app that will read parquet files that generates by query of user, need to wait until it ready and show parquet file with interactive graphiqs
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import plotly.express as px
import time
import os
st.title("Interactive Parquet File Viewer")
# File uploader for parquet files
uploaded_file = st.file_uploader("Upload a Parquet file", type=['parquet'])
# SQL Query input
query = st.text_area("Enter your SQL query to generate data:", height=100)
if st.button("Generate and Load Data"):
if query:
# Simulating query execution and file generation
with st.spinner('Executing query and generating parquet file...'):
# In a real application, you would execute the query here
time.sleep(2) # Simulating query execution time
# For demo purposes, we'll create a sample dataframe
# In real application, this would be your query result
df = pd.DataFrame({
'Date': pd.date_range(start='2023-01-01', periods=100),
'Value': range(100),
'Category': ['A' if i % 3 == 0 else 'B' if i % 3 == 1 else 'C' for i in range(100)]
})
# Save as parquet
df.to_parquet('query_result.parquet')
st.success('Data generated successfully!')
# Load and display the data
uploaded_file = 'query_result.parquet'
if uploaded_file is not None:
try:
# Read the parquet file
if isinstance(uploaded_file, str):
df = pd.read_parquet(uploaded_file)
else:
df = pd.read_parquet(uploaded_file)
# Display basic information about the dataset
st.subheader("Dataset Information")
st.write(f"Number of rows: {len(df)}")
st.write(f"Number of columns: {len(df.columns)}")
# Display the first few rows
st.subheader("Preview of the Data")
st.dataframe(df.head())
# Column selection for visualization
st.subheader("Interactive Visualization")
# Select columns for x and y axis
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
all_columns = df.columns
col1, col2 = st.columns(2)
with col1:
x_axis = st.selectbox('Select X-axis', all_columns)
with col2:
y_axis = st.selectbox('Select Y-axis', numeric_columns)
# Chart type selection
chart_type = st.selectbox(
'Select Chart Type',
['Scatter', 'Line', 'Bar', 'Box']
)
# Optional color grouping
color_column = st.selectbox('Select Color Column (optional)',
['None'] + list(df.columns))
# Create visualization
st.subheader("Visualization")
if color_column == 'None':
color_column = None
if chart_type == 'Scatter':
fig = px.scatter(df, x=x_axis, y=y_axis, color=color_column)
elif chart_type == 'Line':
fig = px.line(df, x=x_axis, y=y_axis, color=color_column)
elif chart_type == 'Bar':
fig = px.bar(df, x=x_axis, y=y_axis, color=color_column)
else: # Box plot
fig = px.box(df, x=x_axis, y=y_axis, color=color_column)
# Make the plot interactive
event = st.plotly_chart(fig, use_container_width=True, key="chart", on_select="rerun")
# If points are selected, show the selected data
if event.selection and event.selection.get("points"):
st.subheader("Selected Data Points")
selected_indices = [p["point_index"] for p in event.selection["points"]]
st.dataframe(df.iloc[selected_indices])
# Show full dataframe with filtering capabilities
st.subheader("Full Dataset (with filtering)")
st.dataframe(df, use_container_width=True)
except Exception as e:
st.error(f"Error reading the parquet file: {str(e)}")
# Cleanup temporary files
if os.path.exists('query_result.parquet'):
os.remove('query_result.parquet')
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?