Ploomber AI Editor | parquet-viewer-9eea

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import time
import os

st.title("Interactive Parquet File Viewer")

# File uploader for parquet files
uploaded_file = st.file_uploader("Upload a Parquet file", type=['parquet'])

# SQL Query input
query = st.text_area("Enter your SQL query to generate data:", height=100)

if st.button("Generate and Load Data"):
    if query:
        # Simulating query execution and file generation
        with st.spinner('Executing query and generating parquet file...'):
            # In a real application, you would execute the query here
            time.sleep(2)  # Simulating query execution time
            
            # For demo purposes, we'll create a sample dataframe
            # In real application, this would be your query result
            df = pd.DataFrame({
                'Date': pd.date_range(start='2023-01-01', periods=100),
                'Value': range(100),
                'Category': ['A' if i % 3 == 0 else 'B' if i % 3 == 1 else 'C' for i in range(100)]
            })
            
            # Save as parquet
            df.to_parquet('query_result.parquet')
            st.success('Data generated successfully!')
            
            # Load and display the data
            uploaded_file = 'query_result.parquet'

if uploaded_file is not None:
    try:
        # Read the parquet file
        if isinstance(uploaded_file, str):
            df = pd.read_parquet(uploaded_file)
        else:
            df = pd.read_parquet(uploaded_file)
        
        # Display basic information about the dataset
        st.subheader("Dataset Information")
        st.write(f"Number of rows: {len(df)}")
        st.write(f"Number of columns: {len(df.columns)}")
        
        # Display the first few rows
        st.subheader("Preview of the Data")
        st.dataframe(df.head())
        
        # Column selection for visualization
        st.subheader("Interactive Visualization")
        
        # Select columns for x and y axis
        numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
        all_columns = df.columns
        
        col1, col2 = st.columns(2)
        with col1:
            x_axis = st.selectbox('Select X-axis', all_columns)
        with col2:
            y_axis = st.selectbox('Select Y-axis', numeric_columns)
        
        # Chart type selection
        chart_type = st.selectbox(
            'Select Chart Type',
            ['Scatter', 'Line', 'Bar', 'Box']
        )
        
        # Optional color grouping
        color_column = st.selectbox('Select Color Column (optional)', 
                                  ['None'] + list(df.columns))
        
        # Create visualization
        st.subheader("Visualization")
        
        if color_column == 'None':
            color_column = None
            
        if chart_type == 'Scatter':
            fig = px.scatter(df, x=x_axis, y=y_axis, color=color_column)
        elif chart_type == 'Line':
            fig = px.line(df, x=x_axis, y=y_axis, color=color_column)
        elif chart_type == 'Bar':
            fig = px.bar(df, x=x_axis, y=y_axis, color=color_column)
        else:  # Box plot
            fig = px.box(df, x=x_axis, y=y_axis, color=color_column)
            
        # Make the plot interactive
        event = st.plotly_chart(fig, use_container_width=True, key="chart", on_select="rerun")
        
        # If points are selected, show the selected data
        if event.selection and event.selection.get("points"):
            st.subheader("Selected Data Points")
            selected_indices = [p["point_index"] for p in event.selection["points"]]
            st.dataframe(df.iloc[selected_indices])
            
        # Show full dataframe with filtering capabilities
        st.subheader("Full Dataset (with filtering)")
        st.dataframe(df, use_container_width=True)
        
    except Exception as e:
        st.error(f"Error reading the parquet file: {str(e)}")

# Cleanup temporary files
if os.path.exists('query_result.parquet'):
    os.remove('query_result.parquet')
Loading code editor...
Click Save & Run to preview your app
Terminal