Ploomber AI Editor | data-visualizer-9068

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set page config
st.set_page_config(page_title="Data Visualizer", layout="wide")

# App title
st.title("📊 Data Visualizer")
st.markdown("Upload a CSV from URL and explore your data with interactive visualizations!")

# Sidebar controls
st.sidebar.header("Data Controls")

# URL input
url = st.sidebar.text_input(
    "CSV URL", 
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
    help="Enter the URL of a CSV file to load"
)

@st.cache_data
def load_data(url):
    """Load and cache CSV data from URL"""
    try:
        df = pd.read_csv(url)
        return df, None
    except Exception as e:
        return None, str(e)

# Load data
if url:
    with st.spinner("Loading data..."):
        df, error = load_data(url)
    
    if error:
        st.error(f"Error loading data: {error}")
        st.stop()
    
    if df is not None:
        # Data controls
        st.sidebar.subheader("Display Options")
        
        # Number of rows control
        max_rows = len(df)
        num_rows = st.sidebar.slider(
            "Number of rows to display", 
            min_value=1, 
            max_value=max_rows, 
            value=min(100, max_rows),
            help="Select how many rows to display"
        )
        
        # Shuffle checkbox
        shuffle_data = st.sidebar.checkbox(
            "Randomly shuffle data", 
            value=False,
            help="Randomly shuffle the DataFrame rows"
        )
        
        # Apply controls to dataframe
        display_df = df.copy()
        if shuffle_data:
            display_df = display_df.sample(frac=1, random_state=42).reset_index(drop=True)
        
        display_df = display_df.head(num_rows)
        
        # Main content
        col1, col2 = st.columns([2, 1])
        
        with col2:
            st.subheader("📈 Visualization Controls")
            
            # Get numeric and categorical columns
            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
            categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
            all_cols = numeric_cols + categorical_cols
            
            # Variable selection
            st.markdown("**Select Variables:**")
            var1 = st.selectbox(
                "First variable", 
                options=all_cols,
                index=0 if all_cols else None,
                help="Select the first variable for visualization"
            )
            
            var2 = st.selectbox(
                "Second variable (optional)", 
                options=[None] + all_cols,
                index=0,
                help="Select a second variable for bivariate analysis"
            )
            
            # Color variable for enhanced visualizations
            color_var = st.selectbox(
                "Color by (optional)",
                options=[None] + categorical_cols,
                index=0,
                help="Select a categorical variable to color the plot"
            )
        
        with col1:
            st.subheader("📋 Data Preview")
            st.dataframe(display_df, use_container_width=True)
            
            # Data info
            st.markdown(f"**Dataset Info:** {len(df)} rows × {len(df.columns)} columns")
            if shuffle_data:
                st.info("Data has been shuffled randomly")
        
        # Visualization section
        st.subheader("📊 Data Visualization")
        
        if var1:
            # Determine the best visualization based on variable types
            var1_type = 'numeric' if var1 in numeric_cols else 'categorical'
            var2_type = 'numeric' if var2 in numeric_cols else 'categorical' if var2 else None
            
            # Create appropriate visualization
            fig = None
            chart_title = ""
            
            if not var2:  # Single variable
                if var1_type == 'numeric':
                    # Histogram for numeric variable
                    fig = px.histogram(
                        display_df, 
                        x=var1, 
                        color=color_var,
                        title=f"Distribution of {var1}",
                        marginal="box"
                    )
                    chart_title = f"Histogram: {var1}"
                else:
                    # Bar chart for categorical variable
                    value_counts = display_df[var1].value_counts()
                    fig = px.bar(
                        x=value_counts.index, 
                        y=value_counts.values,
                        title=f"Count of {var1}",
                        labels={'x': var1, 'y': 'Count'}
                    )
                    chart_title = f"Bar Chart: {var1}"
            
            else:  # Two variables
                if var1_type == 'numeric' and var2_type == 'numeric':
                    # Scatter plot for two numeric variables
                    fig = px.scatter(
                        display_df, 
                        x=var1, 
                        y=var2, 
                        color=color_var,
                        title=f"{var1} vs {var2}",
                        trendline="ols" if color_var is None else None
                    )
                    chart_title = f"Scatter Plot: {var1} vs {var2}"
                
                elif var1_type == 'categorical' and var2_type == 'numeric':
                    # Box plot for categorical vs numeric
                    fig = px.box(
                        display_df, 
                        x=var1, 
                        y=var2, 
                        color=color_var,
                        title=f"{var2} by {var1}"
                    )
                    chart_title = f"Box Plot: {var2} by {var1}"
                
                elif var1_type == 'numeric' and var2_type == 'categorical':
                    # Box plot for numeric vs categorical (swapped)
                    fig = px.box(
                        display_df, 
                        x=var2, 
                        y=var1, 
                        color=color_var,
                        title=f"{var1} by {var2}"
                    )
                    chart_title = f"Box Plot: {var1} by {var2}"
                
                else:  # Both categorical
                    # Grouped bar chart or heatmap
                    crosstab = pd.crosstab(display_df[var1], display_df[var2])
                    fig = px.imshow(
                        crosstab.values,
                        x=crosstab.columns,
                        y=crosstab.index,
                        aspect="auto",
                        title=f"Heatmap: {var1} vs {var2}",
                        labels={'x': var2, 'y': var1, 'color': 'Count'}
                    )
                    chart_title = f"Heatmap: {var1} vs {var2}"
            
            if fig:
                fig.update_layout(height=500)
                st.plotly_chart(fig, use_container_width=True)
                st.caption(f"📈 {chart_title}")
            
            # Summary statistics
            if var1_type == 'numeric' or (var2 and var2_type == 'numeric'):
                st.subheader("📊 Summary Statistics")
                numeric_vars = [v for v in [var1, var2] if v and v in numeric_cols]
                if numeric_vars:
                    summary_stats = display_df[numeric_vars].describe()
                    st.dataframe(summary_stats, use_container_width=True)
        
        else:
            st.info("👆 Select at least one variable to create a visualization")
    
    else:
        st.warning("No data loaded. Please check your URL.")
else:
    st.info("👆 Please enter a CSV URL to get started")
Loading code editor...
Click Save & Run to preview your app
Terminal