Ploomber AI Editor | data-explorer-9cae

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import seaborn as sns
from typing import Optional, Tuple, List

@st.cache_data
def load_data(url: str) -> Optional[pd.DataFrame]:
    """Load data from URL and cache it"""
    try:
        return pd.read_csv(url)
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

def plot_data(df: pd.DataFrame, variables: List[str]) -> None:
    """Plot the selected variables using the most appropriate visualization"""
    if len(variables) == 1:
        var = variables[0]
        if df[var].dtype in ['int64', 'float64']:
            # Numerical variable - show histogram
            fig = px.histogram(df, x=var, title=f"Distribution of {var}")
            st.plotly_chart(fig)
        else:
            # Categorical variable - show count plot
            fig = px.bar(df[var].value_counts().reset_index(), 
                        x='index', y=var,
                        title=f"Count plot of {var}")
            st.plotly_chart(fig)
    
    elif len(variables) == 2:
        var1, var2 = variables
        if df[var1].dtype in ['int64', 'float64'] and df[var2].dtype in ['int64', 'float64']:
            # Both numerical - scatter plot
            fig = px.scatter(df, x=var1, y=var2, title=f"{var1} vs {var2}")
            st.plotly_chart(fig)
        elif df[var1].dtype in ['int64', 'float64'] or df[var2].dtype in ['int64', 'float64']:
            # One numerical, one categorical - box plot
            num_var = var1 if df[var1].dtype in ['int64', 'float64'] else var2
            cat_var = var2 if df[var1].dtype in ['int64', 'float64'] else var1
            fig = px.box(df, x=cat_var, y=num_var, title=f"{num_var} by {cat_var}")
            st.plotly_chart(fig)
        else:
            # Both categorical - heatmap
            contingency = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(contingency, 
                          title=f"Relationship between {var1} and {var2}",
                          labels=dict(x=var2, y=var1, color="Count"))
            st.plotly_chart(fig)

# Main app
st.title("Data Explorer")

# URL input
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

# Load data
if url:
    df = load_data(url)
    
    if df is not None:
        # Controls
        col1, col2 = st.columns(2)
        
        with col1:
            n_rows = st.number_input("Number of rows to display", 
                                   min_value=1, 
                                   max_value=len(df), 
                                   value=min(5, len(df)))
        
        with col2:
            shuffle = st.checkbox("Randomly shuffle data")
        
        # Display data
        if shuffle:
            display_df = df.sample(frac=1).reset_index(drop=True)
        else:
            display_df = df
            
        st.dataframe(display_df.head(n_rows))
        
        # Variable selection
        st.subheader("Visualization")
        cols = df.columns.tolist()
        vars_to_plot = st.multiselect(
            "Select 1 or 2 variables to visualize",
            options=cols,
            max_selections=2
        )
        
        if vars_to_plot:
            plot_data(df, vars_to_plot)
Loading code editor...
Click Save & Run to preview your app
Terminal