Ploomber AI Editor | data-explorer-da18

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np

@st.cache_data
def load_data(url):
    # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
    # you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
    # in those cases try removing the proxy prefix.
    url = f"https://corsproxy.io/?{url}" if not url.startswith("https://corsproxy.io") else url
    return pd.read_csv(url)

# URL input with default value
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

try:
    # Load the data
    df = load_data(url)

    # Controls
    col1, col2 = st.columns(2)
    
    with col1:
        n_rows = st.number_input("Number of rows to display", min_value=1, max_value=len(df), value=min(10, len(df)))
    
    with col2:
        shuffle = st.checkbox("Randomly shuffle data")

    if shuffle:
        df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Display the dataframe
    st.dataframe(df.head(n_rows))

    # Variable selection
    st.subheader("Visualization")
    columns = df.select_dtypes(include=[np.number, 'object', 'category']).columns
    col1, col2 = st.columns(2)
    
    with col1:
        var1 = st.selectbox("Select first variable", columns)
    
    with col2:
        remaining_cols = [col for col in columns if col != var1]
        var2 = st.selectbox("Select second variable (optional)", ["None"] + list(remaining_cols))

    # Visualization logic
    if var2 == "None":
        # Single variable visualization
        if df[var1].dtype in [np.number]:
            # Numerical: show histogram
            fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
        else:
            # Categorical: show bar chart
            fig = px.bar(
                df[var1].value_counts().reset_index(),
                x="index",
                y=var1,
                title=f"Count of {var1}"
            )
    else:
        # Two variable visualization
        if df[var1].dtype in [np.number] and df[var2].dtype in [np.number]:
            # Both numerical: scatter plot
            fig = px.scatter(df, x=var1, y=var2, title=f"{var2} vs {var1}")
        elif df[var1].dtype in [np.number] and df[var2].dtype not in [np.number]:
            # One numerical, one categorical: box plot
            fig = px.box(df, x=var2, y=var1, title=f"Distribution of {var1} by {var2}")
        elif df[var1].dtype not in [np.number] and df[var2].dtype in [np.number]:
            # One categorical, one numerical: box plot
            fig = px.box(df, x=var1, y=var2, title=f"Distribution of {var2} by {var1}")
        else:
            # Both categorical: heatmap
            heatmap_data = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(
                heatmap_data,
                title=f"Relationship between {var1} and {var2}",
                labels=dict(x=var2, y=var1, color="Count")
            )

    st.plotly_chart(fig, use_container_width=True)

except Exception as e:
    st.error(f"Error: {str(e)}")
Loading code editor...
Click Save & Run to preview your app
Terminal