Ploomber AI Editor | data-explorer-60a9

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# Cache the data loading
@st.cache_data
def load_data(url):
    return pd.read_csv(url)

# Set default URL and get user input
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

try:
    # Load the data
    df = load_data(url)
    
    # Control for shuffling
    if st.checkbox("Randomly shuffle data"):
        df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Control number of rows to display
    num_rows = st.slider("Number of rows to display", 1, len(df), min(10, len(df)))
    
    # Display the dataframe
    st.write("### Data Preview")
    st.dataframe(df.head(num_rows))
    
    # Variable selection
    cols = df.select_dtypes(include=[np.number, 'object']).columns
    var1 = st.selectbox("Select first variable", cols)
    var2 = st.selectbox("Select second variable (optional)", ["None"] + list(cols))
    
    st.write("### Visualization")
    
    if var2 == "None":
        # Single variable visualization
        if df[var1].dtype in ['int64', 'float64']:
            # Numeric variable - show histogram
            fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
            st.plotly_chart(fig)
        else:
            # Categorical variable - show bar chart
            counts = df[var1].value_counts()
            fig = px.bar(x=counts.index, y=counts.values, title=f"Distribution of {var1}")
            st.plotly_chart(fig)
    
    else:
        # Two variable visualization
        if (df[var1].dtype in ['int64', 'float64']) and (df[var2].dtype in ['int64', 'float64']):
            # Both numeric - scatter plot
            fig = px.scatter(df, x=var1, y=var2, title=f"{var2} vs {var1}")
            st.plotly_chart(fig)
        
        elif (df[var1].dtype in ['int64', 'float64']) and (df[var2].dtype not in ['int64', 'float64']):
            # One numeric, one categorical - box plot
            fig = px.box(df, x=var2, y=var1, title=f"Distribution of {var1} by {var2}")
            st.plotly_chart(fig)
            
        elif (df[var1].dtype not in ['int64', 'float64']) and (df[var2].dtype in ['int64', 'float64']):
            # One categorical, one numeric - box plot
            fig = px.box(df, x=var1, y=var2, title=f"Distribution of {var2} by {var1}")
            st.plotly_chart(fig)
            
        else:
            # Both categorical - heatmap
            crosstab = pd.crosstab(df[var1], df[var2])
            fig = go.Figure(data=go.Heatmap(
                z=crosstab.values,
                x=crosstab.columns,
                y=crosstab.index,
                colorscale='Viridis'))
            fig.update_layout(title=f"Relationship between {var1} and {var2}")
            st.plotly_chart(fig)

except Exception as e:
    st.error(f"Error loading or processing data: {str(e)}")
Loading code editor...
Click Save & Run to preview your app
Terminal