Ploomber AI Editor | csv-explorer-ee3c

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np

@st.cache_data
def load_data(url):
    # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
    # you can remove the corsproxy.io prefix.
    if not url.startswith('https://corsproxy.io/?'):
        url = f'https://corsproxy.io/?{url}'
    return pd.read_csv(url)

# App title
st.title('CSV Explorer')

# URL input
url = st.text_input(
    'Enter CSV URL',
    value='https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv'
)

try:
    # Load data
    df = load_data(url)
    
    # Controls
    col1, col2 = st.columns(2)
    
    with col1:
        n_rows = st.number_input('Number of rows to display', 
                                min_value=1, 
                                max_value=len(df),
                                value=min(10, len(df)))
    
    with col2:
        shuffle = st.checkbox('Randomly shuffle data')
    
    # Process DataFrame
    if shuffle:
        df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    # Display DataFrame
    st.subheader('Data Preview')
    st.dataframe(df.head(n_rows))
    
    # Variable selection
    st.subheader('Visualization')
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
    
    col1, col2 = st.columns(2)
    
    with col1:
        var1 = st.selectbox('Select first variable', df.columns)
    
    with col2:
        remaining_cols = [col for col in df.columns if col != var1]
        var2 = st.selectbox('Select second variable (optional)', 
                           ['None'] + remaining_cols)
    
    # Visualization logic
    if var2 == 'None':
        if var1 in numeric_cols:
            # For numeric variables, show histogram
            fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
            st.plotly_chart(fig)
        else:
            # For categorical variables, show bar chart
            counts = df[var1].value_counts()
            fig = px.bar(x=counts.index, y=counts.values, 
                        title=f'Distribution of {var1}')
            st.plotly_chart(fig)
    else:
        if var1 in numeric_cols and var2 in numeric_cols:
            # Both numeric: scatter plot
            fig = px.scatter(df, x=var1, y=var2, 
                           title=f'{var2} vs {var1}')
            st.plotly_chart(fig)
        elif var1 in numeric_cols and var2 in categorical_cols:
            # Numeric + Categorical: box plot
            fig = px.box(df, x=var2, y=var1, 
                        title=f'{var1} by {var2}')
            st.plotly_chart(fig)
        elif var1 in categorical_cols and var2 in numeric_cols:
            # Categorical + Numeric: box plot
            fig = px.box(df, x=var1, y=var2, 
                        title=f'{var2} by {var1}')
            st.plotly_chart(fig)
        else:
            # Both categorical: heatmap
            contingency = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(contingency, 
                          title=f'Relationship between {var1} and {var2}')
            st.plotly_chart(fig)

except Exception as e:
    st.error(f'Error: {str(e)}')
Loading code editor...
Click Save & Run to preview your app
Terminal