Ploomber AI Editor | penguins-visualization-313c

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats

@st.cache_data
def load_data(url):
    # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
    # you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
    # in those cases try removing the proxy prefix.
    if not url.startswith('https://corsproxy.io/?'):
        url = f'https://corsproxy.io/?{url}'
    return pd.read_csv(url)

# App title
st.title('CSV Data Visualizer')

# URL input
url = st.text_input(
    'Enter CSV URL',
    value='https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv'
)

try:
    # Load data
    df = load_data(url)

    # Controls
    col1, col2 = st.columns(2)
    
    with col1:
        n_rows = st.number_input('Number of rows to display', 
                                min_value=1, 
                                max_value=len(df),
                                value=min(10, len(df)))
    
    with col2:
        shuffle = st.checkbox('Randomly shuffle data')

    # Process data based on controls
    if shuffle:
        display_df = df.sample(frac=1).reset_index(drop=True)
    else:
        display_df = df.copy()

    # Display dataframe
    st.subheader('Data Preview')
    st.dataframe(display_df.head(n_rows))

    # Variable selection
    st.subheader('Visualization')
    cols = df.select_dtypes(include=[np.number, 'object']).columns
    
    col1, col2 = st.columns(2)
    with col1:
        var1 = st.selectbox('Select first variable', cols)
    with col2:
        remaining_cols = [col for col in cols if col != var1]
        var2 = st.selectbox('Select second variable (optional)', 
                           ['None'] + list(remaining_cols))

    if var2 == 'None':
        # Single variable visualization
        if df[var1].dtype in ['int64', 'float64']:
            # Numeric variable -> histogram
            fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
            st.plotly_chart(fig)
            
            # Display summary statistics
            st.write(f"Summary statistics for {var1}:")
            stats_df = pd.DataFrame({
                'Metric': ['Mean', 'Median', 'Std Dev', 'Min', 'Max'],
                'Value': [
                    df[var1].mean(),
                    df[var1].median(),
                    df[var1].std(),
                    df[var1].min(),
                    df[var1].max()
                ]
            })
            st.table(stats_df)
        else:
            # Categorical variable -> bar chart
            value_counts = df[var1].value_counts()
            fig = px.bar(x=value_counts.index, 
                        y=value_counts.values,
                        title=f'Distribution of {var1}')
            st.plotly_chart(fig)
    else:
        # Two variable visualization
        if df[var1].dtype in ['int64', 'float64'] and df[var2].dtype in ['int64', 'float64']:
            # Both numeric -> scatter plot
            fig = px.scatter(df, x=var1, y=var2, 
                           title=f'Scatter plot: {var1} vs {var2}')
            st.plotly_chart(fig)
            
            # Calculate correlation
            correlation = stats.pearsonr(df[var1], df[var2])[0]
            st.write(f"Correlation coefficient: {correlation:.3f}")
            
        elif df[var1].dtype in ['int64', 'float64'] or df[var2].dtype in ['int64', 'float64']:
            # One numeric, one categorical -> box plot
            numeric_var = var1 if df[var1].dtype in ['int64', 'float64'] else var2
            categorical_var = var2 if df[var1].dtype in ['int64', 'float64'] else var1
            
            fig = px.box(df, x=categorical_var, y=numeric_var,
                        title=f'Box plot: {numeric_var} by {categorical_var}')
            st.plotly_chart(fig)
        else:
            # Both categorical -> heatmap
            contingency = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(contingency,
                           title=f'Heatmap: {var1} vs {var2}')
            st.plotly_chart(fig)

except Exception as e:
    st.error(f"Error: {str(e)}")
Loading code editor...
Click Save & Run to preview your app
Terminal