Ploomber AI Editor | data-visualization-8d5b

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np

# Cache the data loading
@st.cache_data
def load_data(url):
    try:
        # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
        # you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
        # in those cases try removing the proxy prefix.
        proxy_url = f"https://corsproxy.io/?{url}"
        return pd.read_csv(proxy_url)
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

# App title
st.title("Data Visualization App")

# URL input
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

if url:
    # Load the data
    df = load_data(url)
    
    if df is not None:
        # Controls
        col1, col2 = st.columns(2)
        with col1:
            n_rows = st.number_input("Number of rows to display", 
                                   min_value=1, 
                                   max_value=len(df), 
                                   value=min(10, len(df)))
        
        with col2:
            shuffle = st.checkbox("Randomly shuffle data")
            
        if shuffle:
            df = df.sample(frac=1, random_state=42).reset_index(drop=True)
            
        # Display the dataframe
        st.subheader("Data Preview")
        st.dataframe(df.head(n_rows))
        
        # Variable selection for visualization
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
        all_cols = numeric_cols + categorical_cols
        
        st.subheader("Visualization")
        col1, col2 = st.columns(2)
        
        with col1:
            var1 = st.selectbox("Select first variable", all_cols)
        
        with col2:
            remaining_cols = [col for col in all_cols if col != var1]
            var2 = st.selectbox("Select second variable (optional)", 
                              ["None"] + remaining_cols)
        
        # Determine the best visualization based on variable types
        if var2 == "None":
            if var1 in numeric_cols:
                # For numeric variables, show histogram
                fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
                st.plotly_chart(fig)
            else:
                # For categorical variables, show bar chart
                counts = df[var1].value_counts()
                fig = px.bar(x=counts.index, y=counts.values, 
                           title=f"Distribution of {var1}")
                st.plotly_chart(fig)
        else:
            if var1 in numeric_cols and var2 in numeric_cols:
                # Scatter plot for two numeric variables
                fig = px.scatter(df, x=var1, y=var2, 
                               title=f"{var2} vs {var1}")
                st.plotly_chart(fig)
            elif var1 in numeric_cols and var2 in categorical_cols:
                # Box plot for numeric vs categorical
                fig = px.box(df, x=var2, y=var1, 
                           title=f"{var1} by {var2}")
                st.plotly_chart(fig)
            elif var1 in categorical_cols and var2 in numeric_cols:
                # Box plot for numeric vs categorical
                fig = px.box(df, x=var1, y=var2, 
                           title=f"{var2} by {var1}")
                st.plotly_chart(fig)
            else:
                # Heatmap for two categorical variables
                contingency = pd.crosstab(df[var1], df[var2])
                fig = px.imshow(contingency, 
                              title=f"Relationship between {var1} and {var2}")
                st.plotly_chart(fig)
Loading code editor...
Click Save & Run to preview your app
Terminal