Ploomber AI Editor | data-visualizer-2ad5

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

st.set_page_config(page_title="Data Visualizer", layout="wide")

@st.cache_data
def load_data(url):
    """Load and cache data from URL"""
    try:
        df = pd.read_csv(url)
        return df, None
    except Exception as e:
        return None, str(e)

def shuffle_dataframe(df):
    """Shuffle the dataframe"""
    return df.sample(frac=1).reset_index(drop=True)

def create_visualization(df, selected_vars):
    """Create appropriate visualization based on selected variables"""
    if len(selected_vars) == 0:
        st.info("Please select at least one variable to visualize.")
        return
    
    if len(selected_vars) == 1:
        var = selected_vars[0]
        
        # Check if variable is numeric or categorical
        if pd.api.types.is_numeric_dtype(df[var]):
            # Histogram for numeric variables
            fig = px.histogram(df, x=var, title=f"Distribution of {var}")
            st.plotly_chart(fig, use_container_width=True)
        else:
            # Bar chart for categorical variables
            value_counts = df[var].value_counts()
            fig = px.bar(x=value_counts.index, y=value_counts.values, 
                        title=f"Count of {var}")
            fig.update_xaxis(title=var)
            fig.update_yaxis(title="Count")
            st.plotly_chart(fig, use_container_width=True)
    
    elif len(selected_vars) == 2:
        var1, var2 = selected_vars
        
        # Determine variable types
        var1_numeric = pd.api.types.is_numeric_dtype(df[var1])
        var2_numeric = pd.api.types.is_numeric_dtype(df[var2])
        
        if var1_numeric and var2_numeric:
            # Scatter plot for two numeric variables
            fig = px.scatter(df, x=var1, y=var2, title=f"{var1} vs {var2}")
            st.plotly_chart(fig, use_container_width=True)
        elif var1_numeric and not var2_numeric:
            # Box plot: numeric by categorical
            fig = px.box(df, x=var2, y=var1, title=f"{var1} by {var2}")
            st.plotly_chart(fig, use_container_width=True)
        elif not var1_numeric and var2_numeric:
            # Box plot: numeric by categorical
            fig = px.box(df, x=var1, y=var2, title=f"{var2} by {var1}")
            st.plotly_chart(fig, use_container_width=True)
        else:
            # Heatmap for two categorical variables
            crosstab = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(crosstab, 
                           title=f"Cross-tabulation: {var1} vs {var2}",
                           labels=dict(x=var2, y=var1, color="Count"),
                           aspect="auto")
            st.plotly_chart(fig, use_container_width=True)

# Main app
st.title("📊 Data Visualizer")
st.markdown("Load data from a URL and create interactive visualizations")

# URL input
url = st.text_input(
    "Enter CSV URL:",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
    help="Enter the URL of a CSV file to analyze"
)

if url:
    # Load data
    with st.spinner("Loading data..."):
        df, error = load_data(url)
    
    if error:
        st.error(f"Error loading data: {error}")
        st.stop()
    
    if df is not None:
        st.success(f"✅ Data loaded successfully! Shape: {df.shape}")
        
        # Controls
        col1, col2 = st.columns(2)
        
        with col1:
            max_rows = len(df)
            num_rows = st.slider(
                "Number of rows to display:",
                min_value=1,
                max_value=max_rows,
                value=min(100, max_rows),
                help=f"Select how many rows to display (max: {max_rows})"
            )
        
        with col2:
            shuffle = st.checkbox(
                "Randomly shuffle data",
                help="Shuffle the dataframe before displaying"
            )
        
        # Prepare data for display
        display_df = df.copy()
        if shuffle:
            display_df = shuffle_dataframe(display_df)
        
        display_df = display_df.head(num_rows)
        
        # Variable selection for visualization
        st.subheader("📈 Visualization Controls")
        
        # Get column names and types
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
        all_cols = df.columns.tolist()
        
        selected_variables = st.multiselect(
            "Select variables to visualize (1-2 variables):",
            options=all_cols,
            max_selections=2,
            help="Choose 1 variable for distribution or 2 variables for relationship analysis"
        )
        
        if selected_variables:
            st.subheader("📊 Visualization")
            create_visualization(df, selected_variables)
        
        # Display dataframe
        st.subheader("📋 Data Preview")
        
        # Show data info
        col1, col2, col3 = st.columns(3)
        with col1:
            st.metric("Total Rows", len(df))
        with col2:
            st.metric("Total Columns", len(df.columns))
        with col3:
            st.metric("Displaying", len(display_df))
        
        # Show column types
        with st.expander("📊 Column Information"):
            col_info = pd.DataFrame({
                'Column': df.columns,
                'Type': df.dtypes.astype(str),
                'Non-Null Count': df.count(),
                'Null Count': df.isnull().sum()
            })
            st.dataframe(col_info, use_container_width=True)
        
        # Display the data
        st.dataframe(display_df, use_container_width=True)
        
        # Quick stats for numeric columns
        if numeric_cols:
            with st.expander("📈 Quick Statistics (Numeric Columns)"):
                st.dataframe(df[numeric_cols].describe(), use_container_width=True)
Loading code editor...
Click Save & Run to preview your app
Terminal