Ploomber AI Editor | csv-visualizer-2eac

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Set page config
st.set_page_config(page_title="CSV Data Visualizer", layout="wide")

@st.cache_data
def load_data(url):
    """Load and cache CSV data from URL"""
    try:
        df = pd.read_csv(url)
        return df, None
    except Exception as e:
        return None, str(e)

def determine_column_type(series):
    """Determine if a column is numeric or categorical"""
    if pd.api.types.is_numeric_dtype(series):
        return 'numeric'
    else:
        return 'categorical'

def create_visualization(df, var1, var2=None):
    """Create appropriate visualization based on variable types"""
    if var2 is None:
        # Single variable visualization
        var_type = determine_column_type(df[var1])
        
        if var_type == 'numeric':
            # Histogram for numeric variables
            fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
        else:
            # Bar chart for categorical variables
            value_counts = df[var1].value_counts()
            fig = px.bar(x=value_counts.index, y=value_counts.values, 
                        title=f'Count of {var1}')
            fig.update_xaxis(title=var1)
            fig.update_yaxis(title='Count')
    else:
        # Two variable visualization
        var1_type = determine_column_type(df[var1])
        var2_type = determine_column_type(df[var2])
        
        if var1_type == 'numeric' and var2_type == 'numeric':
            # Scatter plot for two numeric variables
            fig = px.scatter(df, x=var1, y=var2, title=f'{var1} vs {var2}')
        elif var1_type == 'categorical' and var2_type == 'numeric':
            # Box plot for categorical vs numeric
            fig = px.box(df, x=var1, y=var2, title=f'{var2} by {var1}')
        elif var1_type == 'numeric' and var2_type == 'categorical':
            # Box plot for numeric vs categorical (swap axes)
            fig = px.box(df, x=var2, y=var1, title=f'{var1} by {var2}')
        else:
            # Stacked bar chart for two categorical variables
            crosstab = pd.crosstab(df[var1], df[var2])
            fig = px.bar(crosstab, title=f'{var1} vs {var2}')
    
    return fig

# Main app
st.title("📊 CSV Data Visualizer")
st.markdown("Load CSV data from a URL and create interactive visualizations")

# Sidebar controls
st.sidebar.header("Data Controls")

# URL input
url = st.sidebar.text_input(
    "CSV URL:", 
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
    help="Enter the URL of a CSV file"
)

# Load data
if url:
    df, error = load_data(url)
    
    if error:
        st.error(f"Error loading data: {error}")
        st.stop()
    
    if df is not None and not df.empty:
        st.sidebar.success(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
        
        # Display controls
        max_rows = st.sidebar.slider(
            "Number of rows to display:", 
            min_value=1, 
            max_value=len(df), 
            value=min(100, len(df))
        )
        
        shuffle_data = st.sidebar.checkbox("Randomly shuffle data", value=False)
        
        # Prepare display data
        display_df = df.copy()
        if shuffle_data:
            display_df = display_df.sample(frac=1).reset_index(drop=True)
        display_df = display_df.head(max_rows)
        
        # Main content area
        col1, col2 = st.columns([2, 1])
        
        with col1:
            st.subheader("📋 Data Preview")
            st.dataframe(display_df, use_container_width=True)
        
        with col2:
            st.subheader("📈 Visualization Controls")
            
            # Variable selection
            columns = df.columns.tolist()
            
            var1 = st.selectbox("Select first variable:", columns, index=0)
            
            use_second_var = st.checkbox("Use second variable")
            var2 = None
            if use_second_var:
                remaining_cols = [col for col in columns if col != var1]
                if remaining_cols:
                    var2 = st.selectbox("Select second variable:", remaining_cols)
            
            # Create visualization button
            if st.button("📊 Create Visualization", type="primary"):
                if var1:
                    fig = create_visualization(df, var1, var2)
                    st.plotly_chart(fig, use_container_width=True)
        
        # Data info
        st.subheader("ℹ️ Dataset Information")
        info_col1, info_col2, info_col3 = st.columns(3)
        
        with info_col1:
            st.metric("Total Rows", df.shape[0])
        with info_col2:
            st.metric("Total Columns", df.shape[1])
        with info_col3:
            st.metric("Displayed Rows", len(display_df))
        
        # Column information
        st.subheader("📋 Column Information")
        col_info = []
        for col in df.columns:
            dtype = str(df[col].dtype)
            null_count = df[col].isnull().sum()
            unique_count = df[col].nunique()
            col_info.append({
                'Column': col,
                'Data Type': dtype,
                'Null Values': null_count,
                'Unique Values': unique_count
            })
        
        col_info_df = pd.DataFrame(col_info)
        st.dataframe(col_info_df, use_container_width=True)
        
    else:
        st.warning("No data found in the CSV file.")
else:
    st.info("👆 Please enter a CSV URL in the sidebar to get started.")
Loading code editor...
Click Save & Run to preview your app
Terminal