Ploomber AI Editor | csv-visualizer-cb58

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

st.set_page_config(page_title="CSV Data Visualizer", layout="wide")

@st.cache_data
def load_data(url):
    """Load and cache CSV data from URL"""
    try:
        df = pd.read_csv(url)
        return df
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

def get_column_type(series):
    """Determine if a column is numeric or categorical"""
    if pd.api.types.is_numeric_dtype(series):
        return 'numeric'
    else:
        return 'categorical'

def create_visualization(df, var1, var2=None):
    """Create appropriate visualization based on variable types"""
    if var2 is None:
        # Single variable visualization
        col_type = get_column_type(df[var1])
        
        if col_type == 'numeric':
            # Histogram for numeric variables
            fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
        else:
            # Bar chart for categorical variables
            fig = px.bar(df[var1].value_counts().reset_index(), 
                        x='index', y=var1, 
                        title=f"Count of {var1}")
            fig.update_xaxis(title=var1)
            fig.update_yaxis(title="Count")
    else:
        # Two variable visualization
        var1_type = get_column_type(df[var1])
        var2_type = get_column_type(df[var2])
        
        if var1_type == 'numeric' and var2_type == 'numeric':
            # Scatter plot for two numeric variables
            fig = px.scatter(df, x=var1, y=var2, 
                           title=f"{var1} vs {var2}")
        elif var1_type == 'categorical' and var2_type == 'categorical':
            # Heatmap for two categorical variables
            crosstab = pd.crosstab(df[var1], df[var2])
            fig = px.imshow(crosstab, 
                          title=f"{var1} vs {var2} (Cross-tabulation)",
                          labels=dict(x=var2, y=var1, color="Count"))
        else:
            # Box plot for numeric vs categorical
            if var1_type == 'categorical':
                fig = px.box(df, x=var1, y=var2, 
                           title=f"{var2} by {var1}")
            else:
                fig = px.box(df, x=var2, y=var1, 
                           title=f"{var1} by {var2}")
    
    return fig

# Main app
st.title("CSV Data Visualizer")
st.write("Load CSV data from a URL and create visualizations")

# URL input
url = st.text_input(
    "Enter CSV URL:", 
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

if url:
    # Load data
    df = load_data(url)
    
    if df is not None:
        st.success(f"Data loaded successfully! Shape: {df.shape}")
        
        # Controls
        col1, col2 = st.columns(2)
        
        with col1:
            # Number of rows to display
            max_rows = len(df)
            num_rows = st.slider("Number of rows to display:", 
                               min_value=1, max_value=max_rows, 
                               value=min(100, max_rows))
        
        with col2:
            # Shuffle checkbox
            shuffle = st.checkbox("Randomly shuffle DataFrame")
        
        # Apply controls
        display_df = df.copy()
        if shuffle:
            display_df = display_df.sample(frac=1).reset_index(drop=True)
        display_df = display_df.head(num_rows)
        
        # Display DataFrame
        st.subheader("DataFrame")
        st.dataframe(display_df, use_container_width=True)
        
        # Variable selection for visualization
        st.subheader("Create Visualization")
        
        # Get column names
        columns = df.columns.tolist()
        
        col1, col2 = st.columns(2)
        
        with col1:
            var1 = st.selectbox("Select first variable:", 
                              options=columns, 
                              index=0 if columns else None)
        
        with col2:
            var2 = st.selectbox("Select second variable (optional):", 
                              options=[None] + columns, 
                              index=0)
        
        # Create and display visualization
        if var1:
            try:
                fig = create_visualization(df, var1, var2)
                st.plotly_chart(fig, use_container_width=True)
                
                # Show data summary
                st.subheader("Data Summary")
                if var2 is None:
                    st.write(f"**{var1}** summary:")
                    if get_column_type(df[var1]) == 'numeric':
                        st.write(df[var1].describe())
                    else:
                        st.write(df[var1].value_counts())
                else:
                    col1, col2 = st.columns(2)
                    with col1:
                        st.write(f"**{var1}** summary:")
                        if get_column_type(df[var1]) == 'numeric':
                            st.write(df[var1].describe())
                        else:
                            st.write(df[var1].value_counts())
                    
                    with col2:
                        st.write(f"**{var2}** summary:")
                        if get_column_type(df[var2]) == 'numeric':
                            st.write(df[var2].describe())
                        else:
                            st.write(df[var2].value_counts())
                            
            except Exception as e:
                st.error(f"Error creating visualization: {str(e)}")
        
        # Show data info
        with st.expander("Data Info"):
            st.write("**Column Information:**")
            info_df = pd.DataFrame({
                'Column': df.columns,
                'Type': [str(df[col].dtype) for col in df.columns],
                'Non-Null Count': [df[col].count() for col in df.columns],
                'Null Count': [df[col].isnull().sum() for col in df.columns]
            })
            st.dataframe(info_df, use_container_width=True)
    
else:
    st.info("Please enter a CSV URL to get started.")
Loading code editor...
Click Save & Run to preview your app
Terminal