Ploomber AI Editor | penguin-data-viewer-6dfc

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set page title
st.title("Penguin Data Viewer and Visualizer")

# URL input with default value
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

# Function to load data with caching
@st.cache_data
def load_data(url):
    try:
        return pd.read_csv(url)
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return None

# Load data
if url:
    data = load_data(url)
    
    if data is not None:
        # Display data info
        st.write(f"Dataset loaded with {data.shape[0]} rows and {data.shape[1]} columns")
        
        # Controls section
        st.subheader("Data Controls")
        
        col1, col2 = st.columns(2)
        
        with col1:
            # Number of rows to display
            num_rows = st.slider("Number of rows to display", 
                                min_value=1, 
                                max_value=min(100, len(data)), 
                                value=min(10, len(data)))
        
        with col2:
            # Option to shuffle data
            shuffle = st.checkbox("Randomly shuffle data")
        
        # Process data based on controls
        display_data = data.copy()
        
        if shuffle:
            display_data = display_data.sample(frac=1, random_state=42).reset_index(drop=True)
        
        # Display the dataframe
        st.subheader("Data Preview")
        st.dataframe(display_data.head(num_rows))
        
        # Visualization section
        st.subheader("Data Visualization")
        
        # Get numerical and categorical columns for selection
        numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
        categorical_cols = data.select_dtypes(exclude=np.number).columns.tolist()
        all_cols = data.columns.tolist()
        
        # Variable selection
        st.write("Select variables to visualize:")
        
        col1, col2 = st.columns(2)
        
        with col1:
            var1 = st.selectbox("Variable 1", options=all_cols, index=0)
        
        with col2:
            var2 = st.selectbox("Variable 2", options=[None] + all_cols, index=0)
        
        # Determine visualization type based on variable types
        if var2 is None:
            # Single variable visualization
            if var1 in numeric_cols:
                st.subheader(f"Distribution of {var1}")
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.histplot(data=data, x=var1, kde=True, ax=ax)
                st.pyplot(fig)
                
                st.write(f"Summary statistics for {var1}:")
                st.write(data[var1].describe())
            else:
                st.subheader(f"Count of {var1} categories")
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.countplot(data=data, x=var1, ax=ax)
                plt.xticks(rotation=45)
                st.pyplot(fig)
                
                st.write(f"Value counts for {var1}:")
                st.write(data[var1].value_counts())
        else:
            # Two-variable visualization
            if var1 in numeric_cols and var2 in numeric_cols:
                # Numeric vs Numeric -> Scatter plot
                st.subheader(f"Relationship between {var1} and {var2}")
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.scatterplot(data=data, x=var1, y=var2, ax=ax)
                st.pyplot(fig)
                
                # Also show correlation
                corr = data[[var1, var2]].corr().iloc[0, 1]
                st.write(f"Correlation between {var1} and {var2}: {corr:.4f}")
                
            elif var1 in numeric_cols and var2 in categorical_cols:
                # Numeric vs Categorical -> Box plot
                st.subheader(f"Distribution of {var1} by {var2}")
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.boxplot(data=data, x=var2, y=var1, ax=ax)
                plt.xticks(rotation=45)
                st.pyplot(fig)
                
            elif var1 in categorical_cols and var2 in numeric_cols:
                # Categorical vs Numeric -> Box plot (reversed)
                st.subheader(f"Distribution of {var2} by {var1}")
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.boxplot(data=data, x=var1, y=var2, ax=ax)
                plt.xticks(rotation=45)
                st.pyplot(fig)
                
            else:
                # Categorical vs Categorical -> Heatmap of counts
                st.subheader(f"Count of {var1} vs {var2}")
                crosstab = pd.crosstab(data[var1], data[var2])
                fig, ax = plt.subplots(figsize=(10, 6))
                sns.heatmap(crosstab, annot=True, cmap="YlGnBu", fmt="d", ax=ax)
                st.pyplot(fig)
    else:
        st.warning("Please enter a valid CSV URL")
Loading code editor...
Click Save & Run to preview your app
Terminal