Ploomber AI Editor | data-viewer-e97f

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Cache the data loading function
@st.cache_data
def load_data(url):
    try:
        return pd.read_csv(url)
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

# URL input with default value
url = st.text_input(
    "Enter CSV URL",
    value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)

# Load the data
df = load_data(url)

if df is not None:
    # Controls
    col1, col2 = st.columns(2)
    
    with col1:
        num_rows = st.number_input(
            "Number of rows to display",
            min_value=1,
            max_value=len(df),
            value=min(10, len(df))
        )
    
    with col2:
        shuffle = st.checkbox("Randomly shuffle data")
    
    # Shuffle if requested
    if shuffle:
        df = df.sample(frac=1).reset_index(drop=True)
    
    # Display the dataframe
    st.subheader("Data Preview")
    st.dataframe(df.head(num_rows))
    
    # Variable selection
    st.subheader("Visualization")
    cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
    
    col1, col2 = st.columns(2)
    
    with col1:
        var1 = st.selectbox("Select first variable", cols)
    
    with col2:
        remaining_cols = [col for col in cols if col != var1]
        var2 = st.selectbox("Select second variable (optional)", 
                           ["None"] + remaining_cols)
    
    # Visualization logic
    if var2 == "None":
        # Single variable visualization
        fig, ax = plt.subplots()
        sns.histplot(data=df, x=var1, kde=True)
        st.pyplot(fig)
    else:
        # Two variable visualization
        fig, ax = plt.subplots()
        sns.scatterplot(data=df, x=var1, y=var2)
        plt.xlabel(var1)
        plt.ylabel(var2)
        st.pyplot(fig)
        
        # Show correlation
        correlation = df[var1].corr(df[var2])
        st.write(f"Correlation between {var1} and {var2}: {correlation:.3f}")
Loading code editor...
Click Save & Run to preview your app
Terminal