Build a Streamlit app for CSV data exploration with URL input (default: penguins dataset). Include: - Row display control - Shuffle checkbox with random seed - Error handling for bad URLs/files - Auto-detect best visualization for 1-2 selected variables - Support numerical/categorical plotting - Handle edge cases: large files, mixed types, missing data - Cache data & computations - Add loading states & error messages - Test with 1M+ rows & 100+ columns - Implement responsive design Test all error states & verify visualization logic.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import random
# Function to load data with error handling
@st.cache_data
def load_data(url):
try:
df = pd.read_csv(url)
return df
except Exception as e:
st.error(f"Error loading data: {e}")
return None
# App Title
st.title("CSV Data Exploration App")
# URL input for CSV
url = st.text_input("Enter CSV URL (default: penguins dataset):",
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")
# Load data
if url:
data = load_data(url)
if data is not None:
# Cache data shape
st.write(f"Data loaded successfully with shape: {data.shape}")
# Display row control
rows = st.slider("Select number of rows to display:", 1, min(1000, len(data)), 10)
st.write(data.head(rows))
# Shuffle checkbox
shuffle = st.checkbox("Shuffle data")
random_seed = st.number_input("Select random seed:", min_value=0, value=42)
if shuffle:
random.seed(random_seed)
data = data.sample(frac=1, random_state=random_seed).reset_index(drop=True)
# Select variables
columns = data.columns.tolist()
x_var = st.selectbox("Select X variable:", columns)
y_var = st.selectbox("Select Y variable (optional):", [None] + columns)
# Handle missing data
if data[x_var].isnull().any():
st.warning(f"{x_var} has missing values.")
if y_var and data[y_var].isnull().any():
st.warning(f"{y_var} has missing values.")
# Visualization
if y_var:
if pd.api.types.is_numeric_dtype(data[x_var]) and pd.api.types.is_numeric_dtype(data[y_var]):
st.subheader("Scatter Plot")
sns.scatterplot(x=x_var, y=y_var, data=data)
st.pyplot()
else:
st.subheader("Box Plot")
sns.boxplot(x=x_var, y=y_var, data=data)
st.pyplot()
else:
st.subheader("Distribution Plot")
sns.histplot(data[x_var], kde=True)
st.pyplot()
# Responsive Design
st.markdown("\n\n<style>\nbody {font-size: 1em;}\n@media (max-width: 768px) {\nbody {font-size: 0.8em;}\n}\n</style>", unsafe_allow_html=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?