Build a Streamlit app for CSV data exploration with URL input (default: penguins dataset). Include: - Row display control - Shuffle checkbox with random seed - Error handling for bad URLs/files - Auto-detect best visualization for 1-2 selected variables - Support numerical/categorical plotting - Handle edge cases: large files, mixed types, missing data - Cache data & computations - Add loading states & error messages - Test with 1M+ rows & 100+ columns - Implement responsive design Test all error states & verify visualization logic.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import random
# Function to load data with error handling
@st.cache_data
def load_data(url):
try:
df = pd.read_csv(url)
return df
except Exception as e:
st.error(f"Error loading data: {e}")
return None
# App Title
st.title("CSV Data Exploration App")
# URL input for CSV
url = st.text_input("Enter CSV URL (default: penguins dataset):",
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")
# Load data
if url:
data = load_data(url)
if data is not None:
# Cache data shape
st.write(f"Data loaded successfully with shape: {data.shape}")
# Display row control
rows = st.slider("Select number of rows to display:", 1, min(1000, len(data)), 10)
st.write(data.head(rows))
# Shuffle checkbox
shuffle = st.checkbox("Shuffle data")
random_seed = st.number_input("Select random seed:", min_value=0, value=42)
if shuffle:
random.seed(random_seed)
data = data.sample(frac=1, random_state=random_seed).reset_index(drop=True)
# Select variables
columns = data.columns.tolist()
x_var = st.selectbox("Select X variable:", columns)
y_var = st.selectbox("Select Y variable (optional):", [None] + columns)
# Handle missing data
if data[x_var].isnull().any():
st.warning(f"{x_var} has missing values.")
if y_var and data[y_var].isnull().any():
st.warning(f"{y_var} has missing values.")
# Visualization type selection
chart_type = st.selectbox("Select Chart Type:", ["Scatter Plot", "Box Plot", "Distribution Plot"])
# Visualization
if chart_type == "Scatter Plot" and y_var:
if pd.api.types.is_numeric_dtype(data[x_var]) and pd.api.types.is_numeric_dtype(data[y_var]):
st.subheader("Scatter Plot")
sns.scatterplot(x=x_var, y=y_var, data=data)
st.pyplot()
elif chart_type == "Box Plot" and y_var:
st.subheader("Box Plot")
sns.boxplot(x=x_var, y=y_var, data=data)
st.pyplot()
elif chart_type == "Distribution Plot":
st.subheader("Distribution Plot")
sns.histplot(data[x_var], kde=True)
st.pyplot()
# Custom color selection
color = st.color_picker("Pick a color for the plot", "#1f77b4")
plt.setp(plt.gca().patches, edgecolor=color)
# Display summary statistics
st.subheader("Summary Statistics")
st.write(data.describe())
# Data download option
st.download_button(
label="Download Data as CSV",
data=data.to_csv(index=False).encode('utf-8'),
file_name='data.csv',
mime='text/csv'
)
# Responsive Design
st.markdown("\n\n<style>\nbody {font-size: 1em;}\n@media (max-width: 768px) {\nbody {font-size: 0.8em;}\n}\n</style>", unsafe_allow_html=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?