Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Set page config
st.set_page_config(page_title="Data Visualizer", layout="wide")
# App title
st.title("📊 Data Visualizer")
st.markdown("Upload a CSV from URL and explore your data with interactive visualizations!")
# Sidebar controls
st.sidebar.header("Data Controls")
# URL input
url = st.sidebar.text_input(
"CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to load"
)
@st.cache_data
def load_data(url):
"""Load and cache CSV data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
# Load data
if url:
with st.spinner("Loading data..."):
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
st.stop()
if df is not None:
# Data controls
st.sidebar.subheader("Display Options")
# Number of rows control
max_rows = len(df)
num_rows = st.sidebar.slider(
"Number of rows to display",
min_value=1,
max_value=max_rows,
value=min(100, max_rows),
help="Select how many rows to display"
)
# Shuffle checkbox
shuffle_data = st.sidebar.checkbox(
"Randomly shuffle data",
value=False,
help="Randomly shuffle the DataFrame rows"
)
# Apply controls to dataframe
display_df = df.copy()
if shuffle_data:
display_df = display_df.sample(frac=1, random_state=42).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Main content
col1, col2 = st.columns([2, 1])
with col2:
st.subheader("📈 Visualization Controls")
# Get numeric and categorical columns
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
all_cols = numeric_cols + categorical_cols
# Variable selection
st.markdown("**Select Variables:**")
var1 = st.selectbox(
"First variable",
options=all_cols,
index=0 if all_cols else None,
help="Select the first variable for visualization"
)
var2 = st.selectbox(
"Second variable (optional)",
options=[None] + all_cols,
index=0,
help="Select a second variable for bivariate analysis"
)
# Color variable for enhanced visualizations
color_var = st.selectbox(
"Color by (optional)",
options=[None] + categorical_cols,
index=0,
help="Select a categorical variable to color the plot"
)
with col1:
st.subheader("📋 Data Preview")
st.dataframe(display_df, use_container_width=True)
# Data info
st.markdown(f"**Dataset Info:** {len(df)} rows × {len(df.columns)} columns")
if shuffle_data:
st.info("Data has been shuffled randomly")
# Visualization section
st.subheader("📊 Data Visualization")
if var1:
# Determine the best visualization based on variable types
var1_type = 'numeric' if var1 in numeric_cols else 'categorical'
var2_type = 'numeric' if var2 in numeric_cols else 'categorical' if var2 else None
# Create appropriate visualization
fig = None
chart_title = ""
if not var2: # Single variable
if var1_type == 'numeric':
# Histogram for numeric variable
fig = px.histogram(
display_df,
x=var1,
color=color_var,
title=f"Distribution of {var1}",
marginal="box"
)
chart_title = f"Histogram: {var1}"
else:
# Bar chart for categorical variable
value_counts = display_df[var1].value_counts()
fig = px.bar(
x=value_counts.index,
y=value_counts.values,
title=f"Count of {var1}",
labels={'x': var1, 'y': 'Count'}
)
chart_title = f"Bar Chart: {var1}"
else: # Two variables
if var1_type == 'numeric' and var2_type == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(
display_df,
x=var1,
y=var2,
color=color_var,
title=f"{var1} vs {var2}",
trendline="ols" if color_var is None else None
)
chart_title = f"Scatter Plot: {var1} vs {var2}"
elif var1_type == 'categorical' and var2_type == 'numeric':
# Box plot for categorical vs numeric
fig = px.box(
display_df,
x=var1,
y=var2,
color=color_var,
title=f"{var2} by {var1}"
)
chart_title = f"Box Plot: {var2} by {var1}"
elif var1_type == 'numeric' and var2_type == 'categorical':
# Box plot for numeric vs categorical (swapped)
fig = px.box(
display_df,
x=var2,
y=var1,
color=color_var,
title=f"{var1} by {var2}"
)
chart_title = f"Box Plot: {var1} by {var2}"
else: # Both categorical
# Grouped bar chart or heatmap
crosstab = pd.crosstab(display_df[var1], display_df[var2])
fig = px.imshow(
crosstab.values,
x=crosstab.columns,
y=crosstab.index,
aspect="auto",
title=f"Heatmap: {var1} vs {var2}",
labels={'x': var2, 'y': var1, 'color': 'Count'}
)
chart_title = f"Heatmap: {var1} vs {var2}"
if fig:
fig.update_layout(height=500)
st.plotly_chart(fig, use_container_width=True)
st.caption(f"📈 {chart_title}")
# Summary statistics
if var1_type == 'numeric' or (var2 and var2_type == 'numeric'):
st.subheader("📊 Summary Statistics")
numeric_vars = [v for v in [var1, var2] if v and v in numeric_cols]
if numeric_vars:
summary_stats = display_df[numeric_vars].describe()
st.dataframe(summary_stats, use_container_width=True)
else:
st.info("👆 Select at least one variable to create a visualization")
else:
st.warning("No data loaded. Please check your URL.")
else:
st.info("👆 Please enter a CSV URL to get started")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?