Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import altair as alt
# Cache data loading
@st.cache_data
def load_data(url):
return pd.read_csv(url)
# Title
st.title("CSV Data Visualizer")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
try:
# Load data
df = load_data(url)
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display", min_value=1,
max_value=len(df), value=min(5, len(df)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
if shuffle:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Display dataframe
st.subheader("Data Preview")
st.dataframe(df.head(n_rows))
# Variable selection
st.subheader("Visualization")
# Get numeric and categorical columns
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
all_cols = df.columns
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", options=all_cols)
with col2:
var2 = st.selectbox("Select second variable (optional)",
options=['None'] + list(all_cols))
if var2 == 'None':
# Single variable visualization
if df[var1].dtype in ['float64', 'int64']:
# Numeric - show histogram
chart = alt.Chart(df).mark_bar().encode(
x=alt.X(f'{var1}:Q', bin=True),
y='count()'
).properties(
title=f'Distribution of {var1}'
)
else:
# Categorical - show bar chart
chart = alt.Chart(df).mark_bar().encode(
x=f'{var1}:N',
y='count()'
).properties(
title=f'Count of {var1}'
)
else:
# Two variable visualization
if (df[var1].dtype in ['float64', 'int64']) and (df[var2].dtype in ['float64', 'int64']):
# Both numeric - scatter plot
chart = alt.Chart(df).mark_circle().encode(
x=f'{var1}:Q',
y=f'{var2}:Q'
).properties(
title=f'{var2} vs {var1}'
)
elif (df[var1].dtype in ['float64', 'int64']) or (df[var2].dtype in ['float64', 'int64']):
# One numeric, one categorical - box plot
# Put the numeric variable on the y-axis
if df[var1].dtype in ['float64', 'int64']:
num_var, cat_var = var1, var2
else:
num_var, cat_var = var2, var1
chart = alt.Chart(df).mark_boxplot().encode(
x=f'{cat_var}:N',
y=f'{num_var}:Q'
).properties(
title=f'Distribution of {num_var} by {cat_var}'
)
else:
# Both categorical - heatmap
chart = alt.Chart(df).mark_rect().encode(
x=f'{var1}:N',
y=f'{var2}:N',
color='count()'
).properties(
title=f'Heatmap of {var2} vs {var1}'
)
st.altair_chart(chart, use_container_width=True)
except Exception as e:
st.error(f"Error: {str(e)}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?