Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import altair as alt
# Cache the data loading function
@st.cache_data
def load_data(url):
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
# in those cases try removing the proxy prefix.
proxy_url = f"https://corsproxy.io/?{url}"
return pd.read_csv(proxy_url)
# App title
st.title("Data Visualization App")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
try:
# Load the data
df = load_data(url)
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display", min_value=1, max_value=len(df), value=min(5, len(df)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
if shuffle:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Display dataframe
st.subheader("Data Preview")
st.dataframe(df.head(n_rows))
# Variable selection
st.subheader("Visualization")
# Get numerical and categorical columns
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = df.select_dtypes(include=['object', 'category']).columns
all_cols = pd.Index(list(numeric_cols) + list(categorical_cols))
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", all_cols)
with col2:
var2 = st.selectbox("Select second variable", all_cols)
# Create visualization based on variable types
if var1 in numeric_cols and var2 in numeric_cols:
# Scatter plot for two numerical variables
chart = alt.Chart(df).mark_circle().encode(
x=var1,
y=var2,
tooltip=[var1, var2]
).interactive()
elif var1 in numeric_cols and var2 in categorical_cols:
# Box plot for numerical vs categorical
chart = alt.Chart(df).mark_boxplot().encode(
x=var2,
y=var1
)
elif var1 in categorical_cols and var2 in numeric_cols:
# Box plot for categorical vs numerical
chart = alt.Chart(df).mark_boxplot().encode(
x=var1,
y=var2
)
else:
# Bar chart for two categorical variables
chart = alt.Chart(df).mark_bar().encode(
x=var1,
y='count()',
color=var2
)
st.altair_chart(chart, use_container_width=True)
except Exception as e:
st.error(f"Error loading or processing data: {str(e)}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?