Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
import altair as alt
import seaborn as sns
# Cache the data loading function
@st.cache_data
def load_data(url):
try:
return pd.read_csv(url)
except Exception as e:
st.error(f"Error loading data: {e}")
return None
# URL input with default value
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
data = load_data(url)
if data is not None:
# Controls for data display
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display",
min_value=1,
max_value=len(data),
value=min(10, len(data)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
# Shuffle data if requested
if shuffle:
data = data.sample(frac=1, random_state=42).reset_index(drop=True)
# Display the dataframe
st.subheader("Data Preview")
st.dataframe(data.head(n_rows))
# Variable selection for visualization
st.subheader("Visualization")
variables = list(data.select_dtypes(include=[np.number, 'category', 'object']).columns)
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", variables)
with col2:
var2 = st.selectbox("Select second variable",
[None] + [v for v in variables if v != var1])
if var2 is None:
# Single variable visualization
if pd.api.types.is_numeric_dtype(data[var1]):
# Histogram for numeric variables
chart = alt.Chart(data).mark_bar().encode(
alt.X(f"{var1}:Q", bin=True),
y='count()'
).properties(
title=f"Distribution of {var1}"
)
else:
# Bar chart for categorical variables
chart = alt.Chart(data).mark_bar().encode(
x='count()',
y=alt.Y(f"{var1}:N", sort='-x')
).properties(
title=f"Distribution of {var1}"
)
else:
# Two variable visualization
if (pd.api.types.is_numeric_dtype(data[var1]) and
pd.api.types.is_numeric_dtype(data[var2])):
# Scatter plot for two numeric variables
chart = alt.Chart(data).mark_circle().encode(
x=f"{var1}:Q",
y=f"{var2}:Q"
).properties(
title=f"{var1} vs {var2}"
)
elif (pd.api.types.is_numeric_dtype(data[var1]) and
not pd.api.types.is_numeric_dtype(data[var2])):
# Box plot for numeric + categorical
chart = alt.Chart(data).mark_boxplot().encode(
x=f"{var2}:N",
y=f"{var1}:Q"
).properties(
title=f"{var1} by {var2}"
)
elif (not pd.api.types.is_numeric_dtype(data[var1]) and
pd.api.types.is_numeric_dtype(data[var2])):
# Box plot for categorical + numeric
chart = alt.Chart(data).mark_boxplot().encode(
x=f"{var1}:N",
y=f"{var2}:Q"
).properties(
title=f"{var2} by {var1}"
)
else:
# Heatmap for two categorical variables
chart = alt.Chart(data).mark_rect().encode(
x=f"{var1}:N",
y=f"{var2}:N",
color='count()'
).properties(
title=f"Frequency of {var1} vs {var2}"
)
st.altair_chart(chart, use_container_width=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?