Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import seaborn as sns
from typing import Optional, Tuple, List
@st.cache_data
def load_data(url: str) -> Optional[pd.DataFrame]:
"""Load data from URL and cache it"""
try:
return pd.read_csv(url)
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return None
def plot_data(df: pd.DataFrame, variables: List[str]) -> None:
"""Plot the selected variables using the most appropriate visualization"""
if len(variables) == 1:
var = variables[0]
if df[var].dtype in ['int64', 'float64']:
# Numerical variable - show histogram
fig = px.histogram(df, x=var, title=f"Distribution of {var}")
st.plotly_chart(fig)
else:
# Categorical variable - show count plot
fig = px.bar(df[var].value_counts().reset_index(),
x='index', y=var,
title=f"Count plot of {var}")
st.plotly_chart(fig)
elif len(variables) == 2:
var1, var2 = variables
if df[var1].dtype in ['int64', 'float64'] and df[var2].dtype in ['int64', 'float64']:
# Both numerical - scatter plot
fig = px.scatter(df, x=var1, y=var2, title=f"{var1} vs {var2}")
st.plotly_chart(fig)
elif df[var1].dtype in ['int64', 'float64'] or df[var2].dtype in ['int64', 'float64']:
# One numerical, one categorical - box plot
num_var = var1 if df[var1].dtype in ['int64', 'float64'] else var2
cat_var = var2 if df[var1].dtype in ['int64', 'float64'] else var1
fig = px.box(df, x=cat_var, y=num_var, title=f"{num_var} by {cat_var}")
st.plotly_chart(fig)
else:
# Both categorical - heatmap
contingency = pd.crosstab(df[var1], df[var2])
fig = px.imshow(contingency,
title=f"Relationship between {var1} and {var2}",
labels=dict(x=var2, y=var1, color="Count"))
st.plotly_chart(fig)
# Main app
st.title("Data Explorer")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
# Load data
if url:
df = load_data(url)
if df is not None:
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display",
min_value=1,
max_value=len(df),
value=min(5, len(df)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
# Display data
if shuffle:
display_df = df.sample(frac=1).reset_index(drop=True)
else:
display_df = df
st.dataframe(display_df.head(n_rows))
# Variable selection
st.subheader("Visualization")
cols = df.columns.tolist()
vars_to_plot = st.multiselect(
"Select 1 or 2 variables to visualize",
options=cols,
max_selections=2
)
if vars_to_plot:
plot_data(df, vars_to_plot)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?