Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
@st.cache_data
def load_data(url):
try:
return pd.read_csv(url)
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return None
# URL input with default value
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
# Load the data
df = load_data(url)
if df is not None:
# Shuffle control
shuffle = st.checkbox("Randomly shuffle data")
if shuffle:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Number of rows to display
n_rows = st.slider("Number of rows to display", 1, len(df), min(10, len(df)))
# Display the dataframe
st.write("### Data Preview")
st.dataframe(df.head(n_rows))
# Variable selection
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
all_cols = numeric_cols + categorical_cols
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", all_cols)
with col2:
remaining_cols = [col for col in all_cols if col != var1]
var2 = st.selectbox("Select second variable (optional)", ["None"] + remaining_cols)
st.write("### Visualization")
if var2 == "None":
# Single variable visualization
if var1 in numeric_cols:
fig, ax = plt.subplots()
sns.histplot(data=df, x=var1, ax=ax)
st.pyplot(fig)
else:
fig, ax = plt.subplots()
sns.countplot(data=df, x=var1, ax=ax)
plt.xticks(rotation=45)
st.pyplot(fig)
else:
# Two variable visualization
fig, ax = plt.subplots()
# Both numeric
if var1 in numeric_cols and var2 in numeric_cols:
sns.scatterplot(data=df, x=var1, y=var2, ax=ax)
# Both categorical
elif var1 in categorical_cols and var2 in categorical_cols:
cross_tab = pd.crosstab(df[var1], df[var2])
sns.heatmap(cross_tab, annot=True, cmap="YlGnBu", ax=ax)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
# One numeric, one categorical
else:
num_var = var1 if var1 in numeric_cols else var2
cat_var = var2 if var1 in numeric_cols else var1
sns.boxplot(data=df, x=cat_var, y=num_var, ax=ax)
plt.xticks(rotation=45)
st.pyplot(fig)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?