Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
# Cache the data loading
@st.cache_data
def load_data(url):
return pd.read_csv(url)
# Title
st.title("Data Visualization App")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
try:
# Load the data
df = load_data(url)
# Shuffle control
shuffle = st.checkbox("Randomly shuffle data")
if shuffle:
df = df.sample(frac=1).reset_index(drop=True)
# Number of rows to display
n_rows = st.slider("Number of rows to display", 1, len(df), min(10, len(df)))
# Display the dataframe
st.subheader("Data Preview")
st.dataframe(df.head(n_rows))
# Variable selection
columns = df.select_dtypes(include=[np.number, object]).columns
var1 = st.selectbox("Select first variable", columns)
var2 = st.selectbox("Select second variable (optional)", ["None"] + list(columns))
if var2 == "None":
# Single variable visualization
if df[var1].dtype in [np.number]:
# Numerical variable - show histogram
fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
st.plotly_chart(fig)
else:
# Categorical variable - show bar chart
fig = px.bar(df[var1].value_counts().reset_index(),
x="index", y=var1,
title=f"Distribution of {var1}")
st.plotly_chart(fig)
else:
# Two variable visualization
if df[var1].dtype in [np.number] and df[var2].dtype in [np.number]:
# Both numerical - scatter plot
fig = px.scatter(df, x=var1, y=var2,
title=f"Scatter plot: {var1} vs {var2}")
st.plotly_chart(fig)
elif df[var1].dtype in [np.number] and df[var2].dtype not in [np.number]:
# Numerical and categorical - box plot
fig = px.box(df, x=var2, y=var1,
title=f"Box plot: {var1} by {var2}")
st.plotly_chart(fig)
elif df[var1].dtype not in [np.number] and df[var2].dtype in [np.number]:
# Categorical and numerical - box plot
fig = px.box(df, x=var1, y=var2,
title=f"Box plot: {var2} by {var1}")
st.plotly_chart(fig)
else:
# Both categorical - heatmap
contingency = pd.crosstab(df[var1], df[var2])
fig = px.imshow(contingency,
title=f"Relationship between {var1} and {var2}")
st.plotly_chart(fig)
except Exception as e:
st.error(f"Error: {str(e)}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?