Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
# Cache the data loading
@st.cache_data
def load_data(url):
return pd.read_csv(url)
# Set default URL and get user input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
try:
# Load the data
df = load_data(url)
# Control for shuffling
if st.checkbox("Randomly shuffle data"):
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Control number of rows to display
num_rows = st.slider("Number of rows to display", 1, len(df), min(10, len(df)))
# Display the dataframe
st.write("### Data Preview")
st.dataframe(df.head(num_rows))
# Variable selection
cols = df.select_dtypes(include=[np.number, 'object']).columns
var1 = st.selectbox("Select first variable", cols)
var2 = st.selectbox("Select second variable (optional)", ["None"] + list(cols))
st.write("### Visualization")
if var2 == "None":
# Single variable visualization
if df[var1].dtype in ['int64', 'float64']:
# Numeric variable - show histogram
fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
st.plotly_chart(fig)
else:
# Categorical variable - show bar chart
counts = df[var1].value_counts()
fig = px.bar(x=counts.index, y=counts.values, title=f"Distribution of {var1}")
st.plotly_chart(fig)
else:
# Two variable visualization
if (df[var1].dtype in ['int64', 'float64']) and (df[var2].dtype in ['int64', 'float64']):
# Both numeric - scatter plot
fig = px.scatter(df, x=var1, y=var2, title=f"{var2} vs {var1}")
st.plotly_chart(fig)
elif (df[var1].dtype in ['int64', 'float64']) and (df[var2].dtype not in ['int64', 'float64']):
# One numeric, one categorical - box plot
fig = px.box(df, x=var2, y=var1, title=f"Distribution of {var1} by {var2}")
st.plotly_chart(fig)
elif (df[var1].dtype not in ['int64', 'float64']) and (df[var2].dtype in ['int64', 'float64']):
# One categorical, one numeric - box plot
fig = px.box(df, x=var1, y=var2, title=f"Distribution of {var2} by {var1}")
st.plotly_chart(fig)
else:
# Both categorical - heatmap
crosstab = pd.crosstab(df[var1], df[var2])
fig = go.Figure(data=go.Heatmap(
z=crosstab.values,
x=crosstab.columns,
y=crosstab.index,
colorscale='Viridis'))
fig.update_layout(title=f"Relationship between {var1} and {var2}")
st.plotly_chart(fig)
except Exception as e:
st.error(f"Error loading or processing data: {str(e)}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?