Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import plotly.express as px
# Cache the data loading
@st.cache_data
def load_data(url):
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
# in those cases try removing the proxy prefix.
if not url.startswith('http'):
st.error('Please enter a valid URL starting with http:// or https://')
return None
try:
df = pd.read_csv(f"https://corsproxy.io/?{url}")
return df
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return None
st.title("CSV Viewer and Plotter")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
data = load_data(url)
if data is not None:
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display",
min_value=1,
max_value=len(data),
value=min(10, len(data)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
# Shuffle if requested
if shuffle:
data = data.sample(frac=1).reset_index(drop=True)
# Display data
st.subheader("Data Preview")
st.dataframe(data.head(n_rows))
# Variable selection for plotting
st.subheader("Data Visualization")
# Get numeric and categorical columns
numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
categorical_cols = data.select_dtypes(include=['object', 'category']).columns.tolist()
all_cols = numeric_cols + categorical_cols
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", all_cols)
with col2:
remaining_cols = [col for col in all_cols if col != var1]
var2 = st.selectbox("Select second variable (optional)",
["None"] + remaining_cols)
# Plotting logic
if var2 == "None":
# Single variable visualization
if var1 in numeric_cols:
# For numeric variables, show histogram
fig = px.histogram(data, x=var1,
title=f"Distribution of {var1}")
else:
# For categorical variables, show bar chart
counts = data[var1].value_counts()
fig = px.bar(x=counts.index, y=counts.values,
title=f"Distribution of {var1}")
else:
# Two variable visualization
if var1 in numeric_cols and var2 in numeric_cols:
# Both numeric: scatter plot
fig = px.scatter(data, x=var1, y=var2,
title=f"{var2} vs {var1}")
elif var1 in numeric_cols and var2 in categorical_cols:
# One numeric, one categorical: box plot
fig = px.box(data, x=var2, y=var1,
title=f"Distribution of {var1} by {var2}")
elif var1 in categorical_cols and var2 in numeric_cols:
# One categorical, one numeric: box plot
fig = px.box(data, x=var1, y=var2,
title=f"Distribution of {var2} by {var1}")
else:
# Both categorical: heatmap
contingency = pd.crosstab(data[var1], data[var2])
fig = px.imshow(contingency,
title=f"Relationship between {var1} and {var2}")
st.plotly_chart(fig)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?