Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats
@st.cache_data
def load_data(url):
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix. Some websites don't work with the proxy,
# in those cases try removing the proxy prefix.
if not url.startswith('https://corsproxy.io/?'):
url = f'https://corsproxy.io/?{url}'
return pd.read_csv(url)
# App title
st.title('CSV Data Visualizer')
# URL input
url = st.text_input(
'Enter CSV URL',
value='https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv'
)
try:
# Load data
df = load_data(url)
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input('Number of rows to display',
min_value=1,
max_value=len(df),
value=min(10, len(df)))
with col2:
shuffle = st.checkbox('Randomly shuffle data')
# Process data based on controls
if shuffle:
display_df = df.sample(frac=1).reset_index(drop=True)
else:
display_df = df.copy()
# Display dataframe
st.subheader('Data Preview')
st.dataframe(display_df.head(n_rows))
# Variable selection
st.subheader('Visualization')
cols = df.select_dtypes(include=[np.number, 'object']).columns
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox('Select first variable', cols)
with col2:
remaining_cols = [col for col in cols if col != var1]
var2 = st.selectbox('Select second variable (optional)',
['None'] + list(remaining_cols))
if var2 == 'None':
# Single variable visualization
if df[var1].dtype in ['int64', 'float64']:
# Numeric variable -> histogram
fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
st.plotly_chart(fig)
# Display summary statistics
st.write(f"Summary statistics for {var1}:")
stats_df = pd.DataFrame({
'Metric': ['Mean', 'Median', 'Std Dev', 'Min', 'Max'],
'Value': [
df[var1].mean(),
df[var1].median(),
df[var1].std(),
df[var1].min(),
df[var1].max()
]
})
st.table(stats_df)
else:
# Categorical variable -> bar chart
value_counts = df[var1].value_counts()
fig = px.bar(x=value_counts.index,
y=value_counts.values,
title=f'Distribution of {var1}')
st.plotly_chart(fig)
else:
# Two variable visualization
if df[var1].dtype in ['int64', 'float64'] and df[var2].dtype in ['int64', 'float64']:
# Both numeric -> scatter plot
fig = px.scatter(df, x=var1, y=var2,
title=f'Scatter plot: {var1} vs {var2}')
st.plotly_chart(fig)
# Calculate correlation
correlation = stats.pearsonr(df[var1], df[var2])[0]
st.write(f"Correlation coefficient: {correlation:.3f}")
elif df[var1].dtype in ['int64', 'float64'] or df[var2].dtype in ['int64', 'float64']:
# One numeric, one categorical -> box plot
numeric_var = var1 if df[var1].dtype in ['int64', 'float64'] else var2
categorical_var = var2 if df[var1].dtype in ['int64', 'float64'] else var1
fig = px.box(df, x=categorical_var, y=numeric_var,
title=f'Box plot: {numeric_var} by {categorical_var}')
st.plotly_chart(fig)
else:
# Both categorical -> heatmap
contingency = pd.crosstab(df[var1], df[var2])
fig = px.imshow(contingency,
title=f'Heatmap: {var1} vs {var2}')
st.plotly_chart(fig)
except Exception as e:
st.error(f"Error: {str(e)}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?