Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import numpy as np
@st.cache_data
def load_data(url):
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix.
if not url.startswith('https://corsproxy.io/?'):
url = f'https://corsproxy.io/?{url}'
return pd.read_csv(url)
st.title('Data Explorer')
# URL input
url = st.text_input(
'Enter CSV URL',
value='https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv'
)
try:
# Load data
df = load_data(url)
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input('Number of rows to display',
min_value=1,
max_value=len(df),
value=min(10, len(df)))
with col2:
shuffle = st.checkbox('Randomly shuffle data')
# Process data based on controls
if shuffle:
display_df = df.sample(frac=1).reset_index(drop=True)
else:
display_df = df.copy()
# Display dataframe
st.subheader('Data Preview')
st.dataframe(display_df.head(n_rows))
# Variable selection for visualization
st.subheader('Visualization')
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
all_cols = numeric_cols + categorical_cols
var1 = st.selectbox('Select first variable', all_cols)
var2 = st.selectbox('Select second variable (optional)', ['None'] + all_cols)
if var2 == 'None':
# Single variable visualization
if var1 in numeric_cols:
fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
st.plotly_chart(fig)
else:
fig = px.bar(df[var1].value_counts().reset_index(),
x='index',
y=var1,
title=f'Count of {var1}')
st.plotly_chart(fig)
else:
# Two variable visualization
if var1 in numeric_cols and var2 in numeric_cols:
# Two numeric variables: scatter plot
fig = px.scatter(df, x=var1, y=var2,
title=f'Scatter plot of {var1} vs {var2}')
st.plotly_chart(fig)
elif var1 in numeric_cols and var2 in categorical_cols:
# One numeric, one categorical: box plot
fig = px.box(df, x=var2, y=var1,
title=f'Distribution of {var1} by {var2}')
st.plotly_chart(fig)
elif var1 in categorical_cols and var2 in numeric_cols:
# One categorical, one numeric: box plot
fig = px.box(df, x=var1, y=var2,
title=f'Distribution of {var2} by {var1}')
st.plotly_chart(fig)
else:
# Two categorical variables: heatmap
contingency = pd.crosstab(df[var1], df[var2])
fig = px.imshow(contingency,
title=f'Relationship between {var1} and {var2}')
st.plotly_chart(fig)
except Exception as e:
st.error(f'Error: {str(e)}')
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?