Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import seaborn as sns
import numpy as np
# Cache the data loading
@st.cache_data
def load_data(url):
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
# Main app
st.title("CSV Viewer & Plotter")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
# Load data
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
else:
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display", min_value=1,
max_value=len(df), value=min(10, len(df)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
# Process DataFrame
display_df = df.copy()
if shuffle:
display_df = display_df.sample(frac=1, random_state=42)
# Display DataFrame
st.subheader("Data Preview")
st.dataframe(display_df.head(n_rows))
# Variable selection for plotting
st.subheader("Data Visualization")
numeric_cols = display_df.select_dtypes(include=[np.number]).columns
categorical_cols = display_df.select_dtypes(exclude=[np.number]).columns
all_cols = list(numeric_cols) + list(categorical_cols)
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", all_cols)
with col2:
remaining_cols = [col for col in all_cols if col != var1]
var2 = st.selectbox("Select second variable (optional)",
["None"] + remaining_cols)
# Plotting logic
if var2 == "None":
# Single variable visualization
if var1 in numeric_cols:
st.subheader(f"Distribution of {var1}")
fig = sns.displot(data=display_df, x=var1, kde=True)
st.pyplot(fig)
else:
st.subheader(f"Count plot of {var1}")
fig = sns.countplot(data=display_df, x=var1)
st.pyplot(fig.figure)
else:
# Two variable visualization
if var1 in numeric_cols and var2 in numeric_cols:
# Scatter plot for two numeric variables
st.subheader(f"Scatter plot: {var1} vs {var2}")
fig = sns.scatterplot(data=display_df, x=var1, y=var2)
st.pyplot(fig.figure)
elif var1 in numeric_cols and var2 in categorical_cols:
# Box plot for numeric vs categorical
st.subheader(f"Box plot: {var1} by {var2}")
fig = sns.boxplot(data=display_df, x=var2, y=var1)
st.pyplot(fig.figure)
elif var1 in categorical_cols and var2 in numeric_cols:
# Box plot for categorical vs numeric
st.subheader(f"Box plot: {var2} by {var1}")
fig = sns.boxplot(data=display_df, x=var1, y=var2)
st.pyplot(fig.figure)
else:
# Heatmap for two categorical variables
st.subheader(f"Heatmap: {var1} vs {var2}")
crosstab = pd.crosstab(display_df[var1], display_df[var2])
fig = sns.heatmap(crosstab, annot=True, fmt='d', cmap='YlOrRd')
st.pyplot(fig.figure)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?