Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Set page title
st.title("Penguin Data Viewer and Visualizer")
# URL input with default value
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
# Function to load data with caching
@st.cache_data
def load_data(url):
try:
return pd.read_csv(url)
except Exception as e:
st.error(f"Error loading data: {e}")
return None
# Load data
if url:
data = load_data(url)
if data is not None:
# Display data info
st.write(f"Dataset loaded with {data.shape[0]} rows and {data.shape[1]} columns")
# Controls section
st.subheader("Data Controls")
col1, col2 = st.columns(2)
with col1:
# Number of rows to display
num_rows = st.slider("Number of rows to display",
min_value=1,
max_value=min(100, len(data)),
value=min(10, len(data)))
with col2:
# Option to shuffle data
shuffle = st.checkbox("Randomly shuffle data")
# Process data based on controls
display_data = data.copy()
if shuffle:
display_data = display_data.sample(frac=1, random_state=42).reset_index(drop=True)
# Display the dataframe
st.subheader("Data Preview")
st.dataframe(display_data.head(num_rows))
# Visualization section
st.subheader("Data Visualization")
# Get numerical and categorical columns for selection
numeric_cols = data.select_dtypes(include=np.number).columns.tolist()
categorical_cols = data.select_dtypes(exclude=np.number).columns.tolist()
all_cols = data.columns.tolist()
# Variable selection
st.write("Select variables to visualize:")
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Variable 1", options=all_cols, index=0)
with col2:
var2 = st.selectbox("Variable 2", options=[None] + all_cols, index=0)
# Determine visualization type based on variable types
if var2 is None:
# Single variable visualization
if var1 in numeric_cols:
st.subheader(f"Distribution of {var1}")
fig, ax = plt.subplots(figsize=(10, 6))
sns.histplot(data=data, x=var1, kde=True, ax=ax)
st.pyplot(fig)
st.write(f"Summary statistics for {var1}:")
st.write(data[var1].describe())
else:
st.subheader(f"Count of {var1} categories")
fig, ax = plt.subplots(figsize=(10, 6))
sns.countplot(data=data, x=var1, ax=ax)
plt.xticks(rotation=45)
st.pyplot(fig)
st.write(f"Value counts for {var1}:")
st.write(data[var1].value_counts())
else:
# Two-variable visualization
if var1 in numeric_cols and var2 in numeric_cols:
# Numeric vs Numeric -> Scatter plot
st.subheader(f"Relationship between {var1} and {var2}")
fig, ax = plt.subplots(figsize=(10, 6))
sns.scatterplot(data=data, x=var1, y=var2, ax=ax)
st.pyplot(fig)
# Also show correlation
corr = data[[var1, var2]].corr().iloc[0, 1]
st.write(f"Correlation between {var1} and {var2}: {corr:.4f}")
elif var1 in numeric_cols and var2 in categorical_cols:
# Numeric vs Categorical -> Box plot
st.subheader(f"Distribution of {var1} by {var2}")
fig, ax = plt.subplots(figsize=(10, 6))
sns.boxplot(data=data, x=var2, y=var1, ax=ax)
plt.xticks(rotation=45)
st.pyplot(fig)
elif var1 in categorical_cols and var2 in numeric_cols:
# Categorical vs Numeric -> Box plot (reversed)
st.subheader(f"Distribution of {var2} by {var1}")
fig, ax = plt.subplots(figsize=(10, 6))
sns.boxplot(data=data, x=var1, y=var2, ax=ax)
plt.xticks(rotation=45)
st.pyplot(fig)
else:
# Categorical vs Categorical -> Heatmap of counts
st.subheader(f"Count of {var1} vs {var2}")
crosstab = pd.crosstab(data[var1], data[var2])
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(crosstab, annot=True, cmap="YlGnBu", fmt="d", ax=ax)
st.pyplot(fig)
else:
st.warning("Please enter a valid CSV URL")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?