Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
st.set_page_config(page_title="CSV Data Visualizer", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache CSV data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def get_column_type(series):
"""Determine if a column is numeric or categorical"""
if pd.api.types.is_numeric_dtype(series):
return 'numeric'
else:
return 'categorical'
def create_visualization(df, selected_vars):
"""Create appropriate visualization based on selected variables"""
if len(selected_vars) == 1:
col = selected_vars[0]
col_type = get_column_type(df[col])
if col_type == 'numeric':
# Histogram for single numeric variable
fig = px.histogram(df, x=col, title=f'Distribution of {col}')
else:
# Bar chart for single categorical variable
value_counts = df[col].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f'Count of {col}', labels={'x': col, 'y': 'Count'})
elif len(selected_vars) == 2:
col1, col2 = selected_vars
col1_type = get_column_type(df[col1])
col2_type = get_column_type(df[col2])
if col1_type == 'numeric' and col2_type == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=col1, y=col2, title=f'{col1} vs {col2}')
elif col1_type == 'categorical' and col2_type == 'numeric':
# Box plot for categorical vs numeric
fig = px.box(df, x=col1, y=col2, title=f'{col2} by {col1}')
elif col1_type == 'numeric' and col2_type == 'categorical':
# Box plot for numeric vs categorical (swap axes)
fig = px.box(df, x=col2, y=col1, title=f'{col1} by {col2}')
else:
# Stacked bar chart for two categorical variables
crosstab = pd.crosstab(df[col1], df[col2])
fig = px.bar(crosstab, title=f'{col1} vs {col2}',
labels={'value': 'Count', 'index': col1})
return fig
# Main app
st.title("🔍 CSV Data Visualizer")
st.markdown("Upload a CSV from a URL and explore your data with interactive visualizations!")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter a direct URL to a CSV file"
)
if url:
# Load data
with st.spinner("Loading data..."):
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
elif df is not None:
st.success(f"✅ Data loaded successfully! Shape: {df.shape}")
# Data controls
st.sidebar.header("Data Controls")
# Number of rows to display
max_rows = len(df)
num_rows = st.sidebar.slider(
"Number of rows to display",
min_value=1,
max_value=max_rows,
value=min(100, max_rows)
)
# Shuffle checkbox
shuffle_data = st.sidebar.checkbox("Randomly shuffle data", value=False)
# Apply controls
display_df = df.copy()
if shuffle_data:
display_df = display_df.sample(frac=1).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Variable selection for visualization
st.sidebar.header("Visualization Controls")
st.sidebar.markdown("Select 1 or 2 variables to visualize:")
selected_vars = st.sidebar.multiselect(
"Select variables:",
options=df.columns.tolist(),
max_selections=2,
help="Choose 1 variable for distribution plots or 2 variables for relationship plots"
)
# Create two columns for layout
col1, col2 = st.columns([1, 1])
with col1:
st.subheader("📊 Data Preview")
st.dataframe(display_df, use_container_width=True)
# Data info
st.subheader("📈 Dataset Info")
info_col1, info_col2 = st.columns(2)
with info_col1:
st.metric("Total Rows", f"{len(df):,}")
st.metric("Columns", len(df.columns))
with info_col2:
st.metric("Displayed Rows", f"{len(display_df):,}")
st.metric("Missing Values", df.isnull().sum().sum())
with col2:
if selected_vars:
st.subheader("📊 Visualization")
try:
fig = create_visualization(df, selected_vars)
st.plotly_chart(fig, use_container_width=True)
# Show basic statistics for selected variables
st.subheader("📋 Statistics")
for var in selected_vars:
if get_column_type(df[var]) == 'numeric':
st.write(f"**{var}** (Numeric)")
stats = df[var].describe()
st.write(stats)
else:
st.write(f"**{var}** (Categorical)")
value_counts = df[var].value_counts().head(10)
st.write(value_counts)
st.write("---")
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
else:
st.info("👆 Select 1 or 2 variables from the sidebar to create a visualization")
# Show column information when no variables selected
st.subheader("📋 Column Information")
col_info = []
for col in df.columns:
col_type = get_column_type(df[col])
missing = df[col].isnull().sum()
unique = df[col].nunique()
col_info.append({
'Column': col,
'Type': col_type,
'Missing': missing,
'Unique Values': unique
})
col_info_df = pd.DataFrame(col_info)
st.dataframe(col_info_df, use_container_width=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?