Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
st.set_page_config(page_title="CSV Data Visualizer", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache CSV data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def get_column_type(series):
"""Determine if a column is numeric or categorical"""
if pd.api.types.is_numeric_dtype(series):
return 'numeric'
else:
return 'categorical'
def create_visualization(df, selected_vars):
"""Create appropriate visualization based on selected variables"""
if len(selected_vars) == 1:
var = selected_vars[0]
var_type = get_column_type(df[var])
if var_type == 'numeric':
# Histogram for single numeric variable
fig = px.histogram(df, x=var, title=f'Distribution of {var}')
else:
# Bar chart for single categorical variable
value_counts = df[var].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f'Count of {var}',
labels={'x': var, 'y': 'Count'})
elif len(selected_vars) == 2:
var1, var2 = selected_vars
var1_type = get_column_type(df[var1])
var2_type = get_column_type(df[var2])
if var1_type == 'numeric' and var2_type == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=var1, y=var2, title=f'{var1} vs {var2}')
elif var1_type == 'categorical' and var2_type == 'numeric':
# Box plot: categorical x, numeric y
fig = px.box(df, x=var1, y=var2, title=f'{var2} by {var1}')
elif var1_type == 'numeric' and var2_type == 'categorical':
# Box plot: numeric x, categorical y (swap variables)
fig = px.box(df, x=var2, y=var1, title=f'{var1} by {var2}')
else:
# Grouped bar chart for two categorical variables
crosstab = pd.crosstab(df[var1], df[var2])
fig = px.bar(crosstab, title=f'{var1} vs {var2}')
return fig
# Main app
st.title("📊 CSV Data Visualizer")
st.markdown("Load CSV data from a URL and create interactive visualizations")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to load and visualize"
)
if url:
# Load data
with st.spinner("Loading data..."):
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
elif df is not None:
st.success(f"Data loaded successfully! Shape: {df.shape}")
# Controls
col1, col2 = st.columns(2)
with col1:
max_rows = len(df)
num_rows = st.slider(
"Number of rows to display:",
min_value=1,
max_value=max_rows,
value=min(100, max_rows),
help="Control how many rows are shown in the table"
)
with col2:
shuffle = st.checkbox(
"Randomly shuffle data",
help="Randomly reorder the DataFrame rows"
)
# Process data based on controls
display_df = df.copy()
if shuffle:
display_df = display_df.sample(frac=1, random_state=42).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Display DataFrame
st.subheader("📋 Data Preview")
st.dataframe(display_df, use_container_width=True)
# Data info
with st.expander("📊 Data Information"):
col1, col2 = st.columns(2)
with col1:
st.write("**Dataset Info:**")
st.write(f"- Total rows: {len(df)}")
st.write(f"- Total columns: {len(df.columns)}")
st.write(f"- Memory usage: {df.memory_usage(deep=True).sum() / 1024:.1f} KB")
with col2:
st.write("**Column Types:**")
for col in df.columns:
col_type = get_column_type(df[col])
st.write(f"- {col}: {col_type}")
# Variable selection for visualization
st.subheader("📈 Create Visualization")
st.markdown("Select one or two variables to create an appropriate visualization:")
selected_vars = st.multiselect(
"Select variables:",
options=df.columns.tolist(),
max_selections=2,
help="Choose 1 variable for distribution plots or 2 variables for relationship plots"
)
if selected_vars:
try:
fig = create_visualization(df, selected_vars)
st.plotly_chart(fig, use_container_width=True)
# Show visualization explanation
with st.expander("🔍 Visualization Details"):
if len(selected_vars) == 1:
var = selected_vars[0]
var_type = get_column_type(df[var])
if var_type == 'numeric':
st.write(f"**Histogram** - Shows the distribution of the numeric variable '{var}'")
else:
st.write(f"**Bar Chart** - Shows the count of each category in '{var}'")
else:
var1, var2 = selected_vars
var1_type = get_column_type(df[var1])
var2_type = get_column_type(df[var2])
if var1_type == 'numeric' and var2_type == 'numeric':
st.write(f"**Scatter Plot** - Shows the relationship between two numeric variables '{var1}' and '{var2}'")
elif 'categorical' in [var1_type, var2_type] and 'numeric' in [var1_type, var2_type]:
st.write(f"**Box Plot** - Shows the distribution of the numeric variable across categories")
else:
st.write(f"**Grouped Bar Chart** - Shows the relationship between two categorical variables")
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
else:
st.info("👆 Select one or two variables above to create a visualization")
# Summary statistics
with st.expander("📊 Summary Statistics"):
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
st.write("**Numeric Variables:**")
st.dataframe(df[numeric_cols].describe())
categorical_cols = df.select_dtypes(exclude=['number']).columns
if len(categorical_cols) > 0:
st.write("**Categorical Variables:**")
for col in categorical_cols:
st.write(f"**{col}** - Unique values: {df[col].nunique()}")
if df[col].nunique() <= 10:
st.write(df[col].value_counts().head())
st.write("---")
else:
st.info("👆 Enter a CSV URL above to get started!")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?