Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
st.set_page_config(page_title="Data Visualizer", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def shuffle_dataframe(df):
"""Shuffle the dataframe"""
return df.sample(frac=1).reset_index(drop=True)
def create_visualization(df, selected_vars):
"""Create appropriate visualization based on selected variables"""
if len(selected_vars) == 0:
st.info("Please select at least one variable to visualize.")
return
if len(selected_vars) == 1:
var = selected_vars[0]
# Check if variable is numeric or categorical
if pd.api.types.is_numeric_dtype(df[var]):
# Histogram for numeric variables
fig = px.histogram(df, x=var, title=f"Distribution of {var}")
st.plotly_chart(fig, use_container_width=True)
else:
# Bar chart for categorical variables
value_counts = df[var].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f"Count of {var}")
fig.update_xaxis(title=var)
fig.update_yaxis(title="Count")
st.plotly_chart(fig, use_container_width=True)
elif len(selected_vars) == 2:
var1, var2 = selected_vars
# Determine variable types
var1_numeric = pd.api.types.is_numeric_dtype(df[var1])
var2_numeric = pd.api.types.is_numeric_dtype(df[var2])
if var1_numeric and var2_numeric:
# Scatter plot for two numeric variables
fig = px.scatter(df, x=var1, y=var2, title=f"{var1} vs {var2}")
st.plotly_chart(fig, use_container_width=True)
elif var1_numeric and not var2_numeric:
# Box plot: numeric by categorical
fig = px.box(df, x=var2, y=var1, title=f"{var1} by {var2}")
st.plotly_chart(fig, use_container_width=True)
elif not var1_numeric and var2_numeric:
# Box plot: numeric by categorical
fig = px.box(df, x=var1, y=var2, title=f"{var2} by {var1}")
st.plotly_chart(fig, use_container_width=True)
else:
# Heatmap for two categorical variables
crosstab = pd.crosstab(df[var1], df[var2])
fig = px.imshow(crosstab,
title=f"Cross-tabulation: {var1} vs {var2}",
labels=dict(x=var2, y=var1, color="Count"),
aspect="auto")
st.plotly_chart(fig, use_container_width=True)
# Main app
st.title("📊 Data Visualizer")
st.markdown("Load data from a URL and create interactive visualizations")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to analyze"
)
if url:
# Load data
with st.spinner("Loading data..."):
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
st.stop()
if df is not None:
st.success(f"✅ Data loaded successfully! Shape: {df.shape}")
# Controls
col1, col2 = st.columns(2)
with col1:
max_rows = len(df)
num_rows = st.slider(
"Number of rows to display:",
min_value=1,
max_value=max_rows,
value=min(100, max_rows),
help=f"Select how many rows to display (max: {max_rows})"
)
with col2:
shuffle = st.checkbox(
"Randomly shuffle data",
help="Shuffle the dataframe before displaying"
)
# Prepare data for display
display_df = df.copy()
if shuffle:
display_df = shuffle_dataframe(display_df)
display_df = display_df.head(num_rows)
# Variable selection for visualization
st.subheader("📈 Visualization Controls")
# Get column names and types
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
all_cols = df.columns.tolist()
selected_variables = st.multiselect(
"Select variables to visualize (1-2 variables):",
options=all_cols,
max_selections=2,
help="Choose 1 variable for distribution or 2 variables for relationship analysis"
)
if selected_variables:
st.subheader("📊 Visualization")
create_visualization(df, selected_variables)
# Display dataframe
st.subheader("📋 Data Preview")
# Show data info
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Rows", len(df))
with col2:
st.metric("Total Columns", len(df.columns))
with col3:
st.metric("Displaying", len(display_df))
# Show column types
with st.expander("📊 Column Information"):
col_info = pd.DataFrame({
'Column': df.columns,
'Type': df.dtypes.astype(str),
'Non-Null Count': df.count(),
'Null Count': df.isnull().sum()
})
st.dataframe(col_info, use_container_width=True)
# Display the data
st.dataframe(display_df, use_container_width=True)
# Quick stats for numeric columns
if numeric_cols:
with st.expander("📈 Quick Statistics (Numeric Columns)"):
st.dataframe(df[numeric_cols].describe(), use_container_width=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?