Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
st.set_page_config(page_title="Data Explorer", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def determine_column_type(series):
"""Determine if a column is numeric or categorical"""
if pd.api.types.is_numeric_dtype(series):
return 'numeric'
else:
return 'categorical'
def create_visualization(df, var1, var2=None):
"""Create appropriate visualization based on variable types"""
if var2 is None:
# Single variable visualization
var1_type = determine_column_type(df[var1])
if var1_type == 'numeric':
# Histogram for numeric variables
fig = px.histogram(df, x=var1, title=f'Distribution of {var1}')
fig.update_layout(showlegend=False)
else:
# Bar chart for categorical variables
value_counts = df[var1].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f'Count of {var1}')
fig.update_xaxes(title=var1)
fig.update_yaxes(title='Count')
else:
# Two variable visualization
var1_type = determine_column_type(df[var1])
var2_type = determine_column_type(df[var2])
if var1_type == 'numeric' and var2_type == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=var1, y=var2,
title=f'{var1} vs {var2}')
elif var1_type == 'categorical' and var2_type == 'numeric':
# Box plot for categorical vs numeric
fig = px.box(df, x=var1, y=var2,
title=f'{var2} by {var1}')
elif var1_type == 'numeric' and var2_type == 'categorical':
# Box plot for numeric vs categorical (swap axes)
fig = px.box(df, x=var2, y=var1,
title=f'{var1} by {var2}')
else:
# Stacked bar chart for two categorical variables
crosstab = pd.crosstab(df[var1], df[var2])
fig = px.bar(crosstab, title=f'{var1} vs {var2}')
fig.update_xaxes(title=var1)
fig.update_yaxes(title='Count')
return fig
# Main app
st.title("📊 Data Explorer")
st.markdown("Load CSV data from a URL and explore it with interactive visualizations")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to load and explore"
)
if url:
# Load data
with st.spinner("Loading data..."):
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
elif df is not None:
st.success(f"Data loaded successfully! Shape: {df.shape}")
# Display basic info about the dataset
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Rows", df.shape[0])
with col2:
st.metric("Columns", df.shape[1])
with col3:
st.metric("Missing Values", df.isnull().sum().sum())
# Controls
st.markdown("---")
col1, col2 = st.columns(2)
with col1:
num_rows = st.slider(
"Number of rows to display:",
min_value=1,
max_value=len(df),
value=min(100, len(df))
)
with col2:
shuffle = st.checkbox("Randomly shuffle DataFrame")
# Process data based on controls
display_df = df.copy()
if shuffle:
display_df = display_df.sample(frac=1).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Display DataFrame
st.markdown("### 📋 Data Preview")
st.dataframe(display_df, use_container_width=True)
# Variable selection for visualization
st.markdown("---")
st.markdown("### 📈 Data Visualization")
# Get column names
columns = list(df.columns)
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable:", columns)
with col2:
var2 = st.selectbox("Select second variable (optional):", [None] + columns)
if var1:
# Create and display visualization
try:
fig = create_visualization(df, var1, var2)
st.plotly_chart(fig, use_container_width=True)
# Show some statistics
st.markdown("### 📊 Variable Statistics")
if var2 is None:
# Single variable stats
if determine_column_type(df[var1]) == 'numeric':
stats = df[var1].describe()
st.write(f"**{var1} Statistics:**")
st.write(stats)
else:
value_counts = df[var1].value_counts()
st.write(f"**{var1} Value Counts:**")
st.write(value_counts)
else:
# Two variable stats
col1, col2 = st.columns(2)
with col1:
if determine_column_type(df[var1]) == 'numeric':
st.write(f"**{var1} Statistics:**")
st.write(df[var1].describe())
else:
st.write(f"**{var1} Value Counts:**")
st.write(df[var1].value_counts())
with col2:
if determine_column_type(df[var2]) == 'numeric':
st.write(f"**{var2} Statistics:**")
st.write(df[var2].describe())
else:
st.write(f"**{var2} Value Counts:**")
st.write(df[var2].value_counts())
# Correlation if both numeric
if (determine_column_type(df[var1]) == 'numeric' and
determine_column_type(df[var2]) == 'numeric'):
correlation = df[var1].corr(df[var2])
st.metric("Correlation", f"{correlation:.3f}")
except Exception as e:
st.error(f"Error creating visualization: {e}")
# Data info section
with st.expander("📋 Dataset Information"):
st.write("**Column Information:**")
info_df = pd.DataFrame({
'Column': df.columns,
'Type': [str(dtype) for dtype in df.dtypes],
'Non-Null Count': [df[col].count() for col in df.columns],
'Null Count': [df[col].isnull().sum() for col in df.columns]
})
st.dataframe(info_df, use_container_width=True)
st.write("**Sample Data:**")
st.write(df.head())
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?