Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Set page config
st.set_page_config(
page_title="CSV Data Visualizer",
page_icon="đ",
layout="wide"
)
@st.cache_data
def load_data(url):
"""Load CSV data from URL and cache it"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def shuffle_dataframe(df):
"""Shuffle the DataFrame"""
return df.sample(frac=1).reset_index(drop=True)
def determine_column_type(series):
"""Determine if a column is numeric or categorical"""
if pd.api.types.is_numeric_dtype(series):
return 'numeric'
else:
return 'categorical'
def create_visualization(df, selected_vars):
"""Create appropriate visualization based on selected variables"""
if len(selected_vars) == 1:
var = selected_vars[0]
var_type = determine_column_type(df[var])
if var_type == 'numeric':
# Histogram for single numeric variable
fig = px.histogram(df, x=var, title=f'Distribution of {var}')
fig.update_layout(showlegend=False)
else:
# Bar chart for single categorical variable
value_counts = df[var].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f'Count of {var}')
fig.update_xaxes(title=var)
fig.update_yaxes(title='Count')
return fig
elif len(selected_vars) == 2:
var1, var2 = selected_vars
type1 = determine_column_type(df[var1])
type2 = determine_column_type(df[var2])
if type1 == 'numeric' and type2 == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=var1, y=var2,
title=f'{var1} vs {var2}')
elif type1 == 'categorical' and type2 == 'numeric':
# Box plot for categorical vs numeric
fig = px.box(df, x=var1, y=var2,
title=f'{var2} by {var1}')
elif type1 == 'numeric' and type2 == 'categorical':
# Box plot for numeric vs categorical
fig = px.box(df, x=var2, y=var1,
title=f'{var1} by {var2}')
else:
# Grouped bar chart for two categorical variables
crosstab = pd.crosstab(df[var1], df[var2])
fig = px.imshow(crosstab, text_auto=True, aspect="auto",
title=f'{var1} vs {var2} (Heatmap)')
fig.update_xaxes(title=var2)
fig.update_yaxes(title=var1)
return fig
return None
# Main app
st.title("đ CSV Data Visualizer")
st.markdown("Load CSV data from a URL and create interactive visualizations")
# URL input
st.subheader("đĨ Data Source")
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to analyze"
)
if url:
# Load data
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
else:
st.success(f"â
Data loaded successfully! Shape: {df.shape}")
# Controls
st.subheader("âī¸ Controls")
col1, col2 = st.columns(2)
with col1:
max_rows = st.slider(
"Number of rows to display:",
min_value=1,
max_value=len(df),
value=min(100, len(df)),
help="Select how many rows to show in the table"
)
with col2:
shuffle = st.checkbox(
"Randomly shuffle DataFrame",
value=False,
help="Shuffle the order of rows in the DataFrame"
)
# Process data based on controls
display_df = df.copy()
if shuffle:
display_df = shuffle_dataframe(display_df)
display_df = display_df.head(max_rows)
# Display DataFrame
st.subheader("đ Data Preview")
st.dataframe(display_df, use_container_width=True)
# Data summary
st.subheader("đ Data Summary")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Rows", len(df))
with col2:
st.metric("Total Columns", len(df.columns))
with col3:
st.metric("Missing Values", df.isnull().sum().sum())
# Variable selection for visualization
st.subheader("đ¨ Visualization")
st.markdown("Select one or two variables to visualize:")
# Get column names and types
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
all_cols = df.columns.tolist()
# Create column info
col_info = []
for col in all_cols:
col_type = determine_column_type(df[col])
col_info.append(f"{col} ({col_type})")
selected_vars = st.multiselect(
"Choose variables:",
options=all_cols,
format_func=lambda x: f"{x} ({determine_column_type(df[x])})",
max_selections=2,
help="Select 1-2 variables to create an appropriate visualization"
)
if selected_vars:
# Create and display visualization
fig = create_visualization(df, selected_vars)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Show visualization explanation
with st.expander("âšī¸ Visualization Info"):
if len(selected_vars) == 1:
var_type = determine_column_type(df[selected_vars[0]])
if var_type == 'numeric':
st.write("**Histogram**: Shows the distribution of a numeric variable.")
else:
st.write("**Bar Chart**: Shows the count of each category in a categorical variable.")
else:
type1 = determine_column_type(df[selected_vars[0]])
type2 = determine_column_type(df[selected_vars[1]])
if type1 == 'numeric' and type2 == 'numeric':
st.write("**Scatter Plot**: Shows the relationship between two numeric variables.")
elif (type1 == 'categorical' and type2 == 'numeric') or (type1 == 'numeric' and type2 == 'categorical'):
st.write("**Box Plot**: Shows the distribution of a numeric variable across categories.")
else:
st.write("**Heatmap**: Shows the relationship between two categorical variables.")
# Column information
with st.expander("đ Column Information"):
col_info_df = pd.DataFrame({
'Column': df.columns,
'Type': [determine_column_type(df[col]) for col in df.columns],
'Non-Null Count': [df[col].count() for col in df.columns],
'Null Count': [df[col].isnull().sum() for col in df.columns]
})
st.dataframe(col_info_df, use_container_width=True)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?