Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
st.set_page_config(page_title="CSV Data Visualizer", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache CSV data from URL"""
try:
df = pd.read_csv(url)
return df
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return None
def get_column_type(series):
"""Determine if a column is numeric or categorical"""
if pd.api.types.is_numeric_dtype(series):
return 'numeric'
else:
return 'categorical'
def create_visualization(df, var1, var2=None):
"""Create appropriate visualization based on variable types"""
if var2 is None:
# Single variable visualization
col_type = get_column_type(df[var1])
if col_type == 'numeric':
# Histogram for numeric variables
fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
else:
# Bar chart for categorical variables
fig = px.bar(df[var1].value_counts().reset_index(),
x='index', y=var1,
title=f"Count of {var1}")
fig.update_xaxis(title=var1)
fig.update_yaxis(title="Count")
else:
# Two variable visualization
var1_type = get_column_type(df[var1])
var2_type = get_column_type(df[var2])
if var1_type == 'numeric' and var2_type == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=var1, y=var2,
title=f"{var1} vs {var2}")
elif var1_type == 'categorical' and var2_type == 'categorical':
# Heatmap for two categorical variables
crosstab = pd.crosstab(df[var1], df[var2])
fig = px.imshow(crosstab,
title=f"{var1} vs {var2} (Cross-tabulation)",
labels=dict(x=var2, y=var1, color="Count"))
else:
# Box plot for numeric vs categorical
if var1_type == 'categorical':
fig = px.box(df, x=var1, y=var2,
title=f"{var2} by {var1}")
else:
fig = px.box(df, x=var2, y=var1,
title=f"{var1} by {var2}")
return fig
# Main app
st.title("CSV Data Visualizer")
st.write("Load CSV data from a URL and create visualizations")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
if url:
# Load data
df = load_data(url)
if df is not None:
st.success(f"Data loaded successfully! Shape: {df.shape}")
# Controls
col1, col2 = st.columns(2)
with col1:
# Number of rows to display
max_rows = len(df)
num_rows = st.slider("Number of rows to display:",
min_value=1, max_value=max_rows,
value=min(100, max_rows))
with col2:
# Shuffle checkbox
shuffle = st.checkbox("Randomly shuffle DataFrame")
# Apply controls
display_df = df.copy()
if shuffle:
display_df = display_df.sample(frac=1).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Display DataFrame
st.subheader("DataFrame")
st.dataframe(display_df, use_container_width=True)
# Variable selection for visualization
st.subheader("Create Visualization")
# Get column names
columns = df.columns.tolist()
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable:",
options=columns,
index=0 if columns else None)
with col2:
var2 = st.selectbox("Select second variable (optional):",
options=[None] + columns,
index=0)
# Create and display visualization
if var1:
try:
fig = create_visualization(df, var1, var2)
st.plotly_chart(fig, use_container_width=True)
# Show data summary
st.subheader("Data Summary")
if var2 is None:
st.write(f"**{var1}** summary:")
if get_column_type(df[var1]) == 'numeric':
st.write(df[var1].describe())
else:
st.write(df[var1].value_counts())
else:
col1, col2 = st.columns(2)
with col1:
st.write(f"**{var1}** summary:")
if get_column_type(df[var1]) == 'numeric':
st.write(df[var1].describe())
else:
st.write(df[var1].value_counts())
with col2:
st.write(f"**{var2}** summary:")
if get_column_type(df[var2]) == 'numeric':
st.write(df[var2].describe())
else:
st.write(df[var2].value_counts())
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
# Show data info
with st.expander("Data Info"):
st.write("**Column Information:**")
info_df = pd.DataFrame({
'Column': df.columns,
'Type': [str(df[col].dtype) for col in df.columns],
'Non-Null Count': [df[col].count() for col in df.columns],
'Null Count': [df[col].isnull().sum() for col in df.columns]
})
st.dataframe(info_df, use_container_width=True)
else:
st.info("Please enter a CSV URL to get started.")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?