Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
st.set_page_config(page_title="Data Visualization App", layout="wide")
@st.cache_data
def load_data(url):
"""Load and cache CSV data from URL"""
try:
df = pd.read_csv(url)
return df, None
except Exception as e:
return None, str(e)
def determine_column_types(df, columns):
"""Determine if columns are numeric or categorical"""
types = {}
for col in columns:
if pd.api.types.is_numeric_dtype(df[col]):
types[col] = 'numeric'
else:
types[col] = 'categorical'
return types
def create_visualization(df, selected_columns):
"""Create appropriate visualization based on selected columns and their types"""
if len(selected_columns) == 0:
return None
column_types = determine_column_types(df, selected_columns)
if len(selected_columns) == 1:
col = selected_columns[0]
col_type = column_types[col]
if col_type == 'numeric':
# Histogram for single numeric variable
fig = px.histogram(df, x=col, title=f'Distribution of {col}')
fig.update_layout(showlegend=False)
return fig
else:
# Bar chart for single categorical variable
value_counts = df[col].value_counts()
fig = px.bar(x=value_counts.index, y=value_counts.values,
title=f'Count of {col}')
fig.update_xaxes(title=col)
fig.update_yaxes(title='Count')
return fig
elif len(selected_columns) == 2:
col1, col2 = selected_columns
type1, type2 = column_types[col1], column_types[col2]
if type1 == 'numeric' and type2 == 'numeric':
# Scatter plot for two numeric variables
fig = px.scatter(df, x=col1, y=col2, title=f'{col1} vs {col2}')
return fig
elif type1 == 'categorical' and type2 == 'numeric':
# Box plot: categorical x, numeric y
fig = px.box(df, x=col1, y=col2, title=f'{col2} by {col1}')
return fig
elif type1 == 'numeric' and type2 == 'categorical':
# Box plot: numeric x, categorical y (swapped)
fig = px.box(df, x=col2, y=col1, title=f'{col1} by {col2}')
return fig
else:
# Grouped bar chart for two categorical variables
crosstab = pd.crosstab(df[col1], df[col2])
fig = px.bar(crosstab, title=f'{col1} vs {col2}')
fig.update_xaxes(title=col1)
fig.update_yaxes(title='Count')
return fig
return None
# Main app
st.title("đ Data Visualization App")
st.markdown("Load CSV data from a URL and create interactive visualizations")
# URL input
url = st.text_input(
"Enter CSV URL:",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
help="Enter the URL of a CSV file to load and visualize"
)
if url:
# Load data
df, error = load_data(url)
if error:
st.error(f"Error loading data: {error}")
elif df is not None:
st.success(f"â
Data loaded successfully! Shape: {df.shape}")
# Controls
col1, col2 = st.columns(2)
with col1:
# Number of rows to display
max_rows = len(df)
num_rows = st.slider(
"Number of rows to display:",
min_value=1,
max_value=max_rows,
value=min(100, max_rows),
help="Select how many rows to display in the table"
)
with col2:
# Shuffle checkbox
shuffle_data = st.checkbox(
"Randomly shuffle data",
help="Check to randomly shuffle the DataFrame"
)
# Process data based on controls
display_df = df.copy()
if shuffle_data:
display_df = display_df.sample(frac=1).reset_index(drop=True)
display_df = display_df.head(num_rows)
# Display DataFrame
st.subheader("đ Data Preview")
st.dataframe(display_df, use_container_width=True)
# Data info
with st.expander("âšī¸ Data Information"):
col1, col2 = st.columns(2)
with col1:
st.write("**Data Types:**")
st.write(df.dtypes)
with col2:
st.write("**Missing Values:**")
st.write(df.isnull().sum())
# Variable selection for visualization
st.subheader("đ Create Visualization")
# Get column names
columns = df.columns.tolist()
# Variable selection
selected_columns = st.multiselect(
"Select 1 or 2 variables to visualize:",
options=columns,
max_selections=2,
help="Choose one variable for distribution or two variables for relationship analysis"
)
if selected_columns:
# Show column information
st.write("**Selected Variables:**")
for col in selected_columns:
col_type = "Numeric" if pd.api.types.is_numeric_dtype(df[col]) else "Categorical"
unique_vals = df[col].nunique()
st.write(f"- **{col}**: {col_type} ({unique_vals} unique values)")
# Create and display visualization
fig = create_visualization(df, selected_columns)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Show visualization explanation
with st.expander("đ Visualization Explanation"):
if len(selected_columns) == 1:
col = selected_columns[0]
if pd.api.types.is_numeric_dtype(df[col]):
st.write(f"**Histogram**: Shows the distribution of values for the numeric variable '{col}'. The x-axis represents the values, and the y-axis shows the frequency/count.")
else:
st.write(f"**Bar Chart**: Shows the count of each category in the categorical variable '{col}'. Each bar represents a different category.")
else:
col1, col2 = selected_columns
type1 = "numeric" if pd.api.types.is_numeric_dtype(df[col1]) else "categorical"
type2 = "numeric" if pd.api.types.is_numeric_dtype(df[col2]) else "categorical"
if type1 == 'numeric' and type2 == 'numeric':
st.write(f"**Scatter Plot**: Shows the relationship between two numeric variables '{col1}' and '{col2}'. Each point represents one observation.")
elif (type1 == 'categorical' and type2 == 'numeric') or (type1 == 'numeric' and type2 == 'categorical'):
st.write(f"**Box Plot**: Shows the distribution of the numeric variable across different categories. The box shows the quartiles, and whiskers show the range.")
else:
st.write(f"**Grouped Bar Chart**: Shows the relationship between two categorical variables '{col1}' and '{col2}' by displaying counts for each combination.")
else:
st.warning("Could not create visualization for the selected variables.")
else:
st.info("đ Select one or two variables above to create a visualization")
else:
st.warning("No data loaded. Please check your URL.")
else:
st.info("đ Enter a CSV URL above to get started")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?