Create an application that has a text field to enter a URL (default value: https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv). The app should read the CSV file (and cache it) from the URL and display a DataFrame. Add controls to do the following: - Control how many rows are displayed - A checkbox to determine to randomly shuffle the DataFrame Users select one or two variables and you must determine the best way to visualize them.
To upload files, please first save the app
import streamlit as st
import pandas as pd
import plotly.express as px
# Cache the data loading
@st.cache_data
def load_data(url):
try:
return pd.read_csv(url)
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return None
# App title
st.title("Data Visualization Explorer")
# URL input
url = st.text_input(
"Enter CSV URL",
value="https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
)
# Load data
df = load_data(url)
if df is not None:
# Controls
col1, col2 = st.columns(2)
with col1:
n_rows = st.number_input("Number of rows to display", min_value=1,
max_value=len(df), value=min(5, len(df)))
with col2:
shuffle = st.checkbox("Randomly shuffle data")
# Shuffle if requested
if shuffle:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
# Display dataframe
st.subheader("Data Preview")
st.dataframe(df.head(n_rows))
# Variable selection
st.subheader("Visualization")
columns = df.select_dtypes(include=['number', 'float', 'int']).columns.tolist()
col1, col2 = st.columns(2)
with col1:
var1 = st.selectbox("Select first variable", columns)
with col2:
remaining_cols = [col for col in columns if col != var1]
var2 = st.selectbox("Select second variable (optional)",
["None"] + remaining_cols)
# Visualization logic
if var2 == "None":
# Single variable visualization
fig = px.histogram(df, x=var1, title=f"Distribution of {var1}")
st.plotly_chart(fig)
else:
# Two variable visualization
fig = px.scatter(df, x=var1, y=var2,
title=f"Relationship between {var1} and {var2}")
# Add trend line
fig.add_traces(px.scatter(df, x=var1, y=var2,
trendline="ols").data[1])
st.plotly_chart(fig)
# Show correlation
correlation = df[var1].corr(df[var2])
st.info(f"Correlation coefficient between {var1} and {var2}: {correlation:.3f}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?