Build me a data visualization tool from S3
To upload files, please first save the app
import streamlit as st
import boto3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import altair as alt
import numpy as np
from io import StringIO, BytesIO
import json
st.set_page_config(page_title="S3 Data Visualization Tool", layout="wide")
def create_s3_client(access_key, secret_key, region):
"""Create S3 client with provided credentials"""
try:
s3_client = boto3.client(
's3',
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
region_name=region
)
return s3_client
except Exception as e:
st.error(f"Error creating S3 client: {str(e)}")
return None
def list_s3_buckets(s3_client):
"""List all S3 buckets"""
try:
response = s3_client.list_buckets()
return [bucket['Name'] for bucket in response['Buckets']]
except Exception as e:
st.error(f"Error listing buckets: {str(e)}")
return []
def list_s3_objects(s3_client, bucket_name, prefix=""):
"""List objects in S3 bucket"""
try:
response = s3_client.list_objects_v2(
Bucket=bucket_name,
Prefix=prefix
)
if 'Contents' in response:
return [obj['Key'] for obj in response['Contents']
if obj['Key'].endswith(('.csv', '.json', '.xlsx', '.xls', '.parquet'))]
return []
except Exception as e:
st.error(f"Error listing objects: {str(e)}")
return []
def read_s3_file(s3_client, bucket_name, object_key):
"""Read file from S3 and return as DataFrame"""
try:
response = s3_client.get_object(Bucket=bucket_name, Key=object_key)
content = response['Body'].read()
if object_key.endswith('.csv'):
df = pd.read_csv(BytesIO(content))
elif object_key.endswith('.json'):
df = pd.read_json(BytesIO(content))
elif object_key.endswith(('.xlsx', '.xls')):
df = pd.read_excel(BytesIO(content))
elif object_key.endswith('.parquet'):
df = pd.read_parquet(BytesIO(content))
else:
st.error("Unsupported file format")
return None
return df
except Exception as e:
st.error(f"Error reading file: {str(e)}")
return None
def create_visualization(df, chart_type, x_col, y_col, color_col=None):
"""Create visualization based on selected parameters"""
if chart_type == "Bar Chart":
if color_col and color_col != "None":
fig = px.bar(df, x=x_col, y=y_col, color=color_col)
else:
fig = px.bar(df, x=x_col, y=y_col)
return fig
elif chart_type == "Line Chart":
if color_col and color_col != "None":
fig = px.line(df, x=x_col, y=y_col, color=color_col)
else:
fig = px.line(df, x=x_col, y=y_col)
return fig
elif chart_type == "Scatter Plot":
if color_col and color_col != "None":
fig = px.scatter(df, x=x_col, y=y_col, color=color_col)
else:
fig = px.scatter(df, x=x_col, y=y_col)
return fig
elif chart_type == "Histogram":
if color_col and color_col != "None":
fig = px.histogram(df, x=x_col, color=color_col)
else:
fig = px.histogram(df, x=x_col)
return fig
elif chart_type == "Box Plot":
if color_col and color_col != "None":
fig = px.box(df, x=x_col, y=y_col, color=color_col)
else:
fig = px.box(df, x=x_col, y=y_col)
return fig
elif chart_type == "Pie Chart":
fig = px.pie(df, names=x_col, values=y_col)
return fig
# Main app
st.title("🗂️ S3 Data Visualization Tool")
st.markdown("Connect to your S3 bucket and visualize your data with interactive charts")
# Sidebar for S3 configuration
with st.sidebar:
st.header("🔑 AWS S3 Configuration")
access_key = st.text_input("AWS Access Key ID", type="password")
secret_key = st.text_input("AWS Secret Access Key", type="password")
region = st.text_input("AWS Region", value="us-east-1")
if st.button("Connect to S3"):
if access_key and secret_key and region:
st.session_state.s3_client = create_s3_client(access_key, secret_key, region)
if st.session_state.s3_client:
st.success("✅ Connected to S3!")
else:
st.error("Please provide all required credentials")
# Main content area
if 's3_client' in st.session_state and st.session_state.s3_client:
col1, col2 = st.columns([1, 2])
with col1:
st.subheader("📁 Data Selection")
# Bucket selection
buckets = list_s3_buckets(st.session_state.s3_client)
if buckets:
selected_bucket = st.selectbox("Select S3 Bucket:", buckets)
# Object selection
objects = list_s3_objects(st.session_state.s3_client, selected_bucket)
if objects:
selected_object = st.selectbox("Select File:", objects)
if st.button("Load Data"):
with st.spinner("Loading data from S3..."):
df = read_s3_file(st.session_state.s3_client, selected_bucket, selected_object)
if df is not None:
st.session_state.df = df
st.success(f"✅ Loaded {len(df)} rows and {len(df.columns)} columns")
else:
st.info("No supported files found in this bucket. Supported formats: CSV, JSON, Excel, Parquet")
else:
st.warning("No buckets found or insufficient permissions")
with col2:
if 'df' in st.session_state:
df = st.session_state.df
st.subheader("📊 Data Preview")
st.dataframe(df.head(), use_container_width=True)
# Data info
col_info1, col_info2, col_info3 = st.columns(3)
with col_info1:
st.metric("Rows", len(df))
with col_info2:
st.metric("Columns", len(df.columns))
with col_info3:
st.metric("Memory Usage", f"{df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB")
# Visualization section
if 'df' in st.session_state:
df = st.session_state.df
st.header("📈 Data Visualization")
# Chart configuration
col1, col2, col3, col4 = st.columns(4)
with col1:
chart_type = st.selectbox(
"Chart Type:",
["Bar Chart", "Line Chart", "Scatter Plot", "Histogram", "Box Plot", "Pie Chart"]
)
with col2:
x_col = st.selectbox("X-axis:", df.columns.tolist())
with col3:
if chart_type != "Histogram":
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
y_col = st.selectbox("Y-axis:", numeric_cols if numeric_cols else df.columns.tolist())
else:
y_col = None
with col4:
color_options = ["None"] + df.columns.tolist()
color_col = st.selectbox("Color by:", color_options)
# Generate visualization
if st.button("Generate Chart", type="primary"):
try:
if chart_type == "Histogram":
fig = create_visualization(df, chart_type, x_col, None, color_col)
elif chart_type == "Pie Chart":
if y_col:
fig = create_visualization(df, chart_type, x_col, y_col)
else:
st.error("Please select a Y-axis column for Pie Chart")
fig = None
else:
if y_col:
fig = create_visualization(df, chart_type, x_col, y_col, color_col)
else:
st.error("Please select a Y-axis column")
fig = None
if fig:
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
# Data analysis section
st.header("🔍 Data Analysis")
analysis_tab1, analysis_tab2, analysis_tab3 = st.tabs(["Summary Statistics", "Data Types", "Missing Values"])
with analysis_tab1:
st.subheader("Summary Statistics")
st.dataframe(df.describe(), use_container_width=True)
with analysis_tab2:
st.subheader("Data Types")
dtype_df = pd.DataFrame({
'Column': df.columns,
'Data Type': df.dtypes.astype(str),
'Non-Null Count': df.count()
})
st.dataframe(dtype_df, use_container_width=True)
with analysis_tab3:
st.subheader("Missing Values")
missing_df = pd.DataFrame({
'Column': df.columns,
'Missing Count': df.isnull().sum(),
'Missing Percentage': (df.isnull().sum() / len(df) * 100).round(2)
})
missing_df = missing_df[missing_df['Missing Count'] > 0]
if not missing_df.empty:
st.dataframe(missing_df, use_container_width=True)
# Missing values visualization
fig_missing = px.bar(
missing_df,
x='Column',
y='Missing Percentage',
title="Missing Values by Column"
)
st.plotly_chart(fig_missing, use_container_width=True)
else:
st.success("✅ No missing values found in the dataset!")
else:
st.info("👈 Please configure your S3 connection in the sidebar and load data to start visualizing!")
# Footer
st.markdown("---")
st.markdown("Built with ❤️ using Streamlit | Upload your data to S3 and visualize it instantly!")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?