generate a sample dashboard with vertical and horizontal layout to analyze the data in 's3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet using sql
To upload files, please first save the app
import streamlit as st
import duckdb
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import pandas as pd
st.set_page_config(layout="wide")
# Initialize connection to DuckDB
@st.cache_resource
def init_connection():
return duckdb.connect()
# Perform query and return data
@st.cache_data
def get_data(query):
conn = init_connection()
return conn.execute(query).fetchdf()
st.title("Website Analytics Dashboard")
# Main metrics
def load_main_metrics():
query = """
SELECT
COUNT(*) as total_hits,
COUNT(DISTINCT UserID) as unique_users,
COUNT(DISTINCT URL) as unique_urls,
AVG(PageViews) as avg_page_views
FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
"""
return get_data(query)
# Time series data
def load_time_series():
query = """
SELECT
DATE_TRUNC('hour', EventTime) as hour,
COUNT(*) as hits
FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
GROUP BY 1
ORDER BY 1
"""
return get_data(query)
# Top pages
def load_top_pages():
query = """
SELECT
URL,
COUNT(*) as visits
FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
"""
return get_data(query)
# Browser statistics
def load_browser_stats():
query = """
SELECT
BrowserName,
COUNT(*) as count
FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
GROUP BY 1
ORDER BY 2 DESC
LIMIT 5
"""
return get_data(query)
# Load all data
with st.spinner("Loading data..."):
metrics_df = load_main_metrics()
time_series_df = load_time_series()
top_pages_df = load_top_pages()
browser_df = load_browser_stats()
# Display metrics in columns
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Total Hits", f"{metrics_df['total_hits'].iloc[0]:,}")
with col2:
st.metric("Unique Users", f"{metrics_df['unique_users'].iloc[0]:,}")
with col3:
st.metric("Unique URLs", f"{metrics_df['unique_urls'].iloc[0]:,}")
with col4:
st.metric("Avg Page Views", f"{metrics_df['avg_page_views'].iloc[0]:.2f}")
# Create two columns for charts
left_column, right_column = st.columns(2)
with left_column:
st.subheader("Traffic Over Time")
fig_time = px.line(time_series_df,
x='hour',
y='hits',
title="Hits per Hour")
st.plotly_chart(fig_time, use_container_width=True)
st.subheader("Top Pages")
fig_pages = px.bar(top_pages_df,
x='visits',
y='URL',
orientation='h',
title="Most Visited Pages")
st.plotly_chart(fig_pages, use_container_width=True)
with right_column:
st.subheader("Browser Distribution")
fig_browser = px.pie(browser_df,
values='count',
names='BrowserName',
title="Browser Usage")
st.plotly_chart(fig_browser, use_container_width=True)
# Display raw data in tables
st.subheader("Raw Data")
tab1, tab2 = st.tabs(["Top Pages", "Browser Stats"])
with tab1:
st.dataframe(top_pages_df)
with tab2:
st.dataframe(browser_df)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?