Ploomber AI Editor | sql-dashboard-335e

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import duckdb
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import pandas as pd

st.set_page_config(layout="wide")

# Initialize connection to DuckDB
@st.cache_resource
def init_connection():
    return duckdb.connect()

# Perform query and return data
@st.cache_data
def get_data(query):
    conn = init_connection()
    return conn.execute(query).fetchdf()

st.title("Website Analytics Dashboard")

# Main metrics
def load_main_metrics():
    query = """
    SELECT 
        COUNT(*) as total_hits,
        COUNT(DISTINCT UserID) as unique_users,
        COUNT(DISTINCT URL) as unique_urls,
        AVG(PageViews) as avg_page_views
    FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
    """
    return get_data(query)

# Time series data
def load_time_series():
    query = """
    SELECT 
        DATE_TRUNC('hour', EventTime) as hour,
        COUNT(*) as hits
    FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
    GROUP BY 1
    ORDER BY 1
    """
    return get_data(query)

# Top pages
def load_top_pages():
    query = """
    SELECT 
        URL,
        COUNT(*) as visits
    FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
    GROUP BY 1
    ORDER BY 2 DESC
    LIMIT 10
    """
    return get_data(query)

# Browser statistics
def load_browser_stats():
    query = """
    SELECT 
        BrowserName,
        COUNT(*) as count
    FROM read_parquet('s3://clickhouse-public-datasets/hits_compatible/athena_partitioned/hits_1.parquet')
    GROUP BY 1
    ORDER BY 2 DESC
    LIMIT 5
    """
    return get_data(query)

# Load all data
with st.spinner("Loading data..."):
    metrics_df = load_main_metrics()
    time_series_df = load_time_series()
    top_pages_df = load_top_pages()
    browser_df = load_browser_stats()

# Display metrics in columns
col1, col2, col3, col4 = st.columns(4)
with col1:
    st.metric("Total Hits", f"{metrics_df['total_hits'].iloc[0]:,}")
with col2:
    st.metric("Unique Users", f"{metrics_df['unique_users'].iloc[0]:,}")
with col3:
    st.metric("Unique URLs", f"{metrics_df['unique_urls'].iloc[0]:,}")
with col4:
    st.metric("Avg Page Views", f"{metrics_df['avg_page_views'].iloc[0]:.2f}")

# Create two columns for charts
left_column, right_column = st.columns(2)

with left_column:
    st.subheader("Traffic Over Time")
    fig_time = px.line(time_series_df, 
                      x='hour', 
                      y='hits',
                      title="Hits per Hour")
    st.plotly_chart(fig_time, use_container_width=True)

    st.subheader("Top Pages")
    fig_pages = px.bar(top_pages_df,
                      x='visits',
                      y='URL',
                      orientation='h',
                      title="Most Visited Pages")
    st.plotly_chart(fig_pages, use_container_width=True)

with right_column:
    st.subheader("Browser Distribution")
    fig_browser = px.pie(browser_df,
                        values='count',
                        names='BrowserName',
                        title="Browser Usage")
    st.plotly_chart(fig_browser, use_container_width=True)

    # Display raw data in tables
    st.subheader("Raw Data")
    tab1, tab2 = st.tabs(["Top Pages", "Browser Stats"])
    
    with tab1:
        st.dataframe(top_pages_df)
    with tab2:
        st.dataframe(browser_df)
Loading code editor...
Click Save & Run to preview your app
Terminal