Ploomber AI Editor | knowledge-inventory-dashboard-9787

Drop files here
or click to upload
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import random

# Set page config
st.set_page_config(
    page_title="Knowledge Inventory Dashboard",
    page_icon="📚",
    layout="wide"
)

# Generate sample data
@st.cache_data
def generate_sample_data():
    np.random.seed(42)
    
    # Generate sample KBA data
    n_articles = 150
    
    # Article statuses
    statuses = ['Published', 'Active', 'Retired', 'Draft', 'Under Review']
    status_weights = [0.4, 0.3, 0.15, 0.1, 0.05]
    
    # Generate dates
    start_date = datetime.now() - timedelta(days=365)
    
    articles_data = []
    for i in range(n_articles):
        created_date = start_date + timedelta(days=random.randint(0, 365))
        published_date = created_date + timedelta(days=random.randint(1, 30)) if random.random() > 0.2 else None
        last_review_date = created_date + timedelta(days=random.randint(30, 300)) if random.random() > 0.3 else None
        
        articles_data.append({
            'article_id': f'KBA-{1000 + i}',
            'title': f'Knowledge Article {i+1}',
            'status': np.random.choice(statuses, p=status_weights),
            'created_date': created_date,
            'published_date': published_date,
            'last_review_date': last_review_date,
            'category': np.random.choice(['Technical', 'Process', 'Product', 'Support', 'Training']),
            'author': f'Author {random.randint(1, 20)}',
            'views': random.randint(0, 1000),
            'time_to_publish': random.randint(1, 45) if published_date else None
        })
    
    return pd.DataFrame(articles_data)

def create_metrics_cards(df):
    """Create metric cards for KPIs"""
    
    # Calculate KPIs
    total_articles = len(df)
    published_kbas = len(df[df['status'] == 'Published'])
    active_articles = len(df[df['status'] == 'Active'])
    retired_articles = len(df[df['status'] == 'Retired'])
    reviewed_articles = len(df[df['last_review_date'].notna()])
    review_percentage = (reviewed_articles / total_articles) * 100
    avg_time_to_publish = df[df['time_to_publish'].notna()]['time_to_publish'].mean()
    
    # Create columns for metrics
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric(
            label="📄 Published KBAs",
            value=published_kbas,
            delta=f"{(published_kbas/total_articles)*100:.1f}% of total"
        )
        
        st.metric(
            label="✅ Active Articles",
            value=active_articles,
            delta=f"{(active_articles/total_articles)*100:.1f}% of total"
        )
    
    with col2:
        st.metric(
            label="🗄️ Retired Articles",
            value=retired_articles,
            delta=f"{(retired_articles/total_articles)*100:.1f}% of total"
        )
        
        st.metric(
            label="📊 Total Articles",
            value=total_articles
        )
    
    with col3:
        st.metric(
            label="🔍 Articles Reviewed",
            value=reviewed_articles,
            delta=f"{review_percentage:.1f}% coverage"
        )
        
        st.metric(
            label="⏱️ Avg Time to Publish",
            value=f"{avg_time_to_publish:.1f} days" if not pd.isna(avg_time_to_publish) else "N/A",
            delta="SLA Target: 14 days" if avg_time_to_publish > 14 else "Within SLA"
        )
    
    with col4:
        # Status breakdown
        st.subheader("Status Distribution")
        status_counts = df['status'].value_counts()
        fig_donut = px.pie(
            values=status_counts.values,
            names=status_counts.index,
            hole=0.6,
            height=200
        )
        fig_donut.update_traces(textposition='inside', textinfo='percent+label')
        fig_donut.update_layout(showlegend=False, margin=dict(l=0, r=0, t=0, b=0))
        st.plotly_chart(fig_donut, use_container_width=True)

def create_trend_charts(df):
    """Create trend analysis charts"""
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("📈 Article Creation Trend")
        
        # Group by month
        df['created_month'] = df['created_date'].dt.to_period('M')
        monthly_creation = df.groupby('created_month').size().reset_index(name='count')
        monthly_creation['created_month'] = monthly_creation['created_month'].astype(str)
        
        fig_trend = px.line(
            monthly_creation, 
            x='created_month', 
            y='count',
            title="Articles Created Per Month",
            markers=True
        )
        fig_trend.update_layout(xaxis_title="Month", yaxis_title="Articles Created")
        st.plotly_chart(fig_trend, use_container_width=True)
    
    with col2:
        st.subheader("⏰ Time to Publish Distribution")
        
        # Filter out null values
        publish_times = df[df['time_to_publish'].notna()]['time_to_publish']
        
        fig_hist = px.histogram(
            x=publish_times,
            nbins=20,
            title="Distribution of Time to Publish",
            labels={'x': 'Days to Publish', 'y': 'Number of Articles'}
        )
        
        # Add SLA line
        fig_hist.add_vline(x=14, line_dash="dash", line_color="red", 
                          annotation_text="SLA Target (14 days)")
        
        st.plotly_chart(fig_hist, use_container_width=True)

def create_category_analysis(df):
    """Create category-wise analysis"""
    
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("📚 Articles by Category")
        
        category_counts = df['category'].value_counts()
        fig_bar = px.bar(
            x=category_counts.values,
            y=category_counts.index,
            orientation='h',
            title="Distribution of Articles by Category"
        )
        fig_bar.update_layout(xaxis_title="Number of Articles", yaxis_title="Category")
        st.plotly_chart(fig_bar, use_container_width=True)
    
    with col2:
        st.subheader("📊 Review Status by Category")
        
        # Calculate review percentage by category
        review_by_category = df.groupby('category').agg({
            'last_review_date': lambda x: x.notna().sum(),
            'article_id': 'count'
        }).reset_index()
        
        review_by_category['review_percentage'] = (
            review_by_category['last_review_date'] / review_by_category['article_id'] * 100
        )
        
        fig_review = px.bar(
            review_by_category,
            x='category',
            y='review_percentage',
            title="Review Coverage by Category (%)",
            color='review_percentage',
            color_continuous_scale='RdYlGn'
        )
        fig_review.update_layout(xaxis_title="Category", yaxis_title="Review Coverage (%)")
        st.plotly_chart(fig_review, use_container_width=True)

def create_detailed_table(df):
    """Create detailed data table"""
    
    st.subheader("📋 Detailed Article Information")
    
    # Add filters
    col1, col2, col3 = st.columns(3)
    
    with col1:
        status_filter = st.selectbox("Filter by Status", ["All"] + df['status'].unique().tolist())
    
    with col2:
        category_filter = st.selectbox("Filter by Category", ["All"] + df['category'].unique().tolist())
    
    with col3:
        review_filter = st.selectbox("Filter by Review Status", ["All", "Reviewed", "Not Reviewed"])
    
    # Apply filters
    filtered_df = df.copy()
    
    if status_filter != "All":
        filtered_df = filtered_df[filtered_df['status'] == status_filter]
    
    if category_filter != "All":
        filtered_df = filtered_df[filtered_df['category'] == category_filter]
    
    if review_filter == "Reviewed":
        filtered_df = filtered_df[filtered_df['last_review_date'].notna()]
    elif review_filter == "Not Reviewed":
        filtered_df = filtered_df[filtered_df['last_review_date'].isna()]
    
    # Format dates for display
    display_df = filtered_df.copy()
    display_df['created_date'] = display_df['created_date'].dt.strftime('%Y-%m-%d')
    display_df['published_date'] = display_df['published_date'].dt.strftime('%Y-%m-%d')
    display_df['last_review_date'] = display_df['last_review_date'].dt.strftime('%Y-%m-%d')
    
    # Select columns to display
    display_columns = ['article_id', 'title', 'status', 'category', 'author', 
                      'created_date', 'published_date', 'last_review_date', 
                      'time_to_publish', 'views']
    
    st.dataframe(
        display_df[display_columns],
        use_container_width=True,
        hide_index=True,
        column_config={
            "article_id": "Article ID",
            "title": "Title",
            "status": "Status",
            "category": "Category",
            "author": "Author",
            "created_date": "Created Date",
            "published_date": "Published Date",
            "last_review_date": "Last Review Date",
            "time_to_publish": "Time to Publish (Days)",
            "views": st.column_config.BarChartColumn("Views", help="Number of article views")
        }
    )
    
    # Summary statistics
    st.subheader("📊 Summary Statistics")
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric("Filtered Articles", len(filtered_df))
    
    with col2:
        avg_views = filtered_df['views'].mean()
        st.metric("Avg Views", f"{avg_views:.0f}")
    
    with col3:
        if not filtered_df[filtered_df['time_to_publish'].notna()].empty:
            avg_publish_time = filtered_df[filtered_df['time_to_publish'].notna()]['time_to_publish'].mean()
            st.metric("Avg Time to Publish", f"{avg_publish_time:.1f} days")
        else:
            st.metric("Avg Time to Publish", "N/A")
    
    with col4:
        reviewed_count = filtered_df['last_review_date'].notna().sum()
        review_rate = (reviewed_count / len(filtered_df)) * 100 if len(filtered_df) > 0 else 0
        st.metric("Review Rate", f"{review_rate:.1f}%")

# Main app
def main():
    st.title("📚 Knowledge Inventory Dashboard")
    st.markdown("Track the volume, status, and lifecycle of content in the knowledge base")
    
    # Generate sample data
    df = generate_sample_data()
    
    # Create tabs
    tab1, tab2, tab3, tab4 = st.tabs([
        "📊 Overview", 
        "📈 Trends", 
        "📚 Category Analysis", 
        "📋 Detailed View"
    ])
    
    with tab1:
        st.header("Knowledge Base Overview")
        create_metrics_cards(df)
        
        # Additional insights
        st.markdown("---")
        st.subheader("🎯 Key Insights")
        
        col1, col2 = st.columns(2)
        with col1:
            st.info("**Article Health**: Monitor the balance between active and retired content")
            st.warning("**Review Coverage**: Ensure regular content reviews for accuracy")
        
        with col2:
            st.success("**Publishing Efficiency**: Track time from creation to publication")
            st.error("**SLA Compliance**: Monitor articles exceeding publication deadlines")
    
    with tab2:
        st.header("Trend Analysis")
        create_trend_charts(df)
    
    with tab3:
        st.header("Category Analysis")
        create_category_analysis(df)
    
    with tab4:
        st.header("Detailed Article View")
        create_detailed_table(df)
    
    # Footer
    st.markdown("---")
    st.markdown("*Dashboard last updated: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "*")

if __name__ == "__main__":
    main()
Loading code editor...
Terminal