Ploomber AI Editor | ewa-drawdown-analysis-91de

Drop files here
or click to upload
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import io
import os
from pathlib import Path

# Page configuration
st.set_page_config(
    page_title="EWA Drawdown Analysis - April 2025",
    page_icon="💰",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .metric-container {
        background-color: #f0f2f6;
        padding: 1rem;
        border-radius: 0.5rem;
        margin: 0.5rem 0;
    }
    .stTabs [data-baseweb="tab-list"] {
        gap: 2px;
    }
    .stTabs [data-baseweb="tab"] {
        height: 50px;
        padding-left: 20px;
        padding-right: 20px;
    }
</style>
""", unsafe_allow_html=True)

# Title and header
st.title("💰 EWA Drawdown Analysis - April 2025")
st.markdown("### Interactive Analysis of Earned Wage Access Transactions")
st.markdown("---")

# Helper functions
@st.cache_data
def load_sample_data():
    """Generate sample data for demonstration"""
    np.random.seed(42)
    
    # Sample April Money.csv (drawdown transactions)
    start_date = datetime(2025, 4, 1)
    end_date = datetime(2025, 4, 30)
    dates = pd.date_range(start_date, end_date, freq='H')
    
    # Generate realistic transaction patterns
    transactions = []
    employee_ids = [f"EMP{i:04d}" for i in range(1, 501)]  # 500 employees
    organizations = ["TechCorp", "RetailPlus", "HealthSystem", "ManufacturingInc", "ServiceGroup"]
    
    for _ in range(2000):  # 2000 transactions in April
        date = np.random.choice(dates)
        # More transactions during weekdays and business hours
        if date.weekday() < 5:  # Weekday
            hour_weights = [0.5, 0.3, 0.2, 0.1, 0.1, 0.2, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 
                          3.5, 3.0, 2.5, 2.0, 1.8, 1.5, 1.2, 1.0, 0.8, 0.6, 0.4, 0.3]
        else:  # Weekend
            hour_weights = [0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.5, 0.8, 1.0, 1.2, 1.5,
                          1.8, 2.0, 1.8, 1.5, 1.2, 1.0, 0.8, 0.6, 0.4, 0.3, 0.2, 0.2]
        
        hour = np.random.choice(range(24), p=np.array(hour_weights)/np.sum(hour_weights))
        transaction_date = date.replace(hour=hour, minute=np.random.randint(0, 60))
        
        transactions.append({
            'transaction_id': f"TXN{len(transactions):06d}",
            'employee_id': np.random.choice(employee_ids),
            'organization': np.random.choice(organizations),
            'transaction_date': transaction_date,
            'amount_disbursed': np.random.uniform(20, 500),
            'access_fee': np.random.uniform(1, 15),
            'transaction_type': 'drawdown'
        })
    
    april_money_df = pd.DataFrame(transactions)
    
    # Sample Employee Data.csv (all opted-in employees)
    employees = []
    for emp_id in employee_ids:
        employees.append({
            'employee_id': emp_id,
            'organization': np.random.choice(organizations),
            'first_name': f"FirstName{emp_id[-3:]}",
            'last_name': f"LastName{emp_id[-3:]}",
            'email': f"{emp_id.lower()}@company.com",
            'opt_in_date': start_date - timedelta(days=np.random.randint(1, 90)),
            'department': np.random.choice(['Sales', 'Marketing', 'Operations', 'HR', 'Finance', 'IT']),
            'salary_range': np.random.choice(['30k-40k', '40k-50k', '50k-60k', '60k-70k', '70k+'])
        })
    
    employee_data_df = pd.DataFrame(employees)
    
    return april_money_df, employee_data_df

@st.cache_data
def process_data(april_money_df, employee_data_df, selected_org=None):
    """Process and filter data based on organization selection"""
    if selected_org and selected_org != "All Organizations":
        april_money_filtered = april_money_df[april_money_df['organization'] == selected_org].copy()
        employee_data_filtered = employee_data_df[employee_data_df['organization'] == selected_org].copy()
    else:
        april_money_filtered = april_money_df.copy()
        employee_data_filtered = employee_data_df.copy()
    
    # Convert date columns
    april_money_filtered['transaction_date'] = pd.to_datetime(april_money_filtered['transaction_date'])
    employee_data_filtered['opt_in_date'] = pd.to_datetime(employee_data_filtered['opt_in_date'])
    
    # Add derived columns
    april_money_filtered['date'] = april_money_filtered['transaction_date'].dt.date
    april_money_filtered['hour'] = april_money_filtered['transaction_date'].dt.hour
    april_money_filtered['day_of_week'] = april_money_filtered['transaction_date'].dt.day_name()
    april_money_filtered['weekday'] = april_money_filtered['transaction_date'].dt.weekday
    
    return april_money_filtered, employee_data_filtered

def create_download_link(df, filename, file_format='csv'):
    """Create download button for dataframes"""
    if file_format == 'csv':
        csv = df.to_csv(index=False)
        return st.download_button(
            label=f"📥 Download {filename}",
            data=csv,
            file_name=filename,
            mime='text/csv'
        )
    elif file_format == 'excel':
        output = io.BytesIO()
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            df.to_excel(writer, index=False, sheet_name='Data')
        excel_data = output.getvalue()
        return st.download_button(
            label=f"📥 Download {filename}",
            data=excel_data,
            file_name=filename,
            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        )

# Sidebar for file uploads and filters
st.sidebar.header("📁 Data Input")

# File upload options
use_sample_data = st.sidebar.checkbox("Use Sample Data", value=True, help="Check to use generated sample data for demonstration")

if not use_sample_data:
    st.sidebar.subheader("Upload CSV Files")
    april_money_file = st.sidebar.file_uploader("April Money.csv", type=['csv'])
    employee_data_file = st.sidebar.file_uploader("Employee Data.csv", type=['csv'])
    
    if april_money_file and employee_data_file:
        april_money_df = pd.read_csv(april_money_file)
        employee_data_df = pd.read_csv(employee_data_file)
    else:
        st.warning("Please upload both CSV files to proceed.")
        st.stop()
else:
    april_money_df, employee_data_df = load_sample_data()

# Organization filter
st.sidebar.subheader("🏢 Filters")
organizations = ["All Organizations"] + sorted(april_money_df['organization'].unique().tolist())
selected_org = st.sidebar.selectbox("Select Organization", organizations)

# Process data
april_money_filtered, employee_data_filtered = process_data(april_money_df, employee_data_df, selected_org)

# Main content area
col1, col2, col3 = st.columns(3)

# Statistical Summary
with col1:
    total_disbursed = april_money_filtered['amount_disbursed'].sum()
    st.metric("💰 Total Disbursed", f"${total_disbursed:,.2f}")

with col2:
    total_fees = april_money_filtered['access_fee'].sum()
    st.metric("💳 Total Access Fees", f"${total_fees:,.2f}")

with col3:
    active_users = april_money_filtered['employee_id'].nunique()
    st.metric("👥 Active Users", f"{active_users:,}")

st.markdown("---")

# Create tabs for different analyses
tab1, tab2, tab3, tab4 = st.tabs(["📊 Overview", "📈 Time Trends", "🔥 Heatmap Analysis", "😴 Inactive Users"])

with tab1:
    st.subheader("📊 Transaction Overview")
    
    col1, col2 = st.columns(2)
    
    with col1:
        # Daily transaction summary
        daily_summary = april_money_filtered.groupby('date').agg({
            'transaction_id': 'count',
            'amount_disbursed': 'sum',
            'access_fee': 'sum'
        }).rename(columns={'transaction_id': 'transaction_count'})
        
        st.write("**Daily Transaction Summary**")
        st.dataframe(daily_summary.tail(10), use_container_width=True)
        
        create_download_link(daily_summary.reset_index(), "daily_summary.csv")
    
    with col2:
        # Organization breakdown
        if selected_org == "All Organizations":
            org_summary = april_money_filtered.groupby('organization').agg({
                'transaction_id': 'count',
                'amount_disbursed': 'sum',
                'employee_id': 'nunique'
            }).rename(columns={'transaction_id': 'transactions', 'employee_id': 'active_users'})
            
            st.write("**Organization Breakdown**")
            st.dataframe(org_summary, use_container_width=True)
        else:
            # Employee breakdown for selected organization
            emp_summary = april_money_filtered.groupby('employee_id').agg({
                'transaction_id': 'count',
                'amount_disbursed': 'sum',
                'access_fee': 'sum'
            }).rename(columns={'transaction_id': 'transactions'}).sort_values('transactions', ascending=False)
            
            st.write(f"**Top Users - {selected_org}**")
            st.dataframe(emp_summary.head(10), use_container_width=True)

with tab2:
    st.subheader("📈 Time Trends Analysis")
    
    # Daily transaction count
    daily_counts = april_money_filtered.groupby('date')['transaction_id'].count().reset_index()
    daily_counts.columns = ['date', 'transaction_count']
    
    fig = px.line(daily_counts, x='date', y='transaction_count', 
                  title='Daily Transaction Count - April 2025',
                  labels={'transaction_count': 'Number of Transactions', 'date': 'Date'})
    fig.update_layout(height=400)
    st.plotly_chart(fig, use_container_width=True)
    
    # Most active transaction date
    most_active_date = daily_counts.loc[daily_counts['transaction_count'].idxmax()]
    st.success(f"🎯 **Most Active Day:** {most_active_date['date']} with {most_active_date['transaction_count']} transactions")
    
    # Hourly distribution
    col1, col2 = st.columns(2)
    
    with col1:
        hourly_dist = april_money_filtered.groupby('hour')['transaction_id'].count().reset_index()
        fig = px.bar(hourly_dist, x='hour', y='transaction_id',
                     title='Transactions by Hour of Day',
                     labels={'transaction_id': 'Number of Transactions', 'hour': 'Hour'})
        fig.update_layout(height=300)
        st.plotly_chart(fig, use_container_width=True)
    
    with col2:
        day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        daily_dist = april_money_filtered.groupby('day_of_week')['transaction_id'].count().reindex(day_order).reset_index()
        fig = px.bar(daily_dist, x='day_of_week', y='transaction_id',
                     title='Transactions by Day of Week',
                     labels={'transaction_id': 'Number of Transactions', 'day_of_week': 'Day'})
        fig.update_layout(height=300)
        st.plotly_chart(fig, use_container_width=True)

with tab3:
    st.subheader("🔥 Transaction Heatmap Analysis")
    
    # Create heatmap data
    heatmap_data = april_money_filtered.groupby(['weekday', 'hour']).size().reset_index(name='count')
    heatmap_pivot = heatmap_data.pivot(index='weekday', columns='hour', values='count').fillna(0)
    
    # Reorder weekdays
    day_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    heatmap_pivot.index = day_labels
    
    # Create plotly heatmap
    fig = px.imshow(heatmap_pivot, 
                    labels=dict(x="Hour of Day", y="Day of Week", color="Transactions"),
                    title="Transaction Heatmap: Day of Week vs Hour",
                    color_continuous_scale="YlOrRd")
    fig.update_layout(height=400)
    st.plotly_chart(fig, use_container_width=True)
    
    # Peak activity insights
    max_activity = heatmap_pivot.max().max()
    peak_hour = heatmap_pivot.max().idxmax()
    peak_day = heatmap_pivot.idxmax().loc[peak_hour]
    
    st.info(f"🔥 **Peak Activity:** {peak_day} at {peak_hour}:00 with {max_activity} transactions")

with tab4:
    st.subheader("😴 Inactive User Analysis")
    
    # Find inactive users (opted in but no transactions)
    active_employees = set(april_money_filtered['employee_id'].unique())
    all_employees = set(employee_data_filtered['employee_id'].unique())
    inactive_employees = all_employees - active_employees
    
    # Get inactive user details
    inactive_df = employee_data_filtered[employee_data_filtered['employee_id'].isin(inactive_employees)].copy()
    
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.write(f"**Found {len(inactive_df)} inactive users out of {len(employee_data_filtered)} total opted-in employees**")
        
        if len(inactive_df) > 0:
            # Display inactive users
            display_cols = ['employee_id', 'first_name', 'last_name', 'organization', 'department', 'opt_in_date']
            st.dataframe(inactive_df[display_cols], use_container_width=True)
            
            # Download button
            create_download_link(inactive_df, "inactive_users.csv")
        else:
            st.success("🎉 All opted-in users have made at least one transaction!")
    
    with col2:
        # Inactive users by department
        if len(inactive_df) > 0:
            dept_inactive = inactive_df['department'].value_counts()
            fig = px.pie(values=dept_inactive.values, names=dept_inactive.index,
                        title="Inactive Users by Department")
            fig.update_layout(height=300)
            st.plotly_chart(fig, use_container_width=True)
        
        # Activity rate
        activity_rate = (len(active_employees) / len(all_employees)) * 100
        st.metric("📊 Activity Rate", f"{activity_rate:.1f}%")

# Additional insights section
st.markdown("---")
st.subheader("💡 Key Insights")

insights_col1, insights_col2 = st.columns(2)

with insights_col1:
    avg_transaction = april_money_filtered['amount_disbursed'].mean()
    avg_fee = april_money_filtered['access_fee'].mean()
    st.write(f"**Average Transaction:** ${avg_transaction:.2f}")
    st.write(f"**Average Fee:** ${avg_fee:.2f}")
    st.write(f"**Fee Rate:** {(avg_fee/avg_transaction)*100:.2f}%")

with insights_col2:
    transactions_per_user = len(april_money_filtered) / active_users
    st.write(f"**Avg Transactions per User:** {transactions_per_user:.1f}")
    st.write(f"**Total Transactions:** {len(april_money_filtered):,}")
    st.write(f"**Analysis Period:** April 1-30, 2025")

# Footer
st.markdown("---")
st.markdown("*EWA Drawdown Analysis Dashboard - Built with Streamlit*")
Loading code editor...
Click Run to preview your app
Terminal