Create a Streamlit app that performs an interactive April 2025 Earned Wage Access (EWA) drawdown analysis for our payroll-integrated financial platform. š Input Files: April Money.csv ā Drawdown transactions Employee Data.csv ā All opted-in employees Payroll Excel files for different organizations, located in a Payroll/ folder Key Features: 1. Statistical Summary Show total disbursed, total access fee collected, and number of active users 3. Time Trends Daily transaction count over April (line/bar chart) Heatmap of transactions by hour vs. day of week Most active transaction date 4. Inactive User Analysis Identify employees who opted in but did not transact Display this list and export to CSV š App Requirements: User-uploadable CSVs (or use default files if not uploaded) Interactive filters for selecting organization Clean UI with tabs or expandable sections Export buttons for reports (CSV or Excel) Matplotlib or Seaborn for plots
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import io
import os
from pathlib import Path
# Page configuration
st.set_page_config(
page_title="EWA Drawdown Analysis - April 2025",
page_icon="š°",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.metric-container {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 0.5rem;
margin: 0.5rem 0;
}
.stTabs [data-baseweb="tab-list"] {
gap: 2px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
padding-left: 20px;
padding-right: 20px;
}
</style>
""", unsafe_allow_html=True)
# Title and header
st.title("š° EWA Drawdown Analysis - April 2025")
st.markdown("### Interactive Analysis of Earned Wage Access Transactions")
st.markdown("---")
# Helper functions
@st.cache_data
def load_sample_data():
"""Generate sample data for demonstration"""
np.random.seed(42)
# Sample April Money.csv (drawdown transactions)
start_date = datetime(2025, 4, 1)
end_date = datetime(2025, 4, 30)
dates = pd.date_range(start_date, end_date, freq='H')
# Generate realistic transaction patterns
transactions = []
employee_ids = [f"EMP{i:04d}" for i in range(1, 501)] # 500 employees
organizations = ["TechCorp", "RetailPlus", "HealthSystem", "ManufacturingInc", "ServiceGroup"]
for _ in range(2000): # 2000 transactions in April
date = np.random.choice(dates)
# More transactions during weekdays and business hours
if date.weekday() < 5: # Weekday
hour_weights = [0.5, 0.3, 0.2, 0.1, 0.1, 0.2, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0,
3.5, 3.0, 2.5, 2.0, 1.8, 1.5, 1.2, 1.0, 0.8, 0.6, 0.4, 0.3]
else: # Weekend
hour_weights = [0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.3, 0.5, 0.8, 1.0, 1.2, 1.5,
1.8, 2.0, 1.8, 1.5, 1.2, 1.0, 0.8, 0.6, 0.4, 0.3, 0.2, 0.2]
hour = np.random.choice(range(24), p=np.array(hour_weights)/np.sum(hour_weights))
transaction_date = date.replace(hour=hour, minute=np.random.randint(0, 60))
transactions.append({
'transaction_id': f"TXN{len(transactions):06d}",
'employee_id': np.random.choice(employee_ids),
'organization': np.random.choice(organizations),
'transaction_date': transaction_date,
'amount_disbursed': np.random.uniform(20, 500),
'access_fee': np.random.uniform(1, 15),
'transaction_type': 'drawdown'
})
april_money_df = pd.DataFrame(transactions)
# Sample Employee Data.csv (all opted-in employees)
employees = []
for emp_id in employee_ids:
employees.append({
'employee_id': emp_id,
'organization': np.random.choice(organizations),
'first_name': f"FirstName{emp_id[-3:]}",
'last_name': f"LastName{emp_id[-3:]}",
'email': f"{emp_id.lower()}@company.com",
'opt_in_date': start_date - timedelta(days=np.random.randint(1, 90)),
'department': np.random.choice(['Sales', 'Marketing', 'Operations', 'HR', 'Finance', 'IT']),
'salary_range': np.random.choice(['30k-40k', '40k-50k', '50k-60k', '60k-70k', '70k+'])
})
employee_data_df = pd.DataFrame(employees)
return april_money_df, employee_data_df
@st.cache_data
def process_data(april_money_df, employee_data_df, selected_org=None):
"""Process and filter data based on organization selection"""
if selected_org and selected_org != "All Organizations":
april_money_filtered = april_money_df[april_money_df['organization'] == selected_org].copy()
employee_data_filtered = employee_data_df[employee_data_df['organization'] == selected_org].copy()
else:
april_money_filtered = april_money_df.copy()
employee_data_filtered = employee_data_df.copy()
# Convert date columns
april_money_filtered['transaction_date'] = pd.to_datetime(april_money_filtered['transaction_date'])
employee_data_filtered['opt_in_date'] = pd.to_datetime(employee_data_filtered['opt_in_date'])
# Add derived columns
april_money_filtered['date'] = april_money_filtered['transaction_date'].dt.date
april_money_filtered['hour'] = april_money_filtered['transaction_date'].dt.hour
april_money_filtered['day_of_week'] = april_money_filtered['transaction_date'].dt.day_name()
april_money_filtered['weekday'] = april_money_filtered['transaction_date'].dt.weekday
return april_money_filtered, employee_data_filtered
def create_download_link(df, filename, file_format='csv'):
"""Create download button for dataframes"""
if file_format == 'csv':
csv = df.to_csv(index=False)
return st.download_button(
label=f"š„ Download {filename}",
data=csv,
file_name=filename,
mime='text/csv'
)
elif file_format == 'excel':
output = io.BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False, sheet_name='Data')
excel_data = output.getvalue()
return st.download_button(
label=f"š„ Download {filename}",
data=excel_data,
file_name=filename,
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
# Sidebar for file uploads and filters
st.sidebar.header("š Data Input")
# File upload options
use_sample_data = st.sidebar.checkbox("Use Sample Data", value=True, help="Check to use generated sample data for demonstration")
if not use_sample_data:
st.sidebar.subheader("Upload CSV Files")
april_money_file = st.sidebar.file_uploader("April Money.csv", type=['csv'])
employee_data_file = st.sidebar.file_uploader("Employee Data.csv", type=['csv'])
if april_money_file and employee_data_file:
april_money_df = pd.read_csv(april_money_file)
employee_data_df = pd.read_csv(employee_data_file)
else:
st.warning("Please upload both CSV files to proceed.")
st.stop()
else:
april_money_df, employee_data_df = load_sample_data()
# Organization filter
st.sidebar.subheader("š¢ Filters")
organizations = ["All Organizations"] + sorted(april_money_df['organization'].unique().tolist())
selected_org = st.sidebar.selectbox("Select Organization", organizations)
# Process data
april_money_filtered, employee_data_filtered = process_data(april_money_df, employee_data_df, selected_org)
# Main content area
col1, col2, col3 = st.columns(3)
# Statistical Summary
with col1:
total_disbursed = april_money_filtered['amount_disbursed'].sum()
st.metric("š° Total Disbursed", f"${total_disbursed:,.2f}")
with col2:
total_fees = april_money_filtered['access_fee'].sum()
st.metric("š³ Total Access Fees", f"${total_fees:,.2f}")
with col3:
active_users = april_money_filtered['employee_id'].nunique()
st.metric("š„ Active Users", f"{active_users:,}")
st.markdown("---")
# Create tabs for different analyses
tab1, tab2, tab3, tab4 = st.tabs(["š Overview", "š Time Trends", "š„ Heatmap Analysis", "š“ Inactive Users"])
with tab1:
st.subheader("š Transaction Overview")
col1, col2 = st.columns(2)
with col1:
# Daily transaction summary
daily_summary = april_money_filtered.groupby('date').agg({
'transaction_id': 'count',
'amount_disbursed': 'sum',
'access_fee': 'sum'
}).rename(columns={'transaction_id': 'transaction_count'})
st.write("**Daily Transaction Summary**")
st.dataframe(daily_summary.tail(10), use_container_width=True)
create_download_link(daily_summary.reset_index(), "daily_summary.csv")
with col2:
# Organization breakdown
if selected_org == "All Organizations":
org_summary = april_money_filtered.groupby('organization').agg({
'transaction_id': 'count',
'amount_disbursed': 'sum',
'employee_id': 'nunique'
}).rename(columns={'transaction_id': 'transactions', 'employee_id': 'active_users'})
st.write("**Organization Breakdown**")
st.dataframe(org_summary, use_container_width=True)
else:
# Employee breakdown for selected organization
emp_summary = april_money_filtered.groupby('employee_id').agg({
'transaction_id': 'count',
'amount_disbursed': 'sum',
'access_fee': 'sum'
}).rename(columns={'transaction_id': 'transactions'}).sort_values('transactions', ascending=False)
st.write(f"**Top Users - {selected_org}**")
st.dataframe(emp_summary.head(10), use_container_width=True)
with tab2:
st.subheader("š Time Trends Analysis")
# Daily transaction count
daily_counts = april_money_filtered.groupby('date')['transaction_id'].count().reset_index()
daily_counts.columns = ['date', 'transaction_count']
fig = px.line(daily_counts, x='date', y='transaction_count',
title='Daily Transaction Count - April 2025',
labels={'transaction_count': 'Number of Transactions', 'date': 'Date'})
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
# Most active transaction date
most_active_date = daily_counts.loc[daily_counts['transaction_count'].idxmax()]
st.success(f"šÆ **Most Active Day:** {most_active_date['date']} with {most_active_date['transaction_count']} transactions")
# Hourly distribution
col1, col2 = st.columns(2)
with col1:
hourly_dist = april_money_filtered.groupby('hour')['transaction_id'].count().reset_index()
fig = px.bar(hourly_dist, x='hour', y='transaction_id',
title='Transactions by Hour of Day',
labels={'transaction_id': 'Number of Transactions', 'hour': 'Hour'})
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
with col2:
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
daily_dist = april_money_filtered.groupby('day_of_week')['transaction_id'].count().reindex(day_order).reset_index()
fig = px.bar(daily_dist, x='day_of_week', y='transaction_id',
title='Transactions by Day of Week',
labels={'transaction_id': 'Number of Transactions', 'day_of_week': 'Day'})
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
with tab3:
st.subheader("š„ Transaction Heatmap Analysis")
# Create heatmap data
heatmap_data = april_money_filtered.groupby(['weekday', 'hour']).size().reset_index(name='count')
heatmap_pivot = heatmap_data.pivot(index='weekday', columns='hour', values='count').fillna(0)
# Reorder weekdays
day_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
heatmap_pivot.index = day_labels
# Create plotly heatmap
fig = px.imshow(heatmap_pivot,
labels=dict(x="Hour of Day", y="Day of Week", color="Transactions"),
title="Transaction Heatmap: Day of Week vs Hour",
color_continuous_scale="YlOrRd")
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
# Peak activity insights
max_activity = heatmap_pivot.max().max()
peak_hour = heatmap_pivot.max().idxmax()
peak_day = heatmap_pivot.idxmax().loc[peak_hour]
st.info(f"š„ **Peak Activity:** {peak_day} at {peak_hour}:00 with {max_activity} transactions")
with tab4:
st.subheader("š“ Inactive User Analysis")
# Find inactive users (opted in but no transactions)
active_employees = set(april_money_filtered['employee_id'].unique())
all_employees = set(employee_data_filtered['employee_id'].unique())
inactive_employees = all_employees - active_employees
# Get inactive user details
inactive_df = employee_data_filtered[employee_data_filtered['employee_id'].isin(inactive_employees)].copy()
col1, col2 = st.columns([2, 1])
with col1:
st.write(f"**Found {len(inactive_df)} inactive users out of {len(employee_data_filtered)} total opted-in employees**")
if len(inactive_df) > 0:
# Display inactive users
display_cols = ['employee_id', 'first_name', 'last_name', 'organization', 'department', 'opt_in_date']
st.dataframe(inactive_df[display_cols], use_container_width=True)
# Download button
create_download_link(inactive_df, "inactive_users.csv")
else:
st.success("š All opted-in users have made at least one transaction!")
with col2:
# Inactive users by department
if len(inactive_df) > 0:
dept_inactive = inactive_df['department'].value_counts()
fig = px.pie(values=dept_inactive.values, names=dept_inactive.index,
title="Inactive Users by Department")
fig.update_layout(height=300)
st.plotly_chart(fig, use_container_width=True)
# Activity rate
activity_rate = (len(active_employees) / len(all_employees)) * 100
st.metric("š Activity Rate", f"{activity_rate:.1f}%")
# Additional insights section
st.markdown("---")
st.subheader("š” Key Insights")
insights_col1, insights_col2 = st.columns(2)
with insights_col1:
avg_transaction = april_money_filtered['amount_disbursed'].mean()
avg_fee = april_money_filtered['access_fee'].mean()
st.write(f"**Average Transaction:** ${avg_transaction:.2f}")
st.write(f"**Average Fee:** ${avg_fee:.2f}")
st.write(f"**Fee Rate:** {(avg_fee/avg_transaction)*100:.2f}%")
with insights_col2:
transactions_per_user = len(april_money_filtered) / active_users
st.write(f"**Avg Transactions per User:** {transactions_per_user:.1f}")
st.write(f"**Total Transactions:** {len(april_money_filtered):,}")
st.write(f"**Analysis Period:** April 1-30, 2025")
# Footer
st.markdown("---")
st.markdown("*EWA Drawdown Analysis Dashboard - Built with Streamlit*")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?