Ploomber AI Editor | ensemble-placement-manager-da66

Drop files here
or click to upload
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
import re
from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
from st_aggrid.grid_options_builder import GridOptionsBuilder
import plotly.express as px
import plotly.graph_objects as go
import io
import base64

# Set page config to light theme only
st.set_page_config(
    page_title="Ensemble Placement Manager",
    page_icon="🎵",
    layout="wide",
    initial_sidebar_state="expanded",
)

# Force light theme
st.markdown("""
<style>
    .stApp {
        background-color: #FFFFFF;
        color: #000000;
    }
    .main {
        background-color: #FFFFFF;
    }
    h1, h2, h3, h4, h5, h6 {
        color: #0E1117;
    }
    .st-bw {
        color: #0E1117;
    }
    .css-145kmo2 {
        color: #0E1117;
    }
    .css-1aehpvj {
        color: #0E1117;
    }
    /* Badge styles */
    .badge {
        color: white;
        padding: 4px 8px;
        text-align: center;
        border-radius: 5px;
        font-size: 12px;
        font-weight: bold;
        display: inline-block;
        margin: 2px;
    }
    .ft-badge {
        background-color: #FF4B4B;
    }
    .pt3yr-badge {
        background-color: #1E88E5;
    }
    .ptspecial-badge {
        background-color: #FF9800;
    }
    .adjunct-badge {
        background-color: #795548;
    }
    /* Bucket header styles */
    .bucket-header {
        background-color: #f0f2f6;
        padding: 10px;
        border-radius: 5px;
        margin-bottom: 10px;
    }
</style>
""", unsafe_allow_html=True)

# Helper functions
def extract_rating_level(style):
    """Extract rating level from style string"""
    if pd.isna(style):
        return None
    match = re.search(r'(\d+)$', str(style))
    if match:
        return int(match.group(1))
    return None

def determine_faculty_type(faculty_email):
    """Determine faculty type based on email domain"""
    if pd.isna(faculty_email):
        return "Unknown"
    email = str(faculty_email).lower()
    if "ft@" in email:
        return "FT"
    elif "pt3yr@" in email:
        return "PT3yr"
    elif "ptspecial@" in email:
        return "PTSpecial"
    else:
        return "Adjunct"

def get_badge_html(faculty_type):
    """Generate HTML for faculty type badge"""
    badge_class = ""
    if faculty_type == "FT":
        badge_class = "ft-badge"
    elif faculty_type == "PT3yr":
        badge_class = "pt3yr-badge"
    elif faculty_type == "PTSpecial":
        badge_class = "ptspecial-badge"
    elif faculty_type == "Adjunct":
        badge_class = "adjunct-badge"
    
    return f'<div class="badge {badge_class}">{faculty_type}</div>'

def faculty_type_priority(faculty_type):
    """Return priority value for faculty type for sorting"""
    if faculty_type == "FT":
        return 1
    elif faculty_type == "PT3yr":
        return 2
    elif faculty_type == "PTSpecial":
        return 3
    elif faculty_type == "Adjunct":
        return 4
    return 5

def download_link(df, filename, link_text):
    """Generate a download link for a dataframe"""
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="{filename}" target="_blank">{link_text}</a>'
    return href

def load_data():
    """Load data from Google Sheets URL or file upload"""
    st.sidebar.header("Data Source")
    data_source = st.sidebar.radio("Select data source:", ["Google Sheet", "File Upload"])
    
    if data_source == "Google Sheet":
        sheet_url = st.sidebar.text_input(
            "Google Sheet URL",
            value="https://docs.google.com/spreadsheets/d/1ZGTodlRycGFKB8OwSfikXt_X0MGKUDnxla1qb4EPc/edit?usp=sharing"
        )
        if sheet_url:
            try:
                # Extract the key from the URL
                sheet_id = sheet_url.split("/d/")[1].split("/")[0]
                # Create a URL for downloading the sheet as CSV
                csv_export_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
                df = pd.read_csv(csv_export_url)
                st.sidebar.success("Data loaded successfully!")
                return df
            except Exception as e:
                st.sidebar.error(f"Error loading data: {e}")
                return None
    else:
        uploaded_file = st.sidebar.file_uploader("Upload CSV or Excel file", type=["csv", "xlsx"])
        if uploaded_file is not None:
            try:
                if uploaded_file.name.endswith('.csv'):
                    df = pd.read_csv(uploaded_file)
                else:
                    df = pd.read_excel(uploaded_file)
                st.sidebar.success("Data loaded successfully!")
                return df
            except Exception as e:
                st.sidebar.error(f"Error loading data: {e}")
                return None
    
    return None

def load_student_data():
    """Load optional student data"""
    st.sidebar.header("Student Data (Optional)")
    student_data_source = st.sidebar.radio("Include student data?", ["No", "Yes"])
    
    if student_data_source == "Yes":
        uploaded_file = st.sidebar.file_uploader("Upload Student CSV", type=["csv", "xlsx"])
        if uploaded_file is not None:
            try:
                if uploaded_file.name.endswith('.csv'):
                    df = pd.read_csv(uploaded_file)
                else:
                    df = pd.read_excel(uploaded_file)
                
                # Check if the required columns exist
                required_cols = ["Student_ID", "Instrument", "Rating"]
                missing_cols = [col for col in required_cols if col not in df.columns]
                
                if missing_cols:
                    st.sidebar.error(f"Missing required columns: {', '.join(missing_cols)}")
                    return None
                
                st.sidebar.success("Student data loaded successfully!")
                return df
            except Exception as e:
                st.sidebar.error(f"Error loading student data: {e}")
                return None
    
    return None

def prepare_data(df):
    """Prepare and transform the data"""
    if df is None:
        return None
    
    # Ensure required columns exist
    required_cols = ["Class", "Style", "Faculty", "Fac_email", "DAYS", "Start_time", "End_time", "student_enrolled", "student_total_need"]
    missing_cols = [col for col in required_cols if col not in df.columns]
    
    if missing_cols:
        st.error(f"Missing required columns: {', '.join(missing_cols)}")
        return None
    
    # Make a copy to avoid modifying original data
    processed_df = df.copy()
    
    # Convert numeric columns if they aren't already
    processed_df['student_enrolled'] = pd.to_numeric(processed_df['student_enrolled'], errors='coerce').fillna(0).astype(int)
    processed_df['student_total_need'] = pd.to_numeric(processed_df['student_total_need'], errors='coerce').fillna(0).astype(int)
    
    # Calculate open seats
    processed_df['open_seats'] = processed_df['student_total_need'] - processed_df['student_enrolled']
    
    # Extract rating level from Style
    processed_df['rating_level'] = processed_df['Style'].apply(extract_rating_level)
    
    # Determine faculty type
    processed_df['faculty_type'] = processed_df['Fac_email'].apply(determine_faculty_type)
    
    # Add faculty badge HTML
    processed_df['faculty_badge'] = processed_df['faculty_type'].apply(get_badge_html)
    
    # Add priority for sorting buckets
    processed_df['faculty_priority'] = processed_df['faculty_type'].apply(faculty_type_priority)
    
    # Determine bucket based on open seats
    def assign_bucket(open_seats):
        if open_seats >= 3:
            return 3
        elif open_seats == 2:
            return 2
        elif open_seats == 1:
            return 1
        else:
            return 0
    
    processed_df['bucket'] = processed_df['open_seats'].apply(assign_bucket)
    
    return processed_df

def filter_data(df):
    """Apply filters to the data"""
    st.sidebar.header("Filters")
    
    # Quick filter for classes that need students
    needs_students = st.sidebar.checkbox("Show only classes that need students", value=False)
    if needs_students:
        df = df[df['open_seats'] > 0]
    
    # Style filter
    all_styles = ['All'] + sorted(df['Style'].dropna().unique().tolist())
    selected_style = st.sidebar.selectbox("Style", all_styles)
    if selected_style != 'All':
        df = df[df['Style'] == selected_style]
    
    # Days filter
    all_days = ['All'] + sorted(df['DAYS'].dropna().unique().tolist())
    selected_days = st.sidebar.selectbox("Days", all_days)
    if selected_days != 'All':
        df = df[df['DAYS'] == selected_days]
    
    # Time window filter
    st.sidebar.subheader("Time Window")
    min_time = df['Start_time'].min() if not df.empty else "8:00"
    max_time = df['End_time'].max() if not df.empty else "22:00"
    
    time_range = st.sidebar.slider(
        "Select time range",
        min_value=pd.to_datetime(min_time, format="%H:%M").time() if isinstance(min_time, str) else min_time,
        max_value=pd.to_datetime(max_time, format="%H:%M").time() if isinstance(max_time, str) else max_time,
        value=(
            pd.to_datetime(min_time, format="%H:%M").time() if isinstance(min_time, str) else min_time,
            pd.to_datetime(max_time, format="%H:%M").time() if isinstance(max_time, str) else max_time
        ),
        format="HH:mm"
    )
    
    # Convert times for comparison
    start_time = time_range[0].strftime("%H:%M")
    end_time = time_range[1].strftime("%H:%M")
    
    df = df[
        (df['Start_time'] >= start_time) & 
        (df['End_time'] <= end_time)
    ]
    
    # Faculty filter
    all_faculty = ['All'] + sorted(df['Faculty'].dropna().unique().tolist())
    selected_faculty = st.sidebar.selectbox("Faculty", all_faculty)
    if selected_faculty != 'All':
        df = df[df['Faculty'] == selected_faculty]
    
    # Faculty type (seniority) filter
    all_faculty_types = ['All', 'FT', 'PT3yr', 'PTSpecial', 'Adjunct']
    selected_faculty_type = st.sidebar.selectbox("Faculty Type", all_faculty_types)
    if selected_faculty_type != 'All':
        df = df[df['faculty_type'] == selected_faculty_type]
    
    # Rating level filter
    all_rating_levels = ['All'] + sorted([str(x) for x in df['rating_level'].dropna().unique().tolist()])
    selected_rating_level = st.sidebar.selectbox("Rating Level", all_rating_levels)
    if selected_rating_level != 'All':
        df = df[df['rating_level'] == int(selected_rating_level)]
    
    return df

def create_ag_grid(df, key):
    """Create an AgGrid interactive table"""
    gb = GridOptionsBuilder.from_dataframe(df)
    
    # Configure columns
    gb.configure_column("Class", header_name="Class", editable=True)
    gb.configure_column("Style", header_name="Style", editable=True)
    gb.configure_column("Faculty", header_name="Faculty", editable=True)
    gb.configure_column("Fac_email", header_name="Faculty Email", editable=True)
    gb.configure_column("DAYS", header_name="Days", editable=True)
    gb.configure_column("Start_time", header_name="Start Time", editable=True)
    gb.configure_column("End_time", header_name="End Time", editable=True)
    gb.configure_column("student_enrolled", header_name="Enrolled", editable=True, type=["numericColumn"])
    gb.configure_column("student_total_need", header_name="Total Need", editable=True, type=["numericColumn"])
    gb.configure_column("open_seats", header_name="Open Seats", editable=False, type=["numericColumn"])
    gb.configure_column("rating_level", header_name="Rating", editable=False, type=["numericColumn"])
    
    # Add faculty badge column with custom renderer
    gb.configure_column(
        "faculty_badge", 
        header_name="Faculty Type",
        cellRenderer=JsCode("""
        function(params) {
            return params.value;
        }
        """),
        editable=False
    )
    
    # Hide faculty_type and faculty_priority columns (used for internal sorting)
    gb.configure_column("faculty_type", hide=True)
    gb.configure_column("faculty_priority", hide=True)
    gb.configure_column("bucket", hide=True)
    
    # Set basic grid options
    gb.configure_default_column(resizable=True, filterable=True, sortable=True)
    gb.configure_selection(selection_mode='multiple', use_checkbox=True)
    gb.configure_grid_options(domLayout='normal', enableRangeSelection=True)
    
    # Generate grid options
    grid_options = gb.build()
    
    # Create the AgGrid
    grid_response = AgGrid(
        df,
        gridOptions=grid_options,
        data_return_mode='AS_INPUT',
        update_mode='MODEL_CHANGED',
        fit_columns_on_grid_load=False,
        theme='light',  # Enforce light theme
        height=300,
        width='100%',
        reload_data=False,
        allow_unsafe_jscode=True,
        enable_enterprise_modules=False,
        key=key
    )
    
    return grid_response

def display_buckets(df):
    """Display data organized in priority buckets"""
    if df.empty:
        st.warning("No data to display after applying filters.")
        return df
    
    # Sort data by bucket (descending) and faculty priority (ascending)
    sorted_df = df.sort_values(by=['bucket', 'faculty_priority'], ascending=[False, True])
    
    # Create tabs for each bucket
    bucket_tabs = st.tabs([
        "3+ Open Seats",
        "2 Open Seats",
        "1 Open Seat",
        "0 Open Seats"
    ])
    
    # Process each bucket
    for i, bucket_value in enumerate([3, 2, 1, 0]):
        bucket_data = sorted_df[sorted_df['bucket'] == bucket_value] if bucket_value < 3 else sorted_df[sorted_df['bucket'] >= bucket_value]
        
        with bucket_tabs[i]:
            if bucket_data.empty:
                st.info(f"No classes with {'3 or more' if bucket_value == 3 else bucket_value} open seat{'s' if bucket_value != 1 else ''}.")
            else:
                st.markdown(f"<div class='bucket-header'><h3>{'3 or more' if bucket_value == 3 else bucket_value} Open Seat{'s' if bucket_value != 1 else ''} • {len(bucket_data)} Classes</h3></div>", unsafe_allow_html=True)
                
                # Create AgGrid for this bucket
                grid_response = create_ag_grid(bucket_data, f"grid_bucket_{bucket_value}")
                updated_df = grid_response['data']
                
                # Add download button
                st.markdown(
                    download_link(updated_df, f"bucket_{bucket_value}_classes.csv", "Download CSV"),
                    unsafe_allow_html=True
                )
    
    return sorted_df

def create_dashboard(df):
    """Create dashboard visualizations"""
    st.header("Ensemble Dashboard")
    
    if df.empty:
        st.warning("No data to display after applying filters.")
        return
    
    col1, col2 = st.columns(2)
    
    with col1:
        # Create bar chart for open seats by faculty type
        faculty_seats = df.groupby('faculty_type').agg({
            'open_seats': 'sum',
            'Class': 'count'
        }).reset_index()
        faculty_seats.columns = ['Faculty Type', 'Open Seats', 'Class Count']
        
        # Sort by faculty priority
        faculty_order = {'FT': 0, 'PT3yr': 1, 'PTSpecial': 2, 'Adjunct': 3, 'Unknown': 4}
        faculty_seats['order'] = faculty_seats['Faculty Type'].map(faculty_order)
        faculty_seats = faculty_seats.sort_values('order').drop('order', axis=1)
        
        fig1 = px.bar(
            faculty_seats,
            x='Faculty Type',
            y='Open Seats',
            text='Open Seats',
            color='Faculty Type',
            color_discrete_map={
                'FT': '#FF4B4B',
                'PT3yr': '#1E88E5',
                'PTSpecial': '#FF9800',
                'Adjunct': '#795548',
                'Unknown': '#9E9E9E'
            },
            title='Open Seats by Faculty Type'
        )
        fig1.update_layout(
            height=400,
            template='plotly_white',
            xaxis_title='',
            yaxis_title='Number of Open Seats'
        )
        st.plotly_chart(fig1, use_container_width=True)
    
    with col2:
        # Create pie chart for open seats by rating level
        rating_seats = df.groupby('rating_level').agg({
            'open_seats': 'sum'
        }).reset_index()
        rating_seats.columns = ['Rating Level', 'Open Seats']
        
        # Handle NaN values
        rating_seats = rating_seats.fillna('Unknown')
        
        fig2 = px.pie(
            rating_seats,
            values='Open Seats',
            names='Rating Level',
            title='Open Seats by Rating Level',
            hole=0.4,
            color_discrete_sequence=px.colors.qualitative.Pastel
        )
        fig2.update_layout(
            height=400,
            template='plotly_white'
        )
        st.plotly_chart(fig2, use_container_width=True)
    
    # Create calendar heatmap for classes by day and time
    st.subheader("Class Schedule Heatmap")
    
    # Create day-time matrix
    days_order = ['M', 'T', 'W', 'R', 'F', 'SA', 'SU']
    
    # Process the days and times
    calendar_data = []
    for _, row in df.iterrows():
        days = str(row['DAYS'])
        for day in days_order:
            if day in days:
                calendar_data.append({
                    'Day': day,
                    'Start Time': str(row['Start_time']),
                    'Open Seats': row['open_seats'],
                    'Class': row['Class']
                })
    
    if calendar_data:
        calendar_df = pd.DataFrame(calendar_data)
        
        # Group by day and time
        calendar_pivot = calendar_df.pivot_table(
            index='Day',
            columns='Start Time',
            values='Open Seats',
            aggfunc='sum'
        ).fillna(0)
        
        # Reindex to ensure all days appear in correct order
        available_days = [day for day in days_order if day in calendar_pivot.index]
        calendar_pivot = calendar_pivot.reindex(available_days)
        
        # Create heatmap
        fig3 = go.Figure(data=go.Heatmap(
            z=calendar_pivot.values,
            x=calendar_pivot.columns,
            y=calendar_pivot.index,
            colorscale='YlOrRd',
            showscale=True,
            colorbar=dict(title='Open Seats')
        ))
        
        fig3.update_layout(
            title='Class Schedule Heatmap (Open Seats)',
            xaxis_title='Start Time',
            yaxis_title='Day',
            height=400,
            template='plotly_white'
        )
        
        st.plotly_chart(fig3, use_container_width=True)
    else:
        st.info("Not enough data to display the schedule heatmap.")

def suggest_student_placements(class_df, student_df):
    """Suggest student placements based on ratings and open seats"""
    if student_df is None or class_df.empty:
        return
    
    st.header("Student Placement Suggestions")
    
    # Filter classes with open seats
    open_classes = class_df[class_df['open_seats'] > 0]
    
    if open_classes.empty:
        st.warning("No classes with open seats available for placement suggestions.")
        return
    
    # Match students to classes based on rating
    placement_suggestions = []
    
    for _, student in student_df.iterrows():
        student_rating = student['Rating']
        matching_classes = open_classes[open_classes['rating_level'] == student_rating]
        
        for _, class_row in matching_classes.iterrows():
            placement_suggestions.append({
                'Student_ID': student['Student_ID'],
                'Instrument': student['Instrument'],
                'Student_Rating': student['Rating'],
                'Class': class_row['Class'],
                'Style': class_row['Style'],
                'Faculty': class_row['Faculty'],
                'Days': class_row['DAYS'],
                'Time': f"{class_row['Start_time']} - {class_row['End_time']}",
                'Open_Seats': class_row['open_seats'],
                'Class_Rating': class_row['rating_level'],
                'Faculty_Type': class_row['faculty_type']
            })
    
    if placement_suggestions:
        suggestions_df = pd.DataFrame(placement_suggestions)
        
        # Sort by faculty priority
        faculty_order = {'FT': 1, 'PT3yr': 2, 'PTSpecial': 3, 'Adjunct': 4, 'Unknown': 5}
        suggestions_df['Faculty_Priority'] = suggestions_df['Faculty_Type'].map(faculty_order)
        suggestions_df = suggestions_df.sort_values('Faculty_Priority')
        
        # Drop the priority column
        suggestions_df = suggestions_df.drop('Faculty_Priority', axis=1)
        
        # Display the suggestions
        st.subheader("Recommended Placements")
        st.dataframe(suggestions_df)
        
        # Add download button
        st.markdown(
            download_link(suggestions_df, "student_placement_suggestions.csv", "Download Suggestions as CSV"),
            unsafe_allow_html=True
        )
    else:
        st.info("No matching placements found based on student ratings.")

def main():
    """Main application function"""
    st.title("Ensemble Placement Manager")
    
    # Load raw data
    raw_data = load_data()
    
    if raw_data is not None:
        # Process the data
        processed_data = prepare_data(raw_data)
        
        if processed_data is not None:
            # Apply filters
            filtered_data = filter_data(processed_data)
            
            # Create dashboard visualizations
            create_dashboard(filtered_data)
            
            # Display data in buckets
            bucket_data = display_buckets(filtered_data)
            
            # Load student data (optional)
            student_data = load_student_data()
            
            # Show student placement suggestions if student data is available
            if student_data is not None:
                suggest_student_placements(bucket_data, student_data)
            
            # Add full data view
            st.header("All Data (Editable)")
            grid_response = create_ag_grid(filtered_data, "grid_all_data")
            updated_df = grid_response['data']
            
            # Download button for all data
            st.markdown(
                download_link(updated_df, "all_ensemble_data.csv", "Download All Data as CSV"),
                unsafe_allow_html=True
            )
    else:
        # Display sample data message
        st.info(
            """
            👈 Please select a data source from the sidebar to get started.
            
            You can use the provided Google Sheet URL or upload your own CSV/Excel file.
            
            The data should contain the following columns:
            - Class: Name of the ensemble class
            - Style: Style of music (with rating level at the end, e.g., "Jazz 2")
            - Faculty: Name of the instructor
            - Fac_email: Faculty email address (used to determine seniority)
            - DAYS: Days when the class meets
            - Start_time: Class start time
            - End_time: Class end time
            - student_enrolled: Number of currently enrolled students
            - student_total_need: Total number of students needed
            """
        )

if __name__ == "__main__":
    main()
Loading code editor...
Terminal