Ploomber AI Editor | loan-default-classifier-583b

Drop files here
or click to upload
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Set page title
st.title('Loan Default Prediction')

# Create sample data
@st.cache_data
def generate_sample_data(n_samples=1000):
    np.random.seed(42)
    
    data = {
        'age': np.random.randint(18, 70, n_samples),
        'income': np.random.normal(50000, 20000, n_samples),
        'loan_amount': np.random.normal(200000, 100000, n_samples),
        'credit_score': np.random.randint(300, 850, n_samples),
        'employment_length': np.random.randint(0, 40, n_samples),
        'debt_to_income': np.random.normal(0.3, 0.1, n_samples)
    }
    
    # Generate target variable based on features
    df = pd.DataFrame(data)
    probability = 1 / (1 + np.exp(-(
        -5 +
        0.02 * df['credit_score'] / 100 +
        0.3 * df['income'] / 10000 +
        -0.2 * df['loan_amount'] / 10000 +
        0.1 * df['employment_length'] +
        -2 * df['debt_to_income']
    )))
    df['default'] = (np.random.random(n_samples) < probability).astype(int)
    
    return df

# Generate and split data
df = generate_sample_data()
X = df.drop('default', axis=1)
y = df['default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
@st.cache_resource
def train_model(X_train, y_train):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    model = LogisticRegression(random_state=42)
    model.fit(X_train_scaled, y_train)
    return model, scaler

model, scaler = train_model(X_train, y_train)

# Create input form
st.subheader('Enter Customer Information')

col1, col2 = st.columns(2)

with col1:
    age = st.number_input('Age', min_value=18, max_value=100, value=30)
    income = st.number_input('Annual Income ($)', min_value=0, max_value=1000000, value=50000)
    loan_amount = st.number_input('Loan Amount ($)', min_value=0, max_value=1000000, value=200000)

with col2:
    credit_score = st.number_input('Credit Score', min_value=300, max_value=850, value=700)
    employment_length = st.number_input('Years of Employment', min_value=0, max_value=50, value=5)
    debt_to_income = st.number_input('Debt to Income Ratio', min_value=0.0, max_value=1.0, value=0.3)

# Make prediction
if st.button('Predict Default Risk'):
    input_data = pd.DataFrame({
        'age': [age],
        'income': [income],
        'loan_amount': [loan_amount],
        'credit_score': [credit_score],
        'employment_length': [employment_length],
        'debt_to_income': [debt_to_income]
    })
    
    input_scaled = scaler.transform(input_data)
    prediction = model.predict(input_scaled)
    probability = model.predict_proba(input_scaled)
    
    st.subheader('Prediction Results')
    if prediction[0] == 1:
        st.error(f'High risk of default (Probability: {probability[0][1]:.2%})')
    else:
        st.success(f'Low risk of default (Probability: {probability[0][0]:.2%})')
    
    # Show feature importance
    st.subheader('Feature Importance')
    importance_df = pd.DataFrame({
        'Feature': X.columns,
        'Importance': np.abs(model.coef_[0])
    }).sort_values('Importance', ascending=False)
    st.bar_chart(importance_df.set_index('Feature'))

# Show model performance metrics
st.subheader('Model Performance Metrics')
X_test_scaled = scaler.transform(X_test)
y_pred = model.predict(X_test_scaled)
st.write(f'Model Accuracy: {accuracy_score(y_test, y_pred):.2%}')
st.text('Classification Report:')
st.text(classification_report(y_test, y_pred))
Loading code editor...
Click Run to preview your app
Terminal