A streamlit app for binary classification if a customer will default loan payment or not\
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Set page title
st.title('Loan Default Prediction')
# Create sample data
@st.cache_data
def generate_sample_data(n_samples=1000):
np.random.seed(42)
data = {
'age': np.random.randint(18, 70, n_samples),
'income': np.random.normal(50000, 20000, n_samples),
'loan_amount': np.random.normal(200000, 100000, n_samples),
'credit_score': np.random.randint(300, 850, n_samples),
'employment_length': np.random.randint(0, 40, n_samples),
'debt_to_income': np.random.normal(0.3, 0.1, n_samples)
}
# Generate target variable based on features
df = pd.DataFrame(data)
probability = 1 / (1 + np.exp(-(
-5 +
0.02 * df['credit_score'] / 100 +
0.3 * df['income'] / 10000 +
-0.2 * df['loan_amount'] / 10000 +
0.1 * df['employment_length'] +
-2 * df['debt_to_income']
)))
df['default'] = (np.random.random(n_samples) < probability).astype(int)
return df
# Generate and split data
df = generate_sample_data()
X = df.drop('default', axis=1)
y = df['default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
@st.cache_resource
def train_model(X_train, y_train):
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)
return model, scaler
model, scaler = train_model(X_train, y_train)
# Create input form
st.subheader('Enter Customer Information')
col1, col2 = st.columns(2)
with col1:
age = st.number_input('Age', min_value=18, max_value=100, value=30)
income = st.number_input('Annual Income ($)', min_value=0, max_value=1000000, value=50000)
loan_amount = st.number_input('Loan Amount ($)', min_value=0, max_value=1000000, value=200000)
with col2:
credit_score = st.number_input('Credit Score', min_value=300, max_value=850, value=700)
employment_length = st.number_input('Years of Employment', min_value=0, max_value=50, value=5)
debt_to_income = st.number_input('Debt to Income Ratio', min_value=0.0, max_value=1.0, value=0.3)
# Make prediction
if st.button('Predict Default Risk'):
input_data = pd.DataFrame({
'age': [age],
'income': [income],
'loan_amount': [loan_amount],
'credit_score': [credit_score],
'employment_length': [employment_length],
'debt_to_income': [debt_to_income]
})
input_scaled = scaler.transform(input_data)
prediction = model.predict(input_scaled)
probability = model.predict_proba(input_scaled)
st.subheader('Prediction Results')
if prediction[0] == 1:
st.error(f'High risk of default (Probability: {probability[0][1]:.2%})')
else:
st.success(f'Low risk of default (Probability: {probability[0][0]:.2%})')
# Show feature importance
st.subheader('Feature Importance')
importance_df = pd.DataFrame({
'Feature': X.columns,
'Importance': np.abs(model.coef_[0])
}).sort_values('Importance', ascending=False)
st.bar_chart(importance_df.set_index('Feature'))
# Show model performance metrics
st.subheader('Model Performance Metrics')
X_test_scaled = scaler.transform(X_test)
y_pred = model.predict(X_test_scaled)
st.write(f'Model Accuracy: {accuracy_score(y_test, y_pred):.2%}')
st.text('Classification Report:')
st.text(classification_report(y_test, y_pred))
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?