Ploomber AI Editor | titanic-survival-predictor-b9c5

Drop files here
or click to upload
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

st.title('Titanic Survival Predictor')

# Load and prepare data
@st.cache_data
def load_data():
    # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
    # you can remove the corsproxy.io prefix.
    df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv")
    # After loading data
    

    return df

df = load_data()


# Data preprocessing
def preprocess_data(df):
    # Select features for the model
    features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
    X = df[features].copy()
    y = df['Survived']
    
    # Handle categorical variables
    le = LabelEncoder()
    X['Sex'] = le.fit_transform(X['Sex'].astype(str))
    X['Embarked'] = le.fit_transform(X['Embarked'].astype(str))
    
    # Handle missing values
    imputer = SimpleImputer(strategy='median')
    X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
    
    return X, y

X, y = preprocess_data(df)

# Train model
@st.cache_resource
def train_model(X, y):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X, y)
    return model

model = train_model(X, y)

# Create input form
st.subheader('Enter Passenger Information')

col1, col2 = st.columns(2)

with col1:
    pclass = st.selectbox('Passenger Class', [1, 2, 3], help='1 = 1st Class, 2 = 2nd Class, 3 = 3rd Class')
    sex = st.selectbox('Sex', ['male', 'female'])
    age = st.number_input('Age', min_value=0, max_value=100, value=30)
    sibsp = st.number_input('Number of Siblings/Spouses Aboard', min_value=0, max_value=10, value=0)

with col2:
    parch = st.number_input('Number of Parents/Children Aboard', min_value=0, max_value=10, value=0)
    fare = st.number_input('Fare', min_value=0.0, max_value=600.0, value=32.2)
    embarked = st.selectbox('Port of Embarkation', ['C', 'Q', 'S'], help='C = Cherbourg, Q = Queenstown, S = Southampton')

# Make prediction
if st.button('Predict Survival'):
    # Prepare input data
    input_data = pd.DataFrame({
        'Pclass': [pclass],
        'Sex': [sex],
        'Age': [age],
        'SibSp': [sibsp],
        'Parch': [parch],
        'Fare': [fare],
        'Embarked': [embarked]
    })
    
    # Preprocess input data
    input_data['Sex'] = LabelEncoder().fit_transform(input_data['Sex'])
    input_data['Embarked'] = LabelEncoder().fit_transform(input_data['Embarked'])
    
    # Make prediction
    prediction = model.predict_proba(input_data)[0]
    
    # Display results
    st.subheader('Prediction Results')
    col1, col2 = st.columns(2)
    
    with col1:
        st.metric(label="Survival Probability", value=f"{prediction[1]:.1%}")
    
    with col2:
        if prediction[1] >= 0.5:
            st.success("This passenger would likely SURVIVE!")
        else:
            st.error("This passenger would likely NOT SURVIVE!")
    
    # Feature importance
    st.subheader('Feature Importance')
    feature_importance = pd.DataFrame({
        'Feature': ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'],
        'Importance': model.feature_importances_
    }).sort_values('Importance', ascending=False)
    
    st.bar_chart(feature_importance.set_index('Feature'))

# Display some statistics about the original dataset
st.subheader('Dataset Statistics')
col1, col2 = st.columns(2)

with col1:
    survival_rate = (df['Survived'].mean() * 100)
    st.metric("Overall Survival Rate", f"{survival_rate:.1f}%")

with col2:
    total_passengers = len(df)
    st.metric("Total Passengers", total_passengers)
Loading code editor...
Click Run to preview your app
Terminal