Create a streamlit interface for predicting if someone is likely to make it alive out of the tinaic. This data is located here: https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv This is a smaple of the csv: PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C There might have some missing data in the dataset
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
st.title('Titanic Survival Predictor')
# Load and prepare data
@st.cache_data
def load_data():
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix.
df = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv")
# After loading data
return df
df = load_data()
# Data preprocessing
def preprocess_data(df):
# Select features for the model
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = df[features].copy()
y = df['Survived']
# Handle categorical variables
le = LabelEncoder()
X['Sex'] = le.fit_transform(X['Sex'].astype(str))
X['Embarked'] = le.fit_transform(X['Embarked'].astype(str))
# Handle missing values
imputer = SimpleImputer(strategy='median')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
return X, y
X, y = preprocess_data(df)
# Train model
@st.cache_resource
def train_model(X, y):
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)
return model
model = train_model(X, y)
# Create input form
st.subheader('Enter Passenger Information')
col1, col2 = st.columns(2)
with col1:
pclass = st.selectbox('Passenger Class', [1, 2, 3], help='1 = 1st Class, 2 = 2nd Class, 3 = 3rd Class')
sex = st.selectbox('Sex', ['male', 'female'])
age = st.number_input('Age', min_value=0, max_value=100, value=30)
sibsp = st.number_input('Number of Siblings/Spouses Aboard', min_value=0, max_value=10, value=0)
with col2:
parch = st.number_input('Number of Parents/Children Aboard', min_value=0, max_value=10, value=0)
fare = st.number_input('Fare', min_value=0.0, max_value=600.0, value=32.2)
embarked = st.selectbox('Port of Embarkation', ['C', 'Q', 'S'], help='C = Cherbourg, Q = Queenstown, S = Southampton')
# Make prediction
if st.button('Predict Survival'):
# Prepare input data
input_data = pd.DataFrame({
'Pclass': [pclass],
'Sex': [sex],
'Age': [age],
'SibSp': [sibsp],
'Parch': [parch],
'Fare': [fare],
'Embarked': [embarked]
})
# Preprocess input data
input_data['Sex'] = LabelEncoder().fit_transform(input_data['Sex'])
input_data['Embarked'] = LabelEncoder().fit_transform(input_data['Embarked'])
# Make prediction
prediction = model.predict_proba(input_data)[0]
# Display results
st.subheader('Prediction Results')
col1, col2 = st.columns(2)
with col1:
st.metric(label="Survival Probability", value=f"{prediction[1]:.1%}")
with col2:
if prediction[1] >= 0.5:
st.success("This passenger would likely SURVIVE!")
else:
st.error("This passenger would likely NOT SURVIVE!")
# Feature importance
st.subheader('Feature Importance')
feature_importance = pd.DataFrame({
'Feature': ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'],
'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)
st.bar_chart(feature_importance.set_index('Feature'))
# Display some statistics about the original dataset
st.subheader('Dataset Statistics')
col1, col2 = st.columns(2)
with col1:
survival_rate = (df['Survived'].mean() * 100)
st.metric("Overall Survival Rate", f"{survival_rate:.1f}%")
with col2:
total_passengers = len(df)
st.metric("Total Passengers", total_passengers)
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?