Use this link and grap titanic dataset from kaggle. Createa model https://www.kaggle.com/datasets/rahulsah06/titanic?select=train.csv
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
st.title('Titanic Survival Prediction')
# Load data
@st.cache_data
def load_data():
# NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
# you can remove the corsproxy.io prefix.
#url = "https://corsproxy.io/?https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
url="https://www.kaggle.com/datasets/rahulsah06/titanic?select=train.csv"
return pd.read_csv(url)
# Load and display raw data
df = load_data()
st.subheader('Raw Data')
st.write(df.head())
# Data preprocessing
def preprocess_data(df):
# Create a copy of the dataframe
data = df.copy()
# Drop unnecessary columns
data = data.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1)
# Handle missing values
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
# Convert categorical variables
le = LabelEncoder()
data['Sex'] = le.fit_transform(data['Sex'])
data['Embarked'] = le.fit_transform(data['Embarked'])
return data
# Preprocess data
processed_data = preprocess_data(df)
# Split features and target
X = processed_data.drop('Survived', axis=1)
y = processed_data['Survived']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
@st.cache_resource
def train_model():
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
return model
model = train_model()
# Make predictions
y_pred = model.predict(X_test)
# Display model performance
st.subheader('Model Performance')
st.write('Accuracy:', accuracy_score(y_test, y_pred))
st.write('Classification Report:')
st.text(classification_report(y_test, y_pred))
# Feature importance
st.subheader('Feature Importance')
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': model.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)
st.bar_chart(feature_importance.set_index('feature'))
# Interactive Prediction
st.subheader('Predict Survival')
col1, col2 = st.columns(2)
with col1:
pclass = st.selectbox('Passenger Class', [1, 2, 3])
sex = st.selectbox('Sex', ['male', 'female'])
age = st.number_input('Age', min_value=0, max_value=100, value=30)
sibsp = st.number_input('Number of Siblings/Spouses', min_value=0, value=0)
with col2:
parch = st.number_input('Number of Parents/Children', min_value=0, value=0)
fare = st.number_input('Fare', min_value=0, value=50)
embarked = st.selectbox('Embarked', ['C', 'Q', 'S'])
if st.button('Predict'):
# Prepare input data
input_data = pd.DataFrame({
'Pclass': [pclass],
'Sex': [0 if sex == 'male' else 1],
'Age': [age],
'SibSp': [sibsp],
'Parch': [parch],
'Fare': [fare],
'Embarked': [0 if embarked == 'C' else 1 if embarked == 'Q' else 2]
})
# Make prediction
prediction = model.predict(input_data)
probability = model.predict_proba(input_data)
# Display prediction
st.write('Survival Prediction:', 'Survived' if prediction[0] == 1 else 'Did Not Survive')
st.write('Survival Probability:', f"{probability[0][1]:.2%}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?