Ploomber AI Editor | titanic-eda-72c9

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
import numpy as np

st.set_page_config(page_title="Titanic EDA", layout="wide")

# Load the Titanic dataset
@st.cache_data
def load_data():
    # NOTE: Using corsproxy.io because we're in a WASM environment. If running locally,
    # you can remove the corsproxy.io prefix.
    url = "https://corsproxy.io/?https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
    return pd.read_csv(url)

# Load data
df = load_data()

# Title and dataset info
st.title("Titanic Dataset Explorer")
st.write("Explore the famous Titanic dataset with interactive visualizations")

# Display basic dataset information
st.header("Dataset Overview")
col1, col2 = st.columns(2)

with col1:
    st.write("Dataset Shape:", df.shape)
    st.write("### Data Sample")
    st.dataframe(df.head())

with col2:
    st.write("### Missing Values")
    missing_values = df.isnull().sum()
    st.dataframe(pd.DataFrame({
        'Column': missing_values.index,
        'Missing Values': missing_values.values,
        'Percentage': (missing_values.values / len(df) * 100).round(2)
    }))

# Survival Distribution
st.header("Survival Analysis")
col3, col4 = st.columns(2)

with col3:
    survival_count = df['Survived'].value_counts()
    fig_survival = px.pie(values=survival_count.values, 
                         names=['Did not Survive', 'Survived'],
                         title='Survival Distribution')
    st.plotly_chart(fig_survival)

with col4:
    fig_survival_class = px.histogram(df, 
                                    x='Pclass', 
                                    color='Survived',
                                    barmode='group',
                                    title='Survival by Passenger Class',
                                    labels={'Pclass': 'Passenger Class', 'count': 'Count'})
    st.plotly_chart(fig_survival_class)

# Age Distribution
st.header("Age Distribution")
col5, col6 = st.columns(2)

with col5:
    fig_age = px.histogram(df, 
                          x='Age',
                          nbins=30,
                          title='Age Distribution',
                          labels={'Age': 'Age', 'count': 'Count'})
    st.plotly_chart(fig_age)

with col6:
    fig_age_survival = px.box(df, 
                             x='Survived', 
                             y='Age',
                             title='Age Distribution by Survival',
                             labels={'Survived': 'Survived', 'Age': 'Age'})
    st.plotly_chart(fig_age_survival)

# Gender Analysis
st.header("Gender Analysis")
col7, col8 = st.columns(2)

with col7:
    gender_survival = pd.crosstab(df['Sex'], df['Survived'])
    fig_gender = px.bar(gender_survival, 
                       title='Survival by Gender',
                       labels={'Sex': 'Gender', 'value': 'Count'},
                       barmode='group')
    st.plotly_chart(fig_gender)

with col8:
    fig_gender_class = px.histogram(df, 
                                  x='Sex', 
                                  color='Pclass',
                                  title='Gender Distribution by Class',
                                  barmode='group',
                                  labels={'Sex': 'Gender', 'count': 'Count'})
    st.plotly_chart(fig_gender_class)

# Fare Analysis
st.header("Fare Analysis")
col9, col10 = st.columns(2)

with col9:
    fig_fare = px.box(df, 
                      x='Pclass', 
                      y='Fare',
                      title='Fare Distribution by Passenger Class',
                      labels={'Pclass': 'Passenger Class', 'Fare': 'Fare'})
    st.plotly_chart(fig_fare)

with col10:
    fig_fare_survival = px.scatter(df, 
                                 x='Fare', 
                                 y='Age',
                                 color='Survived',
                                 title='Fare vs Age by Survival',
                                 labels={'Fare': 'Fare', 'Age': 'Age'})
    st.plotly_chart(fig_fare_survival)

# Interactive Feature Analysis
st.header("Interactive Feature Analysis")
feature_x = st.selectbox('Select X-axis feature:', df.select_dtypes(include=['int64', 'float64']).columns)
feature_y = st.selectbox('Select Y-axis feature:', df.select_dtypes(include=['int64', 'float64']).columns)
color_by = st.selectbox('Color by:', ['Survived', 'Pclass', 'Sex'])

fig_interactive = px.scatter(df, 
                           x=feature_x, 
                           y=feature_y,
                           color=color_by,
                           title=f'{feature_x} vs {feature_y} by {color_by}')
st.plotly_chart(fig_interactive)

# Correlation Heatmap
st.header("Correlation Analysis")
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
correlation = df[numeric_cols].corr()
fig_corr = px.imshow(correlation,
                     labels=dict(color="Correlation"),
                     title="Correlation Heatmap")
st.plotly_chart(fig_corr)

# Data filtering
st.header("Data Filter")
st.write("Filter the dataset based on your criteria:")

col11, col12, col13 = st.columns(3)

with col11:
    selected_class = st.multiselect('Passenger Class:', sorted(df['Pclass'].unique()))

with col12:
    selected_sex = st.multiselect('Gender:', sorted(df['Sex'].unique()))

with col13:
    selected_survival = st.multiselect('Survival:', sorted(df['Survived'].unique()))

# Apply filters
filtered_df = df.copy()
if selected_class:
    filtered_df = filtered_df[filtered_df['Pclass'].isin(selected_class)]
if selected_sex:
    filtered_df = filtered_df[filtered_df['Sex'].isin(selected_sex)]
if selected_survival:
    filtered_df = filtered_df[filtered_df['Survived'].isin(selected_survival)]

st.write("Filtered Dataset:")
st.dataframe(filtered_df)
Loading code editor...
Click Save & Run to preview your app
Terminal