Create a streamlit app to show outliers in numeric columns
To upload files, please first save the app
import streamlit as st
import pandas as pd
import numpy as np
st.title('Numeric Column Outlier Detector')
# File uploader
uploaded_file = st.file_uploader("Choose a CSV file", type='csv')
if uploaded_file is not None:
# Load the data
df = pd.read_csv(uploaded_file)
# Get numeric columns
numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist()
if not numeric_columns:
st.error("No numeric columns found in the dataset!")
else:
# Let user select columns
selected_columns = st.multiselect(
"Select numeric columns to analyze",
numeric_columns,
default=numeric_columns[0] if numeric_columns else None
)
if selected_columns:
# Calculate outliers using IQR method
for column in selected_columns:
st.subheader(f"Outlier Analysis for {column}")
# Calculate Q1, Q3, and IQR
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
# Define outlier bounds
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
# Find outliers
outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)][column]
# Display statistics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Lower Bound", f"{lower_bound:.2f}")
with col2:
st.metric("Upper Bound", f"{upper_bound:.2f}")
with col3:
st.metric("Number of Outliers", len(outliers))
# Show outlier values
if not outliers.empty:
st.write("Outlier Values:")
st.table(pd.DataFrame({
'Index': outliers.index,
'Value': outliers.values
}))
# Create scatter plot to visualize outliers
scatter_data = pd.DataFrame({
'index': df.index,
'value': df[column],
'color': ['Outlier' if x in outliers.values else 'Normal'
for x in df[column]]
})
st.scatter_chart(
data=scatter_data,
x='index',
y='value',
color='color',
x_label='Index',
y_label=column
)
else:
st.success("No outliers found in this column!")
else:
st.info("Please upload a CSV file to begin analysis.")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?