Ploomber AI Editor

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler

# Streamlit App Title
st.title("3D Data Clustering Visualizer")

# Sidebar Controls for Data Generation
st.sidebar.header("Data Generation")
n_samples = st.sidebar.slider("Number of Samples", 100, 1000, 300, step=50)
cluster_std = st.sidebar.slider("Cluster Standard Deviation", 0.1, 3.0, 1.0, step=0.1)

# Generate synthetic data
X, _ = make_blobs(n_samples=n_samples, centers=4, cluster_std=cluster_std, random_state=42)
X = StandardScaler().fit_transform(X)  # Normalize the data

# Sidebar Controls for Clustering Algorithm Selection
st.sidebar.header("Clustering Settings")
algorithm = st.sidebar.selectbox("Choose Clustering Algorithm", ["K-means", "DBSCAN"])

# Additional Controls based on Algorithm Selection
if algorithm == "K-means":
    n_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3, step=1)
    model = KMeans(n_clusters=n_clusters, random_state=42)
    labels = model.fit_predict(X)
    centroids = model.cluster_centers_
else:
    eps = st.sidebar.slider("DBSCAN eps", 0.1, 1.0, 0.5, step=0.1)
    min_samples = st.sidebar.slider("DBSCAN min_samples", 1, 10, 5, step=1)
    model = DBSCAN(eps=eps, min_samples=min_samples)
    labels = model.fit_predict(X)
    centroids = None  # DBSCAN does not compute centroids

# Convert data to DataFrame for easier manipulation
df = pd.DataFrame(X, columns=["x", "y", "z"])
df["Cluster"] = labels.astype(str)  # Convert labels to string for better coloring in Plotly

# Plotting 3D Scatter Plot
st.header("3D Scatter Plot of Clusters")
fig = px.scatter_3d(df, x="x", y="y", z="z", color="Cluster", title=f"{algorithm} Clustering Results")

# Add cluster centroids for K-means
if algorithm == "K-means" and centroids is not None:
    centroid_df = pd.DataFrame(centroids, columns=["x", "y", "z"])
    fig.add_scatter3d(
        x=centroid_df["x"], y=centroid_df["y"], z=centroid_df["z"],
        mode="markers", marker=dict(size=10, color="black", symbol="x"),
        name="Centroids"
    )

st.plotly_chart(fig)

# Display information about the clustering
st.header("Clustering Information")
st.write("### Data Points Per Cluster")
cluster_counts = df["Cluster"].value_counts().sort_index()
st.write(cluster_counts)

if algorithm == "K-means":
    st.write("### K-means Inertia (Sum of squared distances to centroids)")
    st.write(model.inertia_)
elif algorithm == "DBSCAN":
    st.write("### DBSCAN Core Sample Count")
    core_samples = len(model.core_sample_indices_)
    st.write(f"Number of core samples: {core_samples}")
Loading code editor...
Click Save & Run to preview your app
Terminal