create a app with following descriptions: 3D Data Clustering Visualizer Description: Visualize clustering algorithms (like k-means) on 3D data using interactive Plotly 3D scatter plots. Features: Allow users to set the number of clusters, see cluster centroids, and color-code points based on cluster assignment.
To upload files, please first save the app
# viz
import streamlit as st
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
# Streamlit App Title
st.title("3D Data Clustering Visualizer")
st.sidebar.header("Data Generation")
n_samples = st.sidebar.slider("Number of Samples", 100, 1000, 300, step=50)
cluster_std = st.sidebar.slider("Cluster Standard Deviation", 0.1, 3.0, 1.0, step=0.1)
X, _ = make_blobs(n_samples=n_samples, centers=4, cluster_std=cluster_std, n_features=3, random_state=42)
X = StandardScaler().fit_transform(X)
st.sidebar.header("Clustering Settings")
algorithm = st.sidebar.selectbox("Choose Clustering Algorithm", ["K-means", "DBSCAN"])
if algorithm == "K-means":
n_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3, step=1)
model = KMeans(n_clusters=n_clusters, random_state=42)
labels = model.fit_predict(X)
centroids = model.cluster_centers_
else:
eps = st.sidebar.slider("DBSCAN eps", 0.1, 1.0, 0.5, step=0.1)
min_samples = st.sidebar.slider("DBSCAN min_samples", 1, 10, 5, step=1)
model = DBSCAN(eps=eps, min_samples=min_samples)
labels = model.fit_predict(X)
centroids = None
df = pd.DataFrame(X, columns=["x", "y", "z"])
df["Cluster"] = labels.astype(str)
st.header("3D Scatter Plot of Clusters")
fig = px.scatter_3d(df, x="x", y="y", z="z", color="Cluster", title=f"{algorithm} Clustering Results")
if algorithm == "K-means" and centroids is not None:
centroid_df = pd.DataFrame(centroids, columns=["x", "y", "z"])
fig.add_scatter3d(
x=centroid_df["x"], y=centroid_df["y"], z=centroid_df["z"],
mode="markers", marker=dict(size=10, color="black", symbol="x"),
name="Centroids"
)
st.plotly_chart(fig)
st.header("Clustering Information")
st.write("### Data Points Per Cluster")
cluster_counts = df["Cluster"].value_counts().sort_index()
st.write(cluster_counts)
if algorithm == "K-means":
st.write("### K-means Inertia (Sum of squared distances to centroids)")
st.write(model.inertia_)
elif algorithm == "DBSCAN":
st.write("### DBSCAN Core Sample Count")
core_samples = len(model.core_sample_indices_)
st.write(f"Number of core samples: {core_samples}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?