create a app with following descriptions: 3D Data Clustering Visualizer Description: Visualize clustering algorithms (like k-means) on 3D data using interactive Plotly 3D scatter plots. Features: Allow users to set the number of clusters, see cluster centroids, and color-code points based on cluster assignment.
Drop files here
or click to upload
import streamlit as st
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
# Streamlit App Title
st.title("3D Data Clustering Visualizer")
st.sidebar.header("Data Generation")
n_samples = st.sidebar.slider("Number of Samples", 100, 1000, 300, step=50)
cluster_std = st.sidebar.slider("Cluster Standard Deviation", 0.1, 3.0, 1.0, step=0.1)
X, _ = make_blobs(n_samples=n_samples, centers=4, cluster_std=cluster_std, n_features=3, random_state=42)
X = StandardScaler().fit_transform(X)
st.sidebar.header("Clustering Settings")
algorithm = st.sidebar.selectbox("Choose Clustering Algorithm", ["K-means", "DBSCAN"])
if algorithm == "K-means":
n_clusters = st.sidebar.slider("Number of Clusters", 2, 10, 3, step=1)
model = KMeans(n_clusters=n_clusters, random_state=42)
labels = model.fit_predict(X)
centroids = model.cluster_centers_
else:
eps = st.sidebar.slider("DBSCAN eps", 0.1, 1.0, 0.5, step=0.1)
min_samples = st.sidebar.slider("DBSCAN min_samples", 1, 10, 5, step=1)
model = DBSCAN(eps=eps, min_samples=min_samples)
labels = model.fit_predict(X)
centroids = None
df = pd.DataFrame(X, columns=["x", "y", "z"])
df["Cluster"] = labels.astype(str)
st.header("3D Scatter Plot of Clusters")
fig = px.scatter_3d(df, x="x", y="y", z="z", color="Cluster", title=f"{algorithm} Clustering Results")
if algorithm == "K-means" and centroids is not None:
centroid_df = pd.DataFrame(centroids, columns=["x", "y", "z"])
fig.add_scatter3d(
x=centroid_df["x"], y=centroid_df["y"], z=centroid_df["z"],
mode="markers", marker=dict(size=10, color="black", symbol="x"),
name="Centroids"
)
st.plotly_chart(fig)
st.header("Clustering Information")
st.write("### Data Points Per Cluster")
cluster_counts = df["Cluster"].value_counts().sort_index()
st.write(cluster_counts)
if algorithm == "K-means":
st.write("### K-means Inertia (Sum of squared distances to centroids)")
st.write(model.inertia_)
elif algorithm == "DBSCAN":
st.write("### DBSCAN Core Sample Count")
core_samples = len(model.core_sample_indices_)
st.write(f"Number of core samples: {core_samples}")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?