Ploomber AI Editor | kmeans-demo-db2a

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import plotly.express as px

# Set page title
st.title("K-Means Clustering Demo")

# Sidebar controls
st.sidebar.header("Parameters")
n_clusters = st.sidebar.slider("Number of Clusters (K)", min_value=2, max_value=10, value=3)
n_points = st.sidebar.slider("Number of Points", min_value=50, max_value=1000, value=300)
random_state = st.sidebar.slider("Random Seed", min_value=0, max_value=100, value=42)

# Generate random data
def generate_data(n_points, random_state):
    np.random.seed(random_state)
    
    # Create three clusters
    cluster1 = np.random.normal(loc=[2, 2], scale=0.5, size=(n_points//3, 2))
    cluster2 = np.random.normal(loc=[-2, -2], scale=0.5, size=(n_points//3, 2))
    cluster3 = np.random.normal(loc=[2, -2], scale=0.5, size=(n_points//3, 2))
    
    # Combine clusters
    data = np.vstack([cluster1, cluster2, cluster3])
    np.random.shuffle(data)
    return data

# Generate and transform data
data = generate_data(n_points, random_state)
df = pd.DataFrame(data, columns=['X', 'Y'])

# Perform K-means clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)
df['Cluster'] = kmeans.fit_predict(data)

# Create scatter plot
fig = px.scatter(df, x='X', y='Y', color='Cluster', 
                title='K-Means Clustering Visualization',
                color_continuous_scale='viridis')

# Update layout
fig.update_layout(
    plot_bgcolor='white',
    showlegend=True,
    width=800,
    height=600
)

# Display plot
st.plotly_chart(fig)

# Display cluster centers
st.subheader("Cluster Centers")
centers = pd.DataFrame(kmeans.cluster_centers_, columns=['X', 'Y'])
centers.index.name = 'Cluster'
st.dataframe(centers)

# Display inertia (within-cluster sum of squares)
st.subheader("Clustering Metrics")
st.write(f"Inertia (Within-cluster sum of squares): {kmeans.inertia_:.2f}")

# Add explanation
st.markdown("""
### How it works
1. K-means clustering partitions n observations into k clusters.
2. Each observation belongs to the cluster with the nearest mean (cluster center).
3. The algorithm aims to minimize the within-cluster sum of squares (inertia).

### Parameters
- **Number of Clusters (K)**: The number of clusters to form
- **Number of Points**: Total number of data points to generate
- **Random Seed**: Controls the random generation of data points

Try adjusting the parameters in the sidebar to see how they affect the clustering!
""")
Loading code editor...
Click Save & Run to preview your app
Terminal