Create an App R using bslib to Market Basket Analysis
Drop files here
or click to upload
import streamlit as st
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
# Set page config
st.set_page_config(
page_title="Market Basket Analysis",
layout="wide"
)
# Title and description
st.title("Market Basket Analysis Tool")
st.markdown("""
This app performs Market Basket Analysis on transaction data to discover relationships
between products that are frequently bought together.
""")
# Sample data generation function
def generate_sample_data(n_transactions=1000):
products = ['Bread', 'Milk', 'Cheese', 'Eggs', 'Yogurt', 'Coffee', 'Tea',
'Juice', 'Cookies', 'Fruits']
# Generate random transactions
transactions = []
for _ in range(n_transactions):
n_products = np.random.randint(1, 6) # Each transaction has 1-5 products
transaction = np.random.choice(products, size=n_products, replace=False)
transactions.append(list(transaction))
# Convert to one-hot encoded DataFrame
df = pd.DataFrame(columns=products)
for idx, trans in enumerate(transactions):
df.loc[idx] = [1 if product in trans else 0 for product in products]
return df
# Sidebar controls
st.sidebar.header("Settings")
# Data options
data_option = st.sidebar.selectbox(
"Choose data source",
["Use sample data", "Upload your own data"]
)
if data_option == "Use sample data":
n_transactions = st.sidebar.slider(
"Number of sample transactions",
min_value=100,
max_value=5000,
value=1000,
step=100
)
df = generate_sample_data(n_transactions)
else:
uploaded_file = st.sidebar.file_uploader(
"Upload CSV file",
type=['csv']
)
if uploaded_file is not None:
df = pd.read_csv(uploaded_file, index_col=None)
else:
st.warning("Please upload a CSV file or switch to sample data.")
st.stop()
# Analysis parameters
min_support = st.sidebar.slider(
"Minimum Support",
min_value=0.01,
max_value=1.0,
value=0.1,
step=0.01
)
min_confidence = st.sidebar.slider(
"Minimum Confidence",
min_value=0.0,
max_value=1.0,
value=0.5,
step=0.01
)
# Perform analysis
if st.sidebar.button("Run Analysis"):
# Display the dataset
col1, col2 = st.columns(2)
with col1:
st.subheader("Dataset Preview")
st.dataframe(df.head())
st.text(f"Total transactions: {len(df)}")
with col2:
st.subheader("Product Frequency")
product_freq = df.sum().sort_values(ascending=False)
st.bar_chart(product_freq)
# Generate frequent itemsets
st.subheader("Frequent Itemsets")
frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
st.dataframe(frequent_itemsets)
# Generate association rules
st.subheader("Association Rules")
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
# Format rules for better display
def format_rule(row):
antecedents = ', '.join(list(row['antecedents']))
consequents = ', '.join(list(row['consequents']))
return f"{antecedents} → {consequents}"
rules['rule'] = rules.apply(format_rule, axis=1)
# Display rules with metrics
rules_display = rules[['rule', 'support', 'confidence', 'lift']]
st.dataframe(rules_display.style.format({
'support': '{:.3f}',
'confidence': '{:.3f}',
'lift': '{:.3f}'
}))
# Visualization of top rules
st.subheader("Top 10 Rules by Lift")
top_rules = rules.nlargest(10, 'lift')
st.bar_chart(top_rules.set_index('rule')['lift'])
else:
st.info("Adjust the parameters in the sidebar and click 'Run Analysis' to start.")
# Add some helpful information about the metrics
with st.expander("Understanding the Metrics"):
st.markdown("""
* **Support**: The frequency of an itemset in the transactions
* **Confidence**: How likely item Y is purchased when item X is purchased
* **Lift**: How likely item Y is purchased when item X is purchased, while controlling for Y's popularity
A lift value:
* > 1 indicates that the items are likely to be bought together
* = 1 indicates that the items are independent
* < 1 indicates that the items are unlikely to be bought together
""")
Hi! I can help you with any questions about Streamlit and Python. What would you like to know?