Ploomber AI Editor | market-basket-analysis-9267

To upload files, please first save the app
Code Editor for app.py

import streamlit as st
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Set page config
st.set_page_config(
    page_title="Market Basket Analysis",
    layout="wide"
)

# Title and description
st.title("Market Basket Analysis Tool")
st.markdown("""
This app performs Market Basket Analysis on transaction data to discover relationships 
between products that are frequently bought together.
""")

# Sample data generation function
def generate_sample_data(n_transactions=1000):
    products = ['Bread', 'Milk', 'Cheese', 'Eggs', 'Yogurt', 'Coffee', 'Tea', 
                'Juice', 'Cookies', 'Fruits']
    
    # Generate random transactions
    transactions = []
    for _ in range(n_transactions):
        n_products = np.random.randint(1, 6)  # Each transaction has 1-5 products
        transaction = np.random.choice(products, size=n_products, replace=False)
        transactions.append(list(transaction))
    
    # Convert to one-hot encoded DataFrame
    df = pd.DataFrame(columns=products)
    for idx, trans in enumerate(transactions):
        df.loc[idx] = [1 if product in trans else 0 for product in products]
    
    return df

# Sidebar controls
st.sidebar.header("Settings")

# Data options
data_option = st.sidebar.selectbox(
    "Choose data source",
    ["Use sample data", "Upload your own data"]
)

if data_option == "Use sample data":
    n_transactions = st.sidebar.slider(
        "Number of sample transactions",
        min_value=100,
        max_value=5000,
        value=1000,
        step=100
    )
    df = generate_sample_data(n_transactions)
else:
    uploaded_file = st.sidebar.file_uploader(
        "Upload CSV file",
        type=['csv']
    )
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file, index_col=None)
    else:
        st.warning("Please upload a CSV file or switch to sample data.")
        st.stop()

# Analysis parameters
min_support = st.sidebar.slider(
    "Minimum Support",
    min_value=0.01,
    max_value=1.0,
    value=0.1,
    step=0.01
)

min_confidence = st.sidebar.slider(
    "Minimum Confidence",
    min_value=0.0,
    max_value=1.0,
    value=0.5,
    step=0.01
)

# Perform analysis
if st.sidebar.button("Run Analysis"):
    # Display the dataset
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("Dataset Preview")
        st.dataframe(df.head())
        st.text(f"Total transactions: {len(df)}")
    
    with col2:
        st.subheader("Product Frequency")
        product_freq = df.sum().sort_values(ascending=False)
        st.bar_chart(product_freq)
    
    # Generate frequent itemsets
    st.subheader("Frequent Itemsets")
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
    st.dataframe(frequent_itemsets)
    
    # Generate association rules
    st.subheader("Association Rules")
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
    
    # Format rules for better display
    def format_rule(row):
        antecedents = ', '.join(list(row['antecedents']))
        consequents = ', '.join(list(row['consequents']))
        return f"{antecedents} → {consequents}"
    
    rules['rule'] = rules.apply(format_rule, axis=1)
    
    # Display rules with metrics
    rules_display = rules[['rule', 'support', 'confidence', 'lift']]
    st.dataframe(rules_display.style.format({
        'support': '{:.3f}',
        'confidence': '{:.3f}',
        'lift': '{:.3f}'
    }))
    
    # Visualization of top rules
    st.subheader("Top 10 Rules by Lift")
    top_rules = rules.nlargest(10, 'lift')
    st.bar_chart(top_rules.set_index('rule')['lift'])

else:
    st.info("Adjust the parameters in the sidebar and click 'Run Analysis' to start.")

# Add some helpful information about the metrics
with st.expander("Understanding the Metrics"):
    st.markdown("""
    * **Support**: The frequency of an itemset in the transactions
    * **Confidence**: How likely item Y is purchased when item X is purchased
    * **Lift**: How likely item Y is purchased when item X is purchased, while controlling for Y's popularity
    
    A lift value:
    * > 1 indicates that the items are likely to be bought together
    * = 1 indicates that the items are independent
    * < 1 indicates that the items are unlikely to be bought together
    """)
Loading code editor...
Click Save & Run to preview your app
Terminal