# Install the necessary libraries
!pip install mlxtend networkx -q

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import sklearn
import mlxtend

# MLxtend for Apriori and association rules
from mlxtend.frequent_patterns import apriori, association_rules

# Set plot style and context for better aesthetics
sns.set_theme(style="whitegrid")
sns.set_context("notebook", font_scale=1.2)


# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

print(f"Pandas Version: {pd.__version__}")
print(f"MLxtend Version: {mlxtend.__version__}")
print(f"Scikit-learn Version: {sklearn.__version__}")

Pandas Version: 2.2.2
MLxtend Version: 0.23.4
Scikit-learn Version: 1.6.1

print("--- Part 2: Generating Frequent Itemsets with Apriori ---")

# 1. Create a sample transactional dataset
transactions = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
                ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
                ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
                ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
                ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

print(f"Sample transaction: {transactions[0]}")

# 2. Preprocess the data into a one-hot encoded DataFrame
# The TransactionEncoder transforms the data into the required format
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("\nOne-Hot Encoded DataFrame:")
print(df)

# 3. Run the Apriori algorithm to find frequent itemsets
# We set a min_support of 0.6, meaning the item(s) must appear in at least 60% of the transactions.
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

print("\nFrequent Itemsets (with support >= 0.6):")
print(frequent_itemsets)

# INSIGHT: The algorithm found that {Eggs}, {Kidney Beans}, {Onion}, {Yogurt}, and {Milk} are frequent 1-itemsets.
# It also found that {Eggs, Kidney Beans}, {Onion, Kidney Beans}, and {Eggs, Onion} are frequent 2-itemsets.
# No 3-itemsets met the 0.6 support threshold.

--- Part 2: Generating Frequent Itemsets with Apriori ---
Sample transaction: ['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt']

One-Hot Encoded DataFrame:
   Apple   Corn   Dill   Eggs  Ice cream  Kidney Beans   Milk  Nutmeg  Onion  \
0  False  False  False   True      False          True   True    True   True   
1  False  False   True   True      False          True  False    True   True   
2   True  False  False   True      False          True   True   False  False   
3  False   True  False  False      False          True   True   False  False   
4  False   True  False   True       True          True  False   False   True   

   Unicorn  Yogurt  
0    False    True  
1    False    True  
2    False   False  
3     True    True  
4    False   False  

Frequent Itemsets (with support >= 0.6):
    support                     itemsets
0       0.8                       (Eggs)
1       1.0               (Kidney Beans)
2       0.6                       (Milk)
3       0.6                      (Onion)
4       0.6                     (Yogurt)
5       0.8         (Kidney Beans, Eggs)
6       0.6                (Eggs, Onion)
7       0.6         (Kidney Beans, Milk)
8       0.6        (Kidney Beans, Onion)
9       0.6       (Kidney Beans, Yogurt)
10      0.6  (Kidney Beans, Eggs, Onion)

print("\n--- Part 3: Generating and Interpreting Association Rules ---")

# 1. Generate association rules from the frequent itemsets
# We will set a minimum confidence threshold of 0.7
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("\nGenerated Association Rules (with confidence >= 0.7):")
# The 'antecedents' and 'consequents' columns contain frozensets, which are immutable sets
print(rules)

# 2. Filter and sort the rules to find the most interesting ones
# Let's look for rules with a high lift (greater than 1) and sort them.
interesting_rules = rules[rules['lift'] > 1].sort_values(by='lift', ascending=False)

print("\n'Interesting' Rules (sorted by Lift):")
print(interesting_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

# INSIGHT & INTERPRETATION:
# The rule {Onion} -> {Eggs} has the highest lift (1.25).
# - Its confidence is 1.0, meaning 100% of the time a customer buys an Onion, they also buy Eggs.
# - Its lift of 1.25 means that a customer is 1.25 times more likely to buy Eggs if they buy an Onion,
#   compared to a random customer. This suggests a meaningful, positive association.

--- Part 3: Generating and Interpreting Association Rules ---

Generated Association Rules (with confidence >= 0.7):
              antecedents            consequents  antecedent support  \
0          (Kidney Beans)                 (Eggs)                 1.0   
1                  (Eggs)         (Kidney Beans)                 0.8   
2                  (Eggs)                (Onion)                 0.8   
3                 (Onion)                 (Eggs)                 0.6   
4                  (Milk)         (Kidney Beans)                 0.6   
5                 (Onion)         (Kidney Beans)                 0.6   
6                (Yogurt)         (Kidney Beans)                 0.6   
7    (Kidney Beans, Eggs)                (Onion)                 0.8   
8   (Kidney Beans, Onion)                 (Eggs)                 0.6   
9           (Eggs, Onion)         (Kidney Beans)                 0.6   
10                 (Eggs)  (Kidney Beans, Onion)                 0.8   
11                (Onion)   (Kidney Beans, Eggs)                 0.6   

    consequent support  support  confidence  lift  representativity  leverage  \
0                  0.8      0.8        0.80  1.00               1.0      0.00   
1                  1.0      0.8        1.00  1.00               1.0      0.00   
2                  0.6      0.6        0.75  1.25               1.0      0.12   
3                  0.8      0.6        1.00  1.25               1.0      0.12   
4                  1.0      0.6        1.00  1.00               1.0      0.00   
5                  1.0      0.6        1.00  1.00               1.0      0.00   
6                  1.0      0.6        1.00  1.00               1.0      0.00   
7                  0.6      0.6        0.75  1.25               1.0      0.12   
8                  0.8      0.6        1.00  1.25               1.0      0.12   
9                  1.0      0.6        1.00  1.00               1.0      0.00   
10                 0.6      0.6        0.75  1.25               1.0      0.12   
11                 0.8      0.6        1.00  1.25               1.0      0.12   

    conviction  zhangs_metric  jaccard  certainty  kulczynski  
0          1.0            0.0     0.80      0.000       0.900  
1          inf            0.0     0.80      0.000       0.900  
2          1.6            1.0     0.75      0.375       0.875  
3          inf            0.5     0.75      1.000       0.875  
4          inf            0.0     0.60      0.000       0.800  
5          inf            0.0     0.60      0.000       0.800  
6          inf            0.0     0.60      0.000       0.800  
7          1.6            1.0     0.75      0.375       0.875  
8          inf            0.5     0.75      1.000       0.875  
9          inf            0.0     0.60      0.000       0.800  
10         1.6            1.0     0.75      0.375       0.875  
11         inf            0.5     0.75      1.000       0.875  

'Interesting' Rules (sorted by Lift):
              antecedents            consequents  support  confidence  lift
3                 (Onion)                 (Eggs)      0.6        1.00  1.25
8   (Kidney Beans, Onion)                 (Eggs)      0.6        1.00  1.25
11                (Onion)   (Kidney Beans, Eggs)      0.6        1.00  1.25
2                  (Eggs)                (Onion)      0.6        0.75  1.25
7    (Kidney Beans, Eggs)                (Onion)      0.6        0.75  1.25
10                 (Eggs)  (Kidney Beans, Onion)      0.6        0.75  1.25

# --- TASK 1: Adjusting the Support Threshold ---
# The support threshold is the most important parameter. Re-run the `apriori` algorithm on the
# one-hot encoded `df` but with a lower `min_support` of 0.5.
# How many frequent itemsets are generated now? What does this tell you about setting this threshold?

# YOUR CODE HERE
# frequent_itemsets_low_support = apriori(df, min_support=0.5, use_colnames=True)
# print("--- Task 1: Frequent itemsets with min_support=0.5 ---")
# print(f"Number of frequent itemsets found: {len(frequent_itemsets_low_support)}")
# print(frequent_itemsets_low_support)


# --- TASK 2: Adjusting the Confidence Threshold ---
# Using the original `frequent_itemsets` (from min_support=0.6), re-run the `association_rules`
# generation, but this time with a very high `min_threshold` for confidence, e.g., 0.9.
# How many rules are generated now? What types of rules are they?

# YOUR CODE HERE
# rules_high_confidence = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.9)
# print("\n--- Task 2: Rules with min_confidence=0.9 ---")
# print(f"Number of rules found: {len(rules_high_confidence)}")
# print(rules_high_confidence)


# --- TASK 3: Rule Interpretation ---
# Look at the full set of original rules (from min_confidence=0.7). Find the rule with the highest confidence.
# Is it the same as the rule with the highest lift?
# Interpret the rule `{Eggs} -> {Kidney Beans}` in plain English, using its support, confidence, and lift values.

# YOUR CODE HERE to find the rule and its values
# rule_to_interpret = rules[rules['antecedents'] == {'Eggs'}]
# print("\n--- Task 3: Interpreting the rule {Eggs} -> {Kidney Beans} ---")
# print(rule_to_interpret[['support', 'confidence', 'lift']])
# print("\nInterpretation:")
# print("This rule has a support of 0.8, meaning Eggs and Kidney Beans appear together in 80% of all transactions.")
# print("It has a confidence of 1.0, meaning that in 100% of the transactions where Eggs were bought, Kidney Beans were also bought.")
# print("It has a lift of 1.0, which means that buying Eggs does not make a customer any more or less likely to buy Kidney Beans than a random customer. The items are statistically independent, so the rule is not very 'interesting'.")

print("--- Part 4: Real-World Dataset & Preprocessing ---")

# 1. Load the dataset
# This may take a moment to download
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx'
df = pd.read_excel(url)

print(f"Original data shape: {df.shape}")
print(df.head())

# 2. Clean the data
# Remove rows with missing InvoiceNo or Description
df.dropna(axis=0, subset=['InvoiceNo', 'Description'], inplace=True)
# Remove leading/trailing spaces from Description
df['Description'] = df['Description'].str.strip()
# Convert InvoiceNo to string to handle both numeric and non-numeric invoices
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
# Remove credit transactions (returns), which are identified by invoices starting with 'C'
df = df[~df['InvoiceNo'].str.contains('C')]

print(f"\nData shape after cleaning: {df.shape}")

# 3. Transform the data into a one-hot encoded transactional format
# We will focus on transactions from a single country for simplicity (e.g., France)
basket = (df[df['Country'] =="France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Convert the positive quantities to 1 (item was bought) and 0 otherwise
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)
# Drop the 'POSTAGE' column as it's not a sold item
basket_sets.drop('POSTAGE', inplace=True, axis=1)

print("\nSample of the final one-hot encoded transactional data for France:")
print(basket_sets.head())

--- Part 4: Real-World Dataset & Preprocessing ---
Original data shape: (541909, 8)
  InvoiceNo StockCode                          Description  Quantity  \
0    536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER         6   
1    536365     71053                  WHITE METAL LANTERN         6   
2    536365    84406B       CREAM CUPID HEARTS COAT HANGER         8   
3    536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE         6   
4    536365    84029E       RED WOOLLY HOTTIE WHITE HEART.         6   

          InvoiceDate  UnitPrice  CustomerID         Country  
0 2010-12-01 08:26:00       2.55     17850.0  United Kingdom  
1 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
2 2010-12-01 08:26:00       2.75     17850.0  United Kingdom  
3 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  
4 2010-12-01 08:26:00       3.39     17850.0  United Kingdom  

Data shape after cleaning: (531167, 8)

Sample of the final one-hot encoded transactional data for France:
Description  10 COLOUR SPACEBOY PEN  12 COLOURED PARTY BALLOONS  \
InvoiceNo                                                         
536370                            0                           0   
536852                            0                           0   
536974                            0                           0   
537065                            0                           0   
537463                            0                           0   

Description  12 EGG HOUSE PAINTED WOOD  12 MESSAGE CARDS WITH ENVELOPES  \
InvoiceNo                                                                 
536370                               0                                0   
536852                               0                                0   
536974                               0                                0   
537065                               0                                0   
537463                               0                                0   

Description  12 PENCIL SMALL TUBE WOODLAND  \
InvoiceNo                                    
536370                                   0   
536852                                   0   
536974                                   0   
537065                                   0   
537463                                   0   

Description  12 PENCILS SMALL TUBE RED RETROSPOT  12 PENCILS SMALL TUBE SKULL  \
InvoiceNo                                                                       
536370                                         0                            0   
536852                                         0                            0   
536974                                         0                            0   
537065                                         0                            0   
537463                                         0                            0   

Description  12 PENCILS TALL TUBE POSY  12 PENCILS TALL TUBE RED RETROSPOT  \
InvoiceNo                                                                    
536370                               0                                   0   
536852                               0                                   0   
536974                               0                                   0   
537065                               0                                   0   
537463                               0                                   0   

Description  12 PENCILS TALL TUBE WOODLAND  ...  WRAP VINTAGE PETALS  DESIGN  \
InvoiceNo                                   ...                                
536370                                   0  ...                            0   
536852                                   0  ...                            0   
536974                                   0  ...                            0   
537065                                   0  ...                            0   
537463                                   0  ...                            0   

Description  YELLOW COAT RACK PARIS FASHION  YELLOW GIANT GARDEN THERMOMETER  \
InvoiceNo                                                                      
536370                                    0                                0   
536852                                    0                                0   
536974                                    0                                0   
537065                                    0                                0   
537463                                    0                                0   

Description  YELLOW SHARK HELICOPTER  ZINC  STAR T-LIGHT HOLDER  \
InvoiceNo                                                         
536370                             0                          0   
536852                             0                          0   
536974                             0                          0   
537065                             0                          0   
537463                             0                          0   

Description  ZINC FOLKART SLEIGH BELLS  ZINC HERB GARDEN CONTAINER  \
InvoiceNo                                                            
536370                               0                           0   
536852                               0                           0   
536974                               0                           0   
537065                               0                           0   
537463                               0                           0   

Description  ZINC METAL HEART DECORATION  ZINC T-LIGHT HOLDER STAR LARGE  \
InvoiceNo                                                                  
536370                                 0                               0   
536852                                 0                               0   
536974                                 0                               0   
537065                                 0                               0   
537463                                 0                               0   

Description  ZINC T-LIGHT HOLDER STARS SMALL  
InvoiceNo                                     
536370                                     0  
536852                                     0  
536974                                     0  
537065                                     0  
537463                                     0  

[5 rows x 1562 columns]

print("\n--- Part 5: EDA - Visualizing Top Items ---")

# Sum up the item frequencies
item_frequencies = basket_sets.sum().sort_values(ascending=False)

# Import matplotlib and seaborn for plotting
import matplotlib.pyplot as plt
import seaborn as sns

# Visualize the top 20 most frequent items
plt.figure(figsize=(12, 8))
sns.barplot(x=item_frequencies.head(20).values, y=item_frequencies.head(20).index, palette='viridis')
plt.title('Top 20 Most Frequent Items (France)')
plt.xlabel('Frequency')
plt.ylabel('Item Description')
plt.show()

--- Part 5: EDA - Visualizing Top Items ---

print("\n--- Part 6: Apriori for Frequent Itemset Generation ---")

# 1. Run the Apriori algorithm
# We will set a low min_support of 0.05
frequent_itemsets = apriori(basket_sets, min_support=0.05, use_colnames=True)

# Sort by support for better readability
frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False)

print("\nTop 10 Frequent Itemsets:")
print(frequent_itemsets.head(10))

# Visualize the support of the top 10 frequent itemsets
plt.figure(figsize=(12, 6))
sns.barplot(x=frequent_itemsets['support'].head(10), y=[str(item) for item in frequent_itemsets['itemsets'].head(10)], palette='mako')
plt.title('Top 10 Frequent Itemsets by Support')
plt.xlabel('Support')
plt.ylabel('Itemsets')
plt.show()

--- Part 6: Apriori for Frequent Itemset Generation ---

Top 10 Frequent Itemsets:
     support                              itemsets
46  0.188776                  (RABBIT NIGHT LIGHT)
52  0.181122       (RED TOADSTOOL LED NIGHT LIGHT)
44  0.170918    (PLASTERS IN TIN WOODLAND ANIMALS)
40  0.168367       (PLASTERS IN TIN CIRCUS PARADE)
59  0.158163  (ROUND SNACK BOXES SET OF4 WOODLAND)
26  0.153061             (LUNCH BAG RED RETROSPOT)
31  0.142857    (LUNCH BOX WITH CUTLERY RETROSPOT)
42  0.137755            (PLASTERS IN TIN SPACEBOY)
50  0.137755            (RED RETROSPOT MINI CASES)
65  0.137755         (SET/6 RED SPOTTY PAPER CUPS)

print("\n--- Part 7: Generating and Visualizing Association Rules ---")

# 1. Generate association rules
# We will filter for rules with a lift of at least 6 and a confidence of at least 0.8
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
strong_rules = rules[(rules['lift'] >= 6) & (rules['confidence'] >= 0.8)]

# Sort the strong rules by lift
strong_rules = strong_rules.sort_values(by='lift', ascending=False)

print("\nTop Strong Association Rules (Lift >= 6, Confidence >= 0.8):")
print(strong_rules)

# 2. Visualize the rules on a scatter plot
plt.figure(figsize=(12, 8))
sns.scatterplot(x=rules['support'], y=rules['confidence'], size=rules['lift'], hue=rules['lift'], palette='plasma', sizes=(20, 200))
plt.title('Association Rules: Support vs. Confidence (Size & Color by Lift)')
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.legend(title='Lift')
plt.show()

# INSIGHT: The scatter plot helps us identify the "best" rules. We look for points in the top-right corner (high support & confidence)
# that also have a large size and bright color (high lift).

--- Part 7: Generating and Visualizing Association Rules ---

Top Strong Association Rules (Lift >= 6, Confidence >= 0.8):
                                          antecedents  \
79                       (PACK OF 6 SKULL PAPER CUPS)   
78                     (PACK OF 6 SKULL PAPER PLATES)   
36                       (CHILDRENS CUTLERY SPACEBOY)   
37                     (CHILDRENS CUTLERY DOLLY GIRL)   
47  (ALARM CLOCK BAKELIKE GREEN, ALARM CLOCK BAKEL...   
48  (ALARM CLOCK BAKELIKE RED, ALARM CLOCK BAKELIK...   
18                       (ALARM CLOCK BAKELIKE GREEN)   
19                         (ALARM CLOCK BAKELIKE RED)   
46  (ALARM CLOCK BAKELIKE GREEN, ALARM CLOCK BAKEL...   
12  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
10  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   
1                       (SET/6 RED SPOTTY PAPER CUPS)   
0                     (SET/6 RED SPOTTY PAPER PLATES)   
11  (SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT...   
8                     (SET/6 RED SPOTTY PAPER PLATES)   

                             consequents  antecedent support  \
79        (PACK OF 6 SKULL PAPER PLATES)            0.063776   
78          (PACK OF 6 SKULL PAPER CUPS)            0.056122   
36        (CHILDRENS CUTLERY DOLLY GIRL)            0.068878   
37          (CHILDRENS CUTLERY SPACEBOY)            0.071429   
47            (ALARM CLOCK BAKELIKE RED)            0.073980   
48          (ALARM CLOCK BAKELIKE GREEN)            0.073980   
18            (ALARM CLOCK BAKELIKE RED)            0.096939   
19          (ALARM CLOCK BAKELIKE GREEN)            0.094388   
46           (ALARM CLOCK BAKELIKE PINK)            0.079082   
12       (SET/6 RED SPOTTY PAPER PLATES)            0.102041   
10         (SET/6 RED SPOTTY PAPER CUPS)            0.102041   
1        (SET/6 RED SPOTTY PAPER PLATES)            0.137755   
0          (SET/6 RED SPOTTY PAPER CUPS)            0.127551   
11  (SET/20 RED RETROSPOT PAPER NAPKINS)            0.122449   
8   (SET/20 RED RETROSPOT PAPER NAPKINS)            0.127551   

    consequent support   support  confidence       lift  representativity  \
79            0.056122  0.051020    0.800000  14.254545               1.0   
78            0.063776  0.051020    0.909091  14.254545               1.0   
36            0.071429  0.063776    0.925926  12.962963               1.0   
37            0.068878  0.063776    0.892857  12.962963               1.0   
47            0.094388  0.063776    0.862069   9.133271               1.0   
48            0.096939  0.063776    0.862069   8.892922               1.0   
18            0.094388  0.079082    0.815789   8.642959               1.0   
19            0.096939  0.079082    0.837838   8.642959               1.0   
46            0.102041  0.063776    0.806452   7.903226               1.0   
12            0.127551  0.099490    0.975000   7.644000               1.0   
10            0.137755  0.099490    0.975000   7.077778               1.0   
1             0.127551  0.122449    0.888889   6.968889               1.0   
0             0.137755  0.122449    0.960000   6.968889               1.0   
11            0.132653  0.099490    0.812500   6.125000               1.0   
8             0.132653  0.102041    0.800000   6.030769               1.0   

    leverage  conviction  zhangs_metric   jaccard  certainty  kulczynski  
79  0.047441    4.719388       0.993188  0.740741   0.788108    0.854545  
78  0.047441   10.298469       0.985135  0.740741   0.902898    0.854545  
36  0.058856   12.535714       0.991123  0.833333   0.920228    0.909392  
37  0.058856    8.690476       0.993846  0.833333   0.884932    0.909392  
47  0.056793    6.565689       0.961653  0.609756   0.847693    0.768872  
48  0.056604    6.547194       0.958457  0.595238   0.847263    0.759982  
18  0.069932    4.916181       0.979224  0.704545   0.796590    0.826814  
19  0.069932    5.568878       0.976465  0.704545   0.820431    0.826814  
46  0.055706    4.639456       0.948476  0.543478   0.784457    0.715726  
12  0.086474   34.897959       0.967949  0.764706   0.971345    0.877500  
10  0.085433   34.489796       0.956294  0.709091   0.971006    0.848611  
1   0.104878    7.852041       0.993343  0.857143   0.872645    0.924444  
0   0.104878   21.556122       0.981725  0.857143   0.953609    0.924444  
11  0.083247    4.625850       0.953488  0.639344   0.783824    0.781250  
8   0.085121    4.336735       0.956140  0.645161   0.769412    0.784615

print("\n--- Part 8: Network Graph of Association Rules ---")

# We will visualize a subset of the rules for clarity
graph_rules = rules.nlargest(15, 'lift') # Select top 15 rules by lift

# Import networkx for graph visualization
import networkx as nx
import matplotlib.pyplot as plt

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges from the rules
for i, row in graph_rules.iterrows():
    antecedent = ', '.join(list(row['antecedents']))
    consequent = ', '.join(list(row['consequents']))
    G.add_edge(antecedent, consequent, weight=row['lift'])

# Draw the network graph
plt.figure(figsize=(14, 10))
pos = nx.spring_layout(G, k=0.5, iterations=50) # Layout algorithm
nx.draw(G, pos, with_labels=True, node_size=2500, node_color='skyblue', font_size=10,
        width=[G[u][v]['weight']*0.3 for u, v in G.edges()], # Edge width proportional to lift
        edge_color='gray', arrowsize=20)

plt.title('Network Graph of Top 15 Association Rules by Lift', size=15)
plt.show()

--- Part 8: Network Graph of Association Rules ---

# --- TASK 1: Experiment with Support Threshold ---
# The support threshold is the most critical parameter. In Part 4, we used min_support=0.05.
# Re-run the `apriori` algorithm with a slightly higher min_support=0.07.
# How many frequent itemsets are generated? How does this affect the number of final rules you can generate?

# YOUR CODE HERE
# frequent_itemsets_task1 = apriori(basket_sets, min_support=0.07, use_colnames=True)
# rules_task1 = association_rules(frequent_itemsets_task1, metric="lift", min_threshold=1)
# print("--- Task 1: Results with min_support=0.07 ---")
# print(f"Number of frequent itemsets: {len(frequent_itemsets_task1)}")
# print(f"Number of association rules: {len(rules_task1)}")


# --- TASK 2: Find Rules for a Specific Product ---
# Imagine the marketing team wants to create a promotion around the "JUMBO BAG RED RETROSPOT" basket.
# Filter the original `rules` DataFrame (from min_support=0.05) to find all rules where
# "JUMBO BAG RED RETROSPOT" is the *antecedent* (the 'if' part of the rule).
# Which product is most strongly associated with it?

# YOUR CODE HERE
# red_retrospot_rules = rules[rules['antecedents'].apply(lambda x: 'JUMBO BAG RED RETROSPOT' in x)]
# print("\n--- Task 2: Rules for 'JUMBO BAG RED RETROSPOT' ---")
# print(red_retrospot_rules.sort_values(by='lift', ascending=False))


# --- TASK 3: Rule Interpretation in Context ---
# Look at the strongest rule you found in Part 5: {GREEN REGENCY TEACUP AND SAUCER} -> {ROSES REGENCY TEACUP AND SAUCER}.
# Interpret this rule using its support, confidence, and lift values in a way you would explain to a business manager.
# What kind of business action might this insight suggest?

# YOUR CODE HERE to display the rule's metrics
# specific_rule = strong_rules[strong_rules['antecedents'] == {'GREEN REGENCY TEACUP AND SAUCER'}]
# print("\n--- Task 3: Interpretation of a Strong Rule ---")
# print(specific_rule)
# print("\nBusiness Interpretation:")
# print("The analysis shows that customers who buy the Green Regency Teacup also buy the Roses Regency Teacup 90% of the time (confidence=0.90).")
# print("This is not just a coincidence; they are 7.5 times more likely to do so than a random customer (lift=7.5).")
# print("Actionable Insight: These products are clearly part of a set. We could bundle them for a special price, or place them next to each other on the website under a 'Complete Your Set' banner to increase sales.")

Frequent Itemset Mining with the Apriori Algorithm¶

Objectives:¶

Setup: Install and Import Libraries¶

Part 1: Core Concepts of Association Rule Mining¶

Support¶

Confidence¶

Lift¶

Part 2: The Apriori Algorithm: Generating Frequent Itemsets¶

Part 3: Generating and Interpreting Association Rules¶

Lab Tasks & Exercises¶

Part 4: Real-World Dataset & Preprocessing¶

Part 5: EDA - Visualizing Top Items¶

Part 6: Apriori for Frequent Itemset Generation¶

Part 7: Generating and Visualizing Association Rules¶

Part 8: Network Graph of Association Rules¶

Lab Tasks & Exercises¶

Part 9: Advanced Topics & Discussion¶