I'm writing code that takes a small portion of a dataset (shopping baskets), converts it into a hot encoded dataframe and I want to run mlxtend's apriori algorithm on it to get frequent itemsets.
However, whenever I run the apriori algorithm, it seems to run instantly and it returns a generator object rather than a dataframe. I followed the instructions from the documentation, and in their example it shows that apriori returns a dataframe. What am I doing wrong?
Here is my code:
import numpy as np
import pandas as pd
import csv
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
from apyori import apriori
def simpleRandomisedSample(filename, support_frac, sample_frac):
df1 = pd.read_csv("%s.csv" % filename, header=None) #Saving csv file into a dataframe in memory
size = len(df1)
support = support_frac * len(df1) #Sets the original support value to x% of the original dataset
sample_support = support * sample_frac #Support for our reduced sample as a fraction of the original support
sample = df1.sample(frac=sample_frac) #Saving x% (randomised) of the dataset as our sample
sample = sample.reset_index(drop = True) #Reseting indexes (which previously got randomised along with the data)
del df1 #Deleting original dataframe from memory to clear up space
sample_size = len(sample)
return size, support, sample_size, sample_support, sample
def main():
size, support, sample_size, sample_support, sample = simpleRandomisedSample("chess",0.01,0.1)
print("The original dataset had %d rows and a support of %.2f" % (size, support))
print("The dataset was reduced to %d rows and the sample has a support of %.2f" % (sample_size, sample_support))
sample_list = sample.values.tolist() #Converting Dataframe to list of lists for use with Apriori
te = TransactionEncoder()
te_ary = te.fit(sample_list).transform(sample_list) #Preprocessing our sample to work with Apriori algorithm
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
print(frequent_itemsets)
if __name__ == "__main__":
main()
You have a name conflict in your imports:
from mlxtend.frequent_patterns import apriori
[...]
from apyori import apriori
Your code is not using the mlxtend
algorithm but the one provided by apyori
, the one that is imported lated overwrites the previous one.
You can remove the one you're not using or, if you want to have access to both later on, you can give one a different name:
from mlxtend.frequent_patterns import apriori as mlx_apriori
from apyori import apriori as apy_apriori