1919"""
2020
2121import logging
22+ import multiprocessing as mp
2223import numpy as np
24+ import time
2325
2426from collections import Counter
2527from itertools import chain
@@ -49,10 +51,18 @@ def _pre_processing(self, raw_transactions):
4951 counts = Counter (chain .from_iterable (raw_transactions ))
5052 self .unique_candidates = [tuple ([k ]) for k , c in counts .items ()]
5153
52- def is_slice_in_list (self , s , l ):
54+ def _is_slice_in_list (self , s , l ):
5355 len_s = len (s ) # so we don't recompute length of s on every iteration
5456 return any (s == l [i :len_s + i ] for i in range (len (l ) - len_s + 1 ))
5557
58+ def _calc_frequency (self , results , item , minsup ):
59+ # The number of times the item appears in the transactions
60+ frequency = len (
61+ [t for t in self .transactions if self ._is_slice_in_list (item , t )])
62+ if frequency >= minsup :
63+ results [item ] = frequency
64+ return results
65+
5666 def _support (self , items , minsup = 0 ):
5767 '''
5868 The support count (or simply support) for a sequence is defined as
@@ -65,14 +75,16 @@ def _support(self, items, minsup=0):
6575 items: set of items that will be evaluated
6676 minsup: minimum support
6777 '''
68- results = {}
78+ results = mp .Manager ().dict ()
79+ pool = mp .Pool (processes = mp .cpu_count ())
80+
6981 for item in items :
70- # The number of times the item appears in the transactions
71- frequency = len (
72- [ t for t in self . transactions if self . is_slice_in_list ( item , t )] )
73- if frequency >= minsup :
74- results [ item ] = frequency
75- return results
82+ pool . apply_async ( self . _calc_frequency ,
83+ args = ( results , item , minsup ))
84+ pool . close ( )
85+ pool . join ()
86+
87+ return dict ( results )
7688
7789 def _print_status (self , run , candidates ):
7890 logging .debug ("""
@@ -116,6 +128,7 @@ def search(self, minsup=0.2):
116128 # by minimum support
117129 items = np .unique (
118130 list (set (self .freq_patterns [k_items - 2 ].keys ())))
131+
119132 candidates = list (product (items , repeat = k_items ))
120133
121134 # candidate pruning - eliminates candidates who are not potentially
0 commit comments