Skip to content

Commit 8e829b0

Browse files
change gsp to use multiprocessing
1 parent 4516e0a commit 8e829b0

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

gsp.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
"""
2020

2121
import logging
22+
import multiprocessing as mp
2223
import numpy as np
24+
import time
2325

2426
from collections import Counter
2527
from itertools import chain
@@ -49,10 +51,18 @@ def _pre_processing(self, raw_transactions):
4951
counts = Counter(chain.from_iterable(raw_transactions))
5052
self.unique_candidates = [tuple([k]) for k, c in counts.items()]
5153

52-
def is_slice_in_list(self, s, l):
54+
def _is_slice_in_list(self, s, l):
5355
len_s = len(s) # so we don't recompute length of s on every iteration
5456
return any(s == l[i:len_s+i] for i in range(len(l) - len_s+1))
5557

58+
def _calc_frequency(self, results, item, minsup):
59+
# The number of times the item appears in the transactions
60+
frequency = len(
61+
[t for t in self.transactions if self._is_slice_in_list(item, t)])
62+
if frequency >= minsup:
63+
results[item] = frequency
64+
return results
65+
5666
def _support(self, items, minsup=0):
5767
'''
5868
The support count (or simply support) for a sequence is defined as
@@ -65,14 +75,16 @@ def _support(self, items, minsup=0):
6575
items: set of items that will be evaluated
6676
minsup: minimum support
6777
'''
68-
results = {}
78+
results = mp.Manager().dict()
79+
pool = mp.Pool(processes=mp.cpu_count())
80+
6981
for item in items:
70-
# The number of times the item appears in the transactions
71-
frequency = len(
72-
[t for t in self.transactions if self.is_slice_in_list(item, t)])
73-
if frequency >= minsup:
74-
results[item] = frequency
75-
return results
82+
pool.apply_async(self._calc_frequency,
83+
args=(results, item, minsup))
84+
pool.close()
85+
pool.join()
86+
87+
return dict(results)
7688

7789
def _print_status(self, run, candidates):
7890
logging.debug("""
@@ -116,6 +128,7 @@ def search(self, minsup=0.2):
116128
# by minimum support
117129
items = np.unique(
118130
list(set(self.freq_patterns[k_items - 2].keys())))
131+
119132
candidates = list(product(items, repeat=k_items))
120133

121134
# candidate pruning - eliminates candidates who are not potentially

main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ def create_transactions(minsize, maxsize, minvalue, maxvalue):
1414
minsize, maxsize, minvalue, maxvalue = 2, 256, 0, 5
1515

1616
transactions = [create_transactions(
17-
minsize, maxsize, minvalue, maxvalue) for _ in range(100)]
17+
minsize, maxsize, minvalue, maxvalue) for _ in range(10000)]
1818

1919
# transactions = [
20-
# ['Bread', 'Milk'],
20+
# ['Bread', 'Milk'],for _ in procs:
2121
# ['Bread', 'Diaper', 'Beer', 'Eggs'],
2222
# ['Milk', 'Diaper', 'Beer', 'Coke'],
2323
# ['Bread', 'Milk', 'Diaper', 'Beer'],

0 commit comments

Comments
 (0)