-
Notifications
You must be signed in to change notification settings - Fork 1
/
armpy.py
115 lines (99 loc) · 3.27 KB
/
armpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import sys
import time
from argparse import ArgumentParser
from argparse import ArgumentTypeError
from fptree import mine_fp_tree
from generaterules import generate_rules
from index import InvertedIndex
from item import item_str
from datasetreader import DatasetReader
def set_to_string(s):
ss = ""
for x in sorted(map(item_str, s)):
if ss != "":
ss += " "
ss += str(x)
return ss
def float_between_0_and_1(string):
value = float(string)
if value < 0.0 or value > 1.0:
msg = "%r is not in range [0,1]" % string
raise ArgumentTypeError(msg)
return value
def float_gteq_1(string):
value = float(string)
if value < 1.0:
msg = "%r is not in range [1,∞]" % string
raise ArgumentTypeError(msg)
return value
def main():
parser = ArgumentParser(
description="Association rule data mining in Python")
parser.add_argument("--input", dest="input", required=True)
parser.add_argument("--output", dest="output", required=True)
parser.add_argument(
"--min-confidence",
dest="min_confidence",
type=float_between_0_and_1,
required=True)
parser.add_argument(
"--min-support",
dest="min_support",
type=float_between_0_and_1,
required=True)
parser.add_argument(
"--min-lift",
dest="min_lift",
type=float_gteq_1,
required=True)
args = parser.parse_args()
program_start = time.time()
start = program_start
print("ARMPY - Association Rule Mining using Python.")
print("Input file: {}".format(args.input))
print("Output file: {}".format(args.output))
print("Minimum support: {}".format(args.min_confidence))
print("Minimum confidence: {}".format(args.min_support))
print("Minimum lift: {}".format(args.min_lift))
print("Generating frequent itemsets using FPGrowth...", flush=True)
reader = DatasetReader(args.input)
(itemsets, itemset_counts, num_transactions) = mine_fp_tree(
reader, args.min_support)
duration = time.time() - start
print(
"FPGrowth mined {} items in {:.2f} seconds".format(
len(itemsets),
duration),
flush=True)
start = time.time()
rules = generate_rules(
itemsets,
itemset_counts,
num_transactions,
args.min_confidence,
args.min_lift)
duration = time.time() - start
print(
"Generated {} rules in {:.2f} seconds".format(
len(rules),
duration),
flush=True)
start = time.time()
with open(args.output, "w") as f:
f.write("Antecedent->Consequent,Confidence,Lift,Support\n")
for (antecedent,
consequent,
confidence,
lift,
support) in rules:
f.write("{} -> {},{:.4f},{:.4f},{:.4f}\n". format(set_to_string(antecedent),
set_to_string(consequent), confidence, lift, support))
print(
"Wrote rules to disk in {:.2f} seconds".format(
duration),
flush=True)
duration = time.time() - program_start
print("Total runtime {:.2f} seconds".format(duration))
return 0
if __name__ == "__main__":
sys.exit(main())