-
Notifications
You must be signed in to change notification settings - Fork 0
/
detector.py
45 lines (34 loc) · 856 Bytes
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pickle as c
import os
from collections import Counter
from sklearn.naive_bayes import MultinomialNB
def load(clf_file):
with open('text-classifier.mdl', 'rb') as f:
clf = c.load(f)
return clf
def make_dict():
direc = "emails/"
files = os.listdir(direc)
emails = [direc + email for email in files]
words = []
c = len(emails)
for email in emails:
f = open(email)
blob = f.read()
words += blob.split(" ")
print (c)
c = c - 1
for i in range(len(words)):
if not words[i].isalpha():
words[i] = ""
dictionary = Counter(words)
del dictionary[""]
return dictionary.most_common(3000)
clf = load("text-classifier.pkl")
d = make_dict()
features = []
inp = input("Enter to Classify : ")
for word in d:
features.append(inp.count(word[0]))
res = clf.predict([features])
print (["Not Spam!", "Spam!"][res[0]])