-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbuildModels.py
41 lines (32 loc) · 1.59 KB
/
buildModels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import kindred
import argparse
import pickle
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Build and save a classifier')
parser.add_argument('--inTrain',type=str,required=True)
parser.add_argument('--outModel_Driver',type=str,required=True)
parser.add_argument('--outModel_Oncogene',type=str,required=True)
parser.add_argument('--outModel_TumorSuppressor',type=str,required=True)
parser.add_argument('--conservativeThresholds',action='store_true')
args = parser.parse_args()
if args.conservativeThresholds:
thresholds = {'Driver':0.80, 'Oncogene': 0.76, 'Tumor_Suppressor': 0.92}
else:
thresholds = {'Driver':0.5, 'Oncogene': 0.5, 'Tumor_Suppressor': 0.5}
for relationType,outModel in zip(['Driver','Oncogene','Tumor_Suppressor'], [args.outModel_Driver,args.outModel_Oncogene,args.outModel_TumorSuppressor] ):
print("Building %s model" % relationType)
print(" Loading training")
goldDir = 'gold'
trainCorpus = kindred.load('standoff',args.inTrain)
for doc in trainCorpus.documents:
doc.relations = [ r for r in doc.relations if r.relationType == relationType ]
print(" Doing training")
features = "entityTypes,unigramsBetweenEntities,bigrams,dependencyPathEdges,dependencyPathEdgesNearEntities".split(',')
threshold = thresholds[relationType]
classifier = kindred.RelationClassifier(classifierType='LogisticRegression',threshold=threshold,features=features,acceptedEntityTypes=[('cancer','gene')])
classifier.train(trainCorpus)
print(" Saving classifer")
with open(outModel,'wb') as f:
pickle.dump(classifier,f)
print(" Output done!")