forked from qgzang/ComputationalHealthcare
-
Notifications
You must be signed in to change notification settings - Fork 0
/
syncinit.py
123 lines (109 loc) · 3.77 KB
/
syncinit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import logging,json,glob,os
from CHL.models import SCount,STCount,Code,SYTCount,CodeCount,Dataset,TextSearch
from chlib.codes import Coder
from collections import defaultdict
def init_db(datasets,heroku=False):
Dataset.objects.all().delete()
Code.objects.all().delete()
CodeCount.objects.all().delete()
STCount.objects.all().delete()
SCount.objects.all().delete()
SYTCount.objects.all().delete()
for k,d in datasets.iteritems():
init_selected(d,heroku)
def init_selected(d,heroku=False):
if os.path.isfile(d.base_dir + "/counts.txt"):
logging.info("starting {}".format(d.identifier))
dm = Dataset()
dm.identifier = d.identifier
dm.linked = d.linked
dm.base_dir = d.base_dir
dm.years = d.years
dm.states = d.states
dm.patients_count = d.patients
dm.linked_count = d.linked_visits
dm.unlinked_count = d.unlinked_visits
dm.aggregate_patients = d.aggregate_patients
dm.aggregate_readmits = d.aggregate_readmits
dm.aggregate_visits = d.aggregate_visits
dm.aggregate_revisits = d.aggregate_revisits
dm.name = d.name
dm.save()
init_scount(d,dm,heroku)
init_sytcount(d,dm,heroku)
init_stcount(d,dm,heroku)
init_codes(d, dm,heroku)
def init_scount(d,dm,heroku):
for s in d.states:
sc = SCount()
sc.dataset = dm
sc.state = s
sc.patients_count = d.state_counts[s]['patients']
sc.unlinked_count = d.state_counts[s]['unlinked_visits']
sc.linked_count = d.state_counts[s]['linked_visits']
sc.save()
logging.info("finished states")
def init_sytcount(d,dm,heroku):
for k,v in d.year_counts.iteritems():
state, vtype, year, linked = k
syt = SYTCount()
syt.state = state
syt.dataset = dm
syt.visit_type = vtype
syt.year = year
syt.linked = True if linked == 'linked' else False
syt.count = v
syt.save()
logging.info("finished SYT")
def init_stcount(d, dm,heroku):
for k, v in d.type_counts.iteritems():
state,vtype,linked = k
st = STCount()
st.state = state
st.dataset = dm
st.visit_type = vtype
st.linked = True if linked == 'linked' else False
st.count = v
st.save()
logging.info("finished ST")
def init_codes(d,dm,heroku):
coder = Coder()
Codes = {}
for state,year,linked,vtype,ctype,code,count in d.iter_code_counts():
if ctype != 'pdx' and code not in Codes:
cd = Code()
cd.code = code
cd.description = coder[code]
cd.code_type = ctype
cd.dataset = dm
cd.save()
Codes[code] = cd
logging.info("finished Codes")
objs = [CodeCount(year=year,
state=state,
linked=linked,
visit_type=vtype,
count=count,
code=code,
code_type=ctype,
dataset_identifier=dm.identifier)
for state,year,linked,vtype,ctype,code,count in d.iter_code_counts()]
logging.info("starting code counts batch")
CodeCount.objects.bulk_create(objs, batch_size=5000)
logging.info("finished code counts")
def sync_text():
desc = defaultdict(str)
dcount = defaultdict(int)
ctype = defaultdict(str)
TextSearch.objects.all().delete()
for k in Code.objects.all():
desc[k.code] = k.code + " " + k.description
dcount[k.code] += 1
ctype[k.code] = k.code_type
for k in desc:
temp = TextSearch()
temp.code = k
temp.description = desc[k]
temp.datasets_count = dcount[k]
temp.code_type = ctype[k]
temp.save()