This repository has been archived by the owner on Jan 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reims_dbf_extractor.py
159 lines (134 loc) · 5.85 KB
/
reims_dbf_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import asyncio
import csv
import json
from pathlib import Path
import aiohttp
import pandas as pd
from dbfread import DBF
invalid_skus = [3621,
3229,
8638,
9366,
546,
1769,
2960]
def map_size(x):
if x == 'S':
return 'small'
if x == 'C':
return 'child'
if x == 'L':
return 'large'
if x == 'M':
return 'medium'
return 'unknown'
def map_gender(x):
if x == 'M':
return 'masculine'
if x == 'F':
return 'feminine'
if x == 'U':
return 'neutral'
return 'unknown'
def convert_to_dict(df):
# remove RXX: prefix from SKU (not needed)
df['SKU'] = df['SKU'].apply(lambda x: x.split(":")[-1].split(" ")[0])
df['TYPE'] = df['TYPE'].apply(lambda x: 'single' if x == 'S' else 'multifocal' if x == 'B' else 'unknown')
df['SIZE'] = df['SIZE'].apply(map_size)
df['APPEARANCE'] = df['GENDER'].apply(map_gender)
df = df.drop(columns=['TINT', 'GENDER', 'MATERIAL'])
df.columns = map(str.lower, df.columns)
# with open(Path("/home/thomas/Documents/wichtig/reims/new-data/csv/sm_" + file.split(".")[0] + ".csv"), 'w', encoding='utf-8') as f:
# df.to_csv(f)
final = []
for i, col in enumerate(df.iterrows()):
col = col[1]
if col['sku'] == '':
continue
if any(c.isalpha() for c in col['sku']):
raise Exception("bad sku" + col['sku'])
if int(col['sku']) in invalid_skus:
continue
od_df = {'sphere': float(col['odsphere']), 'axis': int(col['odaxis']), 'cylinder': float(col['odcylinder'])}
os_df = {'sphere': float(col['ossphere']), 'axis': int(col['osaxis']), 'cylinder': float(col['oscylinder'])}
if col['type'] != 'single':
od_df['add'] = float(col['odadd'])
os_df['add'] = float(col['osadd'])
location = 'sm' if int(col['sku']) >= 5001 else 'sa'
final.append({'sku': int(col['sku']), 'glassesType': col['type'], 'od': od_df, 'os': os_df,
'appearance': col['appearance'], 'glassesSize': col['size'], 'location': location, 'creationDate': col['enterdate'].isoformat() + ' 00:00:00'})
final = sorted(final, key=lambda d: d['sku'])
print(f"Total glasses {len(final)}")
return final
def convert_to_mysql(glasses):
sqls = []
eye_counter = 1
dispense_counter = 1
for glass in glasses:
sqls.append(f"INSERT INTO eye (id, sphere, cylinder, axis, additional) VALUES({eye_counter}, {glass['od']['sphere']}, " +
f" {glass['od']['cylinder']}, {glass['od']['axis']}, {glass['od'].get('add', 0.0)});")
sqls.append(f"INSERT INTO eye (id, sphere, cylinder, axis, additional) VALUES({eye_counter + 1}, {glass['os']['sphere']}, " +
f" {glass['os']['cylinder']}, {glass['os']['axis']}, {glass['os'].get('add', 0.0)});")
sqls.append(f"INSERT INTO dispense (id, modify_date, previous_sku) VALUES ({dispense_counter}, null, null);")
sqls.append(f"INSERT INTO glasses (sku, glasses_type, glasses_size, appearance, dispense_id, location, dispensed, creation_date, os_id, od_id) " +
f"VALUES ({glass['sku']}, '{glass['glassesType']}', " +
f" '{glass['glassesSize']}', '{glass['appearance']}', {dispense_counter}, " +
f" '{glass['location']}', 0, '{glass['creationDate']}', {eye_counter + 1}, {eye_counter});")
eye_counter += 2
dispense_counter += 1
sqls.append("")
return sqls
# p = Path('/home/thomas/Documents/wichtig/reims/new-data/SM_2020')
# files = [
# "Dispense.dbf",
# "Glsku.dbf",
# "NOTFOUND.dbf"
# ]
# for file in files:
# print("\n" + file + "\n---------------------------")
# dbf = DBF(p / file, char_decode_errors='ignore')
# frame = pd.DataFrame(iter(dbf))
# print(frame)
# dict_list = convert_to_dict(frame)
# Converting to JSON for testing in frontend
df_sa = pd.DataFrame(iter(DBF(Path("files/GLSKU_SA22.dbf"))))
df_sm = pd.DataFrame(iter(DBF(Path("files/GLSKU_SM22.dbf"))))
# NOTE: This is here because the glasses for 2022 were wrongly added using "Add Readers"
# Please beware this also when comparing Philscore results in REIMS1!!!
df_reader_sa = pd.DataFrame(iter(DBF(Path("files/READD_SA22.dbf"))))
df_reader_sm = pd.DataFrame(iter(DBF(Path("files/READD_SM22.dbf"))))
df = pd.concat([df_sa, df_sm, df_reader_sa, df_reader_sm])
final = convert_to_dict(df)
# dbf_dispense_sa = DBF(Path("files/DISPENSE_SA.dbf"))
# df_dispense_sa = pd.DataFrame(iter(dbf_dispense_sa))
# dbf_dispense_sm = DBF(Path("files/DISPENSE_SM.dbf"))
# df_dispense_sm = pd.DataFrame(iter(dbf_dispense_sm))
sql_queries = convert_to_mysql(final)
sql_prepend = """DELETE FROM glasses;
ALTER TABLE glasses AUTO_INCREMENT = 1;
DELETE FROM dispense;
ALTER TABLE dispense AUTO_INCREMENT = 1;
DELETE FROM eye;
ALTER TABLE eye AUTO_INCREMENT = 1;
"""
export_path = Path("../reims2-ansible-playbook/dump.sql").resolve()
with open(export_path, 'w', encoding='utf-8') as f:
f.write(sql_prepend + "\n")
for line in sql_queries:
f.write(f"{line}\n")
print(f"Exported successfully to {export_path}")
# with open(Path("../").resolve() / "reims2-frontend/assets/out.json", 'w', encoding='utf-8') as f:
# json.dump(final, f, ensure_ascii=False, indent=4)
# Upload URLs to backend
# async def make_parallel_post(data):
# async with aiohttp.ClientSession() as session:
# async with session.post("https://api.reims2.app/pvh/api/glasses", json=data) as resp:
# print(f"Status {resp.status} for SKU {data['sku']}")
# loop = asyncio.get_event_loop()
# tasks = []
# for item in final:
# if (item['sku'] < 4700 or item['sku'] > 5300):
# continue
# task = asyncio.ensure_future(make_parallel_post(item))
# tasks.append(task)
# loop.run_until_complete(asyncio.wait(tasks))