-
Notifications
You must be signed in to change notification settings - Fork 0
/
recovery.py
237 lines (194 loc) · 8.65 KB
/
recovery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import tkinter as tk
import os
from utils import *
from tkinter import messagebox
from tkinter import filedialog
from process_description import PD_handler
def extract_keywords(path, skip_pd=False):
"""
Function to extract the keywords from a given Metadata file
path - string ... path to the file
returns:
keys - list ... a list containing the keywords foun in the file
"""
with open(path, "r") as f:
lines = f.readlines()
if not skip_pd:
# if process description should not be skipped
if lines[1] == "\n":
lines = lines[2:]
else:
lines = lines[1:]
else:
if lines[1] == "\n":
lines = lines[3:]
else:
lines = lines[2:]
keys = [line.split(":")[0] for line in lines]
return keys
def extract_process_description(path):
"""
Function to extract the process description from a given Metadata file
path - string ... path to the file
returns:
pd - string ... contains the extracted process description
"""
with open(path, "r") as f:
lines = f.readlines()
lines = [line.replace("\n", "") for line in lines]
if lines[1] == "":
pd = lines[2].split(":: ")[1]
else:
pd = lines[1].split(":: ")[1]
return pd
def recover_keywords():
"""
The purpose of this Function is to recover keywords from a given metadata file
or initialize new empty keywords
"""
root = tk.Tk()
root.withdraw()
if os.path.isfile("keywords.pkl"):
print("You do not want to go there yet!")
exit()
if messagebox.askyesno("Recovering Keywords", "Yes: to try and recover from Metadata from metadata file\nNo: to create a new empty keyword file"):
keys = extract_keywords(filedialog.askopenfilename())
if messagebox.askyesno("Found Keys", "Found the following keys:\n"+str(keys)+"\nDo you want to save them?"):
save_keywords(keys)
return True
else:
if messagebox.askyesno("Recovering Keywords", "Warning: this could overwrite existing Metadata\n\nYes: to initialize new empty keywords.pkl\nNo: to cancel"):
save_keywords(["process description"])
return True
return False
def recover_processes():
"""
The purpose of this Function is to recover processes from a given working directory
or initialize new empty processes
"""
root = tk.Tk()
root.withdraw()
if os.path.isfile("processes.pkl"):
print("You do not want to go there yet!")
exit()
if messagebox.askyesno("Recovering Process Description", "Yes: to try and recover them from a data directory\nNo: to create an empty processes.pkl"):
dir = filedialog.askdirectory()
# initialize metadata file list
metadata_file_list = []
# create directory walk
w = os.walk(dir)
# filling the list with all file paths
for root, _, files in w:
# iterate over all files
for f in files:
# create the path
path = os.path.join(root, f)
# check if metadata is in the filename or directory above
if "metadata" in f or "metadata" in os.path.split(root)[1]:
metadata_file_list.append(os.path.normpath(path))
descriptions = []
# loop over the metadata files and search non empty descriptions
for md in metadata_file_list:
pd = extract_process_description(md)
if pd != "":
descriptions.append(pd)
# create empty process handler
processes = PD_handler()
if len(descriptions) == 0:
if messagebox.askyesno("Recovering Process Descriptions", "Could not find existing descriptions\n\nDo you want to initalize an empty processes.pkl?"):
save_processes(processes)
return True
else:
return False
if messagebox.askyesno("Recovering Process Descriptions", "Found the following descriptions:\n" + str(descriptions) + "\nDo you want to save them?"):
for i,descr in enumerate(descriptions):
processes["descr_"+str(i+1)] = descr
save_processes(processes)
messagebox.showinfo("Attention", "The descriptions where saved with placeholder names!\nTo change the names use the edit description button in the tool.")
return True
else:
if messagebox.askyesno("Recovering Process Descriptions", "Warning: this could overwrite existing Process Descriptions\n\nYes: to initialize an empty processes.pkl\nNo: to cancel"):
save_processes(PD_handler())
return True
return False
def recover_from_other_users(path, keywords, processes):
"""
Function which tries to recover process descriptions and keywords from other
users or instances of this tool which use their own keywords.pkl and processes.pkl.
path - string ... path to the working directory
keywords - list ... containing the known keywords
processes - PD_handler ... contains the known processes
returns
bools - tuple ... should keywords.pkl and processes.pkl be reloaded
"""
# initialize metadata file list
metadata_file_list = []
# create directory walk
w = os.walk(path)
# filling the list with all file paths
for root, _, files in w:
# iterate over all files
for f in files:
# create the path
path = os.path.join(root, f)
# check if metadata is in the filename or directory above
if "metadata" in f or "metadata" in os.path.split(root)[1]:
metadata_file_list.append(os.path.normpath(path))
keys = []
pds = []
# iterate over the found md_files and save the found keys/pds
for md_file in metadata_file_list:
k = extract_keywords(md_file, skip_pd=False)
# check process description is differently named:
if k[0] != keywords[0]:
# calculate path to the data file
path = os.path.join(os.path.split(os.path.split(md_file)[0])[0],
"-".join(os.path.split(md_file)[1].replace("-metadata.txt", "").split("-")[:-1])+"."+os.path.split(md_file)[1].replace("-metadata.txt", "").split("-")[-1])
# create md file and overwrite the name
tmp = MD_file(path, k)
tmp.read()
tmp.update_keyword(0, keywords[0])
tmp.write()
k = k[1:]
dif = list(set(k)-set(keys))
keys.extend(dif)
p = extract_process_description(md_file)
if not p in pds:
pds.append(p)
# get the elements not already found in processes.pkl and keywords.pkl
not_saved_keys = list(set(keys) - set(keywords))
not_saved_pds = list(set(pds) - set(processes.get_process_descriptions()))
if len(not_saved_keys) > 0:
# if keys were found show them to the user and ask if he wants to save them
if messagebox.askyesno("Found unknown Keywords!", "The following unkown keywords were found in the working directory:\n" + str(not_saved_keys)
+ "\nDo you want to update your keywords.pkl?\n\nWarning: not updating will delete metadata in these unkown keywords."):
keywords.extend(not_saved_keys)
save_keywords(keywords)
# true as in reload keywords
reload_keywords = True
else:
# as in do not reload keywords
reload_keywords = False
else:
reload_keywords = False
if len(not_saved_pds) > 0:
if messagebox.askyesno("Found unknown Process Descriptions!", "The following unkown process descriptions were found in the working directory:\n" + str(not_saved_pds)
+ "\nDo you want to update your processes.pkl?\n\nWarning: not updating will delete these process descriptions (metadata could be lost). Saving them will save them with placeholder names."):
for i,pd in enumerate(not_saved_pds):
name = "descr_"+str(i)
p_names = processes.get_process_names()
j=1
while name in p_names:
name = name.split("(")[0]
name = name+"("+str(j)+")"
j+=1
processes[[pd]] = name
save_processes(processes)
# return true as in reload processes
reload_processes = True
else:
# as in do not reload processes
reload_processes = False
else:
reload_processes = False
return reload_keywords, reload_processes