Skip to content

Commit

Permalink
Version 1.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
filak committed Oct 6, 2023
1 parent 88dedcf commit 7e3e550
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 73 deletions.
2 changes: 1 addition & 1 deletion flask-app/application/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def getPath(path):
## SeaSurf (csrf) & Talisman
csrf.init_app(app)

if not relax:
if not relax and not app.debug:
## Paranoid
paranoid.init_app(app)
paranoid.redirect_view = getPath('/')
Expand Down
10 changes: 9 additions & 1 deletion flask-app/application/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,13 @@ def manage(action):
exp_files[f]['fname'] = fp.name
exp_files[f]['fdate'] = mtu.getFpathDate(fp)


# 2024_umls.tsv
show_umls_exports = False
umls_tsv = mtu.getTempFpath('umls', ext='tsv')
if umls_tsv.is_file():
show_umls_exports = True

show_lookups_exports = False
lookups_base = mtu.getTempFpath('lookups')
if lookups_base.is_file():
Expand All @@ -1400,7 +1407,8 @@ def manage(action):
msg = 'Background worker is RUNNING...'
flash(msg, 'info')

return render_template('manage.html', users=users, exports=exp_files, show_lookups=show_lookups_exports, show_marc=show_marc_exports)
return render_template('manage.html', users=users, exports=exp_files,
show_lookups=show_lookups_exports, show_marc=show_marc_exports, show_umls_exports=show_umls_exports)


@app.route(getPath('/download/<fname>'))
Expand Down
29 changes: 13 additions & 16 deletions flask-app/application/templates/manage.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
<div class="row">

<div class="col-md-3 col-sm">

{% include 'snips/users.html' %}

</div>

<div class="col-md-3 col-sm">
Expand Down Expand Up @@ -232,7 +230,7 @@ <h4>MARC21 Exports</h4>
</li>
</ul>

{% if show_marc %}
{% if show_marc %}

<div class="row mb-3">
<div class="col">
Expand Down Expand Up @@ -341,7 +339,7 @@ <h5>Settings</h5>
</div>
</div>

{% endif %}
{% endif %}


</div>
Expand Down Expand Up @@ -484,7 +482,6 @@ <h5>JSON <small class="text-info">for data parsers</small></h5>
</div>
</div>


<div class="row mb-3">
<div class="col">
<h5>JSON <small class="text-info">for Elastic</small></h5>
Expand Down Expand Up @@ -518,14 +515,16 @@ <h5>JSON <small class="text-info">for Elastic</small></h5>

<div class="row mb-3">
<div class="col">
<h5>UMLS TSV <small class="text-info">for MSSQL - includes inactive terms</small></h5>
<h5>UMLS TSV MSSQL <small class="text-info">[includes inactive terms]</small></h5>

{% if show_marc %}
{% if show_umls_exports %}
<form action="{{ url_for('update_stats', stat='umls_all') }}" method="post">
<input type="hidden" name="_csrf_token" value="{{ csrf_token() }}">

{% if exports['umls_all_tsv'] %}
<button class="btn btn-sm btn-info" type="submit" title="Refresh">
<button class="btn btn-sm btn-info" type="submit"
data-toggle="tooltip" data-placement="bottom"
title="To refresh You need to regenerate UMLS TSV first">
Refresh
</button>
{% else %}
Expand All @@ -549,14 +548,16 @@ <h5>UMLS TSV <small class="text-info">for MSSQL - includes inactive terms</small

<div class="row mb-3">
<div class="col">
<h5>UMLS TSV RAW <small class="text-info">Includes inactive terms; chars NOT normalized</small></h5>
<h5>UMLS TSV RAW <small class="text-info">[includes inactive terms; chars NOT normalized]</small></h5>

{% if show_marc %}
{% if show_umls_exports %}
<form action="{{ url_for('update_stats', stat='umls_raw') }}" method="post">
<input type="hidden" name="_csrf_token" value="{{ csrf_token() }}">

{% if exports['umls_raw_tsv'] %}
<button class="btn btn-sm btn-info" type="submit" title="Refresh">
<button class="btn btn-sm btn-info" type="submit"
data-toggle="tooltip" data-placement="bottom"
title="To refresh You need to regenerate UMLS TSV first">
Refresh
</button>
{% else %}
Expand All @@ -578,12 +579,8 @@ <h5>UMLS TSV RAW <small class="text-info">Includes inactive terms; chars NOT nor
</div>
</div>


{% endif %}

{% endif %}
</div>


</div>

</div>
Expand Down
103 changes: 49 additions & 54 deletions flask-app/application/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,15 @@ def exportData(export):
if export in ['umls_all','umls_raw']:
fpath = getTempFpath('umls', ext=ext)
epath = getStatsFpath(export, ext=ext)
exportTsv(export, fpath, epath)
exportTsvFile(export, fpath, epath)
else:
fpath = getTempFpath(export, ext=ext)
if not lpath.is_file():
resp = startStats(export, fpath, lpath)

if resp and fpath.is_file():
epath = getStatsFpath(export, ext=ext)
exportTsv(export, fpath, epath)
exportTsvFile(export, fpath, epath)


def startStats(stat, fpath, lpath, interval=None):
Expand Down Expand Up @@ -1210,6 +1210,12 @@ def writeOutputGzip(fpath, data, mode='wt'):
ft.write(data)


def writeOutputGzipTsv(fpath, lines, mode='wt'):
with gzip.open(str(fpath)+'.gz', mode=mode, encoding='utf-8', newline='') as ft:
writer = csv.writer(ft, delimiter='\t', doublequote=True, quoting=1)
writer.writerows(lines)


def loadJsonFile(fpath, default=None):
try:
with open(str(fpath), mode='r', encoding='utf-8') as json_file:
Expand All @@ -1236,7 +1242,7 @@ def sanitize_input(text, normalize=True):
t = ' '.join(t.split())
t = t.replace('?','')
if normalize:
t = normalize_str(t)
t = normalize_str(t, clear_escaping=True)
t = t.replace('"','\\"')
return t

Expand All @@ -1249,13 +1255,13 @@ def sanitize_text_query(text):
return t


def normalize_str(text, escape_double_chars_tsv=False, skip_normalization=False):
def normalize_str(text, skip_normalization=False, clear_escaping=False):
if text:
if not skip_normalization:
for src, trg in app.config['CHAR_NORM_MAP']:
text = text.replace(src, trg)
if escape_double_chars_tsv:
text = text.replace('\\"','""')
if clear_escaping:
text = text.replace('\\"','"')
return text


Expand Down Expand Up @@ -1531,81 +1537,70 @@ def getDescClass(dtype):
return dtype_dict.get(dtype, '-1')


def exportTsv(export, inputFile, outputFile):

count = 0
found = 0
batch = 0

bsize = 30000
def exportTsvFile(export, inputFile, outputFile):

ext = os.path.splitext(inputFile)[1]
fext = ext.lower()

head = ''
tab = '\t'

if export == 'umls':
### ?dui ?cui ?lang ?tty ?str ?tui ?scn
cols = 'DescriptorUI,ConceptUI,Language,TermType,String,TermUI,ScopeNote'.split(',')
head = (tab).join(cols) + '\n'

elif export in ['umls_all','umls_raw']:
### ?status ?tstatus ?dui ?cui ?lang ?tty ?str ?tui ?scn
cols = 'Dstatus,Tstatus,DescriptorUI,ConceptUI,Language,TermType,String,TermUI,ScopeNote'.split(',')
head = (tab).join(cols) + '\n'

writeOutputGzip(outputFile, head)
writeOutputGzipTsv(outputFile, [cols], mode='wt')

if fext == '.gz':
fh = gzip.open(str(inputFile), mode='rt', encoding='utf-8')
else:
fh = open(str(inputFile), mode='r', encoding='utf-8')

s = io.StringIO()
docs = []
count = 0

for line in fh:
count += 1
found += 1
batch += 1

if count > 1:
if count > 0:
line = line.replace('@'+app.config['TARGET_LANG'], '')
line = line.replace('^^xsd:boolean', '')
row = line.split(tab)

row = line.strip().split(tab)

if export in ['umls_all','umls_raw']:
if row[1] == 'false':
#print(line)
pass
else:
line = (tab).join(row)
clean_row = (tab).join( clearQuotes(row) )

if export == 'umls_raw':
s.write( normalize_str(line, escape_double_chars_tsv=True, skip_normalization=True) )
docs.append( normalize_str( clean_row, clear_escaping=True, skip_normalization=True ).split(tab) )
else:
s.write( normalize_str(line, escape_double_chars_tsv=True) )
docs.append( normalize_str( clean_row, clear_escaping=True ).split(tab) )

elif export == 'umls':

if row[0] == 'false' or row[1] == 'false':
#print(line)
pass
else:
line = (tab).join(row[2:])
s.write( normalize_str(line, escape_double_chars_tsv=True) )
clean_row = (tab).join( clearQuotes(row[2:]) )
docs.append( normalize_str( clean_row, clear_escaping=True ).split(tab) )

else:
line = (tab).join(row)
s.write(line)
count += 1

writeOutputGzipTsv(outputFile, docs, mode='at')

if batch == bsize:
writeOutputGzip(outputFile, s.getvalue(), mode='at')
batch = 0
s.close()
s = io.StringIO()

fh.close()
writeOutputGzip(outputFile, s.getvalue(), mode='at')
s.close()
def clearQuotes(row):
cleaned = []
for item in row:
if item.startswith('"'):
cleaned.append(item[1:-1])
else:
cleaned.append(item)
return cleaned


def cleanDescView(dview):
Expand All @@ -1627,18 +1622,18 @@ def cleanDescView(dview):
line = line.strip()

if line.startswith('type:'):
if 'MeSH TopicalDescriptor' in line:
pass
elif 'MeSH GeographicalDescriptor' in line:
pass
elif 'MeSH PublicationType' in line:
pass
elif 'MeSH CheckTag' in line:
pass
elif 'MeSH Qualifier' in line:
pass
elif line:
out += line + '\n'
if 'MeSH TopicalDescriptor' in line:
pass
elif 'MeSH GeographicalDescriptor' in line:
pass
elif 'MeSH PublicationType' in line:
pass
elif 'MeSH CheckTag' in line:
pass
elif 'MeSH Qualifier' in line:
pass
elif line:
out += line + '\n'
else:
if line:
out += line + '\n'
Expand Down
2 changes: 1 addition & 1 deletion flask-app/mtw_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### Version 1.5.2
### Version 1.5.3
### Python 3.11 ###
arrow==1.3.0
bcrypt==4.0.1 ## NOT python-bcrypt !!!
Expand Down

0 comments on commit 7e3e550

Please sign in to comment.