Skip to content

Commit

Permalink
Merge pull request #2 from FAIRplus/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
EvaMart authored Dec 20, 2023
2 parents 1bc5e12 + cca8b5c commit 6b8902e
Show file tree
Hide file tree
Showing 9 changed files with 3,126 additions and 183 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ The `api` directory contains the source code of the API. It is a Flask applicati

The database is a MongoDB database hosted on a remote server.


### Database preparation
The directory `database/processing` contains scripts to process the data in the Software Observatory and insert new entries suitable for the Tool Discoverer. In addition, indexes for querying are created.

Expand Down
4 changes: 2 additions & 2 deletions api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN python3 -m pip install -r requirements.txt
COPY . .

# Expose correct port
EXPOSE 5000
EXPOSE 3500

# Executable commands
CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=3500"]
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3500"]
138 changes: 0 additions & 138 deletions api/app.py

This file was deleted.

44 changes: 18 additions & 26 deletions api/biotools_API_querying.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import pandas as pd
import logging

import db_retrieval
import zooma_api as za
Expand All @@ -18,17 +19,14 @@ class bcolors:

class tools_discoverer(object):

def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, custom_weights, verbosity):
self.verbosity = verbosity
def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, custom_weights):
self.label = label
self.custom_weights = custom_weights

if self.verbosity:
prompt_text = f"Loading input files..."
print(f"{bcolors.OKBLUE}{prompt_text}{bcolors.ENDC}")
logging.info(f"Loading input files...")

self.terms_input = kw_w # list(dict('keyword', 'ClassId','weight'))
print(self.terms_input)
logging.debug(self.terms_input)
# keywords in:
self.keywords_weights = []
# zooma terms in:
Expand All @@ -52,27 +50,26 @@ def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, cus

def run_pipeline(self):
self.query_terms()
print('Query done')
logging.info('Query done')
if self.results.empty:
print('No tools found')
logging.info('No tools found')
self.result_found = False
else:
self.rank_tools()
print('Tools ranked')
logging.info('Tools ranked')
self.result_found = True


def query_zooma(self):
'''
keywords is a set of strings to look up in zooma
'''
if self.verbosity:
print("Looking up terms in ZOOMA")
logging.info("Looking up terms in ZOOMA")

for term in self.terms_input:
keyword = term['keyword']
# If edam term, directly add to edam list
print(keyword)
# print(keyword)
if term['classId']:
self.edam_terms.append(term['classId'])
term['weight']= term['weight'] + 1
Expand All @@ -81,14 +78,11 @@ def query_zooma(self):
self.keywords_weights.append({'keyword':keyword, 'classId':None, 'weight':term['weight']})
self.free_terms.append(keyword)
else:
if self.verbosity:
print(f"{bcolors.BOLD}{keyword}{bcolors.ENDC}")
confident_matches = za.zooma_single_lookup(keyword)
w = term['weight']
if confident_matches:
if self.verbosity:
print(f"Matches found in EDAM:")
[print(f"{match['label']} - {match['confidence']} - {match['edam_term']}") for match in confident_matches]
logging.debug(f"Matches found in EDAM:")
[logging.debug(f"{match['label']} - {match['confidence']} - {match['edam_term']}") for match in confident_matches]

for match in confident_matches:
term_id = match['edam_term'].strip('\n')
Expand All @@ -105,29 +99,27 @@ def query_zooma(self):
self.free_terms.append(keyword)

self.keywords_weights = pd.DataFrame(self.keywords_weights)
print(self.keywords_weights)
print('Zooma lookup done')
logging.info('Zooma lookup done')


def query_terms(self):
print('edam terms: ' + str(self.edam_terms))
print('free terms: ' + str(self.free_terms))
logging.info('edam terms: ' + str(self.edam_terms))
logging.info('free terms: ' + str(self.free_terms))
query = db_retrieval.query(self.edam_terms, self.free_terms)
query.getData() # perform db search
self.results = query.results

def rank_tools(self):
if self.verbosity:
promp_text = 'Ranking results...'
print(f'{bcolors.OKBLUE}{promp_text}{bcolors.ENDC}')
promp_text = 'Ranking results...'
logging.info(f'{promp_text}')

# sorting
# print(self.results)
self.results['raw_score'] = self.results.apply (lambda row: self.compute_score(row), axis=1)
max_score = max(self.results['raw_score'])
self.results['score'] = self.results.apply (lambda row: row['raw_score']/max_score, axis=1)
self.results = self.results.sort_values('score', ascending=False)
print(self.results)
logging.debug(self.results)

def compute_score(self, row):
if self.custom_weights == False:
Expand All @@ -154,7 +146,7 @@ def generate_outputs(self):
return(self.json_result_parsed)

except Exception as err:
raise(err)
logging.error(err)



9 changes: 5 additions & 4 deletions api/db_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dotenv import load_dotenv
from pymongo import MongoClient
import pandas as pd
import logging


load_dotenv()
Expand All @@ -19,9 +20,9 @@ def query_by_id(identifier):
collection = connect_mongo()
result = collection.find_one({'run_id':identifier})
if result:
print('Result found')
logging.info('Result found')
else:
print('No result found')
logging.warning('No result found')
return(result)

def push(data):
Expand All @@ -30,9 +31,9 @@ def push(data):
collection.insert_one(data)
data.pop('_id')
except Exception as err:
print('Could not insert in collection')
logging.warning('Could not insert in collection')
raise(err)
else:
print('Inserted in collection')
logging.info('Inserted in collection')


12 changes: 6 additions & 6 deletions api/db_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dotenv import load_dotenv
from pymongo import MongoClient
import pandas as pd
import logging

load_dotenv()
DBHOST = os.getenv('DBHOST')
Expand Down Expand Up @@ -57,7 +58,7 @@ def __init__(self, edam_terms, free_terms):


def match_edam_label(self, uri):
print('Matching EDAM label')
logging.info('Matching EDAM label')
if uri == 'http://edamontology.org/topic_3557':
uri = 'http://edamontology.org/operation_3557'
try:
Expand All @@ -67,7 +68,7 @@ def match_edam_label(self, uri):
return(label)

def match_data(self, doc, td):
print('Data types matching ...')
logging.info('Data types matching ...')
newd = []
if td in doc.keys():
for data in doc[td]:
Expand All @@ -83,7 +84,7 @@ def match_data(self, doc, td):


def add_to_results(self, matches, topic):
print(f'Adding to results ...')
logging.info(f'Adding to results ...')
for doc in matches:
#print(f"- {doc['name']}")
doc['_id'] = str(doc['_id'])
Expand All @@ -97,11 +98,11 @@ def add_to_results(self, matches, topic):
self.results.loc[doc_id] = doc
self.results_ids.add(doc_id)
else:
print('hey')
logging.warning('No @id in doc')

def query_edam(self):
for term in self.edam_terms:
print(f'Querying EDAM term {term} ...')
logging.debug(f'Querying EDAM term {term} ...')
if 'operation' in term:
matches = self.collection.find({
'edam_operations.uri' : term
Expand Down Expand Up @@ -132,7 +133,6 @@ def full_text_query(self, term):

def query_description(self):
for term in self.free_terms:
print(term)
term = term.lower()
l = len(term.split(' '))
if l==1:
Expand Down
Loading

0 comments on commit 6b8902e

Please sign in to comment.