Merge pull request #2 from FAIRplus/dev

Dev
FAIRplus · Dec 20, 2023 · 6b8902e · 6b8902e
2 parents 1bc5e12 + cca8b5c
commit 6b8902e
Show file tree

Hide file tree

Showing 9 changed files with 3,126 additions and 183 deletions.
diff --git a/README.md b/README.md
@@ -5,7 +5,6 @@ The `api` directory contains the source code of the API. It is a Flask applicati
 
 The database is a MongoDB database hosted on a remote server. 
 
-
 ### Database preparation
 The directory `database/processing` contains scripts to process the data in the Software Observatory and insert new entries suitable for the Tool Discoverer. In addition, indexes for querying are created.
 

diff --git a/api/Dockerfile b/api/Dockerfile
@@ -13,7 +13,7 @@ RUN python3 -m pip install -r requirements.txt
 COPY . .
 
 # Expose correct port
-EXPOSE 5000
+EXPOSE 3500
 
 # Executable commands
-CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=3500"]
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "3500"]
diff --git a/api/app.py b/api/app.py
diff --git a/api/biotools_API_querying.py b/api/biotools_API_querying.py
@@ -1,5 +1,6 @@
 import json
 import pandas as pd
+import logging
 
 import db_retrieval
 import zooma_api as za
@@ -18,17 +19,14 @@ class bcolors:
 
 class tools_discoverer(object):
 
-    def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, custom_weights, verbosity):
-        self.verbosity = verbosity
+    def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, custom_weights):
         self.label = label
         self.custom_weights = custom_weights
 
-        if self.verbosity:
-            prompt_text = f"Loading input files..."
-            print(f"{bcolors.OKBLUE}{prompt_text}{bcolors.ENDC}")    
+        logging.info(f"Loading input files...")    
 
         self.terms_input = kw_w # list(dict('keyword', 'ClassId','weight'))
-        print(self.terms_input)
+        logging.debug(self.terms_input)
         # keywords in:
         self.keywords_weights = []
         # zooma terms in:
@@ -52,27 +50,26 @@ def __init__(self, label, kw_w, out_path, default_unspecified_keyword_score, cus
 
     def run_pipeline(self):
         self.query_terms()
-        print('Query done')
+        logging.info('Query done')
         if self.results.empty:
-            print('No tools found')
+            logging.info('No tools found')
             self.result_found = False
         else:
             self.rank_tools()
-            print('Tools ranked')
+            logging.info('Tools ranked')
             self.result_found = True
 
 
     def query_zooma(self):
         '''
         keywords is a set of strings to look up in zooma
         '''
-        if self.verbosity:
-            print("Looking up terms in ZOOMA")
+        logging.info("Looking up terms in ZOOMA")
 
         for term in self.terms_input:
             keyword = term['keyword']
             # If edam term, directly add to edam list
-            print(keyword)
+            # print(keyword)
             if term['classId']:
                 self.edam_terms.append(term['classId'])
                 term['weight']= term['weight'] + 1
@@ -81,14 +78,11 @@ def query_zooma(self):
                 self.keywords_weights.append({'keyword':keyword, 'classId':None, 'weight':term['weight']})
                 self.free_terms.append(keyword)
             else:
-                if self.verbosity:
-                    print(f"{bcolors.BOLD}{keyword}{bcolors.ENDC}")
                 confident_matches = za.zooma_single_lookup(keyword)
                 w = term['weight']
                 if confident_matches:
-                    if self.verbosity:
-                        print(f"Matches found in EDAM:")
-                        [print(f"{match['label']} - {match['confidence']} - {match['edam_term']}") for match in confident_matches]
+                    logging.debug(f"Matches found in EDAM:")
+                    [logging.debug(f"{match['label']} - {match['confidence']} - {match['edam_term']}") for match in confident_matches]
 
                     for match in confident_matches:
                         term_id = match['edam_term'].strip('\n')
@@ -105,29 +99,27 @@ def query_zooma(self):
                     self.free_terms.append(keyword)
 
         self.keywords_weights = pd.DataFrame(self.keywords_weights)
-        print(self.keywords_weights)
-        print('Zooma lookup done')
+        logging.info('Zooma lookup done')
 
 
     def query_terms(self):
-        print('edam terms: ' + str(self.edam_terms))
-        print('free terms: ' + str(self.free_terms))
+        logging.info('edam terms: ' + str(self.edam_terms))
+        logging.info('free terms: ' + str(self.free_terms))
         query = db_retrieval.query(self.edam_terms, self.free_terms)
         query.getData() # perform db search
         self.results = query.results
 
     def rank_tools(self):
-        if self.verbosity:
-            promp_text = 'Ranking results...'
-            print(f'{bcolors.OKBLUE}{promp_text}{bcolors.ENDC}')
+        promp_text = 'Ranking results...'
+        logging.info(f'{promp_text}')
 
         # sorting
         # print(self.results)
         self.results['raw_score'] = self.results.apply (lambda row: self.compute_score(row), axis=1)
         max_score = max(self.results['raw_score'])
         self.results['score'] = self.results.apply (lambda row: row['raw_score']/max_score, axis=1)
         self.results = self.results.sort_values('score', ascending=False)    
-        print(self.results)
+        logging.debug(self.results)
 
     def compute_score(self, row):
         if self.custom_weights == False:
@@ -154,7 +146,7 @@ def generate_outputs(self):
             return(self.json_result_parsed)
 
         except Exception as err:
-            raise(err)
+            logging.error(err)
 
 
 
diff --git a/api/db_results.py b/api/db_results.py
@@ -2,6 +2,7 @@
 from dotenv import load_dotenv
 from pymongo import MongoClient
 import pandas as pd
+import logging
 
 
 load_dotenv()
@@ -19,9 +20,9 @@ def query_by_id(identifier):
     collection = connect_mongo()
     result = collection.find_one({'run_id':identifier})
     if result:
-        print('Result found')
+        logging.info('Result found')
     else:
-        print('No result found')
+        logging.warning('No result found')
     return(result)
 
 def push(data):
@@ -30,9 +31,9 @@ def push(data):
         collection.insert_one(data)
         data.pop('_id')
     except Exception as err:
-        print('Could not insert in collection')
+        logging.warning('Could not insert in collection')
         raise(err)
     else:
-        print('Inserted in collection') 
+        logging.info('Inserted in collection') 
 
 
diff --git a/api/db_retrieval.py b/api/db_retrieval.py
@@ -2,6 +2,7 @@
 from dotenv import load_dotenv
 from pymongo import MongoClient
 import pandas as pd
+import logging 
 
 load_dotenv()
 DBHOST = os.getenv('DBHOST')
@@ -57,7 +58,7 @@ def __init__(self, edam_terms, free_terms):
 
 
     def match_edam_label(self, uri):
-        print('Matching EDAM label')
+        logging.info('Matching EDAM label')
         if uri == 'http://edamontology.org/topic_3557':
             uri = 'http://edamontology.org/operation_3557'
         try:
@@ -67,7 +68,7 @@ def match_edam_label(self, uri):
         return(label)
 
     def match_data(self, doc, td):
-        print('Data types matching ...')
+        logging.info('Data types matching ...')
         newd = []
         if td in doc.keys():
             for data in doc[td]:
@@ -83,7 +84,7 @@ def match_data(self, doc, td):
 
 
     def add_to_results(self, matches, topic):
-        print(f'Adding to results ...')
+        logging.info(f'Adding to results ...')
         for doc in matches:
             #print(f"- {doc['name']}")
             doc['_id'] = str(doc['_id'])
@@ -97,11 +98,11 @@ def add_to_results(self, matches, topic):
                     self.results.loc[doc_id] = doc
                     self.results_ids.add(doc_id)
             else:
-                print('hey')
+                logging.warning('No @id in doc')
 
     def query_edam(self):
         for term in self.edam_terms:
-            print(f'Querying EDAM term {term} ...')
+            logging.debug(f'Querying EDAM term {term} ...')
             if 'operation' in term:
                 matches = self.collection.find({
                     'edam_operations.uri' : term
@@ -132,7 +133,6 @@ def full_text_query(self, term):
 
     def query_description(self):
         for term in self.free_terms:
-            print(term)
             term = term.lower()
             l = len(term.split(' '))
             if l==1:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,7 +5,6 @@ The `api` directory contains the source code of the API. It is a Flask applicati

		The database is a MongoDB database hosted on a remote server.


		### Database preparation
		The directory `database/processing` contains scripts to process the data in the Software Observatory and insert new entries suitable for the Tool Discoverer. In addition, indexes for querying are created.

Expand Down