From 377d6a868ec3fef1618d0fcc15af5bb605329220 Mon Sep 17 00:00:00 2001
From: kikass13 <nickfiege999@gmail.com>
Date: Sat, 24 Oct 2020 17:02:52 +0200
Subject: [PATCH] contribution engine will now work with proper dicts instead
 of gathering all steps into one single dict; helper function for splittgn
 dict into key-val-lists added for convenience; normalize_ step added after
 merge_ step in the engine, to be more transparent #164

---
 .../contribution_distribution_engine.py       | 48 +++++++++----------
 .../contribution_distribution_engine_types.py |  7 +++
 libreselery/libreselery.py                    | 15 +-----
 3 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/libreselery/contribution_distribution_engine.py b/libreselery/contribution_distribution_engine.py
index edfa96b..d791e20 100644
--- a/libreselery/contribution_distribution_engine.py
+++ b/libreselery/contribution_distribution_engine.py
@@ -7,7 +7,6 @@
 
 class ContributionDistributionEngine(object):
     def __init__(self, config):
-        print("\n\nLOOK, BUT DONT TOUCH!")
         super(ContributionDistributionEngine, self).__init__()
         ###grab relevant entries from selery cfg
         self.domains = self._extractContributionDomains(config)
@@ -27,56 +26,57 @@ def updateGlobals(self, config=None, connectors=None):
         for domain in self.domains:
             domain.updateGlobals(config=config, connectors=connectors)
 
+    def splitDictKeyVals(self, d):
+        return cdetypes.splitDictKeyVals(d)
+
     def gather_(self):
         ### our task is to apply whatever ContributionType was configured
         ### for a specific domain and extract all
         ### contributors + their weights that fit into this domain
-        print("\n\nLOOK, BUT DONT TOUCH!")
-        cachedContributors = []
-
-        contributorData = {"gather": {}}
+        contributorDataScored = {}
         for domain in self.domains:
             ### execute all activities of every domain
             ### this should identify the contributos that
             ### fit the activity description /
             ### that have done the configured activity successfully
-            contributorScores = domain.gather_(cachedContributors=cachedContributors)
+            contributorScores = domain.gather_()
             ### every domain has to weight it's activities
-            contributorData["gather"][domain.name] = contributorScores
+            contributorDataScored[domain.name] = contributorScores
             ###
-        return contributorData
+        return contributorDataScored
 
-    def weight_(self, contributorData):
+    def weight_(self, contributorDataScored):
         ### domains have to weight activity scores in relation to each other
-        contributorData["weight"] = {}
+        contributorDataWeighted = {}
         for domain in self.domains:
-            domainContent = contributorData.get("gather").get(domain.name)
+            domainContent = contributorDataScored.get(domain.name)
             ### normalize contributor weights based on contributor scores
             contributors, weights = domain.weight_(domainContent)
-            contributorData["weight"][domain.name] = (contributors, weights)
-        return contributorData
+            contributorDataWeighted[domain.name] = (contributors, weights)
+        return contributorDataWeighted
 
-    def merge_(self, contributorData):
+    def merge_(self, contributorDataWeighted):
         ### after all domains are processed, we now have to weight the domains
         ### in relation to each other using the "weight" attribute given
         ### via the ContributionDomain configuration
-        contributorData["merge"] = {}
+        contributorDataMerged = {}
         for domain in self.domains:
             ### merge weights/scores of contributors over all domains
-            contributors, weights = contributorData.get("weight").get(domain.name)
+            contributors, weights = contributorDataWeighted.get(domain.name)
             for contributor, weight in zip(contributors, weights):
-                if contributor in contributorData["merge"]:
-                    contributorData["merge"][contributor] += weight * domain.weight
+                if contributor in contributorDataMerged:
+                    contributorDataMerged[contributor] += weight * domain.weight
                 else:
-                    contributorData["merge"][contributor] = weight * domain.weight
+                    contributorDataMerged[contributor] = weight * domain.weight
+        return contributorDataMerged
 
+    def normalize_(self, contributorDataMerged):
         ### because we potentially downgraded our weights by multiplying with
         ### the given domain weight ... we have to re-normalize the weights
         ### of every contributor to be within [0 ... 1] again
-        contributorData["merge_norm"] = {}
-        blob = [*contributorData.get("merge").items()]
-        contributors, weights = ([c for c, w in blob], [w for c, w in blob])
+        contributorDataNormalized = {}
+        contributors, weights = cdetypes.splitDictKeyVals(contributorDataMerged)
         newWeights = cdetypes.normalizeSum(weights)
         for contributor, weight in zip(contributors, newWeights):
-            contributorData["merge_norm"][contributor] = weight
-        return contributorData
+            contributorDataNormalized[contributor] = weight
+        return contributorDataNormalized
diff --git a/libreselery/contribution_distribution_engine_types.py b/libreselery/contribution_distribution_engine_types.py
index fde2474..ca3dc72 100644
--- a/libreselery/contribution_distribution_engine_types.py
+++ b/libreselery/contribution_distribution_engine_types.py
@@ -7,6 +7,13 @@
 ACTIVITY_PLUGIN_MODULE_PREFIX = "libreselery.contribution_activity_plugins"
 
 
+def splitDictKeyVals(d):
+    ### split up the dicts to create contributors and weight lists
+    blob = [*d.items()]
+    keys, vals = ([c for c, w in blob], [w for c, w in blob])
+    return keys, vals
+
+
 def normalizeR(v):
     v = np.array(v) if type(v) != np.array else v
     return v / np.sqrt(np.sum(v ** 2))
diff --git a/libreselery/libreselery.py b/libreselery/libreselery.py
index 5ddbdd7..bae990d 100755
--- a/libreselery/libreselery.py
+++ b/libreselery/libreselery.py
@@ -390,21 +390,10 @@ def weight(
 
     def run(self):
         contributorData_scored = self.cde.gather_()
-        # print("1___________________________")
-        # print(contributorData_scored["gather"])
-        # print("2___________________________")
         domainContributors_weighted = self.cde.weight_(contributorData_scored)
-        # print(domainContributors_weighted["weight"])
-        # print("3.1_________________________")
         domainContributors_merged = self.cde.merge_(domainContributors_weighted)
-        # print(domainContributors_merged["merge"])
-        # print("3.2_________________________")
-        print("")
-        print(domainContributors_merged["merge_norm"])
-        print("")
-        ### split up the dicts to create contributors and weight lists
-        blob = [*domainContributors_merged["merge_norm"].items()]
-        contributors, weights = ([c for c, w in blob], [w for c, w in blob])
+        domainContributors_normalized = self.cde.normalize_(domainContributors_merged)
+        contributors, weights = self.cde.splitDictKeyVals(domainContributors_normalized)
         return contributors, weights
 
     def split(self, contributors, weights):