From 377d6a868ec3fef1618d0fcc15af5bb605329220 Mon Sep 17 00:00:00 2001 From: kikass13 Date: Sat, 24 Oct 2020 17:02:52 +0200 Subject: [PATCH] contribution engine will now work with proper dicts instead of gathering all steps into one single dict; helper function for splittgn dict into key-val-lists added for convenience; normalize_ step added after merge_ step in the engine, to be more transparent #164 --- .../contribution_distribution_engine.py | 48 +++++++++---------- .../contribution_distribution_engine_types.py | 7 +++ libreselery/libreselery.py | 15 +----- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/libreselery/contribution_distribution_engine.py b/libreselery/contribution_distribution_engine.py index edfa96b..d791e20 100644 --- a/libreselery/contribution_distribution_engine.py +++ b/libreselery/contribution_distribution_engine.py @@ -7,7 +7,6 @@ class ContributionDistributionEngine(object): def __init__(self, config): - print("\n\nLOOK, BUT DONT TOUCH!") super(ContributionDistributionEngine, self).__init__() ###grab relevant entries from selery cfg self.domains = self._extractContributionDomains(config) @@ -27,56 +26,57 @@ def updateGlobals(self, config=None, connectors=None): for domain in self.domains: domain.updateGlobals(config=config, connectors=connectors) + def splitDictKeyVals(self, d): + return cdetypes.splitDictKeyVals(d) + def gather_(self): ### our task is to apply whatever ContributionType was configured ### for a specific domain and extract all ### contributors + their weights that fit into this domain - print("\n\nLOOK, BUT DONT TOUCH!") - cachedContributors = [] - - contributorData = {"gather": {}} + contributorDataScored = {} for domain in self.domains: ### execute all activities of every domain ### this should identify the contributos that ### fit the activity description / ### that have done the configured activity successfully - contributorScores = domain.gather_(cachedContributors=cachedContributors) + contributorScores = domain.gather_() ### every domain has to weight it's activities - contributorData["gather"][domain.name] = contributorScores + contributorDataScored[domain.name] = contributorScores ### - return contributorData + return contributorDataScored - def weight_(self, contributorData): + def weight_(self, contributorDataScored): ### domains have to weight activity scores in relation to each other - contributorData["weight"] = {} + contributorDataWeighted = {} for domain in self.domains: - domainContent = contributorData.get("gather").get(domain.name) + domainContent = contributorDataScored.get(domain.name) ### normalize contributor weights based on contributor scores contributors, weights = domain.weight_(domainContent) - contributorData["weight"][domain.name] = (contributors, weights) - return contributorData + contributorDataWeighted[domain.name] = (contributors, weights) + return contributorDataWeighted - def merge_(self, contributorData): + def merge_(self, contributorDataWeighted): ### after all domains are processed, we now have to weight the domains ### in relation to each other using the "weight" attribute given ### via the ContributionDomain configuration - contributorData["merge"] = {} + contributorDataMerged = {} for domain in self.domains: ### merge weights/scores of contributors over all domains - contributors, weights = contributorData.get("weight").get(domain.name) + contributors, weights = contributorDataWeighted.get(domain.name) for contributor, weight in zip(contributors, weights): - if contributor in contributorData["merge"]: - contributorData["merge"][contributor] += weight * domain.weight + if contributor in contributorDataMerged: + contributorDataMerged[contributor] += weight * domain.weight else: - contributorData["merge"][contributor] = weight * domain.weight + contributorDataMerged[contributor] = weight * domain.weight + return contributorDataMerged + def normalize_(self, contributorDataMerged): ### because we potentially downgraded our weights by multiplying with ### the given domain weight ... we have to re-normalize the weights ### of every contributor to be within [0 ... 1] again - contributorData["merge_norm"] = {} - blob = [*contributorData.get("merge").items()] - contributors, weights = ([c for c, w in blob], [w for c, w in blob]) + contributorDataNormalized = {} + contributors, weights = cdetypes.splitDictKeyVals(contributorDataMerged) newWeights = cdetypes.normalizeSum(weights) for contributor, weight in zip(contributors, newWeights): - contributorData["merge_norm"][contributor] = weight - return contributorData + contributorDataNormalized[contributor] = weight + return contributorDataNormalized diff --git a/libreselery/contribution_distribution_engine_types.py b/libreselery/contribution_distribution_engine_types.py index fde2474..ca3dc72 100644 --- a/libreselery/contribution_distribution_engine_types.py +++ b/libreselery/contribution_distribution_engine_types.py @@ -7,6 +7,13 @@ ACTIVITY_PLUGIN_MODULE_PREFIX = "libreselery.contribution_activity_plugins" +def splitDictKeyVals(d): + ### split up the dicts to create contributors and weight lists + blob = [*d.items()] + keys, vals = ([c for c, w in blob], [w for c, w in blob]) + return keys, vals + + def normalizeR(v): v = np.array(v) if type(v) != np.array else v return v / np.sqrt(np.sum(v ** 2)) diff --git a/libreselery/libreselery.py b/libreselery/libreselery.py index 5ddbdd7..bae990d 100755 --- a/libreselery/libreselery.py +++ b/libreselery/libreselery.py @@ -390,21 +390,10 @@ def weight( def run(self): contributorData_scored = self.cde.gather_() - # print("1___________________________") - # print(contributorData_scored["gather"]) - # print("2___________________________") domainContributors_weighted = self.cde.weight_(contributorData_scored) - # print(domainContributors_weighted["weight"]) - # print("3.1_________________________") domainContributors_merged = self.cde.merge_(domainContributors_weighted) - # print(domainContributors_merged["merge"]) - # print("3.2_________________________") - print("") - print(domainContributors_merged["merge_norm"]) - print("") - ### split up the dicts to create contributors and weight lists - blob = [*domainContributors_merged["merge_norm"].items()] - contributors, weights = ([c for c, w in blob], [w for c, w in blob]) + domainContributors_normalized = self.cde.normalize_(domainContributors_merged) + contributors, weights = self.cde.splitDictKeyVals(domainContributors_normalized) return contributors, weights def split(self, contributors, weights):