Untested/dirty end of the week commit

imallona · imallona · commit 7254b94e80ac · 2024-02-23T17:53:23.000+01:00
diff --git a/Makefile b/Makefile
@@ -7,3 +7,5 @@ benchmark:
 	snakemake -p --cores 1
 dry:
 	snakemake -p --cores 1 -n -p -F
+clean:
+	rm -rf ./out ./log
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ Powered by Snakemake
 - `make activate` to activate the environment with the dependencies (pipenv shell)
 - `make benchmark` to run the benchmark (= trigger the Snakefile)
 - `make dry` to dry run the benchmark (= dry run the Snakefile)
+- `make clean` deletes the `./out` and `./log` output folders
 
 # Rationale
 
diff --git a/Snakefile b/Snakefile
@@ -30,6 +30,7 @@ for stage in get_benchmark_stages():
         print('    Params:',  get_module_parameters(stage, module))
     print('------')
 
+
 ## benchmark seeding (datasets and wildcard generation) ##############################################
 
 # print(list(get_stage_outputs('out').values()))
@@ -116,8 +117,70 @@ rule done:
         op.join('log', 'done.txt')
     shell:
         "date > {output}"
-        
-## sandbox
+
+
+
+
+
+
+
+## sandbox ------------------------------------------------------------------------------
 ## not tested yet
 # wildcard_constraints:
 #     dataset='/'.join([re.escape(x) for x in get_initial_datasets()])
+
+
+silence_sandbox = True
+if silence_sandbox:
+    sys.stdout = open(os.devnull, "w")
+    sys.stderr = open(os.devnull, "w")
+
+### sandbox start
+print('--------------------------------------------------------here')
+
+# print(get_deepest_input_dirname('methods'))
+# print(get_deepest_input_dirname('metrics'))
+
+
+# building a lookup dict tag (format, i.e. 'counts'): deliverables (full paths)
+#
+# print(get_stage_output_dict('data'))
+## tp stands for template
+lookup = dict()
+for stage in get_benchmark_stages():
+    print(stage)
+    if is_initial(stage):
+        o_tps = get_stage_output_dict(stage)
+        for o_tp in o_tps:
+            for module in get_modules_by_stage(stage):            
+                for output_key in o_tp.keys():
+                    lookup.update({output_key : o_tp[output_key].format(mod = module,
+                                                                              stage = stage,
+                                                                              params = 'default',
+                                                                              id = module)})
+    elif is_terminal(stage):
+        ## todo update
+        pass
+    else:
+        ## implicit means explicit - not intuitive at all
+        i_tps = get_stage_implicit_inputs(stage)
+        o_tps = get_stage_outputs(stage)        
+        print('inputs are', i_tps)
+        print('outputs are', o_tps)
+        for i_tp in i_tps:
+            excl = get_module_excludes(stage, module)
+            for module in list(set(modules) - set(excl)):
+                print('here')
+    print(lookup)
+            
+    # else:
+    #     for module in get_modules_by_stage(stage):
+    #         ii = get_stage_implicit_inputs(stage)
+    #         print(ii.keys())
+    #         print(ii.values())
+
+
+print('--------------------------------------------------------here')
+
+
+### sandbox end
diff --git a/src/workflow_helpers.py b/src/workflow_helpers.py
@@ -50,13 +50,19 @@ def get_stage_implicit_inputs(stage):
         return(None)
     return(config['stages'][stage]['inputs'])
 
-def get_stage_outputs(stage):
+def get_stage_output_dict(stage):
      if 'terminal' in config['stages'][stage].keys() and config['stages'][stage]['terminal']:
          return(None)
      L = config['stages'][stage]['outputs']
-     return(dict(pair for d in L for pair in d.items()))
+     return(L)
 
+def get_stage_outputs(stage):
+    if 'terminal' in config['stages'][stage].keys() and config['stages'][stage]['terminal']:
+        return(None)
+    L = config['stages'][stage]['outputs']
+    return(dict(pair for d in L for pair in d.items()))
 
+ 
 def get_stage_explicit_inputs(stage):
     implicit = get_stage_implicit_inputs(stage)
     explicit = implicit
@@ -128,17 +134,20 @@ def count_path_depth(path):
 ##   and 'meta' from raw, then we have to nest outputs after the longest (deepest) folder -
 ##   that is, raw/processed/here, and not to raw/here
 def get_deepest_input_dirname(stage):
-    i = get_stage_explicit_inputs(stage)
-    deepest_input = '.'
-    if i is not None:
+    ii = get_stage_implicit_inputs(stage)
+    deepest_inputs = []
+    if ii is not None:
+        deepest_input = '.'
         deepest_input_depth = 0
-        for item in i.keys():
-            curr_depth = count_path_depth(i[item])
-            if curr_depth > deepest_input_depth:
-                deepest_input_depth = curr_depth
-                deepest_input = op.dirname(i[item])
-    return('this breaks because explicit inputs are lists - raw vs processed; iterate instead')
-
+        for input_dict in ii:
+            for item in input_dict.keys():
+                curr_depth = count_path_depth(input_dict[item])
+                if curr_depth > deepest_input_depth:
+                    deepest_input_depth = curr_depth
+                    deepest_input = op.dirname(input_dict[item])
+                    deepest_inputs.append(deepest_input)
+    
+    return(deepest_inputs)
 
 ## with substituted module/stage/ids    
 def fill_explicit_outputs(stage, module):
@@ -151,3 +160,12 @@ def fill_explicit_outputs(stage, module):
     
 def nest_deliverable_path(parent, path):
     return(op.join(parent, path))
+
+## using the input identifiers, excludes and parameters and not 'after' clauses        
+def traverse_yaml():
+    lookup = ''
+    for stage in get_benchmark_stages():
+        for module in get_modules_by_stage(stage):
+            ii = get_stage_implicit_inputs(stage)
+    return(todo)
+