teratide · bobluppes · Apr 13, 2021 · Apr 13, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,19 @@
+name: lint
+
+on: [pull_request]
+
+jobs:
+  flake8:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - uses: grantmcconnaughey/[email protected]
+        with:
+          # The GitHub API token to create reviews with
+          token: ${{ secrets.GITHUB_TOKEN }}
+          # Fail if "new" violations detected or "any", default "new"
+          failIf: new
+          # Additional arguments to pass to flake8, default "." (current directory)
+          args: ""
diff --git a/.gitignore b/.gitignore
@@ -150,4 +150,7 @@ cython_debug/
 *.pdf
 
 # Profiler results
-profiler/*.txt
+profiler/*.txt
+
+# Dask distributed
+dask-worker-space/
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # Dask Accelerated
 
 [![test](https://github.com/teratide/dask-accelerated/actions/workflows/test.yml/badge.svg)](https://github.com/teratide/dask-accelerated/actions/workflows/test.yml)
+[![lint](https://github.com/teratide/dask-accelerated/actions/workflows/lint.yml/badge.svg)](https://github.com/teratide/dask-accelerated/actions/workflows/lint.yml)
 
 An accelerated version of Dask which substitutes operators in the Dask task graph with an accelerated version.
 This new operator can do it's evaluation using native libraries or by offloading the computation to an FPGA accelerator.

diff --git a/benchmark/benchmarks.py b/benchmark/benchmarks.py
@@ -124,6 +124,3 @@ def benchmark_tidre_in_size(in_sizes, batch_size, batch_aggregate, repeats):
     }
 
     return data, name
-
-
-
diff --git a/benchmark/helpers.py b/benchmark/helpers.py
@@ -2,7 +2,17 @@
 from dask_accelerated import helpers
 
 
-def run_repeats(in_size, batch_size, batch_aggregate, repeats, key, vanilla_filter, re2_filter, tidre_filter=None, tidre_filter_unaligned=None):
+def run_repeats(
+        in_size,
+        batch_size,
+        batch_aggregate,
+        repeats,
+        key,
+        vanilla_filter,
+        re2_filter,
+        tidre_filter=None,
+        tidre_filter_unaligned=None
+):
 
     # Single run to mitigate caching effects
     (res, dur) = helpers.run_vanilla(in_size, batch_size, batch_aggregate)

diff --git a/benchmark/main.py b/benchmark/main.py
@@ -13,6 +13,7 @@
 
 args = parser.parse_args()
 
+
 def benchmark_re2(in_sizes, batch_aggregates, repeats):
 
     # Constants when varying a single parameter
@@ -28,10 +29,20 @@ def benchmark_re2(in_sizes, batch_aggregates, repeats):
 
     data = {}
 
-    (benchmark_data, benchmark_name) = bench.benchmark_re2_in_size(in_sizes, constant_batch_size_in_benchmark, constant_batch_aggregate, repeats)
+    (benchmark_data, benchmark_name) = bench.benchmark_re2_in_size(
+        in_sizes,
+        constant_batch_size_in_benchmark,
+        constant_batch_aggregate,
+        repeats
+    )
     data[benchmark_name] = benchmark_data
 
-    (benchmark_data, benchmark_name) = bench.benchmark_re2_batch_size(constant_in_size, constant_batch_size_batch_benchmark, batch_aggregates, repeats)
+    (benchmark_data, benchmark_name) = bench.benchmark_re2_batch_size(
+        constant_in_size,
+        constant_batch_size_batch_benchmark,
+        batch_aggregates,
+        repeats
+    )
     data[benchmark_name] = benchmark_data
 
     benchmark_helpers.print_and_store_with_or_without_tidre(data, False)
@@ -52,10 +63,20 @@ def benchmark_tidre(in_sizes, batch_aggregates, repeats):
 
     data = {}
 
-    (benchmark_data, benchmark_name) = bench.benchmark_tidre_in_size(in_sizes, constant_batch_size_in_benchmark, constant_batch_aggregate, repeats)
+    (benchmark_data, benchmark_name) = bench.benchmark_tidre_in_size(
+        in_sizes,
+        constant_batch_size_in_benchmark,
+        constant_batch_aggregate,
+        repeats
+    )
     data[benchmark_name] = benchmark_data
 
-    (benchmark_data, benchmark_name) = bench.benchmark_tidre_batch_size(constant_in_size, constant_batch_size_batch_benchmark, batch_aggregates, repeats)
+    (benchmark_data, benchmark_name) = bench.benchmark_tidre_batch_size(
+        constant_in_size,
+        constant_batch_size_batch_benchmark,
+        batch_aggregates,
+        repeats
+    )
     data[benchmark_name] = benchmark_data
 
     benchmark_helpers.print_and_store_with_or_without_tidre(data, True)
@@ -75,4 +96,3 @@ def benchmark_tidre(in_sizes, batch_aggregates, repeats):
     end = time.time()
 
     print("Ran all benchmarks in ", (end - start) / 60, " minutes")
-
diff --git a/benchmark/pickler.py b/benchmark/pickler.py
@@ -21,4 +21,4 @@ def load_from_notebooks():
     with open(data_root + 'data.pickle', 'rb') as f:
         data = pickle.load(f)
 
-    return data
+    return data
diff --git a/dask_accelerated/helpers.py b/dask_accelerated/helpers.py
@@ -110,7 +110,9 @@ def get_lazy_result(in_size, batch_size, split_row_groups):
     parquet_engine = "pyarrow"  # Valid engines: ['fastparquet', 'pyarrow', 'pyarrow-dataset', 'pyarrow-legacy']
     file_root = "../data_generator/diving/data-"
     file_ext = ".parquet"
-    regex = '.*[tT][eE][rR][aA][tT][iI][dD][eE][ \t\n]+[dD][iI][vV][iI][nN][gG][ \t\n]+([sS][uU][bB])+[sS][uU][rR][fF][aA][cC][eE].*'
+    regex = '.*[tT][eE][rR][aA][tT][iI][dD][eE][ \t\n]+' \
+            '[dD][iI][vV][iI][nN][gG][ \t\n]+' \
+            '([sS][uU][bB])+[sS][uU][rR][fF][aA][cC][eE].*'
 
     # Load the dataframe
     columns = ["value", "string"]
@@ -151,7 +153,11 @@ def run_and_record_durations(dsk, result, substitute_operator):
     filter_durations = np.array(substitute_operator.durations)
     durations = construct_durations(total_duration_in_seconds, filter_durations)
 
-    print("Computed ", res, " in ", total_duration_in_seconds, " seconds\tfilter: ", durations['filter']['total'], " seconds")
+    print(
+        "Computed ", res,
+        " in ", total_duration_in_seconds, " seconds",
+        "\tfilter: ", durations['filter']['total'], " seconds"
+    )
 
     return res, durations
 
@@ -191,7 +197,9 @@ def generate_datasets_if_needed(sizes, chunksize=1e6):
         print("Missing datasets found, these will be generated")
         match_percentage = 0.05
         data_length = 100
-        regex = '.*[tT][eE][rR][aA][tT][iI][dD][eE][ \t\n]+[dD][iI][vV][iI][nN][gG][ \t\n]+([sS][uU][bB])+[sS][uU][rR][fF][aA][cC][eE].*'
+        regex = '.*[tT][eE][rR][aA][tT][iI][dD][eE][ \t\n]+' \
+                '[dD][iI][vV][iI][nN][gG][ \t\n]+' \
+                '([sS][uU][bB])+[sS][uU][rR][fF][aA][cC][eE].*'
         parquet_chunksize = chunksize
         parquet_compression = 'none'
 

diff --git a/dask_accelerated/operators.py b/dask_accelerated/operators.py
@@ -150,9 +150,6 @@ def custom_tidre_unaligned(self, obj, accessor, attr, args, kwargs):
         # The number of records in the current batch
         number_of_records = obj.size
 
-        # The regular expression to be matched
-        regex = args[0]
-
         # Add some padding to the pandas series, which will be removed from the buffers later
         obj_with_padding = pandas.concat([pandas.Series(["a"]), obj])
         arr = pyarrow.Array.from_pandas(obj_with_padding)

diff --git a/dask_accelerated/optimization.py b/dask_accelerated/optimization.py
@@ -1,6 +1,7 @@
 import re
 from dask.optimization import SubgraphCallable
 
+
 # Unwrap the corresponding subgraph_callable in the task graph in order to insert a custom function
 def compute_substitute(dsk, key, custom):
     str_match = dsk[(key, 0)]
@@ -16,6 +17,7 @@ def compute_substitute(dsk, key, custom):
 
     return SubgraphCallable(new_dsk, call.outkey, call.inkeys, "regex_callable")
 
+
 # Substitute all string match operators in the graph with the custom re2 operator
 def optimize_graph_re2(graph, substitute_function):
 
@@ -27,7 +29,7 @@ def optimize_graph_re2(graph, substitute_function):
     # This key is used to target one of the operators in the task graph
     # from which the regex_callable will be constructed
     for key in graph.keys():
-        if re.match(regex, key[0]) != None:
+        if re.match(regex, key[0]) is not None:
             key = key[0]    # The keys are tuples and the operator name is the first value
             break
 
@@ -39,9 +41,9 @@ def optimize_graph_re2(graph, substitute_function):
 
     # Substitute the regex_callable if the operator name matches the str-match pattern
     for k in dsk:
-        if re.match(regex, k[0]) != None:
+        if re.match(regex, k[0]) is not None:
             target_op = list(dsk[k])
             target_op[0] = regex_callable
             dsk[k] = tuple(target_op)
 
-    return dsk
+    return dsk
diff --git a/dask_accelerated_worker/__init__.py b/dask_accelerated_worker/__init__.py
diff --git a/dask_accelerated_worker/accelerated_worker.py b/dask_accelerated_worker/accelerated_worker.py
@@ -0,0 +1,60 @@
+import logging
+import re
+import pickle
+from dask.distributed import Worker
+from dask.distributed import worker
+from dask.optimization import SubgraphCallable
+from dask_accelerated.operators import CustomFilter
+
+logger = logging.getLogger(__name__)
+
+
+# Create an accelerated worker class based on the original worker class
+class AcceleratedWorker(Worker):
+
+    def add_task(
+        self,
+        key,
+        function=None,
+        args=None,
+        kwargs=None,
+        task=worker.no_value,
+        who_has=None,
+        nbytes=None,
+        priority=None,
+        duration=None,
+        resource_restrictions=None,
+        actor=False,
+        **kwargs2,
+    ):
+        regex = re.compile('.*str-match.*')
+        if re.match(regex, key) is not None:
+            # This task matches the operation we want to perform on fpga
+            func = pickle.loads(function)
+
+            substitute_op = CustomFilter().custom_tidre
+
+            dsk = func.dsk
+            vals = dsk[func.outkey]
+            vals_args = vals[3]
+            new_vals_args = (vals_args[0], [['_func', substitute_op], vals_args[1][1]])
+            new_vals = (vals[0], vals[1], vals[2], new_vals_args)
+            dsk[func.outkey] = new_vals
+
+            new_func = SubgraphCallable(dsk, func.outkey, func.inkeys, "regex_callable")
+            function = pickle.dumps(new_func)
+
+        super().add_task(
+            key,
+            function,
+            args,
+            kwargs,
+            task,
+            who_has,
+            nbytes,
+            priority,
+            duration,
+            resource_restrictions,
+            actor,
+            **kwargs2,
+        )
diff --git a/dask_accelerated_worker/helpers.py b/dask_accelerated_worker/helpers.py
@@ -0,0 +1,65 @@
+from dask.distributed import Scheduler
+from tornado.ioloop import IOLoop
+import asyncio
+
+
+def get_scheduler():
+    kwargs = {
+        'preload': (),
+        'preload_argv': (),
+        'interface': None,
+        'protocol': None,
+        'scheduler_file': '',
+        'idle_timeout': None
+    }
+
+    loop = IOLoop.current()
+    sec = {}
+    host = ''
+    port = 8786
+    dashboard = True
+    dashboard_address = 8787
+    dashboard_prefix = ''
+
+    scheduler = Scheduler(
+        loop=loop,
+        security=sec,
+        host=host,
+        port=port,
+        dashboard=dashboard,
+        dashboard_address=dashboard_address,
+        http_prefix=dashboard_prefix,
+        **kwargs
+    )
+
+    return scheduler, loop
+
+
+def run_scheduler(scheduler, loop):
+
+    async def run():
+        await scheduler
+        await scheduler.finished()
+
+    loop.run_sync(run)
+
+
+def remove_non_accelerated_workers(scheduler):
+
+    # New event loop to await async remove worker method
+    loop = asyncio.new_event_loop()
+
+    # TODO: fix this
+    # Somehow this does not always work on the first try
+    # A quick but messy fix is to run it more than once to
+    # ensure all non-accelerated workers get removed
+    for i in range(3):
+        workers = scheduler.workers
+        for worker in workers:
+            # All accelerated workers are called 'accelerated-[timestamp]'
+            if str(workers[worker].name).split('-')[0] != 'accelerated':
+                loop.run_until_complete(
+                    scheduler.remove_worker(address=worker)
+                )
+
+    loop.close()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -124,6 +124,3 @@ def benchmark_tidre_in_size(in_sizes, batch_size, batch_aggregate, repeats):
		}

		return data, name