Merge pull request #54 from LUMC/release_1.0.0

Release 1.0.0
LUMC · Feb 6, 2019 · ff1d262 · ff1d262
2 parents 6944edb + 84af28d
commit ff1d262
Show file tree

Hide file tree

Showing 18 changed files with 361 additions and 217 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -4,6 +4,7 @@ disable=wrong-import-order,  # This conflicts with flake8-import-order
         W0511, # We can figure out the fixme's later.
         line-too-long,  # Already tested by flake8
         unused-import,  # Already tested by flake8
+        no-else-return, # This is pylint opinionated to the hilt and annoying.
         missing-docstring
         # Sometimes docstrings are missing because they add visual clutter on self-documenting methods.
         # Adding `# pylint: disable missing-docstrings because this method is self-documenting` does not help with the visual clutter!
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -7,6 +7,49 @@ Changelog
 .. NOTE: This document is user facing. Please word the changes in such a way
 .. that users understand how the changes affect the new version.
 
+Version 1.0.0
+---------------------------
+Lots of small fixes that improve the usability of pytest-workflow are included
+in version 1.0.0.
+
++ Gzipped files can now also be checked for contents. Files with '.gz' as
+  extension are automatically decompressed.
++ ``stdout`` and ``stderr`` of workflows are now streamed to a file instead of
+  being kept in memory. This means you can check the progress of a workflow by
+  running ``tail -f <stdout or stderr>``. The location of ``stdout`` and
+  ``stderr`` is now reported at the start of each worflow. If the
+  ``--keep-workflow-wd`` is not set the ``stdout`` and ``stderr`` files will be
+  deleted with the rest of the workflow files.
++ The log reports now when a workflow is starting, instead of when it is added
+  to the queue. This makes it easier to see which workflows are currently
+  running and if you forgot to use the ``--workflow-threads`` or ``--wt`` flag.
++ Workflow exit code failures now mention the name of the workflow. Previously
+  the generic name "Workflow" was used, which made it harder to figure out
+  which workflows failed.
++ When tests of file content fail because the file does not exist, a different
+  error message is given compared to when the file exist, but the content is
+  not there, which makes debugging easier. Also the accompanying
+  "FileNotFound" error stacktrace is now suppressed, which keeps the test
+  output more pleasant.
++ When tests of stdout/stderr content or file content fail a more informative
+  error message is given to allow for easier debugging.
++ All workflows now get their own folder within the `same` temporary directory.
+  This fixes a bug where if ``basetemp`` was not set, each workflow would get
+  its own folder in a separate temp directory. For example running workflows
+  'workflow1' and 'workflow2' would create two temporary folders:
+
+  '/tmp/pytest_workflow\_\ **33mrz5a5**/workflow1' and
+  '/tmp/pytest_workflow\_\ **b8m1wzuf**/workflow2'
+
+  This is now changed to have all workflows in one temporary directory per
+  pytest run:
+
+  '/tmp/pytest_workflow\_\ **33mrz5a5**/workflow1' and
+  '/tmp/pytest_workflow\_\ **33mrz5a5**/workflow2'
+
++ Disallow empty ``command`` and ``name`` keys. An empty ``command`` caused
+  pytest-workflow to hang. Empty names are also disallowed.
+
 Version 0.4.0
 ---------------------------
 + Added more information to the manual on how to debug pipelines and use

diff --git a/README.rst b/README.rst
@@ -44,10 +44,14 @@ Run ``pytest`` from an environment with pytest-workflow installed.
 Pytest will automatically gather files in the ``tests`` directory starting with
 ``test`` and ending in ``.yaml`` or ``.yml``.
 
-For debugging pipelines running ``pytest -v --keep-workflow-wd`` is
-recommended. This will save the logs and the workflow directory so it is
-possible to check where the pipeline crashed. It will also give a better
-overview of succeeded and failed tests.
+To check the progress of a workflow while it is running you can use ``tail -f``
+on the ``stdout`` or ``stderr`` file of the workflow. The locations of these
+files are reported in the log as soon as a workflow is started.
+
+For debugging pipelines using the ``--keep-workflow-wd`` flag  is
+recommended. This will keep the workflow directory and logs after the test run
+so it is possible to check where the pipeline crashed. The ``-v`` flag can come
+in handy as well as it gives a complete overview of succeeded and failed tests.
 
 Below is an example of a YAML file that defines a test:
 

diff --git a/docs/running_pytest_workflow.rst b/docs/running_pytest_workflow.rst
@@ -7,13 +7,12 @@ Pytest will automatically gather files in the ``tests`` directory starting with
 ``test`` and ending in ``.yaml`` or ``.yml``.
 
 The workflows are run automatically. Each workflow gets its own temporary
-directory to run. These directories are cleaned up after the tests are
-completed. If you wish to inspect the output of a failing workflow you can use
-the ``--keep-workflow-wd`` flag to disable cleanup. This will also make sure
-the logs of the pipeline are saved in the temporary directory. When
-``--keep-workflow-wd`` is set, the paths to the logs and the temporary
-directory are reported in pytest's output. The `--keep-workflow-wd`` flag is
-highly recommended when debugging pipelines.
+directory to run. The ``stdout`` and ``stderr`` of the workflow command are
+also saved to this directory. The temporary directories are cleaned up after
+the tests are completed. If you wish to inspect the output of a failing
+workflow you can use the ``--keep-workflow-wd`` flag to disable cleanup. This
+will also make sure the logs of the pipeline are not deleted. The
+``--keep-workflow-wd`` flag is highly recommended when debugging pipelines.
 
 If you wish to change the temporary directory in which the workflows are run
 use ``--basetemp <dir>`` to change pytest's base temp directory.
@@ -33,6 +32,10 @@ To run multiple workflows simultaneously you can use
 of workflows that can be run simultaneously. This will speed up things if
 you have enough resources to process these workflows simultaneously.
 
+To check the progress of a workflow while it is running you can use ``tail -f``
+on the ``stdout`` or ``stderr`` file of the workflow. The locations of these
+files are reported in the log as soon as a workflow is started.
+
 Running specific workflows
 ----------------------------
 To run a specific workflow use the ``--tag`` flag. Each workflow is tagged with

diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst
@@ -55,7 +55,8 @@ A more advanced example:
     exit_code: 2                       # What the exit code should be (optional, if not given defaults to 0)
     files:
       - path: "fail.log"               # Multiple files can be tested for each workflow
-      - path: "TomCruise.txt"
+      - path: "TomCruise.txt.gz"       # Gzipped files can also be searched, provided their extension is '.gz'
+        contains: "starring"
     stderr:                            # Options for testing stderr (optional)
       contains:                        # A list of strings which should be in stderr (optional)
         - "BSOD error, please contact the IT crowd"

diff --git a/setup.py b/setup.py
@@ -21,7 +21,7 @@
 
 setup(
     name="pytest-workflow",
-    version="0.4.0",
+    version="1.0.0",
     description="A pytest plugin for configuring workflow/pipeline tests "
                 "using YAML files",
     author="Leiden University Medical Center",

diff --git a/src/pytest_workflow/content_tests.py b/src/pytest_workflow/content_tests.py
@@ -19,10 +19,11 @@
 
 The design philosophy here was that each piece of text should only be read
 once."""
-
+import functools
+import gzip
 import threading
 from pathlib import Path
-from typing import Callable, Iterable, List, Set
+from typing import Iterable, List, Optional, Set
 
 import pytest
 
@@ -78,56 +79,64 @@ def check_content(strings: List[str],
 
 def file_to_string_generator(filepath: Path) -> Iterable[str]:
     """
-    Turns a file into a line generator.
+    Turns a file into a line generator. Files ending with .gz are automatically
+    decompressed.
     :param filepath: the file path
     :return: yields lines of the file
     """
-    # Use 'r' here explicitly as opposed to 'rb'
-    with filepath.open("r") as file_handler:
+    file_open = (functools.partial(gzip.open, str(filepath))
+                 if filepath.suffix == ".gz" else
+                 filepath.open)
+    # Use 'rt' here explicitly as opposed to 'rb'
+    with file_open(mode='rt') as file_handler:
         for line in file_handler:
             yield line
 
 
 class ContentTestCollector(pytest.Collector):
     def __init__(self, name: str, parent: pytest.Collector,
-                 content_generator: Callable[[], Iterable[str]],
+                 filepath: Path,
                  content_test: ContentTest,
-                 workflow: Workflow):
+                 workflow: Workflow,
+                 content_name: Optional[str] = None):
         """
         Creates a content test collector
         :param name: Name of the thing which contents are tested
         :param parent: a pytest.Collector object
-        :param content_generator: a function that should return the content as
-        lines. This function is a placeholder for the content itself. In other
-        words: instead of passing the contents of a file directly to the
-        ContentTestCollector, you pass a function that when called will return
-        the contents. This allows the pytest collection phase to finish before
-        the file is read. This is useful because the workflows are run after
-        the collection phase.
+        :param filepath: the file that contains the content
         :param content_test: a ContentTest object.
         :param workflow: the workflow is running.
+        :param content_name: The name of the content that will be displayed if
+        the test fails. Defaults to filepath.
         """
         # pylint: disable=too-many-arguments
-        # it is still only 5 not counting self.
+        # Cannot think of a better way to do this.
         super().__init__(name, parent=parent)
-        self.content_generator = content_generator
+        self.filepath = filepath
         self.content_test = content_test
         self.workflow = workflow
         self.found_strings = None
         self.thread = None
+        # We check the contents of files. Sometimes files are not there. Then
+        # content can not be checked. We save FileNotFoundErrors in this
+        # boolean.
+        self.file_not_found = False
+        self.content_name = content_name or str(filepath)
 
     def find_strings(self):
-        """Find the strings that are looked for in the given content
-        The content_generator function shines here. It only starts looking
-        for lines of text AFTER the workflow is finished. So that is why a
-        function is needed here and not just a variable containing lines of
-        text."""
+        """Find the strings that are looked for in the given file
+
+        When a file we test is not produced, we save the FileNotFoundError so
+        we can give an accurate repr_failure."""
         self.workflow.wait()
         strings_to_check = (self.content_test.contains +
                             self.content_test.must_not_contain)
-        self.found_strings = check_content(
-            strings=strings_to_check,
-            text_lines=self.content_generator())
+        try:
+            self.found_strings = check_content(
+                strings=strings_to_check,
+                text_lines=file_to_string_generator(self.filepath))
+        except FileNotFoundError:
+            self.file_not_found = True
 
     def collect(self):
         # A thread is started that looks for the strings and collection can go
@@ -141,15 +150,17 @@ def collect(self):
             ContentTestItem(
                 parent=self,
                 string=string,
-                should_contain=True
+                should_contain=True,
+                content_name=self.content_name
             )
             for string in self.content_test.contains]
 
         test_items += [
             ContentTestItem(
                 parent=self,
                 string=string,
-                should_contain=False
+                should_contain=False,
+                content_name=self.content_name
             )
             for string in self.content_test.must_not_contain]
 
@@ -160,7 +171,7 @@ class ContentTestItem(pytest.Item):
     """Item that reports if a string has been found in content."""
 
     def __init__(self, parent: ContentTestCollector, string: str,
-                 should_contain: bool):
+                 should_contain: bool, content_name: str):
         """
         Create a ContentTestItem
         :param parent: A ContentTestCollector. We use a ContentTestCollector
@@ -169,36 +180,50 @@ def __init__(self, parent: ContentTestCollector, string: str,
         finished.
         :param string: The string that was searched for.
         :param should_contain: Whether the string should have been there
+        :param content_name: the name of the content which allows for easier
+        debugging if the test fails
         """
         contain = "contains" if should_contain else "does not contain"
         name = "{0} '{1}'".format(contain, string)
         super().__init__(name, parent=parent)
         self.should_contain = should_contain
         self.string = string
+        self.content_name = content_name
 
     def runtest(self):
         """Only after a workflow is finished the contents of files and logs are
-        read. The ContentTestCollector parent reads each file/log once. This is
+        read. The ContentTestCollector parent reads each file once. This is
         done in its thread. We wait for this thread to complete. Then we check
         all the found strings in the parent.
         This way we do not have to read each file one time per ContentTestItem
         this makes content checking much faster on big files (NGS > 1 GB files)
         were we are looking for multiple words (variants / sequences). """
         # Wait for thread to complete.
         self.parent.thread.join()
+        assert not self.parent.file_not_found
         assert ((self.string in self.parent.found_strings) ==
                 self.should_contain)
 
     def repr_failure(self, excinfo):
         # pylint: disable=unused-argument
         # excinfo needed for pytest.
-        message = (
-            "'{string}' was {found} in {content} "
-            "while it {should} be there."
-        ).format(
-            string=self.string,
-            found="not found" if self.should_contain else "found",
-            content=self.parent.name,
-            should="should" if self.should_contain else "should not"
-        )
-        return message
+        if self.parent.file_not_found:
+            return (
+                "'{content}' does not exist and cannot be searched "
+                "for {containing} '{string}'."
+            ).format(
+                content=self.content_name,
+                containing="containing" if self.should_contain
+                else "not containing",
+                string=self.string)
+
+        else:
+            return (
+                "'{string}' was {found} in {content} "
+                "while it {should} be there."
+            ).format(
+                string=self.string,
+                found="not found" if self.should_contain else "found",
+                content=self.content_name,
+                should="should" if self.should_contain else "should not"
+            )
diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py
@@ -15,13 +15,12 @@
 # along with pytest-workflow.  If not, see <https://www.gnu.org/licenses/
 
 """All tests for workflow files"""
-import functools
 import hashlib
 from pathlib import Path
 
 import pytest
 
-from .content_tests import ContentTestCollector, file_to_string_generator
+from .content_tests import ContentTestCollector
 from .schema import FileTest
 from .workflow import Workflow
 
@@ -63,12 +62,10 @@ def collect(self):
             tests += [ContentTestCollector(
                 name="content",
                 parent=self,
-                content_generator=functools.partial(file_to_string_generator,
-                                                    filepath),
+                filepath=filepath,
                 content_test=self.filetest,
                 # FileTest inherits from ContentTest. So this is valid.
-                workflow=self.workflow
-            )]
+                workflow=self.workflow)]
 
         if self.filetest.md5sum:
             tests += [FileMd5(