Skip to content

Commit

Permalink
Merge pull request #34 from LUMC/release_0.3.0
Browse files Browse the repository at this point in the history
Release 0.3.0
  • Loading branch information
rhpvorderman authored Jan 17, 2019
2 parents 55be78f + 5100bd1 commit 970c759
Show file tree
Hide file tree
Showing 16 changed files with 612 additions and 126 deletions.
17 changes: 15 additions & 2 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ Changelog
.. NOTE: This document is user facing. Please word the changes in such a way
.. that users understand how the changes affect the new version.
Version 0.3.0
---------------------------
+ Improved the log output to look nicer and make workflow log paths easier to
find in the test output.
+ Fixed an error that polluted the log message with a pytest stacktrace when
running more than one workflow. Measures are taken in our test framework to
detect such issues in the future.
+ Added the possibility to run multiple workflows simultaneously with the
``--workflow-threads`` or ``--wt`` flag.
+ Made code easier to maintain by using stdlib instead of pytest's ``py`` lib
in all of the code.
+ Added a schema check to ensure that tests have unique names when whitespace
is removed.

Version 0.2.0
---------------------------
Expand All @@ -15,11 +28,11 @@ Version 0.2.0
+ Start using sphinx and readthedocs.org for creating project documentation.
+ The temporary directories in which workflows are run are automatically
cleaned up at the end of each workflow test. You can disable this behaviour
by using the `--keep-workflow-wd` flag, which allows you to inspect the working
by using the ``--keep-workflow-wd`` flag, which allows you to inspect the working
directory after the workflow tests have run. This is useful for debugging
workflows.
+ The temporary directories in which workflows are run can now be
changed by using the `--basetemp` flag. This is because pytest-workflow now
changed by using the ``--basetemp`` flag. This is because pytest-workflow now
uses the built-in tmpdir capabilities of pytest.
+ Save stdout and stderr of each workflow to a file and report their locations
to stdout when running ``pytest``.
Expand Down
5 changes: 5 additions & 0 deletions docs/manual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ the ``--keep-workflow-wd`` flag to disable cleanup.
If you wish to change the temporary directory in which the workflows are run
use ``--basetemp <dir>`` to change pytest's base temp directory.

To run multiple workflows simultaneously you can use
``--workflow-threads <int>`` or ``--wt <int>`` flag. This defines the number
of workflows that can be run simultaneously. This will speed up things if
you have enough resources to process these workflows simultaneously.

==================================
Writing tests with pytest-workflow
==================================
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@

setup(
name="pytest-workflow",
version="0.2.0",
version="0.3.0",
description="A pytest plugin for configuring workflow/pipeline tests "
"using YAML files",
author="Leiden University Medical Center, various departments",
author="Leiden University Medical Center",
author_email="[email protected]", # A placeholder for now
long_description=LONG_DESCRIPTION,
long_description_content_type="text/x-rst",
Expand Down
14 changes: 14 additions & 0 deletions src/pytest_workflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with pytest-workflow. If not, see <https://www.gnu.org/licenses/

import re


# This function was created to ensure the same conversion is used throughout
# pytest-workflow.
def replace_whitespace(string: str, replace_with: str = '_') -> str:
"""
Replaces all whitespace with the string in replace_with.
:param string: input string
:param replace_with: Replace whitespace with this string. Default: '_'
:return: The string with whitespace converted.
"""
return re.sub(r'\s+', replace_with, string)
80 changes: 63 additions & 17 deletions src/pytest_workflow/content_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
The design philosophy here was that each piece of text should only be read
once."""

import threading
from pathlib import Path
from typing import Iterable, List, Set
from typing import Callable, Iterable, List, Set

import pytest

from .schema import ContentTest
from .workflow import Workflow


def check_content(strings: List[str],
Expand Down Expand Up @@ -88,33 +90,66 @@ def file_to_string_generator(filepath: Path) -> Iterable[str]:

class ContentTestCollector(pytest.Collector):
def __init__(self, name: str, parent: pytest.Collector,
content: Iterable[str], content_test: ContentTest):
content_generator: Callable[[], Iterable[str]],
content_test: ContentTest,
workflow: Workflow):
"""
Creates a content test collector
:param name: Name of the thing which contents are tested
:param parent: a pytest.Collector object
:param content_generator: a function that should return the content as
lines. This function is a placeholder for the content itself. In other
words: instead of passing the contents of a file directly to the
ContentTestCollector, you pass a function that when called will return
the contents. This allows the pytest collection phase to finish before
the file is read. This is useful because the workflows are run after
the collection phase.
:param content_test: a ContentTest object.
:param workflow: the workflow is running.
"""
# pylint: disable=too-many-arguments
# it is still only 5 not counting self.
super().__init__(name, parent=parent)
self.content = content
self.content_generator = content_generator
self.content_test = content_test
self.workflow = workflow
self.found_strings = None
self.thread = None

def find_strings(self):
"""Find the strings that are looked for in the given content
The content_generator function shines here. It only starts looking
for lines of text AFTER the workflow is finished. So that is why a
function is needed here and not just a variable containing lines of
text."""
self.workflow.wait()
strings_to_check = (self.content_test.contains +
self.content_test.must_not_contain)
self.found_strings = check_content(
strings=strings_to_check,
text_lines=self.content_generator())

def collect(self):
found_strings = check_content(
self.content_test.contains + self.content_test.must_not_contain,
self.content)

# A thread is started that looks for the strings and collection can go
# on without hindrance. The thread is passed to the items, so they can
# wait on the thread to complete.
self.thread = threading.Thread(target=self.find_strings)
self.thread.start()
test_items = []

test_items += [
ContentTestItem(
parent=self,
string=string,
should_contain=True,
contains=string in found_strings
should_contain=True
)
for string in self.content_test.contains]

test_items += [
ContentTestItem(
parent=self,
string=string,
should_contain=False,
contains=string in found_strings
should_contain=False
)
for string in self.content_test.must_not_contain]

Expand All @@ -124,24 +159,35 @@ def collect(self):
class ContentTestItem(pytest.Item):
"""Item that reports if a string has been found in content."""

def __init__(self, parent: pytest.Collector, string: str,
should_contain: bool, contains: bool):
def __init__(self, parent: ContentTestCollector, string: str,
should_contain: bool):
"""
Create a ContentTestItem
:param parent: A pytest collector
:param parent: A ContentTestCollector. We use a ContentTestCollector
here and not just any pytest collector because we now can wait on the
thread in the parent, and get its found strings when its thread is
finished.
:param string: The string that was searched for.
:param should_contain: Whether the string should have been there
:param result:
"""
contain = "contains" if should_contain else "does not contain"
name = "{0} '{1}'".format(contain, string)
super().__init__(name, parent=parent)
self.should_contain = should_contain
self.string = string
self.contains = contains

def runtest(self):
assert self.contains == self.should_contain
"""Only after a workflow is finished the contents of files and logs are
read. The ContentTestCollector parent reads each file/log once. This is
done in its thread. We wait for this thread to complete. Then we check
all the found strings in the parent.
This way we do not have to read each file one time per ContentTestItem
this makes content checking much faster on big files (NGS > 1 GB files)
were we are looking for multiple words (variants / sequences). """
# Wait for thread to complete.
self.parent.thread.join()
assert ((self.string in self.parent.found_strings) ==
self.should_contain)

def repr_failure(self, excinfo):
# pylint: disable=unused-argument
Expand Down
41 changes: 30 additions & 11 deletions src/pytest_workflow/file_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,33 @@
# along with pytest-workflow. If not, see <https://www.gnu.org/licenses/

"""All tests for workflow files"""
import functools
import hashlib
from pathlib import Path
from typing import Union

import pytest

from .content_tests import ContentTestCollector, file_to_string_generator
from .schema import FileTest
from .workflow import Workflow


class FileTestCollector(pytest.Collector):
"""This collector returns all tests for one particular file"""

def __init__(self, parent: pytest.Collector, filetest: FileTest,
cwd: Union[bytes, str]):
workflow: Workflow):
"""
Create a FiletestCollector
:param parent: The collector that started this collector
:param filetest: a FileTest object
:param cwd: the working directory from which relative filepaths should
be evaluated
:param workflow: the workflow that is running to generate this file
"""
name = str(filetest.path)
super().__init__(name, parent)
self.filetest = filetest
self.cwd = Path(cwd)
self.cwd = workflow.cwd
self.workflow = workflow

def collect(self):
"""Returns all tests for one file. Also the absolute path of the files
Expand All @@ -53,19 +54,28 @@ def collect(self):
# certain conditions are met.
tests = []

tests += [FileExists(self, filepath, self.filetest.should_exist)]
tests += [FileExists(parent=self,
filepath=filepath,
should_exist=self.filetest.should_exist,
workflow=self.workflow)]

if self.filetest.contains or self.filetest.must_not_contain:
tests += [ContentTestCollector(
name="content",
parent=self,
content=file_to_string_generator(filepath),
content_test=self.filetest
content_generator=functools.partial(file_to_string_generator,
filepath),
content_test=self.filetest,
# FileTest inherits from ContentTest. So this is valid.
workflow=self.workflow
)]

if self.filetest.md5sum:
tests += [FileMd5(self, filepath, self.filetest.md5sum)]
tests += [FileMd5(
parent=self,
filepath=filepath,
md5sum=self.filetest.md5sum,
workflow=self.workflow)]

return tests

Expand All @@ -74,18 +84,23 @@ class FileExists(pytest.Item):
"""A pytest file exists test."""

def __init__(self, parent: pytest.Collector, filepath: Path,
should_exist: bool):
should_exist: bool, workflow: Workflow):
"""
:param parent: Collector that started this test
:param filepath: A path to the file
:param should_exist: Whether the file should exist
:param workflow: The workflow running to generate the file
"""
name = "should exist" if should_exist else "should not exist"
super().__init__(name, parent)
self.file = filepath
self.should_exist = should_exist
self.workflow = workflow

def runtest(self):
# Wait for the workflow process to finish before checking if the file
# exists.
self.workflow.wait()
assert self.file.exists() == self.should_exist

def repr_failure(self, excinfo):
Expand All @@ -105,20 +120,24 @@ def repr_failure(self, excinfo):

class FileMd5(pytest.Item):
def __init__(self, parent: pytest.Collector, filepath: Path,
md5sum: str):
md5sum: str, workflow: Workflow):
"""
Create a tests for the file md5sum.
:param parent: The collector that started this item
:param filepath: The path to the file
:param md5sum: The expected md5sum
:param workflow: The workflow running to generate the file
"""
name = "md5sum"
super().__init__(name, parent)
self.filepath = filepath
self.expected_md5sum = md5sum
self.observed_md5sum = None
self.workflow = workflow

def runtest(self):
# Wait for the workflow to finish before we check the md5sum of a file.
self.workflow.wait()
self.observed_md5sum = file_md5sum(self.filepath)
assert self.observed_md5sum == self.expected_md5sum

Expand Down
Loading

0 comments on commit 970c759

Please sign in to comment.