19
19
20
20
The design philosophy here was that each piece of text should only be read
21
21
once."""
22
-
22
+ import functools
23
+ import gzip
23
24
import threading
24
25
from pathlib import Path
25
- from typing import Callable , Iterable , List , Set
26
+ from typing import Iterable , List , Optional , Set
26
27
27
28
import pytest
28
29
@@ -78,56 +79,64 @@ def check_content(strings: List[str],
78
79
79
80
def file_to_string_generator (filepath : Path ) -> Iterable [str ]:
80
81
"""
81
- Turns a file into a line generator.
82
+ Turns a file into a line generator. Files ending with .gz are automatically
83
+ decompressed.
82
84
:param filepath: the file path
83
85
:return: yields lines of the file
84
86
"""
85
- # Use 'r' here explicitly as opposed to 'rb'
86
- with filepath .open ("r" ) as file_handler :
87
+ file_open = (functools .partial (gzip .open , str (filepath ))
88
+ if filepath .suffix == ".gz" else
89
+ filepath .open )
90
+ # Use 'rt' here explicitly as opposed to 'rb'
91
+ with file_open (mode = 'rt' ) as file_handler :
87
92
for line in file_handler :
88
93
yield line
89
94
90
95
91
96
class ContentTestCollector (pytest .Collector ):
92
97
def __init__ (self , name : str , parent : pytest .Collector ,
93
- content_generator : Callable [[], Iterable [ str ]] ,
98
+ filepath : Path ,
94
99
content_test : ContentTest ,
95
- workflow : Workflow ):
100
+ workflow : Workflow ,
101
+ content_name : Optional [str ] = None ):
96
102
"""
97
103
Creates a content test collector
98
104
:param name: Name of the thing which contents are tested
99
105
:param parent: a pytest.Collector object
100
- :param content_generator: a function that should return the content as
101
- lines. This function is a placeholder for the content itself. In other
102
- words: instead of passing the contents of a file directly to the
103
- ContentTestCollector, you pass a function that when called will return
104
- the contents. This allows the pytest collection phase to finish before
105
- the file is read. This is useful because the workflows are run after
106
- the collection phase.
106
+ :param filepath: the file that contains the content
107
107
:param content_test: a ContentTest object.
108
108
:param workflow: the workflow is running.
109
+ :param content_name: The name of the content that will be displayed if
110
+ the test fails. Defaults to filepath.
109
111
"""
110
112
# pylint: disable=too-many-arguments
111
- # it is still only 5 not counting self .
113
+ # Cannot think of a better way to do this .
112
114
super ().__init__ (name , parent = parent )
113
- self .content_generator = content_generator
115
+ self .filepath = filepath
114
116
self .content_test = content_test
115
117
self .workflow = workflow
116
118
self .found_strings = None
117
119
self .thread = None
120
+ # We check the contents of files. Sometimes files are not there. Then
121
+ # content can not be checked. We save FileNotFoundErrors in this
122
+ # boolean.
123
+ self .file_not_found = False
124
+ self .content_name = content_name or str (filepath )
118
125
119
126
def find_strings (self ):
120
- """Find the strings that are looked for in the given content
121
- The content_generator function shines here. It only starts looking
122
- for lines of text AFTER the workflow is finished. So that is why a
123
- function is needed here and not just a variable containing lines of
124
- text."""
127
+ """Find the strings that are looked for in the given file
128
+
129
+ When a file we test is not produced, we save the FileNotFoundError so
130
+ we can give an accurate repr_failure."""
125
131
self .workflow .wait ()
126
132
strings_to_check = (self .content_test .contains +
127
133
self .content_test .must_not_contain )
128
- self .found_strings = check_content (
129
- strings = strings_to_check ,
130
- text_lines = self .content_generator ())
134
+ try :
135
+ self .found_strings = check_content (
136
+ strings = strings_to_check ,
137
+ text_lines = file_to_string_generator (self .filepath ))
138
+ except FileNotFoundError :
139
+ self .file_not_found = True
131
140
132
141
def collect (self ):
133
142
# A thread is started that looks for the strings and collection can go
@@ -141,15 +150,17 @@ def collect(self):
141
150
ContentTestItem (
142
151
parent = self ,
143
152
string = string ,
144
- should_contain = True
153
+ should_contain = True ,
154
+ content_name = self .content_name
145
155
)
146
156
for string in self .content_test .contains ]
147
157
148
158
test_items += [
149
159
ContentTestItem (
150
160
parent = self ,
151
161
string = string ,
152
- should_contain = False
162
+ should_contain = False ,
163
+ content_name = self .content_name
153
164
)
154
165
for string in self .content_test .must_not_contain ]
155
166
@@ -160,7 +171,7 @@ class ContentTestItem(pytest.Item):
160
171
"""Item that reports if a string has been found in content."""
161
172
162
173
def __init__ (self , parent : ContentTestCollector , string : str ,
163
- should_contain : bool ):
174
+ should_contain : bool , content_name : str ):
164
175
"""
165
176
Create a ContentTestItem
166
177
:param parent: A ContentTestCollector. We use a ContentTestCollector
@@ -169,36 +180,50 @@ def __init__(self, parent: ContentTestCollector, string: str,
169
180
finished.
170
181
:param string: The string that was searched for.
171
182
:param should_contain: Whether the string should have been there
183
+ :param content_name: the name of the content which allows for easier
184
+ debugging if the test fails
172
185
"""
173
186
contain = "contains" if should_contain else "does not contain"
174
187
name = "{0} '{1}'" .format (contain , string )
175
188
super ().__init__ (name , parent = parent )
176
189
self .should_contain = should_contain
177
190
self .string = string
191
+ self .content_name = content_name
178
192
179
193
def runtest (self ):
180
194
"""Only after a workflow is finished the contents of files and logs are
181
- read. The ContentTestCollector parent reads each file/log once. This is
195
+ read. The ContentTestCollector parent reads each file once. This is
182
196
done in its thread. We wait for this thread to complete. Then we check
183
197
all the found strings in the parent.
184
198
This way we do not have to read each file one time per ContentTestItem
185
199
this makes content checking much faster on big files (NGS > 1 GB files)
186
200
were we are looking for multiple words (variants / sequences). """
187
201
# Wait for thread to complete.
188
202
self .parent .thread .join ()
203
+ assert not self .parent .file_not_found
189
204
assert ((self .string in self .parent .found_strings ) ==
190
205
self .should_contain )
191
206
192
207
def repr_failure (self , excinfo ):
193
208
# pylint: disable=unused-argument
194
209
# excinfo needed for pytest.
195
- message = (
196
- "'{string}' was {found} in {content} "
197
- "while it {should} be there."
198
- ).format (
199
- string = self .string ,
200
- found = "not found" if self .should_contain else "found" ,
201
- content = self .parent .name ,
202
- should = "should" if self .should_contain else "should not"
203
- )
204
- return message
210
+ if self .parent .file_not_found :
211
+ return (
212
+ "'{content}' does not exist and cannot be searched "
213
+ "for {containing} '{string}'."
214
+ ).format (
215
+ content = self .content_name ,
216
+ containing = "containing" if self .should_contain
217
+ else "not containing" ,
218
+ string = self .string )
219
+
220
+ else :
221
+ return (
222
+ "'{string}' was {found} in {content} "
223
+ "while it {should} be there."
224
+ ).format (
225
+ string = self .string ,
226
+ found = "not found" if self .should_contain else "found" ,
227
+ content = self .content_name ,
228
+ should = "should" if self .should_contain else "should not"
229
+ )
0 commit comments