@@ -173,11 +173,12 @@ def get_read_instruction(path: PathLikeOrFileInstruction) -> _ReadInstruction:
173
173
)
174
174
175
175
176
- def _create_reader (filename : epath .PathLike ):
176
+ def _create_reader (filename : epath .PathLike , additional_reader_options : str ):
177
177
"""Returns an ArrayRecordReader for the given filename."""
178
+ reader_options = f"readahead_buffer_size:0,{ additional_reader_options } "
178
179
return array_record_module .ArrayRecordReader (
179
180
filename ,
180
- options = "readahead_buffer_size:0" ,
181
+ options = reader_options ,
181
182
file_reader_buffer_size = 32768 ,
182
183
)
183
184
@@ -219,6 +220,7 @@ def __init__(
219
220
paths : Union [
220
221
PathLikeOrFileInstruction , Sequence [PathLikeOrFileInstruction ]
221
222
],
223
+ reader_options : dict [str , str ] | None = None ,
222
224
):
223
225
"""Creates a new ArrayRecordDataSource object.
224
226
@@ -238,6 +240,8 @@ def __init__(
238
240
paths/FileInstructions. When you want to read subsets or have a large
239
241
number of files prefer to pass FileInstructions. This makes the
240
242
initialization faster.
243
+ reader_options: string of comma-separated options to be passed when
244
+ creating a reader.
241
245
"""
242
246
if isinstance (paths , (str , pathlib .Path , FileInstruction )):
243
247
paths = [paths ]
@@ -258,6 +262,12 @@ def __init__(
258
262
"Unsupported path format was used. Path format must be "
259
263
"a Sequence, String, pathlib.Path or FileInstruction."
260
264
)
265
+ if reader_options is None :
266
+ self ._reader_options_string = ""
267
+ else :
268
+ self ._reader_options_string = "," .join (
269
+ [f"{ k } :{ v } " for k , v in reader_options .items ()]
270
+ )
261
271
self ._read_instructions = _get_read_instructions (paths )
262
272
self ._paths = [ri .filename for ri in self ._read_instructions ]
263
273
# We open readers lazily when we need to read from them.
@@ -324,7 +334,7 @@ def _ensure_reader_exists(self, reader_idx: int) -> None:
324
334
if self ._readers [reader_idx ] is not None :
325
335
return
326
336
filename = self ._read_instructions [reader_idx ].filename
327
- reader = _create_reader (filename )
337
+ reader = _create_reader (filename , self . _reader_options_string )
328
338
_check_group_size (filename , reader )
329
339
self ._readers [reader_idx ] = reader
330
340
0 commit comments