5
5
import enum
6
6
import functools
7
7
import logging
8
- import os
9
8
import typing as t
10
9
from pathlib import Path
11
10
12
11
import fsspec
12
+ import fsspec .implementations
13
+ import fsspec .implementations .dirfs
13
14
14
15
import singer_sdk .typing as th
15
16
from singer_sdk import Tap
@@ -138,6 +139,11 @@ def read_mode(self) -> ReadMode:
138
139
"""Folder read mode."""
139
140
return ReadMode (self .config ["read_mode" ])
140
141
142
+ @functools .cached_property
143
+ def path (self ) -> str :
144
+ """Return the path to the directory."""
145
+ return self .config ["path" ] # type: ignore[no-any-return]
146
+
141
147
@functools .cached_property
142
148
def fs (self ) -> fsspec .AbstractFileSystem :
143
149
"""Return the filesystem object.
@@ -147,13 +153,18 @@ def fs(self) -> fsspec.AbstractFileSystem:
147
153
"""
148
154
protocol = self .config ["filesystem" ]
149
155
if protocol != "local" and protocol not in self .config : # pragma: no cover
150
- msg = "Filesytem configuration is missing"
156
+ msg = "Filesystem configuration is missing"
151
157
raise ConfigValidationError (
152
158
msg ,
153
159
errors = [f"Missing configuration for filesystem { protocol } " ],
154
160
)
155
- logger .info ("Instatiating filesystem inteface: '%s'" , protocol )
156
- return fsspec .filesystem (protocol , ** self .config .get (protocol , {}))
161
+ logger .info ("Instantiating filesystem interface: '%s'" , protocol )
162
+
163
+ return fsspec .implementations .dirfs .DirFileSystem (
164
+ path = self .path ,
165
+ target_protocol = protocol ,
166
+ target_options = self .config .get (protocol ),
167
+ )
157
168
158
169
def discover_streams (self ) -> list :
159
170
"""Return a list of discovered streams.
@@ -162,24 +173,23 @@ def discover_streams(self) -> list:
162
173
ValueError: If the path does not exist or is not a directory.
163
174
"""
164
175
# A directory for now, but could be a glob pattern.
165
- path : str = self .config ["path" ]
166
-
167
- if not self .fs .exists (path ) or not self .fs .isdir (path ): # pragma: no cover
176
+ if not self .fs .exists ("." ) or not self .fs .isdir ("." ): # pragma: no cover
168
177
# Raise a more specific error if the path is not a directory.
169
- msg = f"Path { path } does not exist or is not a directory"
178
+ msg = f"Path { self . path } does not exist or is not a directory"
170
179
raise ValueError (msg )
171
180
172
181
# One stream per file
173
182
if self .read_mode == ReadMode .one_stream_per_file :
174
183
return [
175
184
self .default_stream_class (
176
185
tap = self ,
177
- name = file_path_to_stream_name (member ),
178
- filepaths = [os . path . join ( path , member )], # noqa: PTH118
186
+ name = file_path_to_stream_name (member [ "name" ] ),
187
+ filepaths = [member [ "name" ]],
179
188
filesystem = self .fs ,
180
189
)
181
- for member in os .listdir (path )
182
- if member .endswith (self .valid_extensions )
190
+ for member in self .fs .listdir ("." )
191
+ if member ["type" ] == "file"
192
+ and member ["name" ].endswith (self .valid_extensions )
183
193
]
184
194
185
195
# Merge
@@ -188,9 +198,10 @@ def discover_streams(self) -> list:
188
198
tap = self ,
189
199
name = self .config ["stream_name" ],
190
200
filepaths = [
191
- os .path .join (path , member ) # noqa: PTH118
192
- for member in os .listdir (path )
193
- if member .endswith (self .valid_extensions )
201
+ member ["name" ]
202
+ for member in self .fs .listdir ("." )
203
+ if member ["type" ] == "file"
204
+ and member ["name" ].endswith (self .valid_extensions )
194
205
],
195
206
filesystem = self .fs ,
196
207
)
0 commit comments