2929CardInfo = namedtuple ("CardInfo" , ["type" , "hash" , "id" , "filename" ])
3030
3131
32+ class CardNameSuffix :
33+ DATA = "data.json"
34+ CARD = "html"
35+
36+
37+ class CardPathSuffix :
38+ DATA = "runtime"
39+ CARD = "cards"
40+
41+
3242def path_spec_resolver (pathspec ):
3343 splits = pathspec .split ("/" )
3444 splits .extend ([None ] * (4 - len (splits )))
@@ -86,18 +96,24 @@ def __init__(self, flow_datastore, pathspec=None):
8696 self ._run_id = run_id
8797 self ._step_name = step_name
8898 self ._pathspec = pathspec
89- self ._temp_card_save_path = self ._get_write_path (base_pth = TEMP_DIR_NAME )
99+ self ._temp_card_save_path = self ._get_write_path (
100+ base_pth = TEMP_DIR_NAME , suffix = CardPathSuffix .CARD
101+ )
90102
91103 @classmethod
92- def get_card_location (cls , base_path , card_name , uuid , card_id = None , suffix = "html" ):
104+ def get_card_location (
105+ cls , base_path , card_name , uuid , card_id = None , suffix = CardNameSuffix .CARD
106+ ):
93107 chash = uuid
94108 if card_id is None :
95109 card_file_name = "%s-%s.%s" % (card_name , chash , suffix )
96110 else :
97111 card_file_name = "%s-%s-%s.%s" % (card_name , card_id , chash , suffix )
98112 return os .path .join (base_path , card_file_name )
99113
100- def _make_path (self , base_pth , pathspec = None , with_steps = False , suffix = "cards" ):
114+ def _make_path (
115+ self , base_pth , pathspec = None , with_steps = False , suffix = CardPathSuffix .CARD
116+ ):
101117 sysroot = base_pth
102118 if pathspec is not None :
103119 # since most cards are at a task level there will always be 4 non-none values returned
@@ -138,16 +154,27 @@ def _make_path(self, base_pth, pathspec=None, with_steps=False, suffix="cards"):
138154 pth_arr .pop (0 )
139155 return os .path .join (* pth_arr )
140156
141- def _get_write_path (self , base_pth = "" , suffix = "cards" ):
157+ def _get_write_path (self , base_pth = "" , suffix = CardPathSuffix . CARD ):
142158 return self ._make_path (
143159 base_pth , pathspec = self ._pathspec , with_steps = True , suffix = suffix
144160 )
145161
146- def _get_read_path (self , base_pth = "" , with_steps = False ):
147- return self ._make_path (base_pth , pathspec = self ._pathspec , with_steps = with_steps )
162+ def _get_read_path (self , base_pth = "" , with_steps = False , suffix = CardPathSuffix .CARD ):
163+ # Data paths will always be under the path with steps
164+ if suffix == CardPathSuffix .DATA :
165+ return self ._make_path (
166+ base_pth = base_pth ,
167+ pathspec = self ._pathspec ,
168+ with_steps = True ,
169+ suffix = suffix ,
170+ )
171+
172+ return self ._make_path (
173+ base_pth , pathspec = self ._pathspec , with_steps = with_steps , suffix = suffix
174+ )
148175
149176 @staticmethod
150- def card_info_from_path (path ):
177+ def card_info_from_path (path , suffix = CardNameSuffix . CARD ):
151178 """
152179 Args:
153180 path (str): The path to the card
@@ -163,8 +190,8 @@ def card_info_from_path(path):
163190
164191 if len (file_split ) not in [2 , 3 ]:
165192 raise Exception (
166- "Invalid card file name %s. Card file names should be of form TYPE-HASH.html or TYPE-ID-HASH.html "
167- % card_file_name
193+ "Invalid file name %s. Card/Data file names should be of form TYPE-HASH.%s or TYPE-ID-HASH.%s "
194+ % ( card_file_name , suffix , suffix )
168195 )
169196 card_type , card_hash , card_id = None , None , None
170197
@@ -173,17 +200,17 @@ def card_info_from_path(path):
173200 else :
174201 card_type , card_id , card_hash = file_split
175202
176- card_hash = card_hash .split (".html" )[0 ]
203+ card_hash = card_hash .split ("." + suffix )[0 ]
177204 return CardInfo (card_type , card_hash , card_id , card_file_name )
178205
179206 def save_data (self , uuid , card_type , json_data , card_id = None ):
180207 card_file_name = card_type
181208 loc = self .get_card_location (
182- self ._get_write_path (suffix = "runtime" ),
209+ self ._get_write_path (suffix = CardPathSuffix . DATA ),
183210 card_file_name ,
184211 uuid ,
185212 card_id = card_id ,
186- suffix = "data.json" ,
213+ suffix = CardNameSuffix . DATA ,
187214 )
188215 self ._backend .save_bytes (
189216 [(loc , BytesIO (json .dumps (json_data ).encode ("utf-8" )))], overwrite = True
@@ -209,7 +236,11 @@ def save_card(self, uuid, card_type, card_html, card_id=None, overwrite=True):
209236 # It will also easily end up breaking the metaflow-ui (which maybe using a client from an older version).
210237 # Hence, we are writing cards to both paths so that we can introduce breaking changes later in the future.
211238 card_path_with_steps = self .get_card_location (
212- self ._get_write_path (), card_file_name , uuid , card_id = card_id
239+ self ._get_write_path (suffix = CardPathSuffix .CARD ),
240+ card_file_name ,
241+ uuid ,
242+ card_id = card_id ,
243+ suffix = CardNameSuffix .CARD ,
213244 )
214245 if SKIP_CARD_DUALWRITE :
215246 self ._backend .save_bytes (
@@ -218,28 +249,31 @@ def save_card(self, uuid, card_type, card_html, card_id=None, overwrite=True):
218249 )
219250 else :
220251 card_path_without_steps = self .get_card_location (
221- self ._get_read_path (with_steps = False ),
252+ self ._get_read_path (with_steps = False , suffix = CardPathSuffix . CARD ),
222253 card_file_name ,
223254 uuid ,
224255 card_id = card_id ,
256+ suffix = CardNameSuffix .CARD ,
225257 )
226258 for cp in [card_path_with_steps , card_path_without_steps ]:
227259 self ._backend .save_bytes (
228260 [(cp , BytesIO (bytes (card_html , "utf-8" )))], overwrite = overwrite
229261 )
230262
231- return self .card_info_from_path (card_path_with_steps )
263+ return self .card_info_from_path (
264+ card_path_with_steps , suffix = CardNameSuffix .CARD
265+ )
232266
233267 def _list_card_paths (self , card_type = None , card_hash = None , card_id = None ):
234268 # Check for new cards first
235269 card_paths = []
236270 card_paths_with_steps = self ._backend .list_content (
237- [self ._get_read_path (with_steps = True )]
271+ [self ._get_read_path (with_steps = True , suffix = CardPathSuffix . CARD )]
238272 )
239273
240274 if len (card_paths_with_steps ) == 0 :
241275 card_paths_without_steps = self ._backend .list_content (
242- [self ._get_read_path (with_steps = False )]
276+ [self ._get_read_path (with_steps = False , suffix = CardPathSuffix . CARD )]
243277 )
244278 if len (card_paths_without_steps ) == 0 :
245279 # If there are no files found on the Path then raise an error of
@@ -256,7 +290,7 @@ def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
256290 cards_found = []
257291 for task_card_path in card_paths :
258292 card_path = task_card_path .path
259- card_info = self .card_info_from_path (card_path )
293+ card_info = self .card_info_from_path (card_path , suffix = CardNameSuffix . CARD )
260294 if card_type is not None and card_info .type != card_type :
261295 continue
262296 elif card_hash is not None :
@@ -270,11 +304,35 @@ def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
270304
271305 return cards_found
272306
307+ def _list_card_data (self , card_type = None , card_hash = None , card_id = None ):
308+ card_data_paths = self ._backend .list_content (
309+ [self ._get_read_path (suffix = CardPathSuffix .DATA )]
310+ )
311+ data_found = []
312+
313+ for data_path in card_data_paths :
314+ _pth = data_path .path
315+ card_info = self .card_info_from_path (_pth , suffix = CardNameSuffix .DATA )
316+ if card_type is not None and card_info .type != card_type :
317+ continue
318+ elif card_hash is not None :
319+ if not card_info .hash .startswith (card_hash ):
320+ continue
321+ elif card_id is not None and card_info .id != card_id :
322+ continue
323+ if data_path .is_file :
324+ data_found .append (_pth )
325+
326+ return data_found
327+
273328 def create_full_path (self , card_path ):
274329 return os .path .join (self ._backend .datastore_root , card_path )
275330
276331 def get_card_names (self , card_paths ):
277- return [self .card_info_from_path (path ) for path in card_paths ]
332+ return [
333+ self .card_info_from_path (path , suffix = CardNameSuffix .CARD )
334+ for path in card_paths
335+ ]
278336
279337 def get_card_html (self , path ):
280338 with self ._backend .load_bytes ([path ]) as get_results :
@@ -283,6 +341,13 @@ def get_card_html(self, path):
283341 with open (path , "r" ) as f :
284342 return f .read ()
285343
344+ def get_card_data (self , path ):
345+ with self ._backend .load_bytes ([path ]) as get_results :
346+ for _ , path , _ in get_results :
347+ if path is not None :
348+ with open (path , "r" ) as f :
349+ return json .loads (f .read ())
350+
286351 def cache_locally (self , path , save_path = None ):
287352 """
288353 Saves the data present in the `path` the `metaflow_card_cache` directory or to the `save_path`.
@@ -308,6 +373,15 @@ def cache_locally(self, path, save_path=None):
308373 shutil .copy (path , main_path )
309374 return main_path
310375
376+ def extract_data_paths (self , card_type = None , card_hash = None , card_id = None ):
377+ return self ._list_card_data (
378+ # card_hash is the unique identifier to the card.
379+ # Its no longer the actual hash!
380+ card_type = card_type ,
381+ card_hash = card_hash ,
382+ card_id = card_id ,
383+ )
384+
311385 def extract_card_paths (self , card_type = None , card_hash = None , card_id = None ):
312386 return self ._list_card_paths (
313387 card_type = card_type , card_hash = card_hash , card_id = card_id
0 commit comments