Skip to content

Commit c569d22

Browse files
committed
[card-server] cli command to expose a card server to view realtime updates
- Modified card datastore to accomodate retrieval of runtime data updates - Added a card viewer html file - Created a simple HTTP based card server that will help showcase the realtime cards from querying the server - Card datastore's read and write path retrieval methods now explicitly are given the suffix they retrieve from. We do this because the suffix determines if we are extracting a card or a data update - Added a private method in the `Card` (user-interface) to get the data related to a card.
1 parent 751a36c commit c569d22

File tree

5 files changed

+521
-22
lines changed

5 files changed

+521
-22
lines changed

metaflow/plugins/cards/card_cli.py

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from metaflow.client import Task
22
from metaflow import JSONType, namespace
3-
from metaflow.exception import CommandException
3+
from metaflow.util import resolve_identity
4+
from metaflow.exception import (
5+
CommandException,
6+
MetaflowNotFound,
7+
MetaflowNamespaceMismatch,
8+
)
49
import webbrowser
510
import re
611
from metaflow._vendor import click
@@ -776,3 +781,89 @@ def list(
776781
show_list_as_json=as_json,
777782
file=file,
778783
)
784+
785+
786+
@card.command(help="Run local card viewer server")
787+
@click.option(
788+
"--run-id",
789+
default=None,
790+
show_default=True,
791+
type=str,
792+
help="Run ID of the flow",
793+
)
794+
@click.option(
795+
"--port",
796+
default=8324,
797+
show_default=True,
798+
type=int,
799+
help="Port on which Metaflow card server will run",
800+
)
801+
@click.option(
802+
"--namespace",
803+
"user_namespace",
804+
default=None,
805+
show_default=True,
806+
type=str,
807+
help="Namespace of the flow",
808+
)
809+
@click.option(
810+
"--max-cards",
811+
default=30,
812+
show_default=True,
813+
type=int,
814+
help="Maximum number of cards to be shown at any time by the server",
815+
)
816+
@click.pass_context
817+
def server(ctx, run_id, port, user_namespace, max_cards):
818+
from .card_server import create_card_server, CardServerOptions
819+
user_namespace = resolve_identity() if user_namespace is None else user_namespace
820+
run = _get_run_object(ctx.obj, run_id, user_namespace)
821+
options = CardServerOptions(
822+
run_object=run,
823+
only_running=False,
824+
follow_resumed=False,
825+
flow_datastore=ctx.obj.flow_datastore,
826+
max_cards=max_cards,
827+
)
828+
create_card_server(options, port, ctx.obj)
829+
830+
831+
def _get_run_object(obj, run_id, user_namespace):
832+
from metaflow import Flow, Run, Task
833+
834+
flow_name = obj.flow.name
835+
try:
836+
if run_id is not None:
837+
namespace(None)
838+
else:
839+
_msg = "Searching for runs in namespace: %s" % user_namespace
840+
obj.echo(_msg, fg="blue", bold=False)
841+
namespace(user_namespace)
842+
flow = Flow(pathspec=flow_name)
843+
except MetaflowNotFound:
844+
raise CommandException("No run found for *%s*." % flow_name)
845+
846+
except MetaflowNamespaceMismatch:
847+
raise CommandException(
848+
"No run found for *%s* in namespace *%s*. You can switch the namespace using --namespace"
849+
% (flow_name, user_namespace)
850+
)
851+
852+
if run_id is None:
853+
run_id = flow.latest_run.pathspec
854+
855+
else:
856+
assert len(run_id.split("/")) == 1, "run_id should be of the form <runid>"
857+
run_id = "/".join([flow_name, run_id])
858+
859+
try:
860+
run = Run(run_id)
861+
except MetaflowNotFound:
862+
raise CommandException("No run found for runid: *%s*." % run_id)
863+
except MetaflowNamespaceMismatch:
864+
raise CommandException(
865+
"No run found for runid: *%s* in namespace *%s*. You can switch the namespace using --namespace"
866+
% (run_id, user_namespace)
867+
)
868+
obj.echo("Using run-id %s" % run_id, fg="blue", bold=False)
869+
return run

metaflow/plugins/cards/card_client.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from metaflow.datastore import FlowDataStore
33
from metaflow.metaflow_config import CARD_SUFFIX
44
from .card_resolver import resolve_paths_from_task, resumed_info
5-
from .card_datastore import CardDatastore
5+
from .card_datastore import CardDatastore, CardNameSuffix
66
from .exception import (
77
UnresolvableDatastoreException,
88
IncorrectArguementException,
@@ -57,6 +57,15 @@ def __init__(
5757
# Tempfile to open stuff in browser
5858
self._temp_file = None
5959

60+
def _get_data(self) -> Optional[dict]:
61+
# currently an internal method to retrieve a card's data.
62+
data_paths = self._card_ds.extract_data_paths(
63+
card_type=self.type, card_hash=self.hash, card_id=self._card_id
64+
)
65+
if len(data_paths) == 0:
66+
return None
67+
return self._card_ds.get_card_data(data_paths[0])
68+
6069
def get(self) -> str:
6170
"""
6271
Retrieves the HTML contents of the card from the
@@ -172,7 +181,7 @@ def _get_card(self, index):
172181
if index >= self._high:
173182
raise IndexError
174183
path = self._card_paths[index]
175-
card_info = self._card_ds.card_info_from_path(path)
184+
card_info = self._card_ds.card_info_from_path(path, suffix=CardNameSuffix.CARD)
176185
# todo : find card creation date and put it in client.
177186
return Card(
178187
self._card_ds,
@@ -252,6 +261,7 @@ def get_cards(
252261
# Exception that the task argument should be of form `Task` or `str`
253262
raise IncorrectArguementException(_TYPE(task))
254263

264+
origin_taskpathspec = None
255265
if follow_resumed:
256266
origin_taskpathspec = resumed_info(task)
257267
if origin_taskpathspec:

metaflow/plugins/cards/card_datastore.py

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@
2929
CardInfo = namedtuple("CardInfo", ["type", "hash", "id", "filename"])
3030

3131

32+
class CardNameSuffix:
33+
DATA = "data.json"
34+
CARD = "html"
35+
36+
37+
class CardPathSuffix:
38+
DATA = "runtime"
39+
CARD = "cards"
40+
41+
3242
def path_spec_resolver(pathspec):
3343
splits = pathspec.split("/")
3444
splits.extend([None] * (4 - len(splits)))
@@ -86,18 +96,24 @@ def __init__(self, flow_datastore, pathspec=None):
8696
self._run_id = run_id
8797
self._step_name = step_name
8898
self._pathspec = pathspec
89-
self._temp_card_save_path = self._get_write_path(base_pth=TEMP_DIR_NAME)
99+
self._temp_card_save_path = self._get_write_path(
100+
base_pth=TEMP_DIR_NAME, suffix=CardPathSuffix.CARD
101+
)
90102

91103
@classmethod
92-
def get_card_location(cls, base_path, card_name, uuid, card_id=None, suffix="html"):
104+
def get_card_location(
105+
cls, base_path, card_name, uuid, card_id=None, suffix=CardNameSuffix.CARD
106+
):
93107
chash = uuid
94108
if card_id is None:
95109
card_file_name = "%s-%s.%s" % (card_name, chash, suffix)
96110
else:
97111
card_file_name = "%s-%s-%s.%s" % (card_name, card_id, chash, suffix)
98112
return os.path.join(base_path, card_file_name)
99113

100-
def _make_path(self, base_pth, pathspec=None, with_steps=False, suffix="cards"):
114+
def _make_path(
115+
self, base_pth, pathspec=None, with_steps=False, suffix=CardPathSuffix.CARD
116+
):
101117
sysroot = base_pth
102118
if pathspec is not None:
103119
# since most cards are at a task level there will always be 4 non-none values returned
@@ -138,16 +154,27 @@ def _make_path(self, base_pth, pathspec=None, with_steps=False, suffix="cards"):
138154
pth_arr.pop(0)
139155
return os.path.join(*pth_arr)
140156

141-
def _get_write_path(self, base_pth="", suffix="cards"):
157+
def _get_write_path(self, base_pth="", suffix=CardPathSuffix.CARD):
142158
return self._make_path(
143159
base_pth, pathspec=self._pathspec, with_steps=True, suffix=suffix
144160
)
145161

146-
def _get_read_path(self, base_pth="", with_steps=False):
147-
return self._make_path(base_pth, pathspec=self._pathspec, with_steps=with_steps)
162+
def _get_read_path(self, base_pth="", with_steps=False, suffix=CardPathSuffix.CARD):
163+
# Data paths will always be under the path with steps
164+
if suffix == CardPathSuffix.DATA:
165+
return self._make_path(
166+
base_pth=base_pth,
167+
pathspec=self._pathspec,
168+
with_steps=True,
169+
suffix=suffix,
170+
)
171+
172+
return self._make_path(
173+
base_pth, pathspec=self._pathspec, with_steps=with_steps, suffix=suffix
174+
)
148175

149176
@staticmethod
150-
def card_info_from_path(path):
177+
def card_info_from_path(path, suffix=CardNameSuffix.CARD):
151178
"""
152179
Args:
153180
path (str): The path to the card
@@ -163,8 +190,8 @@ def card_info_from_path(path):
163190

164191
if len(file_split) not in [2, 3]:
165192
raise Exception(
166-
"Invalid card file name %s. Card file names should be of form TYPE-HASH.html or TYPE-ID-HASH.html"
167-
% card_file_name
193+
"Invalid file name %s. Card/Data file names should be of form TYPE-HASH.%s or TYPE-ID-HASH.%s"
194+
% (card_file_name, suffix, suffix)
168195
)
169196
card_type, card_hash, card_id = None, None, None
170197

@@ -173,17 +200,17 @@ def card_info_from_path(path):
173200
else:
174201
card_type, card_id, card_hash = file_split
175202

176-
card_hash = card_hash.split(".html")[0]
203+
card_hash = card_hash.split("." + suffix)[0]
177204
return CardInfo(card_type, card_hash, card_id, card_file_name)
178205

179206
def save_data(self, uuid, card_type, json_data, card_id=None):
180207
card_file_name = card_type
181208
loc = self.get_card_location(
182-
self._get_write_path(suffix="runtime"),
209+
self._get_write_path(suffix=CardPathSuffix.DATA),
183210
card_file_name,
184211
uuid,
185212
card_id=card_id,
186-
suffix="data.json",
213+
suffix=CardNameSuffix.DATA,
187214
)
188215
self._backend.save_bytes(
189216
[(loc, BytesIO(json.dumps(json_data).encode("utf-8")))], overwrite=True
@@ -209,7 +236,11 @@ def save_card(self, uuid, card_type, card_html, card_id=None, overwrite=True):
209236
# It will also easily end up breaking the metaflow-ui (which maybe using a client from an older version).
210237
# Hence, we are writing cards to both paths so that we can introduce breaking changes later in the future.
211238
card_path_with_steps = self.get_card_location(
212-
self._get_write_path(), card_file_name, uuid, card_id=card_id
239+
self._get_write_path(suffix=CardPathSuffix.CARD),
240+
card_file_name,
241+
uuid,
242+
card_id=card_id,
243+
suffix=CardNameSuffix.CARD,
213244
)
214245
if SKIP_CARD_DUALWRITE:
215246
self._backend.save_bytes(
@@ -218,28 +249,31 @@ def save_card(self, uuid, card_type, card_html, card_id=None, overwrite=True):
218249
)
219250
else:
220251
card_path_without_steps = self.get_card_location(
221-
self._get_read_path(with_steps=False),
252+
self._get_read_path(with_steps=False, suffix=CardPathSuffix.CARD),
222253
card_file_name,
223254
uuid,
224255
card_id=card_id,
256+
suffix=CardNameSuffix.CARD,
225257
)
226258
for cp in [card_path_with_steps, card_path_without_steps]:
227259
self._backend.save_bytes(
228260
[(cp, BytesIO(bytes(card_html, "utf-8")))], overwrite=overwrite
229261
)
230262

231-
return self.card_info_from_path(card_path_with_steps)
263+
return self.card_info_from_path(
264+
card_path_with_steps, suffix=CardNameSuffix.CARD
265+
)
232266

233267
def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
234268
# Check for new cards first
235269
card_paths = []
236270
card_paths_with_steps = self._backend.list_content(
237-
[self._get_read_path(with_steps=True)]
271+
[self._get_read_path(with_steps=True, suffix=CardPathSuffix.CARD)]
238272
)
239273

240274
if len(card_paths_with_steps) == 0:
241275
card_paths_without_steps = self._backend.list_content(
242-
[self._get_read_path(with_steps=False)]
276+
[self._get_read_path(with_steps=False, suffix=CardPathSuffix.CARD)]
243277
)
244278
if len(card_paths_without_steps) == 0:
245279
# If there are no files found on the Path then raise an error of
@@ -256,7 +290,7 @@ def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
256290
cards_found = []
257291
for task_card_path in card_paths:
258292
card_path = task_card_path.path
259-
card_info = self.card_info_from_path(card_path)
293+
card_info = self.card_info_from_path(card_path, suffix=CardNameSuffix.CARD)
260294
if card_type is not None and card_info.type != card_type:
261295
continue
262296
elif card_hash is not None:
@@ -270,11 +304,35 @@ def _list_card_paths(self, card_type=None, card_hash=None, card_id=None):
270304

271305
return cards_found
272306

307+
def _list_card_data(self, card_type=None, card_hash=None, card_id=None):
308+
card_data_paths = self._backend.list_content(
309+
[self._get_read_path(suffix=CardPathSuffix.DATA)]
310+
)
311+
data_found = []
312+
313+
for data_path in card_data_paths:
314+
_pth = data_path.path
315+
card_info = self.card_info_from_path(_pth, suffix=CardNameSuffix.DATA)
316+
if card_type is not None and card_info.type != card_type:
317+
continue
318+
elif card_hash is not None:
319+
if not card_info.hash.startswith(card_hash):
320+
continue
321+
elif card_id is not None and card_info.id != card_id:
322+
continue
323+
if data_path.is_file:
324+
data_found.append(_pth)
325+
326+
return data_found
327+
273328
def create_full_path(self, card_path):
274329
return os.path.join(self._backend.datastore_root, card_path)
275330

276331
def get_card_names(self, card_paths):
277-
return [self.card_info_from_path(path) for path in card_paths]
332+
return [
333+
self.card_info_from_path(path, suffix=CardNameSuffix.CARD)
334+
for path in card_paths
335+
]
278336

279337
def get_card_html(self, path):
280338
with self._backend.load_bytes([path]) as get_results:
@@ -283,6 +341,13 @@ def get_card_html(self, path):
283341
with open(path, "r") as f:
284342
return f.read()
285343

344+
def get_card_data(self, path):
345+
with self._backend.load_bytes([path]) as get_results:
346+
for _, path, _ in get_results:
347+
if path is not None:
348+
with open(path, "r") as f:
349+
return json.loads(f.read())
350+
286351
def cache_locally(self, path, save_path=None):
287352
"""
288353
Saves the data present in the `path` the `metaflow_card_cache` directory or to the `save_path`.
@@ -308,6 +373,15 @@ def cache_locally(self, path, save_path=None):
308373
shutil.copy(path, main_path)
309374
return main_path
310375

376+
def extract_data_paths(self, card_type=None, card_hash=None, card_id=None):
377+
return self._list_card_data(
378+
# card_hash is the unique identifier to the card.
379+
# Its no longer the actual hash!
380+
card_type=card_type,
381+
card_hash=card_hash,
382+
card_id=card_id,
383+
)
384+
311385
def extract_card_paths(self, card_type=None, card_hash=None, card_id=None):
312386
return self._list_card_paths(
313387
card_type=card_type, card_hash=card_hash, card_id=card_id

0 commit comments

Comments
 (0)