1
1
from __future__ import absolute_import
2
2
from __future__ import print_function
3
3
4
+ import logging as _logging
5
+ import math as _math
6
+ import os as _os
7
+
4
8
import click
5
9
6
10
from flytekit .clis .sdk_in_container .constants import CTX_PACKAGES , CTX_PROJECT , CTX_DOMAIN , CTX_VERSION
7
- from flytekit .common import workflow as _workflow , utils as _utils
11
+ from flytekit .common import utils as _utils
12
+ from flytekit .common .core import identifier as _identifier
8
13
from flytekit .common .exceptions .scopes import system_entry_point
9
14
from flytekit .common .tasks import task as _sdk_task
10
15
from flytekit .common .utils import write_proto_to_file as _write_proto_to_file
11
16
from flytekit .configuration import TemporaryConfiguration
12
- from flytekit .configuration .internal import CONFIGURATION_PATH
13
- from flytekit .configuration .internal import IMAGE as _IMAGE
14
- from flytekit .models .workflow_closure import WorkflowClosure as _WorkflowClosure
17
+ from flytekit .configuration import internal as _internal_configuration
15
18
from flytekit .tools .module_loader import iterate_registerable_entities_in_order
16
19
17
20
18
21
@system_entry_point
19
- def serialize_tasks (pkgs ):
20
- # Serialize all tasks
21
- for m , k , t in iterate_registerable_entities_in_order (pkgs , include_entities = {_sdk_task .SdkTask }):
22
- fname = '{}.pb' .format (_utils .fqdn (m .__name__ , k , entity_type = t .resource_type ))
23
- click .echo ('Writing task {} to {}' .format (t .id , fname ))
24
- pb = t .to_flyte_idl ()
25
- _write_proto_to_file (pb , fname )
22
+ def serialize_tasks_only (project , domain , pkgs , version , folder = None ):
23
+ """
24
+ :param Text project:
25
+ :param Text domain:
26
+ :param list[Text] pkgs:
27
+ :param Text version:
28
+ :param Text folder:
29
+
30
+ :return:
31
+ """
32
+ # m = module (i.e. python file)
33
+ # k = value of dir(m), type str
34
+ # o = object (e.g. SdkWorkflow)
35
+ loaded_entities = []
36
+ for m , k , o in iterate_registerable_entities_in_order (pkgs , include_entities = {_sdk_task .SdkTask }):
37
+ name = _utils .fqdn (m .__name__ , k , entity_type = o .resource_type )
38
+ _logging .debug ("Found module {}\n K: {} Instantiated in {}" .format (m , k , o ._instantiated_in ))
39
+ o ._id = _identifier .Identifier (
40
+ o .resource_type ,
41
+ project ,
42
+ domain ,
43
+ name ,
44
+ version
45
+ )
46
+ loaded_entities .append (o )
47
+
48
+ zero_padded_length = _determine_text_chars (len (loaded_entities ))
49
+ for i , entity in enumerate (loaded_entities ):
50
+ serialized = entity .serialize ()
51
+ fname_index = str (i ).zfill (zero_padded_length )
52
+ fname = '{}_{}.pb' .format (fname_index , entity ._id .name )
53
+ click .echo (' Writing {} to\n {}' .format (entity ._id , fname ))
54
+ _write_proto_to_file (serialized , fname )
55
+
56
+ identifier_fname = '{}_{}.identifier.pb' .format (fname_index , entity ._id .name )
57
+ if folder :
58
+ identifier_fname = _os .path .join (folder , identifier_fname )
59
+ _write_proto_to_file (entity ._id .to_flyte_idl (), identifier_fname )
26
60
27
61
28
62
@system_entry_point
29
- def serialize_workflows (pkgs ):
30
- # Create map to look up tasks by their unique identifier. This is so we can compile them into the workflow closure.
31
- tmap = {}
32
- for _ , _ , t in iterate_registerable_entities_in_order (pkgs , include_entities = {_sdk_task .SdkTask }):
33
- tmap [t .id ] = t
63
+ def serialize_all (project , domain , pkgs , version , folder = None ):
64
+ """
65
+ In order to register, we have to comply with Admin's endpoints. Those endpoints take the following object. These
66
+ flyteidl.admin.launch_plan_pb2.LaunchPlanSpec
67
+ flyteidl.admin.workflow_pb2.WorkflowSpec
68
+ flyteidl.admin.task_pb2.TaskSpec
34
69
35
- for m , k , w in iterate_registerable_entities_in_order (pkgs , include_entities = {_workflow .SdkWorkflow }):
36
- click .echo ('Serializing {}' .format (_utils .fqdn (m .__name__ , k , entity_type = w .resource_type )))
37
- task_templates = []
38
- for n in w .nodes :
39
- if n .task_node is not None :
40
- task_templates .append (tmap [n .task_node .reference_id ])
70
+ However, if we were to merely call .to_flyte_idl() on all the discovered entities, what we would get are:
71
+ flyteidl.admin.launch_plan_pb2.LaunchPlanSpec
72
+ flyteidl.core.workflow_pb2.WorkflowTemplate
73
+ flyteidl.core.tasks_pb2.TaskTemplate
74
+
75
+ For Workflows and Tasks therefore, there is special logic in the serialize function that translates these objects.
76
+
77
+ :param Text project:
78
+ :param Text domain:
79
+ :param list[Text] pkgs:
80
+ :param Text version:
81
+ :param Text folder:
82
+
83
+ :return:
84
+ """
41
85
42
- wc = _WorkflowClosure (workflow = w , tasks = task_templates )
43
- wc_pb = wc .to_flyte_idl ()
86
+ # m = module (i.e. python file)
87
+ # k = value of dir(m), type str
88
+ # o = object (e.g. SdkWorkflow)
89
+ loaded_entities = []
90
+ for m , k , o in iterate_registerable_entities_in_order (pkgs ):
91
+ name = _utils .fqdn (m .__name__ , k , entity_type = o .resource_type )
92
+ _logging .debug ("Found module {}\n K: {} Instantiated in {}" .format (m , k , o ._instantiated_in ))
93
+ o ._id = _identifier .Identifier (
94
+ o .resource_type ,
95
+ project ,
96
+ domain ,
97
+ name ,
98
+ version
99
+ )
100
+ loaded_entities .append (o )
101
+
102
+ zero_padded_length = _determine_text_chars (len (loaded_entities ))
103
+ for i , entity in enumerate (loaded_entities ):
104
+ serialized = entity .serialize ()
105
+ fname_index = str (i ).zfill (zero_padded_length )
106
+ fname = '{}_{}.pb' .format (fname_index , entity ._id .name )
107
+ click .echo (' Writing {} to\n {}' .format (entity ._id , fname ))
108
+ _write_proto_to_file (serialized , fname )
109
+
110
+ # Not everything serialized will necessarily have an identifier field in it, even though some do (like the
111
+ # TaskTemplate). To be more rigorous, we write an explicit identifier file that reflects the choices (like
112
+ # project/domain, etc.) made for this serialize call. We should not allow users to specify a different project
113
+ # for instance come registration time, to avoid mismatches between potential internal ids like the TaskTemplate
114
+ # and the registered entity.
115
+ identifier_fname = '{}_{}.identifier.pb' .format (fname_index , entity ._id .name )
116
+ if folder :
117
+ identifier_fname = _os .path .join (folder , identifier_fname )
118
+ _write_proto_to_file (entity ._id .to_flyte_idl (), identifier_fname )
119
+
120
+
121
+ def _determine_text_chars (length ):
122
+ """
123
+ This function is used to help prefix files. If there are only 10 entries, then we just need one digit (0-9) to be
124
+ the prefix. If there are 11, then we'll need two (00-10).
44
125
45
- fname = '{}.pb' .format (_utils .fqdn (m .__name__ , k , entity_type = w .resource_type ))
46
- click .echo (' Writing workflow closure {}' .format (fname ))
47
- _write_proto_to_file (wc_pb , fname )
126
+ :param int length:
127
+ :rtype: int
128
+ """
129
+ return _math .ceil (_math .log (length , 10 ))
48
130
49
131
50
132
@click .group ('serialize' )
@@ -57,37 +139,77 @@ def serialize(ctx):
57
139
object contains the WorkflowTemplate, along with the relevant tasks for that workflow. In lieu of Admin,
58
140
this serialization step will set the URN of the tasks to the fully qualified name of the task function.
59
141
"""
60
- click .echo ('Serializing Flyte elements with image {}' .format (_IMAGE .get ()))
142
+ click .echo ('Serializing Flyte elements with image {}' .format (_internal_configuration . IMAGE .get ()))
61
143
62
144
63
145
@click .command ('tasks' )
146
+ @click .option ('-v' , '--version' , type = str , help = 'Version to serialize tasks with. This is normally parsed from the'
147
+ 'image, but you can override here.' )
148
+ @click .option ('-f' , '--folder' , type = click .Path (exists = True ))
64
149
@click .pass_context
65
- def tasks (ctx ):
150
+ def tasks (ctx , version = None , folder = None ):
151
+ project = ctx .obj [CTX_PROJECT ]
152
+ domain = ctx .obj [CTX_DOMAIN ]
66
153
pkgs = ctx .obj [CTX_PACKAGES ]
154
+
155
+ if folder :
156
+ click .echo (f"Writing output to { folder } " )
157
+
158
+ version = version or ctx .obj [CTX_VERSION ] or _internal_configuration .look_up_version_from_image_tag (
159
+ _internal_configuration .IMAGE .get ())
160
+
67
161
internal_settings = {
68
- 'project' : ctx . obj [ CTX_PROJECT ] ,
69
- 'domain' : ctx . obj [ CTX_DOMAIN ] ,
70
- 'version' : ctx . obj [ CTX_VERSION ]
162
+ 'project' : project ,
163
+ 'domain' : domain ,
164
+ 'version' : version ,
71
165
}
72
166
# Populate internal settings for project/domain/version from the environment so that the file names are resolved
73
- # with the correct strings. The file itself doesn't need to change though.
74
- with TemporaryConfiguration (CONFIGURATION_PATH .get (), internal_settings ):
75
- serialize_tasks (pkgs )
167
+ # with the correct strings. The file itself doesn't need to change though.
168
+ with TemporaryConfiguration (_internal_configuration .CONFIGURATION_PATH .get (), internal_settings ):
169
+ _logging .debug ("Serializing with settings\n "
170
+ "\n Project: {}"
171
+ "\n Domain: {}"
172
+ "\n Version: {}"
173
+ "\n \n over the following packages {}" .format (project , domain , version , pkgs )
174
+ )
175
+ serialize_tasks_only (project , domain , pkgs , version , folder )
76
176
77
177
78
178
@click .command ('workflows' )
179
+ @click .option ('-v' , '--version' , type = str , help = 'Version to serialize tasks with. This is normally parsed from the'
180
+ 'image, but you can override here.' )
181
+ # For now let's just assume that the directory needs to exist. If you're docker run -v'ing, docker will create the
182
+ # directory for you so it shouldn't be a problem.
183
+ @click .option ('-f' , '--folder' , type = click .Path (exists = True ))
79
184
@click .pass_context
80
- def workflows (ctx ):
185
+ def workflows (ctx , version = None , folder = None ):
186
+ _logging .getLogger ().setLevel (_logging .DEBUG )
187
+
188
+ if folder :
189
+ click .echo (f"Writing output to { folder } " )
190
+
191
+ project = ctx .obj [CTX_PROJECT ]
192
+ domain = ctx .obj [CTX_DOMAIN ]
81
193
pkgs = ctx .obj [CTX_PACKAGES ]
194
+
195
+ version = version or ctx .obj [CTX_VERSION ] or _internal_configuration .look_up_version_from_image_tag (
196
+ _internal_configuration .IMAGE .get ())
197
+
82
198
internal_settings = {
83
- 'project' : ctx . obj [ CTX_PROJECT ] ,
84
- 'domain' : ctx . obj [ CTX_DOMAIN ] ,
85
- 'version' : ctx . obj [ CTX_VERSION ]
199
+ 'project' : project ,
200
+ 'domain' : domain ,
201
+ 'version' : version ,
86
202
}
87
203
# Populate internal settings for project/domain/version from the environment so that the file names are resolved
88
- # with the correct strings. The file itself doesn't need to change though.
89
- with TemporaryConfiguration (CONFIGURATION_PATH .get (), internal_settings ):
90
- serialize_workflows (pkgs )
204
+ # with the correct strings. The file itself doesn't need to change though.
205
+ with TemporaryConfiguration (_internal_configuration .CONFIGURATION_PATH .get (), internal_settings ):
206
+ _logging .debug ("Serializing with settings\n "
207
+ "\n Project: {}"
208
+ "\n Domain: {}"
209
+ "\n Version: {}"
210
+ "\n \n over the following packages {}" .format (project , domain , version , pkgs )
211
+ )
212
+ serialize_all (project , domain , pkgs , version , folder )
91
213
92
214
93
215
serialize .add_command (tasks )
0 commit comments