@@ -78,22 +78,34 @@ def __init__(
78
78
self ._cluster_label = cluster_label
79
79
self ._tags = tags
80
80
81
- def _generate_hive_queries (self , context , inputs_dict ):
81
+ def _generate_plugin_objects (self , context , inputs_dict ):
82
82
"""
83
83
Runs user code and and produces hive queries
84
84
:param flytekit.engines.common.EngineContext context:
85
85
:param dict[Text, T] inputs:
86
- :rtype: _qubole.QuboleHiveJob
86
+ :rtype: list[ _qubole.QuboleHiveJob]
87
87
"""
88
88
queries_from_task = super (SdkHiveTask , self )._execute_user_code (context , inputs_dict ) or []
89
89
if not isinstance (queries_from_task , list ):
90
90
queries_from_task = [queries_from_task ]
91
91
92
92
self ._validate_queries (queries_from_task )
93
- queries = _qubole .HiveQueryCollection (
94
- [_qubole .HiveQuery (query = q , timeout_sec = self .metadata .timeout .seconds ,
95
- retry_count = self .metadata .retries .retries ) for q in queries_from_task ])
96
- return _qubole .QuboleHiveJob (queries , self ._cluster_label , self ._tags )
93
+ plugin_objects = []
94
+
95
+ for q in queries_from_task :
96
+ hive_query = _qubole .HiveQuery (query = q , timeout_sec = self .metadata .timeout .seconds ,
97
+ retry_count = self .metadata .retries .retries )
98
+
99
+ # TODO: Remove this after all users of older SDK versions that did the single node, multi-query pattern are
100
+ # deprecated. This is only here for backwards compatibility - in addition to writing the query to the
101
+ # query field, we also construct a QueryCollection with only one query. This will ensure that the
102
+ # older plugin will continue to work.
103
+ query_collection = _qubole .HiveQueryCollection ([hive_query ])
104
+
105
+ plugin_objects .append (_qubole .QuboleHiveJob (hive_query , self ._cluster_label , self ._tags ,
106
+ query_collection = query_collection ))
107
+
108
+ return plugin_objects
97
109
98
110
@staticmethod
99
111
def _validate_task_parameters (cluster_label , tags ):
@@ -146,28 +158,29 @@ def _produce_dynamic_job_spec(self, context, inputs):
146
158
# Add outputs to inputs
147
159
inputs_dict .update (outputs_dict )
148
160
149
- # Note: Today a hive task corresponds to a dynamic job spec with one node, which contains multiple
150
- # queries. We may change this in future.
151
161
nodes = []
152
162
tasks = []
153
- generated_queries = self ._generate_hive_queries (context , inputs_dict )
163
+ # One node per query
164
+ generated_queries = self ._generate_plugin_objects (context , inputs_dict )
154
165
155
166
# Create output bindings always - this has to happen after user code has run
156
167
output_bindings = [_literal_models .Binding (var = name , binding = _interface .BindingData .from_python_std (
157
168
b .sdk_type .to_flyte_literal_type (), b .value ))
158
169
for name , b in _six .iteritems (outputs_dict )]
159
170
160
- if len (generated_queries .query_collection .queries ) > 0 :
171
+ i = 0
172
+ for quboleHiveJob in generated_queries :
161
173
hive_job_node = _create_hive_job_node (
162
- "HiveQueries" ,
163
- generated_queries .to_flyte_idl (),
174
+ "HiveQuery_{}" . format ( i ) ,
175
+ quboleHiveJob .to_flyte_idl (),
164
176
self .metadata
165
177
)
166
178
nodes .append (hive_job_node )
167
179
tasks .append (hive_job_node .executable_sdk_object )
180
+ i += 1
168
181
169
182
dynamic_job_spec = _dynamic_job .DynamicJobSpec (
170
- min_successes = len (nodes ), # At most we only have one node for now, see above comment
183
+ min_successes = len (nodes ),
171
184
tasks = tasks ,
172
185
nodes = nodes ,
173
186
outputs = output_bindings ,
0 commit comments