|
41 | 41 | from dagster._core.definitions.events import AssetKeyPartitionKey
|
42 | 42 | from dagster._core.definitions.partition import DynamicPartitionsDefinition, PartitionedConfig
|
43 | 43 | from dagster._core.definitions.selector import (
|
| 44 | + JobSubsetSelector, |
44 | 45 | PartitionRangeSelector,
|
45 | 46 | PartitionsByAssetSelector,
|
46 | 47 | PartitionsSelector,
|
|
53 | 54 | get_asset_backfill_run_chunk_size,
|
54 | 55 | )
|
55 | 56 | from dagster._core.execution.backfill import BulkActionStatus, PartitionBackfill
|
| 57 | +from dagster._core.execution.plan.resume_retry import ReexecutionStrategy |
56 | 58 | from dagster._core.remote_representation import (
|
57 | 59 | InProcessCodeLocationOrigin,
|
58 | 60 | RemoteRepository,
|
59 | 61 | RemoteRepositoryOrigin,
|
60 | 62 | )
|
| 63 | +from dagster._core.remote_representation.code_location import CodeLocation |
61 | 64 | from dagster._core.storage.compute_log_manager import ComputeIOType
|
62 | 65 | from dagster._core.storage.dagster_run import (
|
63 | 66 | IN_PROGRESS_RUN_STATUSES,
|
|
69 | 72 | ASSET_PARTITION_RANGE_END_TAG,
|
70 | 73 | ASSET_PARTITION_RANGE_START_TAG,
|
71 | 74 | BACKFILL_ID_TAG,
|
| 75 | + BACKFILL_TAGS, |
72 | 76 | MAX_RETRIES_TAG,
|
73 | 77 | PARTITION_NAME_TAG,
|
74 | 78 | )
|
@@ -3164,3 +3168,85 @@ def test_asset_backfill_retries_make_downstreams_runnable(
|
3164 | 3168 | backfill.asset_backfill_data.failed_and_downstream_subset.num_partitions_and_non_partitioned_assets
|
3165 | 3169 | == 0
|
3166 | 3170 | )
|
| 3171 | + |
| 3172 | + |
| 3173 | +def test_run_retry_not_part_of_completed_backfill( |
| 3174 | + instance: DagsterInstance, |
| 3175 | + workspace_context: WorkspaceProcessContext, |
| 3176 | + code_location: CodeLocation, |
| 3177 | + remote_repo: RemoteRepository, |
| 3178 | +): |
| 3179 | + backfill_id = "run_retries_backfill" |
| 3180 | + partition_keys = static_partitions.get_partition_keys() |
| 3181 | + asset_selection = [AssetKey("foo"), AssetKey("a1"), AssetKey("bar")] |
| 3182 | + instance.add_backfill( |
| 3183 | + PartitionBackfill.from_asset_partitions( |
| 3184 | + asset_graph=workspace_context.create_request_context().asset_graph, |
| 3185 | + backfill_id=backfill_id, |
| 3186 | + tags={"custom_tag_key": "custom_tag_value"}, |
| 3187 | + backfill_timestamp=get_current_timestamp(), |
| 3188 | + asset_selection=asset_selection, |
| 3189 | + partition_names=partition_keys, |
| 3190 | + dynamic_partitions_store=instance, |
| 3191 | + all_partitions=False, |
| 3192 | + title=None, |
| 3193 | + description=None, |
| 3194 | + ) |
| 3195 | + ) |
| 3196 | + assert instance.get_runs_count() == 0 |
| 3197 | + backfill = instance.get_backfill(backfill_id) |
| 3198 | + assert backfill |
| 3199 | + assert backfill.status == BulkActionStatus.REQUESTED |
| 3200 | + |
| 3201 | + list(execute_backfill_iteration(workspace_context, get_default_daemon_logger("BackfillDaemon"))) |
| 3202 | + assert instance.get_runs_count() == 3 |
| 3203 | + wait_for_all_runs_to_start(instance, timeout=30) |
| 3204 | + assert instance.get_runs_count() == 3 |
| 3205 | + wait_for_all_runs_to_finish(instance, timeout=30) |
| 3206 | + |
| 3207 | + assert instance.get_runs_count() == 3 |
| 3208 | + runs = reversed(list(instance.get_runs())) |
| 3209 | + for run in runs: |
| 3210 | + assert run.tags[BACKFILL_ID_TAG] == backfill_id |
| 3211 | + assert run.tags["custom_tag_key"] == "custom_tag_value" |
| 3212 | + assert step_succeeded(instance, run, "foo") |
| 3213 | + assert step_succeeded(instance, run, "reusable") |
| 3214 | + assert step_succeeded(instance, run, "bar") |
| 3215 | + |
| 3216 | + list(execute_backfill_iteration(workspace_context, get_default_daemon_logger("BackfillDaemon"))) |
| 3217 | + backfill = instance.get_backfill(backfill_id) |
| 3218 | + assert backfill |
| 3219 | + assert backfill.status == BulkActionStatus.COMPLETED_SUCCESS |
| 3220 | + |
| 3221 | + # simulate a retry of a run |
| 3222 | + run_to_retry = instance.get_runs()[0] |
| 3223 | + selector = JobSubsetSelector( |
| 3224 | + location_name=code_location.name, |
| 3225 | + repository_name=remote_repo.name, |
| 3226 | + job_name=run_to_retry.job_name, |
| 3227 | + asset_selection=run_to_retry.asset_selection, |
| 3228 | + op_selection=None, |
| 3229 | + ) |
| 3230 | + remote_job = code_location.get_job(selector) |
| 3231 | + retried_run = instance.create_reexecuted_run( |
| 3232 | + parent_run=run_to_retry, |
| 3233 | + code_location=code_location, |
| 3234 | + remote_job=remote_job, |
| 3235 | + strategy=ReexecutionStrategy.ALL_STEPS, |
| 3236 | + run_config=run_to_retry.run_config, |
| 3237 | + use_parent_run_tags=True, # ensures that the logic for not copying over backfill tags is tested |
| 3238 | + ) |
| 3239 | + |
| 3240 | + for tag in BACKFILL_TAGS: |
| 3241 | + assert tag not in retried_run.tags.keys() |
| 3242 | + |
| 3243 | + # Since the backfill is alerady complete, it should not be processed by the backfill daemon and |
| 3244 | + # should remain in a completed state |
| 3245 | + list(execute_backfill_iteration(workspace_context, get_default_daemon_logger("BackfillDaemon"))) |
| 3246 | + backfill = instance.get_backfill(backfill_id) |
| 3247 | + assert backfill |
| 3248 | + assert backfill.status == BulkActionStatus.COMPLETED_SUCCESS |
| 3249 | + |
| 3250 | + assert retried_run.run_id not in [ |
| 3251 | + r.run_id for r in instance.get_runs(filters=RunsFilter.for_backfill(backfill_id)) |
| 3252 | + ] |
0 commit comments