From 02226d404035db1e4382c84a23f99a3d6e32df26 Mon Sep 17 00:00:00 2001 From: Riven Date: Mon, 8 Jun 2026 01:26:28 +0900 Subject: [PATCH 1/4] Fix scheduler.ignore_first_depends_on_past_by_default being silently ignored MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `[scheduler] ignore_first_depends_on_past_by_default` option (added in 2.3.0 via #22491) became a dead config in Airflow 3: it is still declared in config.yml with default "True", but no code reads it. The Task SDK hardcoded `DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST = False`, so the regression that #22491 fixed came back — adding a new task to an existing DAG whose default_args set `depends_on_past=True` leaves the task in no-status forever (PrevDagrunDep: "previous task instance has not run yet") and the DAG run never completes. Wire the default back to the config, matching how 2.10.5 read it (and how DEFAULT_RETRIES and friends in the same module still read conf). The value flows through OPERATOR_DEFAULTS into the serialization client_defaults, so both regular and mapped operators pick up the configured default on the scheduler side. --- .../tests/unit/models/test_mappedoperator.py | 3 +- .../serialization/test_dag_serialization.py | 2 +- .../unit/ti_deps/deps/test_prev_dagrun_dep.py | 59 +++++++++++++++++++ .../definitions/_internal/abstractoperator.py | 4 +- .../tests/task_sdk/bases/test_operator.py | 31 ++++++++++ 5 files changed, 96 insertions(+), 3 deletions(-) diff --git a/airflow-core/tests/unit/models/test_mappedoperator.py b/airflow-core/tests/unit/models/test_mappedoperator.py index e00a821e056d7..9ae4f54164423 100644 --- a/airflow-core/tests/unit/models/test_mappedoperator.py +++ b/airflow-core/tests/unit/models/test_mappedoperator.py @@ -33,6 +33,7 @@ from airflow.models.taskmap import TaskMap from airflow.providers.standard.operators.python import PythonOperator from airflow.sdk import DAG, BaseOperator, TaskGroup, setup, task, task_group, teardown +from airflow.sdk.definitions._internal.abstractoperator import DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST from airflow.serialization.definitions.baseoperator import SerializedBaseOperator from airflow.task.trigger_rule import TriggerRule from airflow.utils.state import TaskInstanceState @@ -1536,7 +1537,7 @@ def test_properties( assert not op.is_setup assert not op.is_teardown assert not op.depends_on_past - assert op.ignore_first_depends_on_past == bool(SerializedBaseOperator.ignore_first_depends_on_past) + assert op.ignore_first_depends_on_past == DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST assert not op.wait_for_downstream assert op.retries == SerializedBaseOperator.retries assert op.queue == SerializedBaseOperator.queue diff --git a/airflow-core/tests/unit/serialization/test_dag_serialization.py b/airflow-core/tests/unit/serialization/test_dag_serialization.py index f70cbda319c6f..bdab087fd828c 100644 --- a/airflow-core/tests/unit/serialization/test_dag_serialization.py +++ b/airflow-core/tests/unit/serialization/test_dag_serialization.py @@ -1633,7 +1633,7 @@ def test_no_new_fields_added_to_base_operator(self): "has_on_skipped_callback": False, "has_on_success_callback": False, "has_retry_policy": False, - "ignore_first_depends_on_past": False, + "ignore_first_depends_on_past": True, "is_setup": False, "is_teardown": False, "inlets": [], diff --git a/airflow-core/tests/unit/ti_deps/deps/test_prev_dagrun_dep.py b/airflow-core/tests/unit/ti_deps/deps/test_prev_dagrun_dep.py index 59a2274c5feb8..3a1c55f348767 100644 --- a/airflow-core/tests/unit/ti_deps/deps/test_prev_dagrun_dep.py +++ b/airflow-core/tests/unit/ti_deps/deps/test_prev_dagrun_dep.py @@ -96,6 +96,65 @@ def test_first_task_run_of_new_task(self, testing_dag_bundle): assert dep.is_met(ti=ti, dep_context=dep_context) mock_has_any_prior_tis.assert_called_once_with(ti, session=ANY) + def test_first_task_run_of_new_task_uses_config_default(self, testing_dag_bundle): + """ + A new task added to an existing DAG should pass its first run without an + explicit ``ignore_first_depends_on_past`` because the default is driven by + ``[scheduler] ignore_first_depends_on_past_by_default`` (``True``). + + This is the behavior that regressed when the config became dead: with a + hardcoded ``False`` default, the new task stayed blocked forever. + """ + dag = DAG("test_dag", schedule=timedelta(days=1), start_date=START_DATE) + old_task = BaseOperator( + task_id="test_task", + dag=dag, + depends_on_past=True, + start_date=START_DATE, + wait_for_downstream=False, + ) + scheduler_dag = sync_dag_to_db(dag) + # Old DAG run will include only TaskInstance of old_task + scheduler_dag.create_dagrun( + run_id="old_run", + state=TaskInstanceState.SUCCESS, + logical_date=old_task.start_date, + run_type=DagRunType.SCHEDULED, + data_interval=(old_task.start_date, old_task.start_date), + run_after=old_task.start_date, + triggered_by=DagRunTriggeredByType.TEST, + ) + + # New task relies on the config default rather than setting the flag explicitly. + new_task = BaseOperator( + task_id="new_task", + dag=dag, + depends_on_past=True, + start_date=old_task.start_date, + ) + assert new_task.ignore_first_depends_on_past is True + + logical_date = convert_to_utc(datetime(2016, 1, 2)) + dr = create_scheduler_dag(dag).create_dagrun( + run_id="new_run", + state=DagRunState.RUNNING, + logical_date=logical_date, + run_type=DagRunType.SCHEDULED, + data_interval=(logical_date, logical_date), + run_after=logical_date, + triggered_by=DagRunTriggeredByType.TEST, + ) + + ti = dr.get_task_instance(new_task.task_id) + ti.task = new_task + + dep_context = DepContext(ignore_depends_on_past=False) + dep = PrevDagrunDep() + + with patch.object(dep, "_has_any_prior_tis", Mock(return_value=False)) as mock_has_any_prior_tis: + assert dep.is_met(ti=ti, dep_context=dep_context) + mock_has_any_prior_tis.assert_called_once_with(ti, session=ANY) + @pytest.mark.parametrize( "kwargs", diff --git a/task-sdk/src/airflow/sdk/definitions/_internal/abstractoperator.py b/task-sdk/src/airflow/sdk/definitions/_internal/abstractoperator.py index 2d4ade8212768..e5c98dfdb32fa 100644 --- a/task-sdk/src/airflow/sdk/definitions/_internal/abstractoperator.py +++ b/task-sdk/src/airflow/sdk/definitions/_internal/abstractoperator.py @@ -60,7 +60,9 @@ MAXIMUM_PRIORITY_WEIGHT: int = 2147483647 DEFAULT_EXECUTOR: str | None = None DEFAULT_QUEUE: str = conf.get_mandatory_value("operators", "default_queue") -DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST: bool = False +DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST: bool = conf.getboolean( + "scheduler", "ignore_first_depends_on_past_by_default" +) DEFAULT_WAIT_FOR_PAST_DEPENDS_BEFORE_SKIPPING: bool = False DEFAULT_RETRIES: int = conf.getint("core", "default_task_retries", fallback=0) DEFAULT_RETRY_DELAY: datetime.timedelta = datetime.timedelta( diff --git a/task-sdk/tests/task_sdk/bases/test_operator.py b/task-sdk/tests/task_sdk/bases/test_operator.py index dcb5240a83dc8..f59c01b02271c 100644 --- a/task-sdk/tests/task_sdk/bases/test_operator.py +++ b/task-sdk/tests/task_sdk/bases/test_operator.py @@ -222,6 +222,37 @@ def test_email_on_actions(self): assert test_task.email_on_retry is False assert test_task.email_on_failure is True + def test_default_ignore_first_depends_on_past(self): + """The default comes from ``[scheduler] ignore_first_depends_on_past_by_default`` (``True``).""" + test_task = BaseOperator(task_id="test_default_ignore_first_depends_on_past") + assert test_task.ignore_first_depends_on_past is True + + def test_ignore_first_depends_on_past_override(self): + test_task = BaseOperator(task_id="test_ignore_first_dop", ignore_first_depends_on_past=False) + assert test_task.ignore_first_depends_on_past is False + + def test_default_ignore_first_depends_on_past_follows_config(self): + """ + The module-level default must be read from config, not hardcoded. + + Guards against the regression where ``[scheduler] + ignore_first_depends_on_past_by_default`` became a dead config that no + code read (the default was hardcoded, silently ignoring the option). + """ + import importlib + + from airflow.sdk.definitions._internal import abstractoperator + + with mock.patch.dict( + "os.environ", {"AIRFLOW__SCHEDULER__IGNORE_FIRST_DEPENDS_ON_PAST_BY_DEFAULT": "False"} + ): + try: + reloaded = importlib.reload(abstractoperator) + assert reloaded.DEFAULT_IGNORE_FIRST_DEPENDS_ON_PAST is False + finally: + # Restore the module-level constant to its config default for other tests. + importlib.reload(abstractoperator) + def test_incorrect_default_args(self): default_args = {"test_param": True, "extra_param": True} op = FakeOperator(default_args=default_args) From 278acaccd5ca30042c4c8cccef9e0573663d8f71 Mon Sep 17 00:00:00 2001 From: Riven Date: Mon, 8 Jun 2026 01:30:07 +0900 Subject: [PATCH 2/4] Add newsfragment for #68180 --- airflow-core/newsfragments/68180.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 airflow-core/newsfragments/68180.bugfix.rst diff --git a/airflow-core/newsfragments/68180.bugfix.rst b/airflow-core/newsfragments/68180.bugfix.rst new file mode 100644 index 0000000000000..19d59e44f6e42 --- /dev/null +++ b/airflow-core/newsfragments/68180.bugfix.rst @@ -0,0 +1 @@ +Restore the ``[scheduler] ignore_first_depends_on_past_by_default`` option, which was silently ignored in Airflow 3. The Task SDK hardcoded the default to ``False``, so a new task added to an existing DAG with ``depends_on_past=True`` was blocked by ``PrevDagrunDep`` forever and the DAG run never completed. The default is now read from the config again (``True`` by default), as it was in Airflow 2. From eee3a8597dda8c4d5164df06d6359b50261b9003 Mon Sep 17 00:00:00 2001 From: Riven Date: Mon, 8 Jun 2026 02:03:31 +0900 Subject: [PATCH 3/4] Update OpenLineage AF3 task-info expectations for restored default The OpenLineage task facet now reports ignore_first_depends_on_past=True on Airflow 3 (matching the AF2 expectations), since the scheduler config default is honored again. --- .../openlineage_base_complex_dag__af3.json | 28 +++++++++---------- .../openlineage_base_simple_dag__af3.json | 4 +-- .../unit/openlineage/utils/test_utils.py | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_complex_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_complex_dag__af3.json index 1c0d509d3ba28..2c7df9e030611 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_complex_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_complex_dag__af3.json @@ -330,7 +330,7 @@ "downstream_task_ids": "{{ 'task_2' in result and 'section_1.section_2.section_3.task_7' in result }}", "execution_timeout": "456.0 seconds", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -503,7 +503,7 @@ "downstream_task_ids": "{{ 'task_2' in result and 'section_1.section_2.section_3.task_7' in result }}", "execution_timeout": "456.0 seconds", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -675,7 +675,7 @@ "depends_on_past": false, "downstream_task_ids": "{{ 'task_3' in result and 'check_events' in result }}", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[{'uri': 'file://host1/dir2/file2.txt', 'extra': {}, 'type': 'asset'}, {'uri': 'unknown_scheme://host1/dir2/file3.txt', 'extra': {}, 'type': 'asset'}]", "mapped": false, "max_retry_delay": "42.0 seconds", @@ -852,7 +852,7 @@ "depends_on_past": false, "downstream_task_ids": "{{ 'task_3' in result and 'check_events' in result }}", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[{'uri': 'file://host1/dir2/file2.txt', 'extra': {}, 'type': 'asset'}, {'uri': 'unknown_scheme://host1/dir2/file3.txt', 'extra': {}, 'type': 'asset'}]", "mapped": false, "max_retry_delay": "42.0 seconds", @@ -1023,7 +1023,7 @@ "depends_on_past": false, "downstream_task_ids": "{{ 'task_4' in result and 'section_1.task_5' in result }}", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -1193,7 +1193,7 @@ "depends_on_past": false, "downstream_task_ids": "{{ 'task_4' in result and 'section_1.task_5' in result }}", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -1360,7 +1360,7 @@ "executor_config": {}, "external_dag_id": "external_dag_id", "external_task_id": "external_task_id", - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "max_active_tis_per_dag": 7, @@ -1519,7 +1519,7 @@ "executor_config": {}, "external_dag_id": "external_dag_id", "external_task_id": "external_task_id", - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "max_active_tis_per_dag": 7, @@ -1675,7 +1675,7 @@ "depends_on_past": false, "downstream_task_ids": "['section_1.section_2.task_6']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": "{{ result == true }}", "multiple_outputs": false, @@ -1838,7 +1838,7 @@ "depends_on_past": false, "downstream_task_ids": "['section_1.section_2.task_6']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": "{{ result == true }}", "multiple_outputs": false, @@ -2001,7 +2001,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -2174,7 +2174,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -2347,7 +2347,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -2519,7 +2519,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, diff --git a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_simple_dag__af3.json b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_simple_dag__af3.json index 0652f3d3de873..5c03936485c75 100644 --- a/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_simple_dag__af3.json +++ b/providers/openlineage/tests/system/openlineage/expected_events/openlineage_base_simple_dag__af3.json @@ -151,7 +151,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, @@ -277,7 +277,7 @@ "depends_on_past": false, "downstream_task_ids": "['check_events']", "executor_config": {}, - "ignore_first_depends_on_past": false, + "ignore_first_depends_on_past": true, "inlets": "[]", "mapped": false, "multiple_outputs": false, diff --git a/providers/openlineage/tests/unit/openlineage/utils/test_utils.py b/providers/openlineage/tests/unit/openlineage/utils/test_utils.py index 36fe5125a7f0f..6ab8a6ccd86cf 100644 --- a/providers/openlineage/tests/unit/openlineage/utils/test_utils.py +++ b/providers/openlineage/tests/unit/openlineage/utils/test_utils.py @@ -3197,7 +3197,7 @@ def __init__(self, *args, **kwargs): "execution_timeout": None, "executor_config": {}, "hitl_summary": "hitl_summary", - "ignore_first_depends_on_past": False, + "ignore_first_depends_on_past": True, "inlets": "[{'uri': 'uri1', 'extra': {'a': 1}, 'type': 'asset'}]", "mapped": False, "max_active_tis_per_dag": None, From 7bc1f9602ddbc0f9179fd57b3a33d95b1a49a31e Mon Sep 17 00:00:00 2001 From: Riven Date: Mon, 8 Jun 2026 02:59:38 +0900 Subject: [PATCH 4/4] Fix serialization ground truth and OpenLineage cross-version test - Add the new ignore_first_depends_on_past=True entry to the serialized DAG client_defaults ground truth (it now differs from the schema default). - Derive the expected ignore_first_depends_on_past in the OpenLineage test_task_info_af3 from the operator instead of hardcoding, so it passes against both older 3.x cores (default False) and 3.2+ (default True). --- .../tests/unit/serialization/test_dag_serialization.py | 1 + .../openlineage/tests/unit/openlineage/utils/test_utils.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/airflow-core/tests/unit/serialization/test_dag_serialization.py b/airflow-core/tests/unit/serialization/test_dag_serialization.py index bdab087fd828c..1b5b316b5d98f 100644 --- a/airflow-core/tests/unit/serialization/test_dag_serialization.py +++ b/airflow-core/tests/unit/serialization/test_dag_serialization.py @@ -187,6 +187,7 @@ def _operator_defaults(overrides): VAR = Encoding.VAR serialized_simple_dag_ground_truth = { "__version": 3, + "client_defaults": {"tasks": {"ignore_first_depends_on_past": True}}, "dag": { "default_args": { "__type": "dict", diff --git a/providers/openlineage/tests/unit/openlineage/utils/test_utils.py b/providers/openlineage/tests/unit/openlineage/utils/test_utils.py index 6ab8a6ccd86cf..d7e99d889e25c 100644 --- a/providers/openlineage/tests/unit/openlineage/utils/test_utils.py +++ b/providers/openlineage/tests/unit/openlineage/utils/test_utils.py @@ -3197,7 +3197,10 @@ def __init__(self, *args, **kwargs): "execution_timeout": None, "executor_config": {}, "hitl_summary": "hitl_summary", - "ignore_first_depends_on_past": True, + # Default comes from the installed core's [scheduler] + # ignore_first_depends_on_past_by_default, so derive it rather than + # hardcoding (True on Airflow 3.2+, False on older 3.x). + "ignore_first_depends_on_past": task_10.ignore_first_depends_on_past, "inlets": "[{'uri': 'uri1', 'extra': {'a': 1}, 'type': 'asset'}]", "mapped": False, "max_active_tis_per_dag": None,