From 6498ceb6e4460155f2956363bd08cc3d64da5f02 Mon Sep 17 00:00:00 2001 From: Bhavani Ravi Date: Sun, 26 Oct 2025 22:35:23 +0530 Subject: [PATCH 01/17] chore: move standard examples to provider 1. loading example dags as bundles 2. fixing testcases to use bundles instead of dagbag folders 3. fixing testcases to use example_dags from standard module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 20 +++++++++++++++++ .../src/airflow/dag_processing/dagbag.py | 7 ------ .../src/airflow/example_dags/standard | 1 - .../tests/unit/dag_processing/test_dagbag.py | 18 ++++++++------- .../tests/unit/models/test_dagcode.py | 18 +++++++++------ .../tests/unit/models/test_serialized_dag.py | 22 +++++++++---------- .../src/tests_common/test_utils/db.py | 4 ++++ 7 files changed, 56 insertions(+), 34 deletions(-) delete mode 120000 airflow-core/src/airflow/example_dags/standard diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index f2f66cd7d2ad5..f23fad633f3c3 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -16,6 +16,7 @@ # under the License. from __future__ import annotations +import os import warnings from typing import TYPE_CHECKING @@ -106,6 +107,24 @@ def _add_example_dag_bundle(bundle_config_list: list[_ExternalBundleConfig]): ) +def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundleConfig]): + from airflow import providers + + for provider_path in providers.__path__: + for name in os.listdir(provider_path): + example_dag_folder = os.path.join(provider_path, name, "example_dags") + if os.path.isdir(example_dag_folder): + bundle_config_list.append( + _ExternalBundleConfig( + name=f"airflow-provider-{name}-example-dags", + classpath="airflow.dag_processing.bundles.local.LocalDagBundle", + kwargs={ + "path": example_dag_folder, + }, + ) + ) + + def _is_safe_bundle_url(url: str) -> bool: """ Check if a bundle URL is safe to use. @@ -191,6 +210,7 @@ def parse_config(self) -> None: bundle_config_list = _parse_bundle_config(config_list) if conf.getboolean("core", "LOAD_EXAMPLES"): _add_example_dag_bundle(bundle_config_list) + _add_provider_example_dags_to_bundle(bundle_config_list) for bundle_config in bundle_config_list: if bundle_config.team_name and not conf.getboolean("core", "multi_team"): diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index dcaab93f1ab25..ca7d34cf16c26 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -477,13 +477,6 @@ def collect_dags( registry = get_importer_registry() files_to_parse = registry.list_dag_files(dag_folder, safe_mode=safe_mode) - if include_examples: - from airflow import example_dags - - example_dag_folder = next(iter(example_dags.__path__)) - - files_to_parse.extend(registry.list_dag_files(example_dag_folder, safe_mode=safe_mode)) - for filepath in files_to_parse: try: file_parse_start_dttm = timezone.utcnow() diff --git a/airflow-core/src/airflow/example_dags/standard b/airflow-core/src/airflow/example_dags/standard deleted file mode 120000 index 3c2ef23d52c55..0000000000000 --- a/airflow-core/src/airflow/example_dags/standard +++ /dev/null @@ -1 +0,0 @@ -../../../../providers/standard/src/airflow/providers/standard/example_dags \ No newline at end of file diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index a673f9f1b0d2a..1951937b720e7 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -28,6 +28,7 @@ import zipfile from copy import deepcopy from datetime import datetime, timedelta, timezone +from pathlib import Path from unittest import mock from unittest.mock import patch @@ -46,6 +47,7 @@ from airflow.models.dag import DagModel from airflow.models.dagwarning import DagWarning, DagWarningType from airflow.models.serialized_dag import SerializedDagModel +from airflow.providers.standard import example_dags as standard_example_dags from airflow.sdk import DAG, BaseOperator from tests_common.pytest_plugin import AIRFLOW_ROOT_PATH @@ -56,7 +58,7 @@ pytestmark = pytest.mark.db_test -example_dags_folder = AIRFLOW_ROOT_PATH / "airflow-core" / "src" / "airflow" / "example_dags" / "standard" +standard_example_dags_folder = Path(standard_example_dags.__file__).parent PY311 = sys.version_info >= (3, 11) PY313 = sys.version_info >= (3, 13) @@ -345,9 +347,9 @@ def test_get_existing_dag(self, tmp_path): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=True, bundle_name="test_bundle") + dagbag = DagBag(dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle") - some_expected_dag_ids = ["example_bash_operator", "example_branch_operator"] + some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] for dag_id in some_expected_dag_ids: dag = dagbag.get_dag(dag_id) @@ -733,7 +735,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(include_examples=True) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder) dagbag.process_file_calls # Should not call process_file again, since it's already loaded during init. @@ -745,9 +747,9 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): ("file_to_load", "expected"), ( pytest.param( - pathlib.Path(example_dags_folder) / "example_bash_operator.py", + pathlib.Path(standard_example_dags_folder) / "example_bash_operator.py", { - "example_bash_operator": f"{example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" + "example_bash_operator": f"{standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" }, id="example_bash_operator", ), @@ -809,7 +811,7 @@ def test_refresh_py_dag(self, mock_dagmodel, tmp_path): Test that we can refresh an ordinary .py DAG """ dag_id = "example_bash_operator" - fileloc = str(example_dags_folder / "example_bash_operator.py") + fileloc = str(standard_example_dags_folder / "example_bash_operator.py") mock_dagmodel.return_value = DagModel() mock_dagmodel.return_value.last_expired = datetime.max.replace(tzinfo=timezone.utc) @@ -823,7 +825,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=os.fspath(tmp_path), include_examples=True) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder, include_examples=False) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) diff --git a/airflow-core/tests/unit/models/test_dagcode.py b/airflow-core/tests/unit/models/test_dagcode.py index 6bb6e412442b0..5fdade754ee5c 100644 --- a/airflow-core/tests/unit/models/test_dagcode.py +++ b/airflow-core/tests/unit/models/test_dagcode.py @@ -77,10 +77,10 @@ def teardown_method(self): def _write_two_example_dags(self, session): example_dags = make_example_dags(example_dags_module) - bash_dag = example_dags["example_bash_operator"] - sync_dag_to_db(bash_dag, session=session) - dag_version = DagVersion.get_latest_version("example_bash_operator") - x = DagCode(dag_version, bash_dag.fileloc) + xcomargs_dag = example_dags["example_xcom_args"] + sync_dag_to_db(xcomargs_dag, session=session) + dag_version = DagVersion.get_latest_version("example_xcom_args") + x = DagCode(dag_version, xcomargs_dag.fileloc) session.add(x) session.commit() xcom_dag = example_dags["example_xcom"] @@ -89,7 +89,7 @@ def _write_two_example_dags(self, session): x = DagCode(dag_version, xcom_dag.fileloc) session.add(x) session.commit() - return [bash_dag, xcom_dag] + return [xcomargs_dag, xcom_dag] def _write_example_dags(self): example_dags = make_example_dags(example_dags_module) @@ -133,7 +133,9 @@ def test_code_can_be_read_when_no_access_to_file(self, testing_dag_bundle): Test that code can be retrieved from DB when you do not have access to Code file. Source Code should at least exist in one of DB or File. """ - example_dag = make_example_dags(example_dags_module).get("example_bash_operator") + from airflow.providers.standard import example_dags + + example_dag = make_example_dags(example_dags).get("example_bash_operator") sync_dag_to_db(example_dag) # Mock that there is no access to the Dag File @@ -146,7 +148,9 @@ def test_code_can_be_read_when_no_access_to_file(self, testing_dag_bundle): def test_db_code_created_on_serdag_change(self, session, testing_dag_bundle): """Test new DagCode is created in DB when ser dag is changed""" - example_dag = make_example_dags(example_dags_module).get("example_bash_operator") + from airflow.providers.standard import example_dags + + example_dag = make_example_dags(example_dags).get("example_bash_operator") sync_dag_to_db(example_dag, session=session).create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), diff --git a/airflow-core/tests/unit/models/test_serialized_dag.py b/airflow-core/tests/unit/models/test_serialized_dag.py index 765b2adf206ae..dec06a346ec82 100644 --- a/airflow-core/tests/unit/models/test_serialized_dag.py +++ b/airflow-core/tests/unit/models/test_serialized_dag.py @@ -158,14 +158,14 @@ def my_callable2(): def test_serialized_dag_is_updated_if_dag_is_changed(self, testing_dag_bundle): """Test Serialized DAG is updated if DAG is changed""" example_dags = make_example_dags(example_dags_module) - example_bash_op_dag = example_dags.get("example_bash_operator") + example_params_trigger_ui = example_dags.get("example_params_trigger_ui") dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) assert dag_updated is True - s_dag = SDM.get(example_bash_op_dag.dag_id) + s_dag = SDM.get(example_params_trigger_ui.dag_id) s_dag.dag.create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), @@ -177,28 +177,28 @@ def test_serialized_dag_is_updated_if_dag_is_changed(self, testing_dag_bundle): # Test that if DAG is not changed, Serialized DAG is not re-written and last_updated # column is not updated dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) - s_dag_1 = SDM.get(example_bash_op_dag.dag_id) + s_dag_1 = SDM.get(example_params_trigger_ui.dag_id) assert s_dag_1.dag_hash == s_dag.dag_hash assert s_dag.created_at == s_dag_1.created_at assert dag_updated is False # Update DAG - example_bash_op_dag.tags.add("new_tag") - assert example_bash_op_dag.tags == {"example", "example2", "new_tag"} + example_params_trigger_ui.tags.add("new_tag") + assert example_params_trigger_ui.tags == {"example", "new_tag", "params"} dag_updated = SDM.write_dag( - dag=LazyDeserializedDAG.from_dag(example_bash_op_dag), + dag=LazyDeserializedDAG.from_dag(example_params_trigger_ui), bundle_name="testing", ) - s_dag_2 = SDM.get(example_bash_op_dag.dag_id) + s_dag_2 = SDM.get(example_params_trigger_ui.dag_id) assert s_dag.created_at != s_dag_2.created_at assert s_dag.dag_hash != s_dag_2.dag_hash - assert s_dag_2.data["dag"]["tags"] == ["example", "example2", "new_tag"] + assert s_dag_2.data["dag"]["tags"] == ["example", "new_tag", "params"] assert dag_updated is True def test_read_dags(self): @@ -217,7 +217,7 @@ def test_read_all_dags_only_picks_the_latest_serdags(self, session): serialized_dags = SDM.read_all_dags() assert len(example_dags) == len(serialized_dags) - dag = example_dags.get("example_bash_operator") + dag = example_dags.get("example_params_trigger_ui") create_scheduler_dag(dag=dag).create_dagrun( run_id="test1", run_after=pendulum.datetime(2025, 1, 1, tz="UTC"), diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 726b13036332b..76d62c3508d29 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -206,6 +206,10 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + for bundle in DagBundlesManager().get_all_dag_bundles(): + dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) + sync_bag_to_db(dagbag, bundle.name, None, session=session) + sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] From 3c8d3528497bc5ef9b80a7807449060460dddaed Mon Sep 17 00:00:00 2001 From: Bhavani Ravi Date: Mon, 27 Oct 2025 14:17:20 +0530 Subject: [PATCH 02/17] fix: failing testcases to load examples form dagbundle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert --- .../routes/public/test_dag_parsing.py | 4 ++-- .../unit/cli/commands/test_dag_command.py | 23 ++++++++----------- .../unit/cli/commands/test_task_command.py | 3 ++- .../src/tests_common/test_utils/db.py | 3 +-- .../standard/operators/test_trigger_dagrun.py | 2 +- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py index d2abb5e672a5a..763e5e88669e2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_parsing.py @@ -56,7 +56,7 @@ def test_201_and_400_requests(self, url_safe_serializer, session, test_client): assert response.status_code == 201 parsing_requests = session.scalars(select(DagPriorityParsingRequest)).all() assert len(parsing_requests) == 1 - assert parsing_requests[0].bundle_name == "dags-folder" + assert parsing_requests[0].bundle_name == "example_dags" assert parsing_requests[0].relative_fileloc == test_dag.relative_fileloc _check_last_log(session, dag_id=None, event="reparse_dag_file", logical_date=None) @@ -65,7 +65,7 @@ def test_201_and_400_requests(self, url_safe_serializer, session, test_client): assert response.status_code == 409 parsing_requests = session.scalars(select(DagPriorityParsingRequest)).all() assert len(parsing_requests) == 1 - assert parsing_requests[0].bundle_name == "dags-folder" + assert parsing_requests[0].bundle_name == "example_dags" assert parsing_requests[0].relative_fileloc == test_dag.relative_fileloc _check_last_log(session, dag_id=None, event="reparse_dag_file", logical_date=None) diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index 6c8a0e74672d3..298cf516163ac 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -85,7 +85,8 @@ class TestCliDags: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull, include_examples=True) cls.parser = cli_parser.get_parser() @classmethod @@ -274,7 +275,7 @@ def test_next_execution(self, dag_id, delta, schedule, catchup, first, second, t # Rebuild Test DB for other tests clear_db_dags() - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() @conf_vars({("core", "load_examples"): "false"}) @pytest.mark.parametrize( @@ -315,7 +316,6 @@ def test_next_execution_table_flag_with_no_next_run( clear_db_dags() parse_and_sync_to_db(os.devnull, include_examples=True) - @conf_vars({("core", "load_examples"): "true"}) def test_cli_report(self, stdout_capture): args = self.parser.parse_args(["dags", "report", "--output", "json"]) with stdout_capture as temp_stdout: @@ -326,7 +326,6 @@ def test_cli_report(self, stdout_capture): assert any(item["file"].endswith("example_complex.py") for item in data) assert any("example_complex" in item["dags"] for item in data) - @conf_vars({("core", "load_examples"): "true"}) def test_cli_get_dag_details(self, stdout_capture): args = self.parser.parse_args(["dags", "details", "example_complex", "--output", "yaml"]) with stdout_capture as temp_stdout: @@ -343,7 +342,6 @@ def test_cli_get_dag_details(self, stdout_capture): for value in dag_details_values: assert value in out - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags(self, stdout_capture): args = self.parser.parse_args(["dags", "list", "--output", "json"]) with stdout_capture as temp_stdout: @@ -354,11 +352,12 @@ def test_cli_list_dags(self, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_local_dags(self, stdout_capture): # Clear the database clear_db_dags() - args = self.parser.parse_args(["dags", "list", "--output", "json", "--local"]) + args = self.parser.parse_args( + ["dags", "list", "--output", "json", "--local", "--bundle-name", "example_dags"] + ) with stdout_capture as temp_stdout: dag_command.dag_list_dags(args) out = temp_stdout.getvalue() @@ -367,7 +366,7 @@ def test_cli_list_local_dags(self, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() @conf_vars({("core", "load_examples"): "false"}) def test_cli_list_local_dags_with_bundle_name(self, configure_testing_dag_bundle, stdout_capture): @@ -388,9 +387,8 @@ def test_cli_list_local_dags_with_bundle_name(self, configure_testing_dag_bundle str(TEST_DAGS_FOLDER / "test_example_bash_operator.py") in d["fileloc"] for d in dag_list ) # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags_custom_cols(self, stdout_capture): args = self.parser.parse_args( ["dags", "list", "--output", "json", "--columns", "dag_id,last_parsed_time"] @@ -404,7 +402,6 @@ def test_cli_list_dags_custom_cols(self, stdout_capture): for key in ["fileloc", "owners", "is_paused"]: assert key not in dag_list[0] - @conf_vars({("core", "load_examples"): "true"}) def test_cli_list_dags_invalid_cols(self, stderr_capture): args = self.parser.parse_args(["dags", "list", "--output", "json", "--columns", "dag_id,invalid_col"]) with stderr_capture as temp_stderr: @@ -448,9 +445,8 @@ def test_cli_list_dags_prints_local_import_errors( assert "Failed to load all files." in out # Rebuild Test DB for other tests - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() - @conf_vars({("core", "load_examples"): "true"}) @mock.patch("airflow.models.DagModel.get_dagmodel") def test_list_dags_none_get_dagmodel(self, mock_get_dagmodel, stdout_capture): mock_get_dagmodel.return_value = None @@ -463,7 +459,6 @@ def test_list_dags_none_get_dagmodel(self, mock_get_dagmodel, stdout_capture): assert key in dag_list[0] assert any("airflow/example_dags/example_complex.py" in d["fileloc"] for d in dag_list) - @conf_vars({("core", "load_examples"): "true"}) def test_dagbag_dag_col(self, session): dagbag = DBDagBag() dag_details = dag_command._get_dagbag_dag_details( diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index b9747b9412e67..7c327d36c7ec9 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -451,7 +451,8 @@ def test_task_state(self): ) def test_task_states_for_dag_run(self): - dag2 = DagBag().dags["example_python_operator"] + from airflow.providers.standard.example_dags.example_python_operator import dag as dag2 + lazy_deserialized_dag2 = LazyDeserializedDAG.from_dag(dag2) SerializedDagModel.write_dag(lazy_deserialized_dag2, bundle_name="testing") diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 76d62c3508d29..9f98c5a7344ce 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -205,12 +205,11 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] - + sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) sync_bag_to_db(dagbag, bundle.name, None, session=session) - sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: diff --git a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py index f4908336ab32f..499fe03e60f07 100644 --- a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py +++ b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py @@ -105,7 +105,7 @@ def teardown_method(self): if AIRFLOW_V_3_0_PLUS: from airflow.models.dagbundle import DagBundleModel - session.execute(delete(DagBundleModel)) + session.query(DagBundleModel).filter_by(name="test_bundle").delete(synchronize_session=False) session.commit() @pytest.mark.skipif(not AIRFLOW_V_3_0_PLUS, reason="Implementation is different for Airflow 2 & 3") From d5a538d6e17db74de197757887a9354a223d7511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 09:49:08 -0300 Subject: [PATCH 03/17] Resolve provider example DAGs via ProvidersManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the directory walk over airflow.providers.__path__ with a lookup based on ProvidersManager. The previous approach silently skipped: - nested providers like apache-airflow-providers-common-sql, whose module path is airflow.providers.common.sql (one level deeper); - providers installed outside the airflow.providers namespace package, since they are not visible via os.listdir. The new implementation iterates over the providers registered through the apache_airflow_provider entry point, imports each provider module and adds its example_dags folder when present. Bundle names are now keyed on the canonical package name to keep them unique and stable across deployments. Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 62 ++++++++++++++----- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index f23fad633f3c3..5a43c3ae50902 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -108,21 +108,55 @@ def _add_example_dag_bundle(bundle_config_list: list[_ExternalBundleConfig]): def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundleConfig]): - from airflow import providers - - for provider_path in providers.__path__: - for name in os.listdir(provider_path): - example_dag_folder = os.path.join(provider_path, name, "example_dags") - if os.path.isdir(example_dag_folder): - bundle_config_list.append( - _ExternalBundleConfig( - name=f"airflow-provider-{name}-example-dags", - classpath="airflow.dag_processing.bundles.local.LocalDagBundle", - kwargs={ - "path": example_dag_folder, - }, - ) + """ + Add an ``example_dags`` folder of every installed provider as a bundle. + + Provider locations are resolved through ``ProvidersManager`` instead of + walking ``airflow.providers.__path__`` so that: + + - nested providers (e.g. ``apache-airflow-providers-common-sql`` whose + module path is ``airflow.providers.common.sql``) are discovered; + - providers installed outside the ``airflow.providers`` namespace package + are discovered via their entry point. + """ + import importlib + import logging + + from airflow.providers_manager import ProvidersManager + + log = logging.getLogger(__name__) + seen: set[str] = set() + + for package_name in ProvidersManager().providers: + # apache-airflow-providers-foo-bar -> airflow.providers.foo.bar + if not package_name.startswith("apache-airflow-providers-"): + module_name = package_name.replace("-", "_") + else: + suffix = package_name[len("apache-airflow-providers-") :] + module_name = "airflow.providers." + suffix.replace("-", ".") + try: + module = importlib.import_module(module_name) + except ImportError: + log.debug("Could not import provider module %s for example DAG discovery", module_name) + continue + + for module_path in getattr(module, "__path__", []): + example_dag_folder = os.path.join(module_path, "example_dags") + if not os.path.isdir(example_dag_folder): + continue + bundle_name = f"airflow-provider-{package_name}-example-dags" + if bundle_name in seen: + continue + seen.add(bundle_name) + bundle_config_list.append( + _ExternalBundleConfig( + name=bundle_name, + classpath="airflow.dag_processing.bundles.local.LocalDagBundle", + kwargs={ + "path": example_dag_folder, + }, ) + ) def _is_safe_bundle_url(url: str) -> bool: From 13b635795b7d55602e8b15de6dc20636392af1a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:05:54 -0300 Subject: [PATCH 04/17] Tests: relax test_get_all_bundle_names assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set of provider example DAG bundles depends on which providers expose an example_dags folder, which is environment specific. Pin only the built-in bundles and assert the prefix of any extra entry. Signed-off-by: André Ahlert --- .../dag_processing/bundles/test_dag_bundle_manager.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 2c00e8aa25907..9bae2c4acbafb 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -466,4 +466,13 @@ def test_multiple_bundles_one_fails(clear_db, session): def test_get_all_bundle_names(): - assert DagBundlesManager().get_all_bundle_names() == ["dags-folder", "example_dags"] + bundle_names = DagBundlesManager().get_all_bundle_names() + # Built-in bundles are always present. + assert "dags-folder" in bundle_names + assert "example_dags" in bundle_names + # Any other bundle exposed here comes from a provider's example_dags + # folder discovered via ProvidersManager. Their presence depends on + # which providers are installed in the environment, so we only check + # the naming prefix instead of pinning an exact list. + extra = [n for n in bundle_names if n not in {"dags-folder", "example_dags"}] + assert all(n.startswith("airflow-provider-") for n in extra) From 1b9e49c071cbde6ba50d085f0c835dd59f5f2a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:27:41 -0300 Subject: [PATCH 05/17] Address review feedback on example DAG loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apply remaining suggestions from the original review of the closed PR #57320: - bundles/manager.py: document that sync_bundles_to_db only reconciles bundle metadata and does not parse or write DAG rows, clarifying the signature confusion raised by @bhavaniravi during review. - tests/unit/dag_processing/test_dagbag.py: stop importing the standard provider's example_dags module at collection time. Move the import into a small helper plus a pytest fixture so tests that need the folder request it explicitly and the module remains collectable when the standard provider is not yet importable. The single parametrize case that referenced the folder now passes a relative file name and resolves the absolute path inside the test. - tests/unit/cli/commands/test_task_command.py: build a minimal DAG inline in test_task_states_for_dag_run instead of importing one from the standard provider's example_dags. The test only checks CLI behaviour around a known dag_id/task_id, so reproducing the name and a single task is enough to keep the core test decoupled from the standard provider's example DAGs. Signed-off-by: André Ahlert --- .../airflow/dag_processing/bundles/manager.py | 9 ++++ .../unit/cli/commands/test_task_command.py | 14 +++++- .../tests/unit/dag_processing/test_dagbag.py | 50 +++++++++++++------ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 5a43c3ae50902..199af8f52c5db 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -264,6 +264,15 @@ def parse_config(self) -> None: @provide_session def sync_bundles_to_db(self, *, session: Session = NEW_SESSION) -> None: + """ + Persist the configured DAG bundles into ``DagBundleModel`` rows. + + This only reconciles bundle metadata, not the DAGs contained in them. + Parsing each bundle's DAG files and writing the resulting + ``DagModel`` / ``SerializedDagModel`` rows is the responsibility of + ``DagBag`` plus ``sync_bag_to_db`` (or, in production, the DAG + processor); calling this method does not trigger that work. + """ self.log.debug("Syncing DAG bundles to the database") def _extract_and_sign_template(bundle_name: str) -> tuple[str | None, dict]: diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index 7c327d36c7ec9..e9331b8331797 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -451,7 +451,19 @@ def test_task_state(self): ) def test_task_states_for_dag_run(self): - from airflow.providers.standard.example_dags.example_python_operator import dag as dag2 + # Build a minimal DAG inline rather than importing one from the + # standard provider's example_dags. The test only asserts CLI + # behaviour around a known dag_id/task_id pair, so reproducing the + # name and a single task is enough and keeps this core test + # decoupled from the standard provider's example DAGs. + from airflow.sdk import DAG + + with DAG( + dag_id="example_python_operator", + schedule=None, + start_date=timezone.datetime(2021, 1, 1), + ) as dag2: + BashOperator(task_id="print_the_context", bash_command="echo hello") lazy_deserialized_dag2 = LazyDeserializedDAG.from_dag(dag2) diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index 1951937b720e7..49fa6f8ebe950 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -47,7 +47,6 @@ from airflow.models.dag import DagModel from airflow.models.dagwarning import DagWarning, DagWarningType from airflow.models.serialized_dag import SerializedDagModel -from airflow.providers.standard import example_dags as standard_example_dags from airflow.sdk import DAG, BaseOperator from tests_common.pytest_plugin import AIRFLOW_ROOT_PATH @@ -58,7 +57,25 @@ pytestmark = pytest.mark.db_test -standard_example_dags_folder = Path(standard_example_dags.__file__).parent + +def _standard_example_dags_folder() -> Path: + """ + Return the filesystem path of the standard provider's ``example_dags``. + + Importing the provider lazily keeps the test module collectable in + environments where the standard provider is not yet installed. The + tests that actually need the folder will fail explicitly when the + provider is missing, instead of breaking pytest collection. + """ + from airflow.providers.standard import example_dags + + return Path(example_dags.__file__).parent + + +@pytest.fixture +def standard_example_dags_folder() -> Path: + return _standard_example_dags_folder() + PY311 = sys.version_info >= (3, 11) PY313 = sys.version_info >= (3, 13) @@ -343,11 +360,13 @@ def test_dagbag_with_bundle_name(self, tmp_path): dagbag2 = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) assert dagbag2.bundle_name is None - def test_get_existing_dag(self, tmp_path): + def test_get_existing_dag(self, tmp_path, standard_example_dags_folder): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag(dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle") + dagbag = DagBag( + dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle" + ) some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] @@ -716,7 +735,7 @@ def test_process_file_valid_param_check(self, tmp_path): assert len(dagbag.dags) == len(valid_dag_files) @patch.object(DagModel, "get_current") - def test_get_dag_without_refresh(self, mock_dagmodel): + def test_get_dag_without_refresh(self, mock_dagmodel, standard_example_dags_folder): """ Test that, once a DAG is loaded, it doesn't get refreshed again if it hasn't been expired. @@ -744,25 +763,24 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): assert dagbag.process_file_calls == 1 @pytest.mark.parametrize( - ("file_to_load", "expected"), + ("file_name", "expected_dag_id"), ( pytest.param( - pathlib.Path(standard_example_dags_folder) / "example_bash_operator.py", - { - "example_bash_operator": f"{standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / 'example_bash_operator.py'}" - }, + "example_bash_operator.py", + "example_bash_operator", id="example_bash_operator", ), ), ) - def test_get_dag_registration(self, file_to_load, expected): + def test_get_dag_registration(self, file_name, expected_dag_id, standard_example_dags_folder): pytest.importorskip("system.standard") + file_to_load = standard_example_dags_folder / file_name + expected_path = standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / file_name dagbag = DagBag(dag_folder=os.devnull, include_examples=False) dagbag.process_file(os.fspath(file_to_load)) - for dag_id, path in expected.items(): - dag = dagbag.get_dag(dag_id) - assert dag, f"{dag_id} was bagged" - assert dag.fileloc.endswith(path) + dag = dagbag.get_dag(expected_dag_id) + assert dag, f"{expected_dag_id} was bagged" + assert dag.fileloc.endswith(str(expected_path)) @pytest.mark.parametrize( ("expected"), @@ -806,7 +824,7 @@ def test_dag_registration_with_failure_zipped(self, zip_with_valid_dag_and_dup_t assert [dag.dag_id for dag in found] == ["test_example_bash_operator"] @patch.object(DagModel, "get_current") - def test_refresh_py_dag(self, mock_dagmodel, tmp_path): + def test_refresh_py_dag(self, mock_dagmodel, tmp_path, standard_example_dags_folder): """ Test that we can refresh an ordinary .py DAG """ From 552e1ab46d82062a91e1755a4c4f0c828e4956d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Thu, 30 Apr 2026 10:49:34 -0300 Subject: [PATCH 06/17] Tests: use SQLAlchemy 2.0 delete() in trigger_dagrun teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The teardown deletes a single DagBundleModel row keyed by name. Use session.execute(delete(...).where(...)) instead of the deprecated session.query(...).filter_by(...).delete() form so prek's prevent-deprecated-sqlalchemy-usage hook stays clean. Signed-off-by: André Ahlert --- .../tests/unit/standard/operators/test_trigger_dagrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py index 499fe03e60f07..972503e6f1e20 100644 --- a/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py +++ b/providers/standard/tests/unit/standard/operators/test_trigger_dagrun.py @@ -105,7 +105,7 @@ def teardown_method(self): if AIRFLOW_V_3_0_PLUS: from airflow.models.dagbundle import DagBundleModel - session.query(DagBundleModel).filter_by(name="test_bundle").delete(synchronize_session=False) + session.execute(delete(DagBundleModel).where(DagBundleModel.name == "test_bundle")) session.commit() @pytest.mark.skipif(not AIRFLOW_V_3_0_PLUS, reason="Implementation is different for Airflow 2 & 3") From 8d96aade7b38f4b037b7e3c6be41bf92443713d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 11:32:47 -0300 Subject: [PATCH 07/17] Tests: update bundle_name for example_python_operator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After moving the standard provider's example DAGs into their own bundle, example_python_operator no longer lives in the dags-folder bundle. Update the 24 expected payloads in test_task_instances.py that asserted bundle_name='dags-folder' to the new airflow-provider-apache-airflow-providers-standard-example-dags bundle name. The lone sync_bag_to_db('dags-folder', ...) call in this file is unrelated; it registers a synthetic dag built by dag_maker. Signed-off-by: André Ahlert --- .../routes/public/test_task_instances.py | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 4097a8682d587..3e84912af3b2a 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -201,7 +201,7 @@ def test_should_respond_200(self, test_client, session): assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -359,7 +359,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -423,7 +423,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -479,7 +479,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi assert response.json() == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -599,7 +599,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2634,7 +2634,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2680,7 +2680,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2757,7 +2757,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2829,7 +2829,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2876,7 +2876,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -3682,7 +3682,7 @@ def test_should_respond_200_with_dag_run_id( "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4174,7 +4174,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4211,7 +4211,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4282,7 +4282,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4365,7 +4365,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4402,7 +4402,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4604,7 +4604,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4880,7 +4880,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, @@ -5018,7 +5018,7 @@ def test_update_mask_set_note_should_respond_200( "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5081,7 +5081,7 @@ def test_set_note_should_respond_200(self, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5162,7 +5162,7 @@ def test_set_note_should_respond_200_mapped_task_with_rtif(self, test_client, se "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5245,7 +5245,7 @@ def test_set_note_should_respond_200_mapped_task_summary_with_rtif(self, test_cl "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_ti["dag_version"]["created_at"], @@ -5432,7 +5432,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5720,7 +5720,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "dags-folder", + "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, From 27d4a6fd5d41a4b69d3d9cd0f168a81999ca852d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 13:26:26 -0300 Subject: [PATCH 08/17] Tests: avoid double-loading example DAGs in parse_and_sync_to_db MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Airflow 3.1+, parse_and_sync_to_db iterates every registered DAG bundle (including example_dags and airflow-provider-*-example-dags) and syncs them with their own bundle name. The leading DagBag(dag_folder=folder, include_examples=True) was also pulling example DAGs into the dags-folder bundle, so each example DAG ended up registered under two different bundles. The duplicated rows then violated the (asset_id, dag_id) unique constraint on dag_schedule_asset_reference and broke unrelated tests (notably the fab provider tests under compat). Force include_examples=False on the 3.1+ path; the bundle loop is already responsible for loading example DAGs from their own bundles. The 2.x and 3.0 paths are untouched. Signed-off-by: André Ahlert --- devel-common/src/tests_common/test_utils/db.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 9f98c5a7344ce..a928b09ed58cb 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -199,20 +199,30 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): DagBundlesManager().sync_bundles_to_db(session=session) session.flush() - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) if AIRFLOW_V_3_1_PLUS: try: from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + # On 3.1+, example DAGs are exposed as their own bundles + # (``example_dags`` for core, ``airflow-provider-*-example-dags`` + # for each provider that ships an ``example_dags`` folder). The + # bundle loop below already syncs every one of them, so the + # ``dags-folder`` DagBag must NOT pull example DAGs in too, + # otherwise the same DAG gets registered under two bundles and + # ``dag_schedule_asset_reference`` rows collide on the unique + # ``(asset_id, dag_id)`` constraint. + dagbag = DagBag(dag_folder=folder, include_examples=False) sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): - dagbag = DagBag(dag_folder=bundle.path, include_examples=include_examples) - sync_bag_to_db(dagbag, bundle.name, None, session=session) + bundle_dagbag = DagBag(dag_folder=bundle.path, include_examples=False) + sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: + dagbag = DagBag(dag_folder=folder, include_examples=include_examples) dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: + dagbag = DagBag(dag_folder=folder, include_examples=include_examples) dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag From b14c2ff637c9d0d9aa5f7a73cc3744c6b0eb3a07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 1 May 2026 16:22:58 -0300 Subject: [PATCH 09/17] Trigger CI rerun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Ahlert From 4be51a755db191561fc7203db28683d146029608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sat, 2 May 2026 03:07:21 -0300 Subject: [PATCH 10/17] Update devel-common/src/tests_common/test_utils/db.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Signed-off-by: André Ahlert --- devel-common/src/tests_common/test_utils/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index a928b09ed58cb..d1246b8b54654 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -204,7 +204,7 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): from airflow.dag_processing.dagbag import sync_bag_to_db except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] - # On 3.1+, example DAGs are exposed as their own bundles + # On 3.3+, example DAGs are exposed as their own bundles # (``example_dags`` for core, ``airflow-provider-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The # bundle loop below already syncs every one of them, so the From 939d2ad4472156d6547a0943add8c854f21e7b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sat, 2 May 2026 03:07:45 -0300 Subject: [PATCH 11/17] Update airflow-core/src/airflow/dag_processing/bundles/manager.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Signed-off-by: André Ahlert --- airflow-core/src/airflow/dag_processing/bundles/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index 199af8f52c5db..da54d59ecf9a7 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -137,7 +137,7 @@ def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundl try: module = importlib.import_module(module_name) except ImportError: - log.debug("Could not import provider module %s for example DAG discovery", module_name) + log.warning("Could not import provider module %s for example DAG discovery", module_name) continue for module_path in getattr(module, "__path__", []): From bfc43c102cec888905d20382cebd5803ea466737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Sun, 3 May 2026 10:00:53 -0300 Subject: [PATCH 12/17] Address review feedback on provider example DAG bundle discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Hoist `importlib`, `logging`, and `ProvidersManager` to module top in `dag_processing/bundles/manager.py` and add a module-level logger. - Reverse the `apache-airflow-providers-` prefix check so the canonical case reads first, per @jscheffl's nit. - Broaden the exception handler around `import_module` and the subsequent `__path__` access to `Exception` with `log.exception`, so a provider with a custom `__getattr__` can no longer crash config loading on the scheduler/api-server. - Switch the provider-example bundle dedup key from `bundle_name` to the resolved `example_dag_folder` and document the namespace-package scenario the guard exists for (multiple `airflow.providers.common.*` distributions sharing one namespace via `pkgutil.extend_path`). - Rename the per-provider bundle from `airflow-provider-{package}-example-dags` to `{package}-example-dags`, matching the package distribution name surfaced to users in REST API responses and `pip list`. - Emit a `DeprecationWarning` from `DagBag.collect_dags` and from `tests_common.test_utils.db.parse_and_sync_to_db` when callers pass `include_examples=True`, and update the docstrings to direct callers at the `[core] load_examples` configuration option. - Migrate the in-tree `parse_and_sync_to_db(..., include_examples=True)` callers to use `conf_vars({("core", "load_examples"): "true"})`. - Update `test_task_instances.py` bundle-name assertions and the `test_get_all_bundle_names` suffix check for the new format. - Add `airflow-core/newsfragments/66161.significant.rst` covering the user-visible REST-API bundle-name change. The package-name -> module-path heuristic is kept for now; replacing it with an authoritative field on `ProviderInfo` is tracked in #66305. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 58 +++++++++++++++++++ .../airflow/dag_processing/bundles/manager.py | 40 ++++++++----- .../src/airflow/dag_processing/dagbag.py | 22 ++++++- .../tests/unit/api_fastapi/conftest.py | 3 +- .../routes/public/test_task_instances.py | 48 +++++++-------- .../unit/cli/commands/test_asset_command.py | 4 +- .../cli/commands/test_backfill_command.py | 4 +- .../unit/cli/commands/test_dag_command.py | 2 +- .../unit/cli/commands/test_task_command.py | 3 +- .../bundles/test_dag_bundle_manager.py | 4 +- .../src/tests_common/test_utils/db.py | 25 +++++++- .../fab/tests/unit/fab/www/views/conftest.py | 3 +- 12 files changed, 165 insertions(+), 51 deletions(-) create mode 100644 airflow-core/newsfragments/66161.significant.rst diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst new file mode 100644 index 0000000000000..e0722395aa4a4 --- /dev/null +++ b/airflow-core/newsfragments/66161.significant.rst @@ -0,0 +1,58 @@ +Provider example DAGs are exposed as dedicated bundles + +Example DAGs that ship with provider distributions are now discovered via +``ProvidersManager`` and registered as their own DAG bundles, one per +provider that ships an ``example_dags/`` folder. Bundle names follow the +shape ``apache-airflow-providers--example-dags`` (for +canonical Apache providers) or ``-example-dags`` (for +third-party providers). The ``[core] load_examples`` configuration option +remains the single switch that controls whether any example bundles are +registered. + +**What changed:** + +- Example DAGs that previously came in under the implicit ``dags-folder`` + bundle are now persisted in ``DagBundleModel`` rows and emitted in REST + API responses (``GET /api/v2/dags/{dag_id}/dag-versions`` and the + ``bundle_name`` field on task-instance responses) under the new + per-provider bundle names. +- Nested providers such as ``apache-airflow-providers-common-sql`` are + discovered correctly (previously they were missed because discovery + walked ``airflow.providers.__path__`` directly). + +**Behaviour changes:** + +- Clients filtering or tracking bundles by ``"dags-folder"`` for + previously-shipped example DAGs (e.g. ``example_python_operator``) need + to update to the new per-provider bundle names. The DAG identifiers + themselves are unchanged. + +**Deprecations:** + +- The ``include_examples`` parameter on ``DagBag.collect_dags`` and on + ``tests_common.test_utils.db.parse_and_sync_to_db`` is now back-compat + only. Passing ``include_examples=True`` emits a + :class:`DeprecationWarning`. Tests and external callers that need + example DAGs loaded should set the ``[core] load_examples`` config to + ``true`` (e.g. via ``conf_vars({("core", "load_examples"): "true"})``) + instead. + +* Types of change + + * [ ] Dag changes + * [x] Config changes + * [x] API changes + * [ ] CLI changes + * [x] Behaviour changes + * [ ] Plugin changes + * [ ] Dependency changes + * [x] Code interface changes + +* Migration rules needed + + * Update clients that filter REST API responses by ``bundle_name`` to + match the new per-provider bundle names for example DAGs. + * Replace ``include_examples=True`` calls to ``DagBag`` / + ``parse_and_sync_to_db`` with + ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent + configuration) before the deprecated argument is removed. diff --git a/airflow-core/src/airflow/dag_processing/bundles/manager.py b/airflow-core/src/airflow/dag_processing/bundles/manager.py index da54d59ecf9a7..78c54266eda9f 100644 --- a/airflow-core/src/airflow/dag_processing/bundles/manager.py +++ b/airflow-core/src/airflow/dag_processing/bundles/manager.py @@ -16,6 +16,8 @@ # under the License. from __future__ import annotations +import importlib +import logging import os import warnings from typing import TYPE_CHECKING @@ -30,6 +32,7 @@ from airflow.exceptions import AirflowConfigException from airflow.models.dagbundle import DagBundleModel from airflow.models.team import Team +from airflow.providers_manager import ProvidersManager from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.session import NEW_SESSION, provide_session @@ -38,6 +41,8 @@ from sqlalchemy.orm import Session +log = logging.getLogger(__name__) + _example_dag_bundle_name = "example_dags" @@ -119,35 +124,38 @@ def _add_provider_example_dags_to_bundle(bundle_config_list: list[_ExternalBundl - providers installed outside the ``airflow.providers`` namespace package are discovered via their entry point. """ - import importlib - import logging - - from airflow.providers_manager import ProvidersManager - - log = logging.getLogger(__name__) + # Dedup on the resolved on-disk folder rather than the bundle name: distributions + # under ``airflow.providers.common.*`` use ``pkgutil.extend_path``, so when several + # ``common-*`` packages are installed ``airflow.providers.common.__path__`` has + # multiple entries and the inner loop iterates more than once. Path-based dedup + # only skips when the same folder is seen twice; distinct folders are preserved. seen: set[str] = set() for package_name in ProvidersManager().providers: - # apache-airflow-providers-foo-bar -> airflow.providers.foo.bar - if not package_name.startswith("apache-airflow-providers-"): - module_name = package_name.replace("-", "_") - else: + # Heuristic: derive the import path from the canonical + # ``apache-airflow-providers-*`` distribution name. Tracked as a follow-up + # to record the provider module path on ``ProviderInfo`` (see + # https://github.com/apache/airflow/issues/66305). + if package_name.startswith("apache-airflow-providers-"): suffix = package_name[len("apache-airflow-providers-") :] module_name = "airflow.providers." + suffix.replace("-", ".") + else: + module_name = package_name.replace("-", "_") try: module = importlib.import_module(module_name) - except ImportError: - log.warning("Could not import provider module %s for example DAG discovery", module_name) + module_paths = list(getattr(module, "__path__", [])) + except Exception: + log.exception("Could not load provider module %s for example DAG discovery", module_name) continue - for module_path in getattr(module, "__path__", []): + for module_path in module_paths: example_dag_folder = os.path.join(module_path, "example_dags") if not os.path.isdir(example_dag_folder): continue - bundle_name = f"airflow-provider-{package_name}-example-dags" - if bundle_name in seen: + if example_dag_folder in seen: continue - seen.add(bundle_name) + seen.add(example_dag_folder) + bundle_name = f"{package_name}-example-dags" bundle_config_list.append( _ExternalBundleConfig( name=bundle_name, diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index ca7d34cf16c26..7d25c4fa83dd9 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -172,8 +172,12 @@ class DagBag(LoggingMixin): that one system can run multiple, independent settings sets. :param dag_folder: the folder to scan to find DAGs - :param include_examples: whether to include the examples that ship - with airflow or not + :param include_examples: back-compat-only on Airflow 3.1+. Example DAGs are + now exposed as dedicated bundles (``example_dags`` for core, + ``apache-airflow-providers-*-example-dags`` for each provider that + ships an ``example_dags`` folder), and the ``[core] load_examples`` + config controls whether those bundles are registered. Passing + ``include_examples=True`` here emits a :class:`DeprecationWarning`. :param safe_mode: when ``False``, scans all python modules for dags. When ``True`` uses heuristics (files containing ``DAG`` and ``airflow`` strings) to filter python modules to scan for dags. @@ -465,7 +469,21 @@ def collect_dags( **Note**: The patterns in ``.airflowignore`` are interpreted as either un-anchored regexes or gitignore-like glob expressions, depending on the ``DAG_IGNORE_FILE_SYNTAX`` configuration parameter. + + ``include_examples`` is a back-compat-only argument on Airflow 3.1+; + example DAGs are loaded via dedicated bundles gated by the + ``[core] load_examples`` configuration option. Passing + ``include_examples=True`` emits a :class:`DeprecationWarning`. """ + if include_examples is True: + warnings.warn( + "include_examples=True is deprecated for DagBag.collect_dags. " + "Example DAGs are now loaded via dedicated bundles controlled by " + "the [core] load_examples configuration option. This argument " + "will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) self.log.info("Filling up the DagBag from %s", dag_folder) dag_folder = dag_folder or self.dag_folder # Used to store stats around DagBag processing diff --git a/airflow-core/tests/unit/api_fastapi/conftest.py b/airflow-core/tests/unit/api_fastapi/conftest.py index aace17f8a1479..03c43a178a090 100644 --- a/airflow-core/tests/unit/api_fastapi/conftest.py +++ b/airflow-core/tests/unit/api_fastapi/conftest.py @@ -187,7 +187,8 @@ def make_dag_with_multiple_versions(dag_maker, configure_git_connection_for_dag_ def dagbag(): from airflow.models.dagbag import DBDagBag - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) return DBDagBag() diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 3e84912af3b2a..91aeec2cd92a8 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -201,7 +201,7 @@ def test_should_respond_200(self, test_client, session): assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -359,7 +359,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -423,7 +423,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -479,7 +479,7 @@ def test_should_respond_200_task_instance_with_rendered(self, test_client, sessi assert response.json() == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -599,7 +599,7 @@ def test_should_respond_200_mapped_task_instance_with_rtif(self, test_client, se assert response_data == { "dag_id": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2634,7 +2634,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2680,7 +2680,7 @@ def test_should_respond_200_with_different_try_numbers(self, test_client, try_nu "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2757,7 +2757,7 @@ def test_should_respond_200_with_mapped_task_at_different_try_numbers( "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2829,7 +2829,7 @@ def test_should_respond_200_with_task_state_in_deferred(self, test_client, sessi "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -2876,7 +2876,7 @@ def test_should_respond_200_with_task_state_in_removed(self, test_client, sessio "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["dag_version"]["created_at"], @@ -3682,7 +3682,7 @@ def test_should_respond_200_with_dag_run_id( "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4174,7 +4174,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4211,7 +4211,7 @@ def test_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4282,7 +4282,7 @@ def test_ti_in_retry_state_not_returned(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4365,7 +4365,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4402,7 +4402,7 @@ def test_mapped_task_should_respond_200(self, test_client, session): "unixname": getuser(), "dag_run_id": "TEST_DAG_RUN_ID", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][1]["dag_version"]["created_at"], @@ -4604,7 +4604,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -4880,7 +4880,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, @@ -5018,7 +5018,7 @@ def test_update_mask_set_note_should_respond_200( "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5081,7 +5081,7 @@ def test_set_note_should_respond_200(self, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5162,7 +5162,7 @@ def test_set_note_should_respond_200_mapped_task_with_rtif(self, test_client, se "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5245,7 +5245,7 @@ def test_set_note_should_respond_200_mapped_task_summary_with_rtif(self, test_cl "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_ti["dag_version"]["created_at"], @@ -5432,7 +5432,7 @@ def test_should_call_mocked_api(self, mock_set_ti_state, test_client, session): "dag_id": self.DAG_ID, "dag_display_name": self.DAG_DISPLAY_NAME, "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": response_data["task_instances"][0]["dag_version"]["created_at"], @@ -5720,7 +5720,7 @@ def test_should_raise_422_for_invalid_task_instance_state(self, payload, expecte "dag_id": "example_python_operator", "dag_display_name": "example_python_operator", "dag_version": { - "bundle_name": "airflow-provider-apache-airflow-providers-standard-example-dags", + "bundle_name": "apache-airflow-providers-standard-example-dags", "bundle_url": None, "bundle_version": None, "created_at": mock.ANY, diff --git a/airflow-core/tests/unit/cli/commands/test_asset_command.py b/airflow-core/tests/unit/cli/commands/test_asset_command.py index 7b17f2a5cea52..6efd8293534f3 100644 --- a/airflow-core/tests/unit/cli/commands/test_asset_command.py +++ b/airflow-core/tests/unit/cli/commands/test_asset_command.py @@ -28,6 +28,7 @@ from airflow.cli import cli_parser from airflow.cli.commands import asset_command +from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_dags, clear_db_runs, parse_and_sync_to_db if typing.TYPE_CHECKING: @@ -38,7 +39,8 @@ @pytest.fixture(scope="module", autouse=True) def prepare_examples(): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) yield clear_db_runs() clear_db_dags() diff --git a/airflow-core/tests/unit/cli/commands/test_backfill_command.py b/airflow-core/tests/unit/cli/commands/test_backfill_command.py index faab928ddb6f3..ab6c937834609 100644 --- a/airflow-core/tests/unit/cli/commands/test_backfill_command.py +++ b/airflow-core/tests/unit/cli/commands/test_backfill_command.py @@ -30,6 +30,7 @@ from airflow.cli import cli_parser from airflow.models.backfill import ReprocessBehavior +from tests_common.test_utils.config import conf_vars from tests_common.test_utils.db import clear_db_backfills, clear_db_dags, clear_db_runs, parse_and_sync_to_db DEFAULT_DATE = timezone.make_aware(datetime(2015, 1, 1), timezone=timezone.utc) @@ -48,7 +49,8 @@ class TestCliBackfill: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() @classmethod diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index 298cf516163ac..22f3612df7f45 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -86,7 +86,7 @@ class TestCliDags: @classmethod def setup_class(cls): with conf_vars({("core", "load_examples"): "True"}): - parse_and_sync_to_db(os.devnull, include_examples=True) + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() @classmethod diff --git a/airflow-core/tests/unit/cli/commands/test_task_command.py b/airflow-core/tests/unit/cli/commands/test_task_command.py index e9331b8331797..c99e82ca21db4 100644 --- a/airflow-core/tests/unit/cli/commands/test_task_command.py +++ b/airflow-core/tests/unit/cli/commands/test_task_command.py @@ -79,7 +79,8 @@ class TestCliTasks: @classmethod def setup_class(cls): - parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + parse_and_sync_to_db(os.devnull) cls.parser = cli_parser.get_parser() clear_db_runs() diff --git a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py index 9bae2c4acbafb..0d17069c831dd 100644 --- a/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py +++ b/airflow-core/tests/unit/dag_processing/bundles/test_dag_bundle_manager.py @@ -473,6 +473,6 @@ def test_get_all_bundle_names(): # Any other bundle exposed here comes from a provider's example_dags # folder discovered via ProvidersManager. Their presence depends on # which providers are installed in the environment, so we only check - # the naming prefix instead of pinning an exact list. + # the naming suffix instead of pinning an exact list. extra = [n for n in bundle_names if n not in {"dags-folder", "example_dags"}] - assert all(n.startswith("airflow-provider-") for n in extra) + assert all(n.endswith("-example-dags") for n in extra) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index d1246b8b54654..c612cdaeea1d5 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -21,6 +21,7 @@ import json import logging import os +import warnings from tempfile import gettempdir from typing import TYPE_CHECKING @@ -186,6 +187,28 @@ def initial_db_init(): def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): + """ + Parse DAGs in ``folder`` and sync them to the metadata database. + + On Airflow 3.1+, ``include_examples`` is back-compat-only: example DAGs + are exposed as dedicated bundles (``example_dags`` for core, + ``apache-airflow-providers-*-example-dags`` for each provider that ships + an ``example_dags`` folder), and whether they are loaded is controlled by + the ``[core] load_examples`` configuration option, not by this argument. + Tests that need example DAGs should set ``conf_vars({("core", "load_examples"): "true"})`` + instead. Passing ``include_examples=True`` on 3.1+ emits a + :class:`DeprecationWarning`. + """ + if AIRFLOW_V_3_1_PLUS and include_examples is True: + warnings.warn( + "include_examples=True is deprecated for parse_and_sync_to_db on " + "Airflow 3.1+. Example DAGs are now loaded via dedicated bundles " + "controlled by the [core] load_examples configuration option. Set " + "conf_vars({('core', 'load_examples'): 'true'}) in the test instead.", + DeprecationWarning, + stacklevel=2, + ) + if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag else: @@ -205,7 +228,7 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): except ImportError: from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] # On 3.3+, example DAGs are exposed as their own bundles - # (``example_dags`` for core, ``airflow-provider-*-example-dags`` + # (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The # bundle loop below already syncs every one of them, so the # ``dags-folder`` DagBag must NOT pull example DAGs in too, diff --git a/providers/fab/tests/unit/fab/www/views/conftest.py b/providers/fab/tests/unit/fab/www/views/conftest.py index 3c6e047deb994..96c423947e11b 100644 --- a/providers/fab/tests/unit/fab/www/views/conftest.py +++ b/providers/fab/tests/unit/fab/www/views/conftest.py @@ -43,7 +43,8 @@ def session(): @pytest.fixture(autouse=True, scope="module") def examples_dag_bag(session): - dag_bag = parse_and_sync_to_db(os.devnull, include_examples=True) + with conf_vars({("core", "load_examples"): "True"}): + dag_bag = parse_and_sync_to_db(os.devnull) session.commit() return dag_bag From 18d5abf5a0824e9b65424bdff955c62de5699feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 07:31:13 -0300 Subject: [PATCH 13/17] Remove include_examples parameter from DagBag and test helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the include_examples knob from DagBag.__init__, BundleDagBag.__init__, collect_dags, and parse_and_sync_to_db. Example DAGs now come in exclusively through the per-provider example bundles registered when [core] load_examples is enabled. Removes the deprecation warnings, updates all callers across core, devel-common, and providers, and gates the few provider tests that still need to read examples on older Airflow versions behind AIRFLOW_V_3_3_PLUS. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 25 +++-- .../src/airflow/dag_processing/dagbag.py | 37 ------- .../tests/integration/otel/test_otel.py | 2 +- .../tests/unit/always/test_example_dags.py | 3 - .../core_api/routes/public/test_backfills.py | 2 +- .../routes/public/test_dag_sources.py | 2 +- .../routes/public/test_task_instances.py | 2 +- .../unit/cli/commands/test_config_command.py | 2 - .../unit/cli/commands/test_dag_command.py | 10 +- .../unit/cli/commands/test_pool_command.py | 2 +- .../unit/cli/commands/test_team_command.py | 2 +- .../cli/commands/test_variable_command.py | 2 +- airflow-core/tests/unit/cli/conftest.py | 2 +- .../unit/core/test_impersonation_tests.py | 2 +- .../tests/unit/dag_processing/test_dagbag.py | 98 +++++++++---------- .../tests/unit/dag_processing/test_manager.py | 3 +- .../tests/unit/jobs/test_scheduler_job.py | 16 ++- airflow-core/tests/unit/models/test_dag.py | 2 +- airflow-core/tests/unit/models/test_dagrun.py | 2 +- .../serialization/test_dag_serialization.py | 6 +- .../src/tests_common/pytest_plugin.py | 4 +- .../src/tests_common/test_utils/db.py | 35 +++---- .../google/cloud/operators/test_dataproc.py | 2 +- .../google/cloud/operators/test_looker.py | 2 +- .../openlineage/plugins/test_execution.py | 2 - .../sensors/test_external_task_sensor.py | 18 ++-- .../unit/standard/sensors/test_time_delta.py | 14 ++- .../unit/standard/sensors/test_weekday.py | 12 ++- 28 files changed, 135 insertions(+), 176 deletions(-) diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst index e0722395aa4a4..aa4db6ba9f9f7 100644 --- a/airflow-core/newsfragments/66161.significant.rst +++ b/airflow-core/newsfragments/66161.significant.rst @@ -27,15 +27,20 @@ registered. to update to the new per-provider bundle names. The DAG identifiers themselves are unchanged. -**Deprecations:** +**Removals:** -- The ``include_examples`` parameter on ``DagBag.collect_dags`` and on - ``tests_common.test_utils.db.parse_and_sync_to_db`` is now back-compat - only. Passing ``include_examples=True`` emits a - :class:`DeprecationWarning`. Tests and external callers that need - example DAGs loaded should set the ``[core] load_examples`` config to - ``true`` (e.g. via ``conf_vars({("core", "load_examples"): "true"})``) - instead. +- The ``include_examples`` parameter has been removed from + ``DagBag.__init__``, ``DagBag.collect_dags``, ``BundleDagBag.__init__``, + and ``tests_common.test_utils.db.parse_and_sync_to_db``. Example DAG + loading is now controlled exclusively by the ``[core] load_examples`` + configuration option, which gates whether the per-provider example + bundles are registered. Callers that previously passed + ``include_examples=True`` should set + ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent + configuration). Callers that previously passed + ``include_examples=False`` can drop the argument: it matches the new + default behaviour where ``DagBag`` only walks the configured + ``dag_folder`` and example DAGs come in via dedicated bundles. * Types of change @@ -55,4 +60,6 @@ registered. * Replace ``include_examples=True`` calls to ``DagBag`` / ``parse_and_sync_to_db`` with ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration) before the deprecated argument is removed. + configuration). The argument has been removed. + * Drop ``include_examples=False`` arguments from ``DagBag`` / + ``parse_and_sync_to_db`` calls; the default behaviour is unchanged. diff --git a/airflow-core/src/airflow/dag_processing/dagbag.py b/airflow-core/src/airflow/dag_processing/dagbag.py index 7d25c4fa83dd9..34684810977c7 100644 --- a/airflow-core/src/airflow/dag_processing/dagbag.py +++ b/airflow-core/src/airflow/dag_processing/dagbag.py @@ -172,12 +172,6 @@ class DagBag(LoggingMixin): that one system can run multiple, independent settings sets. :param dag_folder: the folder to scan to find DAGs - :param include_examples: back-compat-only on Airflow 3.1+. Example DAGs are - now exposed as dedicated bundles (``example_dags`` for core, - ``apache-airflow-providers-*-example-dags`` for each provider that - ships an ``example_dags`` folder), and the ``[core] load_examples`` - config controls whether those bundles are registered. Passing - ``include_examples=True`` here emits a :class:`DeprecationWarning`. :param safe_mode: when ``False``, scans all python modules for dags. When ``True`` uses heuristics (files containing ``DAG`` and ``airflow`` strings) to filter python modules to scan for dags. @@ -191,7 +185,6 @@ class DagBag(LoggingMixin): def __init__( self, dag_folder: str | Path | None = None, # todo AIP-66: rename this to path - include_examples: bool | ArgNotSet = NOTSET, safe_mode: bool | ArgNotSet = NOTSET, load_op_links: bool = True, collect_dags: bool = True, @@ -222,11 +215,6 @@ def __init__( if collect_dags: self.collect_dags( dag_folder=dag_folder, - include_examples=( - include_examples - if is_arg_set(include_examples) - else conf.getboolean("core", "LOAD_EXAMPLES") - ), safe_mode=( safe_mode if is_arg_set(safe_mode) else conf.getboolean("core", "DAG_DISCOVERY_SAFE_MODE") ), @@ -455,7 +443,6 @@ def collect_dags( self, dag_folder: str | Path | None = None, only_if_updated: bool = True, - include_examples: bool = conf.getboolean("core", "LOAD_EXAMPLES"), safe_mode: bool = conf.getboolean("core", "DAG_DISCOVERY_SAFE_MODE"), ): """ @@ -469,21 +456,7 @@ def collect_dags( **Note**: The patterns in ``.airflowignore`` are interpreted as either un-anchored regexes or gitignore-like glob expressions, depending on the ``DAG_IGNORE_FILE_SYNTAX`` configuration parameter. - - ``include_examples`` is a back-compat-only argument on Airflow 3.1+; - example DAGs are loaded via dedicated bundles gated by the - ``[core] load_examples`` configuration option. Passing - ``include_examples=True`` emits a :class:`DeprecationWarning`. """ - if include_examples is True: - warnings.warn( - "include_examples=True is deprecated for DagBag.collect_dags. " - "Example DAGs are now loaded via dedicated bundles controlled by " - "the [core] load_examples configuration option. This argument " - "will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) self.log.info("Filling up the DagBag from %s", dag_folder) dag_folder = dag_folder or self.dag_folder # Used to store stats around DagBag processing @@ -565,17 +538,7 @@ def __init__(self, *args, bundle_path: Path | None = None, **kwargs): if str(bundle_path) not in sys.path: sys.path.append(str(bundle_path)) - # Warn if user explicitly set include_examples=True, since bundles never contain examples - if kwargs.get("include_examples") is True: - warnings.warn( - "include_examples=True is ignored for BundleDagBag. " - "Bundles do not contain example DAGs, so include_examples is always False.", - UserWarning, - stacklevel=2, - ) - kwargs["bundle_path"] = bundle_path - kwargs["include_examples"] = False super().__init__(*args, **kwargs) diff --git a/airflow-core/tests/integration/otel/test_otel.py b/airflow-core/tests/integration/otel/test_otel.py index c543f54a921d8..d19756094b6ce 100644 --- a/airflow-core/tests/integration/otel/test_otel.py +++ b/airflow-core/tests/integration/otel/test_otel.py @@ -257,7 +257,7 @@ def setup_class(cls): def serialize_and_get_dags(cls) -> dict[str, SerializedDAG]: log.info("Serializing Dags from directory %s", cls.dag_folder) # Load DAGs from the dag directory. - dag_bag = DagBag(dag_folder=cls.dag_folder, include_examples=False) + dag_bag = DagBag(dag_folder=cls.dag_folder) dag_ids = dag_bag.dag_ids assert len(dag_ids) == 1 diff --git a/airflow-core/tests/unit/always/test_example_dags.py b/airflow-core/tests/unit/always/test_example_dags.py index bd9c5478bd76d..b45845404ae53 100644 --- a/airflow-core/tests/unit/always/test_example_dags.py +++ b/airflow-core/tests/unit/always/test_example_dags.py @@ -213,7 +213,6 @@ def patch_get_dagbag_import_timeout(): def test_should_be_importable(example: str, patch_get_dagbag_import_timeout): dagbag = DagBag( dag_folder=example, - include_examples=False, ) if len(dagbag.import_errors) == 1 and "AirflowOptionalProviderFeatureException" in str( dagbag.import_errors @@ -232,7 +231,6 @@ def test_should_not_do_database_queries(example: str, patch_get_dagbag_import_ti with assert_queries_count(1, stacklevel_from_module=example.rsplit(os.sep, 1)[-1]): DagBag( dag_folder=example, - include_examples=False, ) @@ -244,7 +242,6 @@ def test_should_not_run_hook_connections(example: str, patch_get_dagbag_import_t mock_get_connection.return_value = Connection() DagBag( dag_folder=example, - include_examples=False, ) assert mock_get_connection.call_count == 0, ( f"BaseHook.get_connection() should not be called during DAG parsing. " diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py index 33ff158aa2848..66be4e6325284 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_backfills.py @@ -86,7 +86,7 @@ def make_dags(): with DAG(DAG3_ID, schedule=None) as dag3: # DAG start_date set to None EmptyOperator(task_id=TASK_ID, start_date=datetime(2019, 6, 12)) - dag_bag = DagBag(os.devnull, include_examples=False) + dag_bag = DagBag(os.devnull) dag_bag.dags = {dag.dag_id: dag, dag2.dag_id: dag2, dag3.dag_id: dag3} diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py index 029d74007ee37..7bcadfdb03df2 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_dag_sources.py @@ -50,7 +50,7 @@ @pytest.fixture def real_dag_bag(): - return parse_and_sync_to_db(EXAMPLE_DAG_FILE, include_examples=False) + return parse_and_sync_to_db(EXAMPLE_DAG_FILE) @pytest.fixture diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py index 91aeec2cd92a8..6edae03cb090b 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/public/test_task_instances.py @@ -736,7 +736,7 @@ def create_dag_runs_with_mapped_tasks(self, dag_maker, session, dags=None): session.add(ti) DagBundlesManager().sync_bundles_to_db() - dagbag = DagBag(os.devnull, include_examples=False) + dagbag = DagBag(os.devnull) dagbag.dags = {dag_id: dag_maker.dag} sync_bag_to_db(dagbag, "dags-folder", None) session.flush() diff --git a/airflow-core/tests/unit/cli/commands/test_config_command.py b/airflow-core/tests/unit/cli/commands/test_config_command.py index 008554422624b..efe5ad5cf296e 100644 --- a/airflow-core/tests/unit/cli/commands/test_config_command.py +++ b/airflow-core/tests/unit/cli/commands/test_config_command.py @@ -45,7 +45,6 @@ def test_cli_show_config_should_write_data(self, mock_conf, mock_stringio): mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section=None, - include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, @@ -65,7 +64,6 @@ def test_cli_show_config_should_write_data_specific_section(self, mock_conf, moc mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section="core", - include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, diff --git a/airflow-core/tests/unit/cli/commands/test_dag_command.py b/airflow-core/tests/unit/cli/commands/test_dag_command.py index 22f3612df7f45..6f76034a4218d 100644 --- a/airflow-core/tests/unit/cli/commands/test_dag_command.py +++ b/airflow-core/tests/unit/cli/commands/test_dag_command.py @@ -257,7 +257,7 @@ def test_next_execution(self, dag_id, delta, schedule, catchup, first, second, t print(file_content) with time_machine.travel(DEFAULT_DATE): clear_db_dags() - parse_and_sync_to_db(tmp_path, include_examples=False) + parse_and_sync_to_db(tmp_path) # Test num-executions = 1 (default) args = self.parser.parse_args(["dags", "next-execution", dag_id]) @@ -303,7 +303,7 @@ def test_next_execution_table_flag_with_no_next_run( with time_machine.travel(DEFAULT_DATE): clear_db_dags() - parse_and_sync_to_db(tmp_path, include_examples=False) + parse_and_sync_to_db(tmp_path) args = self.parser.parse_args(["dags", "next-execution", dag_id, "--table", "--num-executions", "2"]) # Must not raise AttributeError on None DagRunInfo @@ -314,7 +314,7 @@ def test_next_execution_table_flag_with_no_next_run( # Rebuild Test DB for other tests clear_db_dags() - parse_and_sync_to_db(os.devnull, include_examples=True) + self.setup_class() def test_cli_report(self, stdout_capture): args = self.parser.parse_args(["dags", "report", "--output", "json"]) @@ -1014,7 +1014,7 @@ def test_dag_with_parsing_context( path_to_parse = TEST_DAGS_FOLDER / "test_dag_parsing_context.py" with configure_testing_dag_bundle(path_to_parse): - bag = DagBag(dag_folder=path_to_parse, include_examples=False) + bag = DagBag(dag_folder=path_to_parse) sync_bag_to_db(bag, "testing", None) cli_args = self.parser.parse_args( ["dags", "test", "test_dag_parsing_context", DEFAULT_DATE.isoformat()] @@ -1108,7 +1108,7 @@ def test_get_dag_excludes_examples_with_bundle(self, configure_testing_dag_bundl from airflow.utils.cli import get_dag as get_bagged_dag # type: ignore with configure_testing_dag_bundle(TEST_DAGS_FOLDER / "test_sensor.py"): - # example DAG should not be found since include_examples=False + # example DAG should not be found since the testing bundle only exposes test_sensor.py with pytest.raises(AirflowException, match="could not be found"): get_bagged_dag(bundle_names=["testing"], dag_id="example_simplest_dag") diff --git a/airflow-core/tests/unit/cli/commands/test_pool_command.py b/airflow-core/tests/unit/cli/commands/test_pool_command.py index 8fea33d7a7ffa..828497e9c2d38 100644 --- a/airflow-core/tests/unit/cli/commands/test_pool_command.py +++ b/airflow-core/tests/unit/cli/commands/test_pool_command.py @@ -35,7 +35,7 @@ class TestCliPools: @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() settings.configure_orm() cls.session = Session diff --git a/airflow-core/tests/unit/cli/commands/test_team_command.py b/airflow-core/tests/unit/cli/commands/test_team_command.py index 49ade1c19bc7b..1df56d9c3cc3f 100644 --- a/airflow-core/tests/unit/cli/commands/test_team_command.py +++ b/airflow-core/tests/unit/cli/commands/test_team_command.py @@ -54,7 +54,7 @@ def _cleanup(cls): @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() settings.configure_orm() cls.session = Session diff --git a/airflow-core/tests/unit/cli/commands/test_variable_command.py b/airflow-core/tests/unit/cli/commands/test_variable_command.py index 21d2fb66822b5..e9f4b94f30840 100644 --- a/airflow-core/tests/unit/cli/commands/test_variable_command.py +++ b/airflow-core/tests/unit/cli/commands/test_variable_command.py @@ -120,7 +120,7 @@ def _create(data, format="yaml", filename=None): class TestCliVariables: @classmethod def setup_class(cls): - cls.dagbag = models.DagBag(include_examples=True) + cls.dagbag = models.DagBag() cls.parser = cli_parser.get_parser() def setup_method(self): diff --git a/airflow-core/tests/unit/cli/conftest.py b/airflow-core/tests/unit/cli/conftest.py index cc3052c27cebb..7676a103b5363 100644 --- a/airflow-core/tests/unit/cli/conftest.py +++ b/airflow-core/tests/unit/cli/conftest.py @@ -54,7 +54,7 @@ def load_examples(): @pytest.fixture(scope="session") def dagbag(): - return DagBag(include_examples=True) + return DagBag() @pytest.fixture(scope="session") diff --git a/airflow-core/tests/unit/core/test_impersonation_tests.py b/airflow-core/tests/unit/core/test_impersonation_tests.py index 8165d1f6d73f6..7325cb3ef5ba2 100644 --- a/airflow-core/tests/unit/core/test_impersonation_tests.py +++ b/airflow-core/tests/unit/core/test_impersonation_tests.py @@ -167,7 +167,7 @@ def setup_impersonation_tests(self, create_airflow_home): @staticmethod def get_dagbag(dag_folder): """Get DagBag and print statistic into the log.""" - dagbag = DagBag(dag_folder=dag_folder, include_examples=False) + dagbag = DagBag(dag_folder=dag_folder) logger.info("Loaded DAGs:") logger.info(dagbag.dagbag_report()) return dagbag diff --git a/airflow-core/tests/unit/dag_processing/test_dagbag.py b/airflow-core/tests/unit/dag_processing/test_dagbag.py index 49fa6f8ebe950..99abd92a59f73 100644 --- a/airflow-core/tests/unit/dag_processing/test_dagbag.py +++ b/airflow-core/tests/unit/dag_processing/test_dagbag.py @@ -353,20 +353,18 @@ def teardown_class(self): def test_dagbag_with_bundle_name(self, tmp_path): """Test that DagBag constructor accepts and stores bundle_name parameter.""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False, bundle_name="test_bundle") + dagbag = DagBag(dag_folder=os.fspath(tmp_path), bundle_name="test_bundle") assert dagbag.bundle_name == "test_bundle" # Test with None (default) - dagbag2 = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag2 = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag2.bundle_name is None def test_get_existing_dag(self, tmp_path, standard_example_dags_folder): """ Test that we're able to parse some example DAGs and retrieve them """ - dagbag = DagBag( - dag_folder=standard_example_dags_folder, include_examples=False, bundle_name="test_bundle" - ) + dagbag = DagBag(dag_folder=standard_example_dags_folder, bundle_name="test_bundle") some_expected_dag_ids = ["example_bash_operator", "example_python_operator"] @@ -382,7 +380,7 @@ def test_get_non_existing_dag(self, tmp_path): """ test that retrieving a non existing dag id returns None without crashing """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) non_existing_dag_id = "non_existing_dag_id" assert dagbag.get_dag(non_existing_dag_id) is None @@ -398,7 +396,7 @@ def test_dont_load_example(self, tmp_path): """ test that the example are not loaded """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag.size() == 0 @@ -411,7 +409,7 @@ def test_safe_mode_heuristic_match(self, tmp_path): path.write_text("# airflow\n# DAG") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -424,7 +422,7 @@ def test_safe_mode_heuristic_mismatch(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 0 def test_safe_mode_disabled(self, tmp_path): @@ -432,7 +430,7 @@ def test_safe_mode_disabled(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=False) + dagbag = DagBag(safe_mode=False) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -454,7 +452,7 @@ def test_dagbag_stats_file_is_relative_path_with_mixed_separators(self, tmp_path # but the filesystem returns paths with backslashes dags_folder_with_forward_slashes = path.parent.as_posix() with conf_vars({("core", "dags_folder"): dags_folder_with_forward_slashes}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 assert dagbag.dagbag_stats[0].file == path.name @@ -470,7 +468,6 @@ def test_dagbag_stats_includes_bundle_info(self, tmp_path): with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): dagbag = DagBag( - include_examples=False, safe_mode=True, bundle_path=bundle_path, bundle_name=bundle_name, @@ -487,7 +484,7 @@ def test_dagbag_stats_bundle_info_none_when_not_provided(self, tmp_path): path.write_text("# airflow\n# DAG") with conf_vars({("core", "dags_folder"): os.fspath(path.parent)}): - dagbag = DagBag(include_examples=False, safe_mode=True) + dagbag = DagBag(safe_mode=True) assert len(dagbag.dagbag_stats) == 1 stat = dagbag.dagbag_stats[0] @@ -501,12 +498,12 @@ def test_process_file_that_contains_multi_bytes_char(self, tmp_path): path = tmp_path / "testfile.py" path.write_text("\u3042") # write multi-byte char (hiragana) - dagbag = DagBag(dag_folder=os.fspath(path.parent), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(path.parent)) assert dagbag.process_file(os.fspath(path)) == [] def test_process_file_duplicated_dag_id(self, tmp_path): """Loading a DAG with ID that already existed in a DAG bag should result in an import error.""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) def create_dag(): from airflow.sdk import dag @@ -547,7 +544,6 @@ def test_import_errors_use_relative_path_with_bundle(self, tmp_path): dagbag = DagBag( dag_folder=os.fspath(dag_path), - include_examples=False, bundle_path=bundle_path, bundle_name="test_bundle", ) @@ -580,7 +576,6 @@ def my_flow(): dagbag = DagBag( dag_folder=os.fspath(bundle_path), - include_examples=False, bundle_path=bundle_path, bundle_name="test_bundle", ) @@ -606,7 +601,7 @@ def test_zip_skip_log(self, caplog, test_zip_path): it doesn't have "airflow" and "DAG" """ caplog.set_level(logging.INFO) - dagbag = DagBag(dag_folder=test_zip_path, include_examples=False) + dagbag = DagBag(dag_folder=test_zip_path) assert dagbag.has_logged assert ( @@ -619,7 +614,7 @@ def test_zip(self, tmp_path, test_zip_path): test the loading of a DAG within a zip file that includes dependencies """ syspath_before = deepcopy(sys.path) - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(test_zip_path) assert dagbag.get_dag("test_zip_dag") assert sys.path == syspath_before # sys.path doesn't change @@ -635,12 +630,12 @@ def test_process_dag_file_without_timeout( """ mocked_get_dagbag_import_timeout.return_value = 0 - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_not_called() mocked_get_dagbag_import_timeout.return_value = -1 - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_not_called() @@ -658,7 +653,7 @@ def test_process_dag_file_with_non_default_timeout( # ensure the test value is not equal to the default value assert timeout_value != settings.conf.getfloat("core", "DAGBAG_IMPORT_TIMEOUT") - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) dagbag.process_file(os.path.join(TEST_DAGS_FOLDER, "test_sensor.py")) mocked_timeout.assert_called_once_with(timeout_value, error_message=mock.ANY) @@ -672,7 +667,7 @@ def test_check_value_type_from_get_dagbag_import_timeout( """ mocked_get_dagbag_import_timeout.return_value = "1" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) with pytest.raises( TypeError, match=r"Value \(1\) from get_dagbag_import_timeout must be int or float" ): @@ -694,7 +689,7 @@ def test_process_file_cron_validity_check( self, request: pytest.FixtureRequest, invalid_dag_name: str, tmp_path ): """Test if an invalid cron expression as schedule interval can be identified""" - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 dagbag.process_file(request.getfixturevalue(invalid_dag_name)) assert len(dagbag.import_errors) == 1 @@ -710,7 +705,7 @@ def test_process_file_invalid_param_check(self, tmp_path): "test_invalid_param3.py", "test_invalid_param4.py", ] - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 for file in invalid_dag_files: @@ -726,7 +721,7 @@ def test_process_file_valid_param_check(self, tmp_path): "test_valid_param.py", "test_valid_param2.py", ] - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert len(dagbag.import_errors) == 0 for file in valid_dag_files: @@ -776,7 +771,7 @@ def test_get_dag_registration(self, file_name, expected_dag_id, standard_example pytest.importorskip("system.standard") file_to_load = standard_example_dags_folder / file_name expected_path = standard_example_dags_folder.relative_to(AIRFLOW_ROOT_PATH) / file_name - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) dagbag.process_file(os.fspath(file_to_load)) dag = dagbag.get_dag(expected_dag_id) assert dag, f"{expected_dag_id} was bagged" @@ -795,7 +790,7 @@ def test_get_dag_registration(self, file_name, expected_dag_id, standard_example ), ) def test_get_zip_dag_registration(self, test_zip_path, expected): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) dagbag.process_file(test_zip_path) for dag_id, path in expected.items(): dag = dagbag.get_dag(dag_id) @@ -803,7 +798,7 @@ def test_get_zip_dag_registration(self, test_zip_path, expected): assert dag.fileloc.endswith(f"{pathlib.Path(test_zip_path).parent}/{path}") def test_dag_registration_with_failure(self): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) found = dagbag.process_file(str(TEST_DAGS_FOLDER / "test_invalid_dup_task.py")) assert found == [] @@ -818,7 +813,7 @@ def zip_with_valid_dag_and_dup_tasks(self, tmp_path: pathlib.Path) -> str: return os.fspath(zipped) def test_dag_registration_with_failure_zipped(self, zip_with_valid_dag_and_dup_tasks): - dagbag = DagBag(dag_folder=os.devnull, include_examples=False) + dagbag = DagBag(dag_folder=os.devnull) found = dagbag.process_file(zip_with_valid_dag_and_dup_tasks) assert len(found) == 1 assert [dag.dag_id for dag in found] == ["test_example_bash_operator"] @@ -843,7 +838,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=standard_example_dags_folder, include_examples=False) + dagbag = _TestDagBag(dag_folder=standard_example_dags_folder) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) @@ -871,7 +866,7 @@ def process_file(self, filepath, only_if_updated=True, safe_mode=True): _TestDagBag.process_file_calls += 1 return super().process_file(filepath, only_if_updated, safe_mode) - dagbag = _TestDagBag(dag_folder=os.path.realpath(test_zip_path), include_examples=False) + dagbag = _TestDagBag(dag_folder=os.path.realpath(test_zip_path)) assert dagbag.process_file_calls == 1 dag = dagbag.get_dag(dag_id) @@ -888,7 +883,7 @@ def process_dag(self, create_dag, tmp_path): path = tmp_path / "testfile.py" path.write_text(source) - dagbag = DagBag(dag_folder=os.fspath(path.parent), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(path.parent)) found_dags = dagbag.process_file(os.fspath(path)) return dagbag, found_dags, os.fspath(path) @@ -943,7 +938,7 @@ def test_process_file_with_none(self, tmp_path): """ test that process_file can handle Nones """ - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dagbag.process_file(None) == [] @@ -969,7 +964,7 @@ def test_timeout_dag_errors_are_import_errors(self, tmp_path, caplog): """) with conf_vars({("core", "DAGBAG_IMPORT_TIMEOUT"): "0.01"}): - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert dag_file.as_posix() in dagbag.import_errors assert "DagBag import timeout for" in caplog.text @@ -999,7 +994,7 @@ def test_import_error_tracebacks(self, tmp_path, depth): with contextlib.ExitStack() as cm: if depth is not None: cm.enter_context(conf_vars({("core", "dagbag_import_error_traceback_depth"): str(depth)})) - dagbag = DagBag(dag_folder=unparseable_filename, include_examples=False) + dagbag = DagBag(dag_folder=unparseable_filename) import_errors = dagbag.import_errors assert unparseable_filename in import_errors @@ -1015,7 +1010,7 @@ def test_import_error_tracebacks_zip(self, tmp_path, depth): with contextlib.ExitStack() as cm: if depth is not None: cm.enter_context(conf_vars({("core", "dagbag_import_error_traceback_depth"): str(depth)})) - dagbag = DagBag(dag_folder=invalid_zip_filename, include_examples=False) + dagbag = DagBag(dag_folder=invalid_zip_filename) import_errors = dagbag.import_errors assert invalid_dag_filename in import_errors assert import_errors[invalid_dag_filename] == self._make_test_traceback(invalid_dag_filename, depth) @@ -1030,7 +1025,7 @@ def test_task_cluster_policy_violation(self): dag_id = "test_missing_owner" err_cls_name = "AirflowClusterPolicyViolation" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert set() == set(dagbag.dag_ids) expected_import_errors = { dag_file: ( @@ -1052,7 +1047,7 @@ def test_task_cluster_policy_nonstring_owner(self): dag_id = "test_nonstring_owner" err_cls_name = "AirflowClusterPolicyViolation" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert set() == set(dagbag.dag_ids) expected_import_errors = { dag_file: ( @@ -1071,7 +1066,7 @@ def test_task_cluster_policy_obeyed(self): """ dag_file = os.path.join(TEST_DAGS_FOLDER, "test_with_non_default_owner.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert {"test_with_non_default_owner"} == set(dagbag.dag_ids) assert dagbag.import_errors == {} @@ -1080,14 +1075,13 @@ def test_task_cluster_policy_obeyed(self): def test_dag_cluster_policy_obeyed(self): dag_file = os.path.join(TEST_DAGS_FOLDER, "test_dag_with_no_tags.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) assert len(dagbag.dag_ids) == 0 assert "has no tags" in dagbag.import_errors[dag_file] def test_dagbag_dag_collection(self): dagbag = DagBag( dag_folder=TEST_DAGS_FOLDER, - include_examples=False, collect_dags=False, bundle_name="test_collection", ) @@ -1098,15 +1092,15 @@ def test_dagbag_dag_collection(self): assert dagbag.dags # test that dagbag.dags is not empty if collect_dags is True - dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, include_examples=False, bundle_name="test_collection") + dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, bundle_name="test_collection") assert dagbag.dags def test_dabgag_captured_warnings(self): dag_file = os.path.join(TEST_DAGS_FOLDER, "test_dag_warnings.py") - dagbag = DagBag(dag_folder=dag_file, include_examples=False, collect_dags=False) + dagbag = DagBag(dag_folder=dag_file, collect_dags=False) assert dag_file not in dagbag.captured_warnings - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dagbag.dagbag_stats[0].warning_num == 2 assert dagbag.captured_warnings == { dag_file: ( @@ -1118,14 +1112,14 @@ def test_dabgag_captured_warnings(self): with warnings.catch_warnings(): # Disable capture DeprecationWarning, and it should be reflected in captured warnings warnings.simplefilter("ignore", DeprecationWarning) - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dag_file in dagbag.captured_warnings assert len(dagbag.captured_warnings[dag_file]) == 1 assert dagbag.dagbag_stats[0].warning_num == 1 # Disable all warnings, no captured warnings expected warnings.simplefilter("ignore") - dagbag.collect_dags(dag_folder=dagbag.dag_folder, include_examples=False, only_if_updated=False) + dagbag.collect_dags(dag_folder=dagbag.dag_folder, only_if_updated=False) assert dag_file not in dagbag.captured_warnings assert dagbag.dagbag_stats[0].warning_num == 0 @@ -1139,7 +1133,7 @@ def warning_zipped_dag_path(self, tmp_path: pathlib.Path) -> str: def test_dabgag_captured_warnings_zip(self, warning_zipped_dag_path: str): in_zip_dag_file = f"{warning_zipped_dag_path}/test_dag_warnings.py" - dagbag = DagBag(dag_folder=warning_zipped_dag_path, include_examples=False) + dagbag = DagBag(dag_folder=warning_zipped_dag_path) assert dagbag.dagbag_stats[0].warning_num == 2 assert dagbag.captured_warnings == { warning_zipped_dag_path: ( @@ -1175,7 +1169,7 @@ def test_dag_warnings_invalid_pool(self, known_pools, expected): BaseOperator(task_id="1") BaseOperator(task_id="2", pool="pool1") - dagbag = DagBag(dag_folder="", include_examples=False, collect_dags=False, known_pools=known_pools) + dagbag = DagBag(dag_folder="", collect_dags=False, known_pools=known_pools) dagbag.bag_dag(dag) assert dagbag.dag_warnings == expected @@ -1204,7 +1198,7 @@ def mytask(): ) ) - dagbag = DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(tmp_path)) assert "Received SIGSEGV signal while processing" in caplog.text assert dag_file.as_posix() in dagbag.import_errors @@ -1229,7 +1223,7 @@ def mytask(): ) with mock.patch("airflow.dag_processing.importers.python_importer.signal.signal") as mock_signal: mock_signal.side_effect = ValueError("Invalid signal setting") - DagBag(dag_folder=os.fspath(tmp_path), include_examples=False) + DagBag(dag_folder=os.fspath(tmp_path)) assert "SIGSEGV signal handler registration failed. Not in the main thread" in caplog.text @@ -1375,7 +1369,7 @@ def test_dagbag_no_bundle_path_no_syspath_modification(self, tmp_path): ) ) syspath_before = deepcopy(sys.path) - dagbag = DagBag(dag_folder=str(dag_file), include_examples=False) + dagbag = DagBag(dag_folder=str(dag_file)) dag = dagbag.get_dag("simple_dag") assert str(tmp_path) not in dag.description diff --git a/airflow-core/tests/unit/dag_processing/test_manager.py b/airflow-core/tests/unit/dag_processing/test_manager.py index c0c42fae1d088..17795bcc8870c 100644 --- a/airflow-core/tests/unit/dag_processing/test_manager.py +++ b/airflow-core/tests/unit/dag_processing/test_manager.py @@ -969,7 +969,6 @@ def test_scan_stale_dags(self, session): ) dagbag = DagBag( test_dag_path.absolute_path, - include_examples=False, bundle_path=test_dag_path.bundle_path, ) @@ -1484,7 +1483,7 @@ def test_refresh_dags_dir_doesnt_delete_zipped_dags( self, tmp_path, session, configure_testing_dag_bundle, test_zip_path ): """Test DagFileProcessorManager._refresh_dag_dir method""" - dagbag = DagBag(dag_folder=tmp_path, include_examples=False) + dagbag = DagBag(dag_folder=tmp_path) dagbag.process_file(test_zip_path) dag = dagbag.get_dag("test_zip_dag") sync_dag_to_db(dag) diff --git a/airflow-core/tests/unit/jobs/test_scheduler_job.py b/airflow-core/tests/unit/jobs/test_scheduler_job.py index cca74569bdf0b..89383233fad02 100644 --- a/airflow-core/tests/unit/jobs/test_scheduler_job.py +++ b/airflow-core/tests/unit/jobs/test_scheduler_job.py @@ -4293,7 +4293,7 @@ def test_dagrun_root_after_dagrun_unfinished(self, mock_executor, testing_dag_bu Noted: the DagRun state could be still in running state during CI. """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) sync_bag_to_db(dagbag, "testing", None) dag_id = "test_dagrun_states_root_future" @@ -4311,7 +4311,7 @@ def test_scheduler_start_date(self, testing_dag_bundle): """ Test that the scheduler respects start_dates, even when DAGs have run """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) with create_session() as session: dag_id = "test_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -4368,7 +4368,6 @@ def test_scheduler_task_start_date_catchup_true(self, testing_dag_bundle): """ dagbag = DagBag( dag_folder=os.path.join(settings.DAGS_FOLDER, "test_scheduler_dags.py"), - include_examples=False, ) dag_id = "test_task_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -4409,7 +4408,6 @@ def test_scheduler_task_start_date_catchup_false(self, testing_dag_bundle): """ dagbag = DagBag( dag_folder=os.path.join(settings.DAGS_FOLDER, "test_scheduler_dags.py"), - include_examples=False, ) dag_id = "test_task_start_date_scheduling" dag = dagbag.get_dag(dag_id) @@ -4453,7 +4451,7 @@ def test_scheduler_multiprocessing(self): """ Test that the scheduler can successfully queue multiple dags in parallel """ - dagbag = DagBag(TEST_DAG_FOLDER, include_examples=False) + dagbag = DagBag(TEST_DAG_FOLDER) dag_ids = [ "test_start_date_scheduling", "test_task_start_date_scheduling", @@ -7693,7 +7691,7 @@ def watch_heartbeat(*args, **kwargs): def test_mapped_dag(self, dag_id, session, testing_dag_bundle): """End-to-end test of a simple mapped dag""" - dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER, include_examples=False) + dagbag = DagBag(dag_folder=TEST_DAGS_FOLDER) sync_bag_to_db(dagbag, "testing", None) dagbag.process_file(str(TEST_DAGS_FOLDER / f"{dag_id}.py")) dag = dagbag.get_dag(dag_id) @@ -7726,7 +7724,7 @@ def test_should_mark_empty_task_as_success(self, testing_dag_bundle): dag_file = Path(__file__).parents[1] / "dags/test_only_empty_tasks.py" # Write DAGs to dag and serialized_dag table - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) sync_bag_to_db(dagbag, "testing", None) scheduler_job = Job() @@ -9454,7 +9452,7 @@ def test_execute_queries_count_with_harvested_dags( ), ): dagruns = [] - dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE, include_examples=False) + dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE) sync_bag_to_db(dagbag, "testing", None) for i, dag in enumerate(dagbag.dags.values()): @@ -9546,7 +9544,7 @@ def test_process_dags_queries_count( } ), ): - dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE, include_examples=False) + dagbag = DagBag(dag_folder=ELASTIC_DAG_FILE) sync_bag_to_db(dagbag, "testing", None) scheduler_job = Job(job_type=SchedulerJobRunner.job_type) diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index 19406d9b12e54..93ffaf74589ac 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -209,7 +209,7 @@ def test_dag_test_auto_parses_when_not_serialized(self, test_dags_bundle, sessio dag_id = "test_example_bash_operator" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) dag = dagbag.dags.get(dag_id) # Ensure not serialized yet diff --git a/airflow-core/tests/unit/models/test_dagrun.py b/airflow-core/tests/unit/models/test_dagrun.py index 4ceefc30dfd1b..4a919a311a202 100644 --- a/airflow-core/tests/unit/models/test_dagrun.py +++ b/airflow-core/tests/unit/models/test_dagrun.py @@ -104,7 +104,7 @@ async def empty_callback_for_deadline(): def dagbag(): from airflow.dag_processing.dagbag import DagBag - return DagBag(include_examples=True) + return DagBag() @pytest.fixture diff --git a/airflow-core/tests/unit/serialization/test_dag_serialization.py b/airflow-core/tests/unit/serialization/test_dag_serialization.py index f70cbda319c6f..5bc5b0ae84768 100644 --- a/airflow-core/tests/unit/serialization/test_dag_serialization.py +++ b/airflow-core/tests/unit/serialization/test_dag_serialization.py @@ -484,7 +484,7 @@ def collect_dags(dag_folder=None): for directory in glob(f"{AIRFLOW_REPO_ROOT_PATH}/{pattern}"): if any([directory.startswith(excluded_pattern) for excluded_pattern in excluded_patterns]): continue - dagbag = DagBag(directory, include_examples=False) + dagbag = DagBag(directory) dags.update(dagbag.dags) import_errors.update(dagbag.import_errors) return dags, import_errors @@ -1888,9 +1888,7 @@ def mytask(): @pytest.mark.db_test def test_basic_mapped_dag(self, dag_maker): - dagbag = DagBag( - "airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py", include_examples=False - ) + dagbag = DagBag("airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py") assert not dagbag.import_errors dag = dagbag.dags["example_dynamic_task_mapping"] ser_dag = DagSerialization.to_dict(dag) diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 812b598f1fd71..773629819f1b0 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -941,7 +941,7 @@ def __init__(self): from airflow.models import DagBag # Keep all the serialized dags we've created in this test - self.dagbag = DagBag(os.devnull, include_examples=False) + self.dagbag = DagBag(os.devnull) def __enter__(self): self.serialized_model = None @@ -1749,7 +1749,7 @@ def _get(dag_id: str): from airflow.models.dagbag import DagBag # type: ignore[no-redef, attribute-defined] dag_file = AIRFLOW_CORE_TESTS_PATH / "unit" / "dags" / f"{dag_id}.py" - dagbag = DagBag(dag_folder=dag_file, include_examples=False) + dagbag = DagBag(dag_folder=dag_file) dag = dagbag.get_dag(dag_id) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index c612cdaeea1d5..131d2e70360d0 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -21,7 +21,6 @@ import json import logging import os -import warnings from tempfile import gettempdir from typing import TYPE_CHECKING @@ -186,29 +185,17 @@ def initial_db_init(): _bootstrap_dagbag() -def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): +def parse_and_sync_to_db(folder: Path | str): """ Parse DAGs in ``folder`` and sync them to the metadata database. - On Airflow 3.1+, ``include_examples`` is back-compat-only: example DAGs - are exposed as dedicated bundles (``example_dags`` for core, - ``apache-airflow-providers-*-example-dags`` for each provider that ships - an ``example_dags`` folder), and whether they are loaded is controlled by - the ``[core] load_examples`` configuration option, not by this argument. - Tests that need example DAGs should set ``conf_vars({("core", "load_examples"): "true"})`` - instead. Passing ``include_examples=True`` on 3.1+ emits a - :class:`DeprecationWarning`. + On Airflow 3.3+, example DAGs are exposed as dedicated bundles + (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` + for each provider that ships an ``example_dags`` folder), and whether + they are loaded is controlled by the ``[core] load_examples`` + configuration option. Tests that need example DAGs should set + ``conf_vars({("core", "load_examples"): "true"})``. """ - if AIRFLOW_V_3_1_PLUS and include_examples is True: - warnings.warn( - "include_examples=True is deprecated for parse_and_sync_to_db on " - "Airflow 3.1+. Example DAGs are now loaded via dedicated bundles " - "controlled by the [core] load_examples configuration option. Set " - "conf_vars({('core', 'load_examples'): 'true'}) in the test instead.", - DeprecationWarning, - stacklevel=2, - ) - if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag else: @@ -235,17 +222,17 @@ def parse_and_sync_to_db(folder: Path | str, include_examples: bool = False): # otherwise the same DAG gets registered under two bundles and # ``dag_schedule_asset_reference`` rows collide on the unique # ``(asset_id, dag_id)`` constraint. - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder) sync_bag_to_db(dagbag, "dags-folder", None, session=session) for bundle in DagBundlesManager().get_all_dag_bundles(): - bundle_dagbag = DagBag(dag_folder=bundle.path, include_examples=False) + bundle_dagbag = DagBag(dag_folder=bundle.path) sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) + dagbag = DagBag(dag_folder=folder, include_examples=False) dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: - dagbag = DagBag(dag_folder=folder, include_examples=include_examples) + dagbag = DagBag(dag_folder=folder, include_examples=False) dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag diff --git a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py index 7dbe93033fc75..7f0f13f77bd23 100644 --- a/providers/google/tests/unit/google/cloud/operators/test_dataproc.py +++ b/providers/google/tests/unit/google/cloud/operators/test_dataproc.py @@ -575,7 +575,7 @@ def assert_warning(msg: str, warnings): class DataprocTestBase: @classmethod def setup_class(cls): - cls.dagbag = DagBag(dag_folder="/dev/null", include_examples=False) + cls.dagbag = DagBag(dag_folder="/dev/null") cls.dag = DAG( dag_id=TEST_DAG_ID, schedule=None, diff --git a/providers/google/tests/unit/google/cloud/operators/test_looker.py b/providers/google/tests/unit/google/cloud/operators/test_looker.py index 6985fd6ec77d2..982f977d7504c 100644 --- a/providers/google/tests/unit/google/cloud/operators/test_looker.py +++ b/providers/google/tests/unit/google/cloud/operators/test_looker.py @@ -50,7 +50,7 @@ def test_data_studio_aliases(): class LookerTestBase: @classmethod def setUpClass(cls): - cls.dagbag = DagBag(dag_folder="/dev/null", include_examples=False) + cls.dagbag = DagBag(dag_folder="/dev/null") cls.dag = DAG(TEST_DAG_ID, default_args={"owner": "airflow", "start_date": DEFAULT_DATE}) def setup_method(self): diff --git a/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py b/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py index ffb32b6ff1d37..256e873f55e47 100644 --- a/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py +++ b/providers/openlineage/tests/unit/openlineage/plugins/test_execution.py @@ -86,7 +86,6 @@ def setup_job(self, task_name, run_id, listener_manager): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, - include_examples=False, ) dag = dagbag.dags.get("test_openlineage_execution") task = dag.get_task(task_name) @@ -189,7 +188,6 @@ def test_success_overtime_kills_tasks(self, listener_manager): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, - include_examples=False, ) dag = dagbag.dags.get("test_openlineage_execution") task = dag.get_task("execute_long_stall") diff --git a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py index 589bd32920e6f..8851711d73a6b 100644 --- a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py +++ b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py @@ -62,7 +62,11 @@ from tests_common.test_utils.dag import create_scheduler_dag, sync_dag_to_db, sync_dags_to_db from tests_common.test_utils.db import clear_db_runs from tests_common.test_utils.mock_operators import MockOperator -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_1_PLUS, + AIRFLOW_V_3_2_PLUS, +) if AIRFLOW_V_3_0_PLUS: from airflow.models.dag_version import DagVersion @@ -1721,7 +1725,7 @@ def dag_bag_ext(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule=None) task_a_0 = EmptyOperator(task_id="task_a_0", dag=dag_0) @@ -1785,7 +1789,7 @@ def dag_bag_parent_child(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) day_1 = DEFAULT_DATE @@ -2022,7 +2026,7 @@ def dag_bag_cyclic(): """ def _factory(depth: int) -> DagBag: - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) dags = [] @@ -2120,7 +2124,7 @@ def dag_bag_multiple(session): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule="@daily") if AIRFLOW_V_3_0_PLUS: @@ -2166,7 +2170,7 @@ def dag_bag_head_tail(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: head = ExternalTaskSensor( @@ -2211,7 +2215,7 @@ def dag_bag_head_tail_mapped_tasks(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False) + dag_bag = DagBag(dag_folder=DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: diff --git a/providers/standard/tests/unit/standard/sensors/test_time_delta.py b/providers/standard/tests/unit/standard/sensors/test_time_delta.py index b5c931e3dafbb..4a2e11a22f0a3 100644 --- a/providers/standard/tests/unit/standard/sensors/test_time_delta.py +++ b/providers/standard/tests/unit/standard/sensors/test_time_delta.py @@ -36,7 +36,12 @@ from airflow.utils.types import DagRunType from tests_common.test_utils import db -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS, timezone +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + timezone, +) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -63,7 +68,7 @@ def clear_db(): class TestTimedeltaSensor: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=False) + self.dagbag = DagBag(dag_folder=DEV_NULL) self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), start_date=DEFAULT_DATE) def test_timedelta_sensor(self, mocker): @@ -161,7 +166,10 @@ def test_timedelta_sensor_deferrable_run_after_vs_interval(run_after, interval_e class TestTimeDeltaSensorAsync: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) self.args = {"owner": "airflow", "start_date": DEFAULT_DATE} self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), default_args=self.args) diff --git a/providers/standard/tests/unit/standard/sensors/test_weekday.py b/providers/standard/tests/unit/standard/sensors/test_weekday.py index 4f9bac530785d..69f2d82bf7bc8 100644 --- a/providers/standard/tests/unit/standard/sensors/test_weekday.py +++ b/providers/standard/tests/unit/standard/sensors/test_weekday.py @@ -27,7 +27,12 @@ from airflow.providers.standard.utils.weekday import WeekDay from tests_common.test_utils import db -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS, timezone +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + timezone, +) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -66,7 +71,10 @@ def clean_db(): def setup_method(self): self.clean_db() - self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True) self.args = {"owner": "airflow", "start_date": DEFAULT_DATE} dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), default_args=self.args) self.dag = dag From e445c0f41f1eaf6652df267ad48cc1cb51080888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 12:37:58 -0300 Subject: [PATCH 14/17] Fix mypy and test assertion after DagBag include_examples removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit removed include_examples from DagBag callers but left two follow-ups that broke prek mypy and the config CLI test: - devel-common test_utils/db.py: pre-3.1 compat branches still call DagBag(include_examples=False) for older Airflow runtimes; mypy now flags it because the current source no longer accepts that kwarg. Add call-arg type-ignore (matches the existing attr-defined pattern on the sync_to_db calls below). - test_config_command.py: assertions for conf.write(...) lost the include_examples=False kwarg, but conf.write is AirflowConfigParser.write (config-file examples), unrelated to the DagBag flag, and still passes it. Restore the kwarg in the expected call. Signed-off-by: André Ahlert --- airflow-core/tests/unit/cli/commands/test_config_command.py | 2 ++ devel-common/src/tests_common/test_utils/db.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/airflow-core/tests/unit/cli/commands/test_config_command.py b/airflow-core/tests/unit/cli/commands/test_config_command.py index efe5ad5cf296e..008554422624b 100644 --- a/airflow-core/tests/unit/cli/commands/test_config_command.py +++ b/airflow-core/tests/unit/cli/commands/test_config_command.py @@ -45,6 +45,7 @@ def test_cli_show_config_should_write_data(self, mock_conf, mock_stringio): mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section=None, + include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, @@ -64,6 +65,7 @@ def test_cli_show_config_should_write_data_specific_section(self, mock_conf, moc mock_conf.write.assert_called_once_with( mock_stringio.return_value.__enter__.return_value, section="core", + include_examples=False, include_descriptions=False, include_sources=False, include_env_vars=False, diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index 131d2e70360d0..bea62f035b7eb 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -229,10 +229,10 @@ def parse_and_sync_to_db(folder: Path | str): sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) elif AIRFLOW_V_3_0_PLUS: - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] else: - dagbag = DagBag(dag_folder=folder, include_examples=False) + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db(session=session) # type: ignore[attr-defined] return dagbag From 9f102c17d2a20b59f96e9b3b35564e79e1ae9c8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Mon, 4 May 2026 20:21:22 -0300 Subject: [PATCH 15/17] Drop include_examples from new DagBag callers and gate dag_maker for compat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge with main brought in tests in airflow-core/tests/unit/models/test_dag.py that still passed include_examples=False to DagBag, and the dag_maker fixture in pytest_plugin.py was loading examples by default on Airflow <3.3 because the removed kwarg flipped the effective default to True. That broke compat 3.0.6 runs because the cleanup path then tried to roll back a session that was never attached. Drop the kwarg from the new test_dag.py callers and gate both dag_maker and get_test_dag DagBag construction on AIRFLOW_V_3_3_PLUS so older Airflow versions still pass include_examples=False explicitly. Signed-off-by: André Ahlert --- airflow-core/tests/unit/models/test_dag.py | 8 ++++---- devel-common/src/tests_common/pytest_plugin.py | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/airflow-core/tests/unit/models/test_dag.py b/airflow-core/tests/unit/models/test_dag.py index 93ffaf74589ac..965b3be88987e 100644 --- a/airflow-core/tests/unit/models/test_dag.py +++ b/airflow-core/tests/unit/models/test_dag.py @@ -237,7 +237,7 @@ def test_dag_test_syncs_sibling_for_trigger_dagrun(self, test_dags_bundle, sessi parent_id = "test_dag_test_trigger_parent" target_id = "test_dag_test_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -270,7 +270,7 @@ def test_dag_test_syncs_sibling_for_dynamic_trigger_dagrun(self, test_dags_bundl parent_id = "test_dag_test_dynamic_trigger_parent" target_id = "test_dag_test_dynamic_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -297,7 +297,7 @@ def test_dag_test_falls_back_when_recorded_bundle_no_longer_configured( parent_id = "test_dag_test_trigger_parent" target_id = "test_dag_test_trigger_target" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None @@ -325,7 +325,7 @@ def test_dag_test_only_syncs_owning_bundle_when_parent_already_serialized( """ parent_id = "test_dag_test_trigger_parent" - dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER), include_examples=False) + dagbag = DagBag(dag_folder=os.fspath(TEST_DAGS_FOLDER)) parent = dagbag.dags.get(parent_id) assert parent is not None diff --git a/devel-common/src/tests_common/pytest_plugin.py b/devel-common/src/tests_common/pytest_plugin.py index 773629819f1b0..a36cb50a206a3 100644 --- a/devel-common/src/tests_common/pytest_plugin.py +++ b/devel-common/src/tests_common/pytest_plugin.py @@ -919,6 +919,7 @@ def dag_maker(request) -> Generator[DagMaker, None, None]: AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, NOTSET, ) @@ -941,7 +942,10 @@ def __init__(self): from airflow.models import DagBag # Keep all the serialized dags we've created in this test - self.dagbag = DagBag(os.devnull) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(os.devnull) + else: + self.dagbag = DagBag(os.devnull, include_examples=False) # type: ignore[call-arg] def __enter__(self): self.serialized_model = None @@ -1741,7 +1745,11 @@ def _get(dag_id: str): from airflow import settings from airflow.models.serialized_dag import SerializedDagModel - from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_2_PLUS + from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, + ) if AIRFLOW_V_3_2_PLUS: from airflow.dag_processing.dagbag import DagBag @@ -1749,7 +1757,10 @@ def _get(dag_id: str): from airflow.models.dagbag import DagBag # type: ignore[no-redef, attribute-defined] dag_file = AIRFLOW_CORE_TESTS_PATH / "unit" / "dags" / f"{dag_id}.py" - dagbag = DagBag(dag_folder=dag_file) + if AIRFLOW_V_3_3_PLUS: + dagbag = DagBag(dag_folder=dag_file) + else: + dagbag = DagBag(dag_folder=dag_file, include_examples=False) # type: ignore[call-arg] dag = dagbag.get_dag(dag_id) From edebd750e62370a4d37caafcc811f245c0353325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Tue, 5 May 2026 05:51:19 -0300 Subject: [PATCH 16/17] Gate DagBag include_examples on Airflow 3.3+ for older compat runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PR drops include_examples=False from a number of DagBag callers because DagBag in 3.3+ no longer accepts the kwarg and never loads examples by default. On compat runs against Airflow 3.1.x and 3.2.x, parse_and_sync_to_db hit a branch that called DagBag(folder) with the old default include_examples=True, which let the dags-folder DagBag pull in core example DAGs that the bundle loop also synced, producing duplicate dag_schedule_asset_reference rows and a UNIQUE constraint failure. On Airflow 2.11.1, DagBag callers in the standard external_task_sensor and time_delta tests defaulted to include_examples=True and pulled in example DAGs whose required Params lack defaults, breaking ExternalTaskMarker tests with ParamValidationError. Restrict the new bundle-aware sync path to AIRFLOW_V_3_3_PLUS and pass include_examples=False explicitly on older Airflow versions in the standard provider tests so compat 2.11.1, 3.1.8, and 3.2.1 keep their original behaviour. Signed-off-by: André Ahlert --- .../src/tests_common/test_utils/db.py | 22 +++++++++++----- .../sensors/test_external_task_sensor.py | 26 ++++++++++++++----- .../unit/standard/sensors/test_time_delta.py | 5 +++- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/devel-common/src/tests_common/test_utils/db.py b/devel-common/src/tests_common/test_utils/db.py index bea62f035b7eb..1525625375780 100644 --- a/devel-common/src/tests_common/test_utils/db.py +++ b/devel-common/src/tests_common/test_utils/db.py @@ -64,7 +64,12 @@ ParseImportError, TaskOutletAssetReference, ) -from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS +from tests_common.test_utils.version_compat import ( + AIRFLOW_V_3_0_PLUS, + AIRFLOW_V_3_1_PLUS, + AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, +) log = logging.getLogger(__name__) @@ -209,11 +214,9 @@ def parse_and_sync_to_db(folder: Path | str): DagBundlesManager().sync_bundles_to_db(session=session) session.flush() - if AIRFLOW_V_3_1_PLUS: - try: - from airflow.dag_processing.dagbag import sync_bag_to_db - except ImportError: - from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + if AIRFLOW_V_3_3_PLUS: + from airflow.dag_processing.dagbag import sync_bag_to_db + # On 3.3+, example DAGs are exposed as their own bundles # (``example_dags`` for core, ``apache-airflow-providers-*-example-dags`` # for each provider that ships an ``example_dags`` folder). The @@ -228,6 +231,13 @@ def parse_and_sync_to_db(folder: Path | str): bundle_dagbag = DagBag(dag_folder=bundle.path) sync_bag_to_db(bundle_dagbag, bundle.name, None, session=session) + elif AIRFLOW_V_3_1_PLUS: + try: + from airflow.dag_processing.dagbag import sync_bag_to_db + except ImportError: + from airflow.models.dagbag import sync_bag_to_db # type: ignore[no-redef, attribute-defined] + dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] + sync_bag_to_db(dagbag, "dags-folder", None, session=session) elif AIRFLOW_V_3_0_PLUS: dagbag = DagBag(dag_folder=folder, include_examples=False) # type: ignore[call-arg] dagbag.sync_to_db("dags-folder", None, session) # type: ignore[attr-defined] diff --git a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py index 8851711d73a6b..ef6bd33a76423 100644 --- a/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py +++ b/providers/standard/tests/unit/standard/sensors/test_external_task_sensor.py @@ -66,8 +66,22 @@ AIRFLOW_V_3_0_PLUS, AIRFLOW_V_3_1_PLUS, AIRFLOW_V_3_2_PLUS, + AIRFLOW_V_3_3_PLUS, ) + +def _make_dagbag(dag_folder): + """DagBag with examples disabled on Airflow <3.3. + + In 3.3+, ``include_examples`` was removed and example DAGs come from + provider example bundles instead. On older versions the default is True, + which loads example DAGs that can fail tests with their required Params. + """ + if AIRFLOW_V_3_3_PLUS: + return DagBag(dag_folder=dag_folder) + return DagBag(dag_folder=dag_folder, include_examples=False) # type: ignore[call-arg] + + if AIRFLOW_V_3_0_PLUS: from airflow.models.dag_version import DagVersion from airflow.sdk import BaseOperator, task as task_deco @@ -1725,7 +1739,7 @@ def dag_bag_ext(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule=None) task_a_0 = EmptyOperator(task_id="task_a_0", dag=dag_0) @@ -1789,7 +1803,7 @@ def dag_bag_parent_child(): """ clear_db_runs() - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) day_1 = DEFAULT_DATE @@ -2026,7 +2040,7 @@ def dag_bag_cyclic(): """ def _factory(depth: int) -> DagBag: - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) dags = [] @@ -2124,7 +2138,7 @@ def dag_bag_multiple(session): """ Create a DagBag containing two DAGs, linked by multiple ExternalTaskMarker. """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) daily_dag = DAG("daily_dag", start_date=DEFAULT_DATE, schedule="@daily") agg_dag = DAG("agg_dag", start_date=DEFAULT_DATE, schedule="@daily") if AIRFLOW_V_3_0_PLUS: @@ -2170,7 +2184,7 @@ def dag_bag_head_tail(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: head = ExternalTaskSensor( @@ -2215,7 +2229,7 @@ def dag_bag_head_tail_mapped_tasks(session): | tail/| | tail/| / | tail | +------+ +------+ +------+ """ - dag_bag = DagBag(dag_folder=DEV_NULL) + dag_bag = _make_dagbag(DEV_NULL) with DAG("head_tail", start_date=DEFAULT_DATE, schedule="@daily") as dag: diff --git a/providers/standard/tests/unit/standard/sensors/test_time_delta.py b/providers/standard/tests/unit/standard/sensors/test_time_delta.py index 4a2e11a22f0a3..5c4eeef2a8eb9 100644 --- a/providers/standard/tests/unit/standard/sensors/test_time_delta.py +++ b/providers/standard/tests/unit/standard/sensors/test_time_delta.py @@ -68,7 +68,10 @@ def clear_db(): class TestTimedeltaSensor: def setup_method(self): - self.dagbag = DagBag(dag_folder=DEV_NULL) + if AIRFLOW_V_3_3_PLUS: + self.dagbag = DagBag(dag_folder=DEV_NULL) + else: + self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=False) # type: ignore[call-arg] self.dag = DAG(TEST_DAG_ID, schedule=timedelta(days=1), start_date=DEFAULT_DATE) def test_timedelta_sensor(self, mocker): From 358e1eb41e191da33b7cd7d04c9055281477d069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Tue, 5 May 2026 17:50:25 -0300 Subject: [PATCH 17/17] Trim 66161 newsfragment to user-visible facts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DagBag is not part of the Airflow public interface, so removing include_examples from DagBag/collect_dags/BundleDagBag/parse_and_sync_to_db does not need a release-notes migration recipe. Drop the internal removal block and the Types of change / Migration rules scaffolding, keep the bundle naming change and the bundle_name REST API behaviour change. Signed-off-by: André Ahlert --- .../newsfragments/66161.significant.rst | 71 +++---------------- 1 file changed, 9 insertions(+), 62 deletions(-) diff --git a/airflow-core/newsfragments/66161.significant.rst b/airflow-core/newsfragments/66161.significant.rst index aa4db6ba9f9f7..79b7313bb291d 100644 --- a/airflow-core/newsfragments/66161.significant.rst +++ b/airflow-core/newsfragments/66161.significant.rst @@ -1,65 +1,12 @@ Provider example DAGs are exposed as dedicated bundles -Example DAGs that ship with provider distributions are now discovered via +Example DAGs shipped by provider distributions are now discovered via ``ProvidersManager`` and registered as their own DAG bundles, one per -provider that ships an ``example_dags/`` folder. Bundle names follow the -shape ``apache-airflow-providers--example-dags`` (for -canonical Apache providers) or ``-example-dags`` (for -third-party providers). The ``[core] load_examples`` configuration option -remains the single switch that controls whether any example bundles are -registered. - -**What changed:** - -- Example DAGs that previously came in under the implicit ``dags-folder`` - bundle are now persisted in ``DagBundleModel`` rows and emitted in REST - API responses (``GET /api/v2/dags/{dag_id}/dag-versions`` and the - ``bundle_name`` field on task-instance responses) under the new - per-provider bundle names. -- Nested providers such as ``apache-airflow-providers-common-sql`` are - discovered correctly (previously they were missed because discovery - walked ``airflow.providers.__path__`` directly). - -**Behaviour changes:** - -- Clients filtering or tracking bundles by ``"dags-folder"`` for - previously-shipped example DAGs (e.g. ``example_python_operator``) need - to update to the new per-provider bundle names. The DAG identifiers - themselves are unchanged. - -**Removals:** - -- The ``include_examples`` parameter has been removed from - ``DagBag.__init__``, ``DagBag.collect_dags``, ``BundleDagBag.__init__``, - and ``tests_common.test_utils.db.parse_and_sync_to_db``. Example DAG - loading is now controlled exclusively by the ``[core] load_examples`` - configuration option, which gates whether the per-provider example - bundles are registered. Callers that previously passed - ``include_examples=True`` should set - ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration). Callers that previously passed - ``include_examples=False`` can drop the argument: it matches the new - default behaviour where ``DagBag`` only walks the configured - ``dag_folder`` and example DAGs come in via dedicated bundles. - -* Types of change - - * [ ] Dag changes - * [x] Config changes - * [x] API changes - * [ ] CLI changes - * [x] Behaviour changes - * [ ] Plugin changes - * [ ] Dependency changes - * [x] Code interface changes - -* Migration rules needed - - * Update clients that filter REST API responses by ``bundle_name`` to - match the new per-provider bundle names for example DAGs. - * Replace ``include_examples=True`` calls to ``DagBag`` / - ``parse_and_sync_to_db`` with - ``conf_vars({("core", "load_examples"): "true"})`` (or equivalent - configuration). The argument has been removed. - * Drop ``include_examples=False`` arguments from ``DagBag`` / - ``parse_and_sync_to_db`` calls; the default behaviour is unchanged. +provider, named ``apache-airflow-providers--example-dags`` +(or ``-example-dags`` for third-party providers). The +``[core] load_examples`` option still gates whether they are registered. + +REST API clients that filtered ``bundle_name`` by ``"dags-folder"`` for +provider-shipped example DAGs (e.g. ``example_python_operator``) must +update to the new per-provider bundle names. DAG identifiers are +unchanged.