From 04a75a4d36391908995f70f8d88c68c94302d245 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Wed, 26 Jun 2024 13:52:03 -0700 Subject: [PATCH 1/4] chore: deprecate tox in favor of act --- .gitattributes | 2 + .github/workflows/docker.yml | 2 +- pyproject.toml | 164 ------------------ requirements/development.txt | 34 ++-- .../advanced_data_type/types_tests.py | 4 - 5 files changed, 19 insertions(+), 187 deletions(-) diff --git a/.gitattributes b/.gitattributes index 5f47e6acc8eec..5b51949ac1153 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,4 @@ docker/**/*.sh text eol=lf +<<<<<<< HEAD *.svg binary +*.ipynb binary diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index c8c4756ea543c..088befe83d23b 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - build_preset: ${{fromJson(needs.setup_matrix.outputs.matrix_config)}} + build_preset: ${{ fromJson(github.event_name == 'pull_request' && '["dev"]' || '["dev", "lean", "py310", "websocket", "dockerize"]') }} fail-fast: false env: DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} diff --git a/pyproject.toml b/pyproject.toml index 16b9c2cb61c16..4eccad2592dbb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -196,7 +196,6 @@ development = [ "ruff", "sqloxide", "statsd", - "tox", ] [project.urls] @@ -233,169 +232,6 @@ disallow_untyped_calls = false disallow_untyped_defs = false disable_error_code = "annotation-unchecked" -[tool.tox] -legacy_tox_ini = """ -# Remember to start celery workers to run celery tests, e.g. -# celery --app=superset.tasks.celery_app:app worker -Ofair -c 2 -[testenv] -basepython = python3.10 -ignore_basepython_conflict = true -commands = - superset db upgrade - superset init - superset load-test-users - # use -s to be able to use break pointers. - # no args or tests/* can be passed as an argument to run all tests - pytest -s {posargs} -deps = - -rrequirements/development.txt -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} - mysql: SUPERSET__SQLALCHEMY_DATABASE_URI = mysql://mysqluser:mysqluserpassword@localhost/superset?charset=utf8 - postgres: SUPERSET__SQLALCHEMY_DATABASE_URI = postgresql+psycopg2://superset:superset@localhost/test - sqlite: SUPERSET__SQLALCHEMY_DATABASE_URI = sqlite:////{envtmpdir}/superset.db - sqlite: SUPERSET__SQLALCHEMY_EXAMPLES_URI = sqlite:////{envtmpdir}/examples.db - mysql-presto: SUPERSET__SQLALCHEMY_DATABASE_URI = mysql://mysqluser:mysqluserpassword@localhost/superset?charset=utf8 - # docker run -p 8080:8080 --name presto starburstdata/presto - mysql-presto: SUPERSET__SQLALCHEMY_EXAMPLES_URI = presto://localhost:8080/memory/default - # based on https://github.com/big-data-europe/docker-hadoop - # clone the repo & run docker compose up -d to test locally - mysql-hive: SUPERSET__SQLALCHEMY_DATABASE_URI = mysql://mysqluser:mysqluserpassword@localhost/superset?charset=utf8 - mysql-hive: SUPERSET__SQLALCHEMY_EXAMPLES_URI = hive://localhost:10000/default - # make sure that directory is accessible by docker - hive: UPLOAD_FOLDER = /tmp/.superset/app/static/uploads/ -usedevelop = true -allowlist_externals = - npm - pkill - -[testenv:cypress] -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} -commands = - npm install -g npm@'>=6.5.0' - pip install -e {toxinidir}/ - {toxinidir}/superset-frontend/cypress_build.sh -commands_post = - pkill -if "python {envbindir}/flask" - -[testenv:cypress-dashboard] -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} -commands = - npm install -g npm@'>=6.5.0' - pip install -e {toxinidir}/ - {toxinidir}/superset-frontend/cypress_build.sh dashboard -commands_post = - pkill -if "python {envbindir}/flask" - -[testenv:cypress-explore] -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} -commands = - npm install -g npm@'>=6.5.0' - pip install -e {toxinidir}/ - {toxinidir}/superset-frontend/cypress_build.sh explore -commands_post = - pkill -if "python {envbindir}/flask" - -[testenv:cypress-sqllab] -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} -commands = - npm install -g npm@'>=6.5.0' - pip install -e {toxinidir}/ - {toxinidir}/superset-frontend/cypress_build.sh sqllab -commands_post = - pkill -if "python {envbindir}/flask" - -[testenv:cypress-sqllab-backend-persist] -setenv = - PYTHONPATH = {toxinidir} - SUPERSET_TESTENV = true - SUPERSET_CONFIG = tests.integration_tests.superset_test_config - SUPERSET_HOME = {envtmpdir} -commands = - npm install -g npm@'>=6.5.0' - pip install -e {toxinidir}/ - {toxinidir}/superset-frontend/cypress_build.sh sqllab -commands_post = - pkill -if "python {envbindir}/flask" - -[testenv:eslint] -changedir = {toxinidir}/superset-frontend -commands = - npm run lint -deps = - -[testenv:fossa] -commands = - {toxinidir}/scripts/fossa.sh -deps = -passenv = * - -[testenv:javascript] -commands = - npm install -g npm@'>=6.5.0' - {toxinidir}/superset-frontend/js_build.sh -deps = - -[testenv:license-check] -commands = - {toxinidir}/scripts/check_license.sh -passenv = * -whitelist_externals = - {toxinidir}/scripts/check_license.sh -deps = - -[testenv:pre-commit] -commands = - pre-commit run --all-files -deps = - -rrequirements/development.txt -skip_install = true - -[testenv:pylint] -commands = - pylint superset -deps = - -rrequirements/development.txt - -[testenv:thumbnails] -setenv = - SUPERSET_CONFIG = tests.integration_tests.superset_test_config_thumbnails -deps = - -rrequirements/development.txt - -[tox] -envlist = - cypress-dashboard - cypress-explore - cypress-sqllab - cypress-sqllab-backend-persist - eslint - fossa - javascript - license-check - pre-commit - pylint -skipsdist = true -""" [tool.ruff] # Exclude a variety of commonly ignored directories. exclude = [ diff --git a/requirements/development.txt b/requirements/development.txt index b2ed1b8b3a1cc..f9ba56e97530d 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -6,7 +6,7 @@ # pip-compile-multi # -r base.txt --e file:. +-e file:///Users/max/code/superset # via # -r requirements/base.in # -r requirements/development.in @@ -27,9 +27,7 @@ cached-property==1.5.2 cfgv==3.3.1 # via pre-commit chardet==5.1.0 - # via - # dataflows-tabulator - # tox + # via dataflows-tabulator cmdstanpy==1.1.0 # via prophet contourpy==1.0.7 @@ -51,9 +49,7 @@ docker==7.0.0 et-xmlfile==1.1.0 # via openpyxl filelock==3.12.2 - # via - # tox - # virtualenv + # via virtualenv flask-cors==4.0.0 # via apache-superset flask-testing==0.8.1 @@ -156,9 +152,7 @@ pip-tools==7.4.1 playwright==1.42.0 # via apache-superset pluggy==1.4.0 - # via - # pytest - # tox + # via pytest pre-commit==3.7.1 # via apache-superset progress==1.6 @@ -192,8 +186,6 @@ pyinstrument==4.4.0 # via apache-superset pylint==3.1.0 # via apache-superset -pyproject-api==1.6.1 - # via tox pyproject-hooks==1.0.0 # via # build @@ -232,13 +224,21 @@ thrift==0.16.0 # apache-superset # thrift-sasl thrift-sasl==0.4.3 - # via apache-superset + # via + # apache-superset + # pyhive +tomli==2.0.1 + # via + # build + # coverage + # pip-tools + # pylint + # pyproject-hooks + # pytest tomlkit==0.12.5 # via pylint toposort==1.10 # via pip-compile-multi -tox==4.6.4 - # via apache-superset tqdm==4.66.4 # via # cmdstanpy @@ -252,9 +252,7 @@ unicodecsv==0.14.1 # dataflows-tabulator # tableschema virtualenv==20.23.1 - # via - # pre-commit - # tox + # via pre-commit wheel==0.43.0 # via pip-tools xlrd==2.0.1 diff --git a/tests/unit_tests/advanced_data_type/types_tests.py b/tests/unit_tests/advanced_data_type/types_tests.py index a8f8bcf813dc7..37c3ba5337cf1 100644 --- a/tests/unit_tests/advanced_data_type/types_tests.py +++ b/tests/unit_tests/advanced_data_type/types_tests.py @@ -29,10 +29,6 @@ from superset.advanced_data_type.plugins.internet_port import internet_port as port -# To run the unit tests below, use the following command in the root Superset folder: -# tox -e py38 -- tests/unit_tests/advanced_data_type/types_tests.py - - def test_ip_func_valid_ip(): """Test to see if the cidr_func behaves as expected when a valid IP is passed in""" cidr_request: AdvancedDataTypeRequest = { From e08dfe53d09db4d5e0337d4a023586f0143846a9 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 28 Jun 2024 12:43:35 -0700 Subject: [PATCH 2/4] improving docs --- docs/docs/contributing/development.mdx | 98 +++++++++++++++++++++----- requirements/development.txt | 2 +- 2 files changed, 80 insertions(+), 20 deletions(-) diff --git a/docs/docs/contributing/development.mdx b/docs/docs/contributing/development.mdx index 14d6a526d630e..4b349bd162f08 100644 --- a/docs/docs/contributing/development.mdx +++ b/docs/docs/contributing/development.mdx @@ -496,50 +496,110 @@ If using the eslint extension with vscode, put the following in your workspace ` ] ``` -## Testing -### Python Testing +## GitHub Actions and `act` + +For automation and CI/CD, Superset makes extensive use of GitHub Actions (GHA). You +can find all of the workflows and other assets under the `.github/` folder. This includes: +- running the backend unit test suites (`tests/`) +- running the frontend test suites (`superset-frontend/src/**.*.test.*`) +- running our Cypress end-to-end tests (`superset-frontend/cypress-base/`) +- linting the codebase, including all Python, Typescript and Javascript, yaml and beyond +- checking for all sorts of other rules conventions + +When you open a pull request (PR), the appropriate GitHub Actions (GHA) workflows will +automatically run depending on the changes in your branch. It's perfectly reasonable +(and required!) to rely on this automation. However, the downside is that it's mostly an +all-or-nothing approach and doesn't provide much control to target specific tests or +iterate quickly. -All python tests are carried out in [tox](https://tox.readthedocs.io/en/latest/index.html) -a standardized testing framework. -All python tests can be run with any of the tox [environments](https://tox.readthedocs.io/en/latest/example/basic.html#a-simple-tox-ini-default-environments), via, +At times, it may be more convenient to run GHA workflows locally. For that purpose +we use [act](https://github.com/nektos/act), a tool that allows you to run GitHub Actions (GHA) +workflows locally. It simulates the GitHub Actions environment, enabling developers to +test and debug workflows on their local machines before pushing changes to the repository. More +on how to use it in the next section. + +:::note +In both GHA and `act`, we can run a more complex matrix for our tests, executing against different +database engines (PostgreSQL, MySQL, SQLite) and different versions of Python. +This enables us to ensure compatibility and stability across various environments. +::: + +### Lower-Level Control + +Note that for all GHAs, it's also possible to go lower level and the content of any workflow +more directly on a local environment of your own making. +If you prefer this approach for the control it gives you, it's possible to leverage the Docker Compose setup +as a backend to run integration tests. As to how to run specific workflows or individual steps +from said workflows, you can simply refer to the workflows themselves and reverse-engineer them +as they act as a comprehensive recipe for lower level execution. + +### Using `act` + +First, install `act` -> https://nektosact.com/ + +To list the workflows, simply: ```bash -tox -e +act --list ``` -For example, +To run a specific workflow: ```bash -tox -e py38 +act --job {workflow_name} --secret GITHUB_TOKEN=$GITHUB_TOKEN --event pull_request --container-architecture linux/amd64 ``` -Alternatively, you can run all tests in a single file via, +In the example above, notice that: +- we target a specific workflow, using `--job` +- we pass a secret using `--secret`, as many jobs require read access (public) to the repo +- we simulate a `pull_request` event using `--event`, here we could simulate a + `push` event or something else +- we specify `--container-architecture`, which tends to emulate GHA more reliably + +:::note +`act` is a rich tool that offers all sorts of features, allowing you to simulate different +events (pull_request, push, ...), semantics around passing secrets where required and much +more. For more information, refer to [act's documentation](https://nektosact.com/) +::: + +--- + +## Testing + +### Python Testing + +#### Unit Tests + +For unit tests located in `tests/unit_tests/`, it's usually easy to simply run the script locally using: ```bash -tox -e -- tests/test_file.py +pytest tests/unit_tests/* ``` -or for a specific test via, +#### Integration Tests + +For more complex pytest-defined integration tests (not to be confused with our end-to-end Cypress tests), many tests will require having a working test environment. Some tests require a database, Celery, and potentially other services or libraries installed. + +### Running Tests with `act` + +To run integration tests locally using `act`, ensure you have followed the setup instructions from the [GitHub Actions and `act`](#github-actions-and-act) section. You can run specific workflows or jobs that include integration tests. For example: ```bash -tox -e -- tests/test_file.py::TestClassName::test_method_name +act --job test-python-38 --secret GITHUB_TOKEN=$GITHUB_TOKEN --event pull_request --container-architecture linux/amd64 ``` -Note that the test environment uses a temporary directory for defining the -SQLite databases which will be cleared each time before the group of test -commands are invoked. +#### Running locally using a test script -There is also a utility script included in the Superset codebase to run python integration tests. The [readme can be -found here](https://github.com/apache/superset/tree/master/scripts/tests) +There is also a utility script included in the Superset codebase to run Python integration tests. The [readme can be found here](https://github.com/apache/superset/tree/master/scripts/tests). -To run all integration tests for example, run this script from the root directory: +To run all integration tests, for example, run this script from the root directory: ```bash scripts/tests/run.sh ``` -You can run unit tests found in './tests/unit_tests' for example with pytest. It is a simple way to run an isolated test that doesn't need any database setup +You can run unit tests found in `./tests/unit_tests` with pytest. It is a simple way to run an isolated test that doesn't need any database setup: ```bash pytest ./link_to_test.py diff --git a/requirements/development.txt b/requirements/development.txt index f9ba56e97530d..03f07e2a0a2b1 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -6,7 +6,7 @@ # pip-compile-multi # -r base.txt --e file:///Users/max/code/superset +-e file:. # via # -r requirements/base.in # -r requirements/development.in From 4ec6610489e88931f5c39c0c894bb4f538501c3a Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 14 Sep 2024 11:00:00 -0700 Subject: [PATCH 3/4] re-run pip-compile-multi --- .github/workflows/docker.yml | 2 +- requirements/development.txt | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 088befe83d23b..c8c4756ea543c 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -31,7 +31,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - build_preset: ${{ fromJson(github.event_name == 'pull_request' && '["dev"]' || '["dev", "lean", "py310", "websocket", "dockerize"]') }} + build_preset: ${{fromJson(needs.setup_matrix.outputs.matrix_config)}} fail-fast: false env: DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} diff --git a/requirements/development.txt b/requirements/development.txt index 03f07e2a0a2b1..cc41decda921a 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -224,9 +224,7 @@ thrift==0.16.0 # apache-superset # thrift-sasl thrift-sasl==0.4.3 - # via - # apache-superset - # pyhive + # via apache-superset tomli==2.0.1 # via # build From 1df5cb916e997cdb9d613086676f7861446e0d7f Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 14 Sep 2024 11:26:32 -0700 Subject: [PATCH 4/4] broke through detect-changes issues --- docs/docs/contributing/development.mdx | 13 ++++++++++--- scripts/change_detector.py | 14 ++++++++++---- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/docs/docs/contributing/development.mdx b/docs/docs/contributing/development.mdx index 4b349bd162f08..1e49ad8abd148 100644 --- a/docs/docs/contributing/development.mdx +++ b/docs/docs/contributing/development.mdx @@ -547,14 +547,14 @@ act --list To run a specific workflow: ```bash -act --job {workflow_name} --secret GITHUB_TOKEN=$GITHUB_TOKEN --event pull_request --container-architecture linux/amd64 +act pull_request --job {workflow_name} --secret GITHUB_TOKEN=$GITHUB_TOKEN --container-architecture linux/amd64 ``` In the example above, notice that: - we target a specific workflow, using `--job` - we pass a secret using `--secret`, as many jobs require read access (public) to the repo -- we simulate a `pull_request` event using `--event`, here we could simulate a - `push` event or something else +- we simulate a `pull_request` event by specifying it as the first arg, + similarly, we could simulate a `push` event or something else - we specify `--container-architecture`, which tends to emulate GHA more reliably :::note @@ -563,6 +563,13 @@ events (pull_request, push, ...), semantics around passing secrets where require more. For more information, refer to [act's documentation](https://nektosact.com/) ::: +:::note +Some jobs require secrets to interact with external systems and accounts that you may +not have in your possession. In those cases you may have to rely on remote CI or parameterize the +job further to target a different environment/sandbox or your own alongside the related +secrets. +::: + --- ## Testing diff --git a/scripts/change_detector.py b/scripts/change_detector.py index f52cd59fec45f..df46538f1ee99 100755 --- a/scripts/change_detector.py +++ b/scripts/change_detector.py @@ -95,15 +95,21 @@ def print_files(files: List[str]) -> None: print("\n".join([f"- {s}" for s in files])) +def is_int(s: str) -> bool: + return bool(re.match(r"^-?\d+$", s)) + + def main(event_type: str, sha: str, repo: str) -> None: """Main function to check for file changes based on event context.""" print("SHA:", sha) print("EVENT_TYPE", event_type) + files = None if event_type == "pull_request": pr_number = os.getenv("GITHUB_REF", "").split("/")[-2] - files = fetch_changed_files_pr(repo, pr_number) - print("PR files:") - print_files(files) + if is_int(pr_number): + files = fetch_changed_files_pr(repo, pr_number) + print("PR files:") + print_files(files) elif event_type == "push": files = fetch_changed_files_push(repo, sha) @@ -119,7 +125,7 @@ def main(event_type: str, sha: str, repo: str) -> None: changes_detected = {} for group, regex_patterns in PATTERNS.items(): patterns_compiled = [re.compile(p) for p in regex_patterns] - changes_detected[group] = event_type == "workflow_dispatch" or detect_changes( + changes_detected[group] = files is None or detect_changes( files, patterns_compiled )