From add7129bd6722233fc0ffe67c07705ea32abdbb9 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 13 May 2024 11:07:53 -0700 Subject: [PATCH] new files --- .github/workflows/python-importable.yml | 27 +++ scripts/test_imports.py | 135 +++++++++++++ superset/extensions/event_logger_manager.py | 39 ++++ superset/initialization/bootstrap.py | 198 ++++++++++++++++++++ 4 files changed, 399 insertions(+) create mode 100644 .github/workflows/python-importable.yml create mode 100755 scripts/test_imports.py create mode 100644 superset/extensions/event_logger_manager.py create mode 100644 superset/initialization/bootstrap.py diff --git a/.github/workflows/python-importable.yml b/.github/workflows/python-importable.yml new file mode 100644 index 000000000000..153c91b112bc --- /dev/null +++ b/.github/workflows/python-importable.yml @@ -0,0 +1,27 @@ +name: pre-commit checks + +on: + push: + branches: + - "master" + - "[0-9].[0-9]" + pull_request: + types: [synchronize, opened, reopened, ready_for_review] + +# cancel previous workflow jobs for PRs +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }} + cancel-in-progress: true + +jobs: + test_python_imports: + runs-on: ubuntu-20.04 + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + - name: Setup Python + uses: ./.github/actions/setup-backend/ + - name: Enable brew and helm-docs + run: | + echo "Checking that all python modules are importable" + python scripts/test_imports.py diff --git a/scripts/test_imports.py b/scripts/test_imports.py new file mode 100755 index 000000000000..9a5732b17e91 --- /dev/null +++ b/scripts/test_imports.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test importability of all modules within a package with parallel +execution support. + +This was implemented to prevent usage of the app's context and configuration +located at app.config within module scope. It may also identify other issues +such as circular imports or anything else that may prevent a module from being +imported independently. +""" + +import argparse +import os +import re +import subprocess +import sys +from concurrent.futures import as_completed, ThreadPoolExecutor # Import as_completed +from typing import List + +EXCLUDE_FILE_PATTERNS: List[str] = [ + r"^superset/migrations/", + r"^tests/integration_tests/migrations/", +] +ROOT_FOLDERS = ["superset", "tests"] + + +def test_module_import(file_path: str) -> str | None: + """Test if a module can be imported independently""" + module_path = file_path.replace(".py", "").replace("/", ".") + splitted = module_path.split(".") + from_part = ".".join(splitted[:-1]) + import_part = splitted[-1] + import_statement = f"from {from_part} import {import_part}" + try: + subprocess.run( + ["python", "-c", import_statement], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + return None + except subprocess.CalledProcessError as e: + if e.stderr: + return e.stderr.decode("utf-8").strip() + return str(e) + + +def get_module_paths(package_path: str) -> list[str]: + paths = [] + for root, dirs, files in os.walk(package_path): + for file in files: + filepath = os.path.normpath(os.path.join(package_path, root, file)) + relative_path = os.path.relpath(filepath, package_path) + if file.endswith(".py") and all( + not re.match(pattern, relative_path) + for pattern in EXCLUDE_FILE_PATTERNS + ): + paths.append(relative_path) + return paths + + +def test_import( + path_pattern: str | None = None, max_workers: int | None = None +) -> None: + """Test importability of all modules within a package""" + error_count = 0 + processed_count = 0 + paths = [] + for folder in ROOT_FOLDERS: + paths += get_module_paths(folder) + if path_pattern: + filtered_path = [] + for path in paths: + if re.match(path_pattern, path): + filtered_path.append(path) + paths = filtered_path + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(test_module_import, path): path for path in paths} + for future in as_completed(futures): # Use as_completed + path = futures[future] + processed_count += 1 + message = f"[{processed_count}/{len(paths)}] {path}" + error = future.result() + if error: + print(f"❌ {message}") + print(error) + error_count += 1 + else: + print(f"✅ {message}") + + print(f"Total errors: {error_count}") + if error_count: + sys.exit(1) + + +def parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Test importability of all modules within a package with parallel execution support." + ) + parser.add_argument( + "--workers", + type=int, + default=os.cpu_count(), + help="Number of worker threads for parallel execution (default is number of CPU cores)", + ) + parser.add_argument( + "path", + type=str, + default="*", + help="Path filter", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_arguments() + test_import(args.path, args.workers) diff --git a/superset/extensions/event_logger_manager.py b/superset/extensions/event_logger_manager.py new file mode 100644 index 000000000000..05012fa7aa5e --- /dev/null +++ b/superset/extensions/event_logger_manager.py @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from superset.utils.log import AbstractEventLogger, StdOutEventLogger + + +class EventLoggerManager: + _instance = None + + def __new__(cls) -> "EventLoggerManager": + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._event_logger = ( + StdOutEventLogger() + ) # Initialize with default logger + return cls._instance + + def get_event_logger(self) -> AbstractEventLogger: + return self._event_logger + + def set_event_logger(self, logger: AbstractEventLogger) -> None: + self._event_logger = logger # pylint: disable=attribute-defined-outside-init + + +__all__ = ["EventLoggerManager"] diff --git a/superset/initialization/bootstrap.py b/superset/initialization/bootstrap.py new file mode 100644 index 000000000000..ecb55adfb060 --- /dev/null +++ b/superset/initialization/bootstrap.py @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import os +from typing import Any + +from babel import Locale +from flask import current_app as app, g, get_flashed_messages, session +from flask_appbuilder.security.sqla.models import User +from flask_babel import get_locale + +from superset.db_engine_specs import get_available_dialects +from superset.extensions import appbuilder, cache_manager, feature_flag_manager +from superset.reports.models import ReportRecipientType +from superset.translations.utils import get_language_pack +from superset.utils import core as utils + +FRONTEND_CONF_KEYS = ( + "SUPERSET_WEBSERVER_TIMEOUT", + "SUPERSET_DASHBOARD_POSITION_DATA_LIMIT", + "SUPERSET_DASHBOARD_PERIODICAL_REFRESH_LIMIT", + "SUPERSET_DASHBOARD_PERIODICAL_REFRESH_WARNING_MESSAGE", + "ENABLE_JAVASCRIPT_CONTROLS", + "DEFAULT_SQLLAB_LIMIT", + "DEFAULT_VIZ_TYPE", + "SQL_MAX_ROW", + "SUPERSET_WEBSERVER_DOMAINS", + "SQLLAB_SAVE_WARNING_MESSAGE", + "SQLLAB_DEFAULT_DBID", + "DISPLAY_MAX_ROW", + "GLOBAL_ASYNC_QUERIES_TRANSPORT", + "GLOBAL_ASYNC_QUERIES_POLLING_DELAY", + "SQL_VALIDATORS_BY_ENGINE", + "SQLALCHEMY_DOCS_URL", + "SQLALCHEMY_DISPLAY_TEXT", + "GLOBAL_ASYNC_QUERIES_WEBSOCKET_URL", + "DASHBOARD_AUTO_REFRESH_MODE", + "DASHBOARD_AUTO_REFRESH_INTERVALS", + "DASHBOARD_VIRTUALIZATION", + "SCHEDULED_QUERIES", + "EXCEL_EXTENSIONS", + "CSV_EXTENSIONS", + "COLUMNAR_EXTENSIONS", + "ALLOWED_EXTENSIONS", + "SAMPLES_ROW_LIMIT", + "DEFAULT_TIME_FILTER", + "HTML_SANITIZATION", + "HTML_SANITIZATION_SCHEMA_EXTENSIONS", + "WELCOME_PAGE_LAST_TAB", + "VIZ_TYPE_DENYLIST", + "ALERT_REPORTS_DEFAULT_CRON_VALUE", + "ALERT_REPORTS_DEFAULT_RETENTION", + "ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT", + "NATIVE_FILTER_DEFAULT_ROW_LIMIT", + "PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET", + "JWT_ACCESS_CSRF_COOKIE_NAME", +) + + +def get_environment_tag() -> dict[str, Any]: + # Whether flask is in debug mode (--debug) + debug = app.config["DEBUG"] + + # Getting the configuration option for ENVIRONMENT_TAG_CONFIG + env_tag_config = app.config["ENVIRONMENT_TAG_CONFIG"] + + # These are the predefined templates define in the config + env_tag_templates = env_tag_config.get("values") + + # This is the environment variable name from which to select the template + # default is SUPERSET_ENV (from FLASK_ENV in previous versions) + env_envvar = env_tag_config.get("variable") + + # this is the actual name we want to use + env_name = os.environ.get(env_envvar) + + if not env_name or env_name not in env_tag_templates.keys(): + env_name = "debug" if debug else None + + env_tag = env_tag_templates.get(env_name) + return env_tag or {} + + +def menu_data(user: User) -> dict[str, Any]: + languages = { + lang: {**appbuilder.languages[lang], "url": appbuilder.get_url_for_locale(lang)} + for lang in appbuilder.languages + } + + if callable(brand_text := app.config["LOGO_RIGHT_TEXT"]): + brand_text = brand_text() + + return { + "menu": appbuilder.menu.get_data(), + "brand": { + "path": app.config["LOGO_TARGET_PATH"] or "/superset/welcome/", + "icon": appbuilder.app_icon, + "alt": appbuilder.app_name, + "tooltip": app.config["LOGO_TOOLTIP"], + "text": brand_text, + }, + "environment_tag": get_environment_tag(), + "navbar_right": { + # show the watermark if the default app icon has been overridden + "show_watermark": ("superset-logo-horiz" not in appbuilder.app_icon), + "bug_report_url": app.config["BUG_REPORT_URL"], + "bug_report_icon": app.config["BUG_REPORT_ICON"], + "bug_report_text": app.config["BUG_REPORT_TEXT"], + "documentation_url": app.config["DOCUMENTATION_URL"], + "documentation_icon": app.config["DOCUMENTATION_ICON"], + "documentation_text": app.config["DOCUMENTATION_TEXT"], + "version_string": app.config["VERSION_STRING"], + "version_sha": app.config["VERSION_SHA"], + "build_number": app.config["BUILD_NUMBER"], + "languages": languages, + "show_language_picker": len(languages) > 1, + "user_is_anonymous": user.is_anonymous, + "user_info_url": ( + None + if feature_flag_manager.is_feature_enabled("MENU_HIDE_USER_INFO") + else appbuilder.get_url_for_userinfo + ), + "user_logout_url": appbuilder.get_url_for_logout, + "user_login_url": appbuilder.get_url_for_login, + "locale": session.get("locale", "en"), + }, + } + + +@cache_manager.cache.memoize(timeout=60) +def cached_common_bootstrap_data( # pylint: disable=unused-argument + user_id: int | None, locale: Locale | None +) -> dict[str, Any]: + """Common data always sent to the client + + The function is memoized as the return value only changes when user permissions + or configuration values change. + """ + conf = app.config + + # should not expose API TOKEN to frontend + frontend_config = { + k: (list(conf.get(k)) if isinstance(conf.get(k), set) else conf.get(k)) + for k in FRONTEND_CONF_KEYS + } + + if conf.get("SLACK_API_TOKEN"): + frontend_config["ALERT_REPORTS_NOTIFICATION_METHODS"] = [ + ReportRecipientType.EMAIL, + ReportRecipientType.SLACK, + ] + else: + frontend_config["ALERT_REPORTS_NOTIFICATION_METHODS"] = [ + ReportRecipientType.EMAIL, + ] + + # TODO maybe we should pass all dialects to define what's visible in the UI (?) + + # verify client has google sheets installed + available_dialects = get_available_dialects() + frontend_config["HAS_GSHEETS_INSTALLED"] = bool("gsheets" in available_dialects) + + language = locale.language if locale else "en" + + bootstrap_data = { + "conf": frontend_config, + "locale": language, + "language_pack": get_language_pack(language), + "d3_format": conf.get("D3_FORMAT"), + "currencies": conf.get("CURRENCIES"), + "feature_flags": feature_flag_manager.get_feature_flags(), + "extra_sequential_color_schemes": conf["EXTRA_SEQUENTIAL_COLOR_SCHEMES"], + "extra_categorical_color_schemes": conf["EXTRA_CATEGORICAL_COLOR_SCHEMES"], + "theme_overrides": conf["THEME_OVERRIDES"], + "menu_data": menu_data(g.user), + } + bootstrap_data.update(conf["COMMON_BOOTSTRAP_OVERRIDES_FUNC"](bootstrap_data)) + return bootstrap_data + + +def common_bootstrap_payload() -> dict[str, Any]: + return { + **cached_common_bootstrap_data(utils.get_user_id(), get_locale()), + "flash_messages": get_flashed_messages(with_categories=True), + }