diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7b9a213cfd2..79d0afc4ba0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -190,19 +190,10 @@ jobs: #!/usr/bin/env bash set -ev source ./env/bin/activate - git fetch origin --depth=1 $(System.PullRequest.TargetBranch) - declare -A secret_files - for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do - detected=$(azdev scan -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])") - if [ $detected == 'True' ]; then - printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan'/'azdev mask' locally to fix.\033[0m\n" "$FILE" - secret_files+=$FILE - fi - done - if [ "${#secret_files[@]}" -gt 0 ]; then - exit 1 - fi + python scripts/ci/azdev_scan.py displayName: "azdev scan ( High Confidence ) on Modified Extensions" + env: + ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch) - job: AzdevScanProModifiedExtensionsMedium displayName: "azdev scan ( Medium Confidence ) on Modified Extensions" @@ -221,19 +212,10 @@ jobs: #!/usr/bin/env bash set -ev source ./env/bin/activate - git fetch origin --depth=1 $(System.PullRequest.TargetBranch) - declare -A secret_files - for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do - detected=$(azdev scan --confidence-level MEDIUM -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])") - if [ $detected == 'True' ]; then - printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan --confidence-level MEDIUM'/'azdev mask --confidence-level MEDIUM' locally to fix.\033[0m\n" "$FILE" - secret_files+=$FILE - fi - done - if [ "${#secret_files[@]}" -gt 0 ]; then - exit 1 - fi + python scripts/ci/azdev_scan.py --confidence-level MEDIUM displayName: "azdev scan ( Medium Confidence ) on Modified Extensions" + env: + ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch) #- job: IndexRefDocVerify # displayName: "Verify Ref Docs" diff --git a/scripts/ci/azdev_linter_style.py b/scripts/ci/azdev_linter_style.py index 5f41839c85a..e47ce0f9c92 100644 --- a/scripts/ci/azdev_linter_style.py +++ b/scripts/ci/azdev_linter_style.py @@ -7,7 +7,7 @@ This script is used to run azdev linter and azdev style on extensions. It's only working on ADO by default. If want to run locally, -please update the target branch/commit to find diff in function find_modified_files_against_master_branch() +please update the target branch in find_modified_files_against_master_branch() in util.py. """ import json import logging @@ -18,7 +18,7 @@ import service_name from packaging.version import Version -from util import get_ext_metadata +from util import get_ext_metadata, find_modified_files_against_master_branch logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -119,30 +119,6 @@ def check_extension_name(self): f"Please fix the name in setup.py!") -def find_modified_files_against_master_branch(): - """ - Find modified files from src/ only. - A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed. - Deleted files don't count in diff. - """ - ado_pr_target_branch = 'origin/' + os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH') - - separator_line() - logger.info('pull request target branch: %s', ado_pr_target_branch) - - cmd = 'git --no-pager diff --name-only --diff-filter=ACMRT {} -- src/'.format(ado_pr_target_branch) - files = check_output(cmd.split()).decode('utf-8').split('\n') - files = [f for f in files if len(f) > 0] - - if files: - logger.info('modified files:') - separator_line() - for f in files: - logger.info(f) - - return files - - def contain_index_json(files): return 'src/index.json' in files diff --git a/scripts/ci/azdev_scan.py b/scripts/ci/azdev_scan.py new file mode 100644 index 00000000000..3b2cffd9853 --- /dev/null +++ b/scripts/ci/azdev_scan.py @@ -0,0 +1,91 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. +# -------------------------------------------------------------------------------------------- + +""" +This script is used to run azdev scan on modified extensions in PR pipelines. + +It reuses find_modified_files_against_master_branch() from util.py to get an +accurate list of files changed in the PR (via merge-base), then runs +azdev scan on each file. +""" +import json +import logging +import sys +from subprocess import CalledProcessError, check_output + +from util import find_modified_files_against_master_branch + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) +ch = logging.StreamHandler() +ch.setLevel(logging.DEBUG) +logger.addHandler(ch) + + +def run_scan(modified_files, confidence_level=None): + """Run azdev scan on each modified file and report secrets.""" + confidence_flag = [] + confidence_msg = '' + if confidence_level: + confidence_flag = ['--confidence-level', confidence_level] + confidence_msg = ' --confidence-level {}'.format(confidence_level) + + secret_files = [] + failed_files = [] + for f in modified_files: + cmd = ['azdev', 'scan', '-f', f, '--continue-on-failure'] + confidence_flag + logger.info('Scanning: %s', f) + try: + output = check_output(cmd).decode('utf-8', errors='replace') + result = json.loads(output) + if result.get('secrets_detected') is True: + logger.error( + '\033[0;31mSecrets detected from %s, Please remove or replace it. ' + 'You can run \'azdev scan%s\'/\'azdev mask%s\' locally to fix.\033[0m', + f, confidence_msg, confidence_msg + ) + secret_files.append(f) + except CalledProcessError as e: + logger.error('azdev scan failed for %s: %s', f, e) + failed_files.append(f) + except (json.JSONDecodeError, KeyError) as e: + logger.error('Failed to parse azdev scan output for %s: %s', f, e) + failed_files.append(f) + + has_errors = False + if secret_files: + logger.error('Secrets detected in %d file(s): %s', len(secret_files), secret_files) + has_errors = True + if failed_files: + logger.error('Scan failed for %d file(s): %s', len(failed_files), failed_files) + has_errors = True + if has_errors: + sys.exit(1) + else: + logger.info('-' * 100) + logger.info('No secrets detected in any modified files.') + logger.info('-' * 100) + + +def main(): + import argparse + parser = argparse.ArgumentParser(description='azdev scan on modified extensions') + parser.add_argument('--confidence-level', + type=str, + default=None, + help='Confidence level for azdev scan (e.g., MEDIUM). ' + 'Default: HIGH (azdev scan default).') + args = parser.parse_args() + + modified_files = find_modified_files_against_master_branch() + if not modified_files: + logger.info('No modified files found, skipping scan.') + return + + run_scan(modified_files, confidence_level=args.confidence_level) + + +if __name__ == '__main__': + main() diff --git a/scripts/ci/util.py b/scripts/ci/util.py index ffc7d54797b..7c43a1e0ce7 100644 --- a/scripts/ci/util.py +++ b/scripts/ci/util.py @@ -10,7 +10,7 @@ import json import zipfile -from subprocess import check_output +from subprocess import check_call, check_output logger = logging.getLogger(__name__) @@ -163,3 +163,67 @@ def diff_code(start, end): f'end: {end}, ' f'diff_ref: {diff_ref}.') return diff_ref + + +def find_modified_files_against_master_branch(): + """ + Find modified files from src/ only, using merge-base for accurate PR diff. + A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed. + Deleted files don't count in diff. + """ + ado_pr_target_branch = os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH') + if not ado_pr_target_branch or ado_pr_target_branch == '$(System.PullRequest.TargetBranch)': + logger.warning('ADO_PULL_REQUEST_TARGET_BRANCH is not available, skip diff.') + return [] + + normalized_branch = re.sub( + r'^(?:refs/remotes/origin/|refs/heads/|origin/)+', '', ado_pr_target_branch + ) + + ado_pr_target_branch = 'origin/{}'.format(normalized_branch) + + logger.info('-' * 100) + logger.info('pull request target branch: %s', ado_pr_target_branch) + + # Ensure target ref exists and has enough history for merge-base. + # Only use --deepen when the repo is a shallow clone. + is_shallow = os.path.isfile(os.path.join('.git', 'shallow')) + fetch_cmd = ['git', 'fetch', 'origin'] + if is_shallow: + fetch_cmd.append('--deepen=50') + fetch_cmd.append('refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch)) + check_call(fetch_cmd) + + try: + merge_base = check_output([ + 'git', 'merge-base', 'HEAD', ado_pr_target_branch + ]).decode('utf-8').strip() + except Exception: + if is_shallow: + logger.warning('merge-base failed after --deepen=50, falling back to --unshallow') + check_call([ + 'git', + 'fetch', + 'origin', + '--unshallow', + 'refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch), + ]) + merge_base = check_output([ + 'git', 'merge-base', 'HEAD', ado_pr_target_branch + ]).decode('utf-8').strip() + else: + raise + + logger.info('merge base: %s', merge_base) + + cmd = ['git', '--no-pager', 'diff', '--name-only', '--diff-filter=ACMRT', merge_base, 'HEAD', '--', 'src/'] + files = check_output(cmd).decode('utf-8').split('\n') + files = [f for f in files if len(f) > 0] + + if files: + logger.info('modified files:') + logger.info('-' * 100) + for f in files: + logger.info(f) + + return files