Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 6 additions & 24 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,10 @@ jobs:
#!/usr/bin/env bash
set -ev
source ./env/bin/activate
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
declare -A secret_files
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
detected=$(azdev scan -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
if [ $detected == 'True' ]; then
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan'/'azdev mask' locally to fix.\033[0m\n" "$FILE"
secret_files+=$FILE
fi
done
if [ "${#secret_files[@]}" -gt 0 ]; then
exit 1
fi
python scripts/ci/azdev_scan.py
displayName: "azdev scan ( High Confidence ) on Modified Extensions"
env:
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)

- job: AzdevScanProModifiedExtensionsMedium
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
Expand All @@ -221,19 +212,10 @@ jobs:
#!/usr/bin/env bash
set -ev
source ./env/bin/activate
git fetch origin --depth=1 $(System.PullRequest.TargetBranch)
declare -A secret_files
for FILE in `git diff --name-only --diff-filter=AM origin/$(System.PullRequest.TargetBranch)` ; do
detected=$(azdev scan --confidence-level MEDIUM -f $FILE --continue-on-failure| python -c "import sys, json; print(json.load(sys.stdin)['secrets_detected'])")
if [ $detected == 'True' ]; then
printf "\033[0;31mSecrets detected from %s, Please remove or replace it. You can run 'azdev scan --confidence-level MEDIUM'/'azdev mask --confidence-level MEDIUM' locally to fix.\033[0m\n" "$FILE"
secret_files+=$FILE
fi
done
if [ "${#secret_files[@]}" -gt 0 ]; then
exit 1
fi
python scripts/ci/azdev_scan.py --confidence-level MEDIUM
displayName: "azdev scan ( Medium Confidence ) on Modified Extensions"
env:
ADO_PULL_REQUEST_TARGET_BRANCH: $(System.PullRequest.TargetBranch)

#- job: IndexRefDocVerify
# displayName: "Verify Ref Docs"
Expand Down
28 changes: 2 additions & 26 deletions scripts/ci/azdev_linter_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
This script is used to run azdev linter and azdev style on extensions.

It's only working on ADO by default. If want to run locally,
please update the target branch/commit to find diff in function find_modified_files_against_master_branch()
please update the target branch in find_modified_files_against_master_branch() in util.py.
"""
import json
import logging
Expand All @@ -18,7 +18,7 @@

import service_name
from packaging.version import Version
from util import get_ext_metadata
from util import get_ext_metadata, find_modified_files_against_master_branch

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -119,30 +119,6 @@ def check_extension_name(self):
f"Please fix the name in setup.py!")


def find_modified_files_against_master_branch():
"""
Find modified files from src/ only.
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
Deleted files don't count in diff.
"""
ado_pr_target_branch = 'origin/' + os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')

separator_line()
logger.info('pull request target branch: %s', ado_pr_target_branch)

cmd = 'git --no-pager diff --name-only --diff-filter=ACMRT {} -- src/'.format(ado_pr_target_branch)
files = check_output(cmd.split()).decode('utf-8').split('\n')
files = [f for f in files if len(f) > 0]

if files:
logger.info('modified files:')
separator_line()
for f in files:
logger.info(f)

return files


def contain_index_json(files):
return 'src/index.json' in files

Expand Down
91 changes: 91 additions & 0 deletions scripts/ci/azdev_scan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

"""
This script is used to run azdev scan on modified extensions in PR pipelines.

It reuses find_modified_files_against_master_branch() from util.py to get an
accurate list of files changed in the PR (via merge-base), then runs
azdev scan on each file.
"""
import json
import logging
import sys
from subprocess import CalledProcessError, check_output

from util import find_modified_files_against_master_branch

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)


def run_scan(modified_files, confidence_level=None):
"""Run azdev scan on each modified file and report secrets."""
confidence_flag = []
confidence_msg = ''
if confidence_level:
confidence_flag = ['--confidence-level', confidence_level]
confidence_msg = ' --confidence-level {}'.format(confidence_level)

secret_files = []
failed_files = []
for f in modified_files:
cmd = ['azdev', 'scan', '-f', f, '--continue-on-failure'] + confidence_flag
logger.info('Scanning: %s', f)
try:
output = check_output(cmd).decode('utf-8', errors='replace')
result = json.loads(output)
if result.get('secrets_detected') is True:
logger.error(
'\033[0;31mSecrets detected from %s, Please remove or replace it. '
'You can run \'azdev scan%s\'/\'azdev mask%s\' locally to fix.\033[0m',
f, confidence_msg, confidence_msg
)
secret_files.append(f)
except CalledProcessError as e:
logger.error('azdev scan failed for %s: %s', f, e)
failed_files.append(f)
except (json.JSONDecodeError, KeyError) as e:
logger.error('Failed to parse azdev scan output for %s: %s', f, e)
failed_files.append(f)

has_errors = False
if secret_files:
logger.error('Secrets detected in %d file(s): %s', len(secret_files), secret_files)
has_errors = True
if failed_files:
logger.error('Scan failed for %d file(s): %s', len(failed_files), failed_files)
has_errors = True
if has_errors:
sys.exit(1)
else:
logger.info('-' * 100)
logger.info('No secrets detected in any modified files.')
logger.info('-' * 100)


def main():
import argparse
parser = argparse.ArgumentParser(description='azdev scan on modified extensions')
parser.add_argument('--confidence-level',
type=str,
default=None,
help='Confidence level for azdev scan (e.g., MEDIUM). '
'Default: HIGH (azdev scan default).')
args = parser.parse_args()

modified_files = find_modified_files_against_master_branch()
if not modified_files:
logger.info('No modified files found, skipping scan.')
return

run_scan(modified_files, confidence_level=args.confidence_level)


if __name__ == '__main__':
main()
66 changes: 65 additions & 1 deletion scripts/ci/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json
import zipfile

from subprocess import check_output
from subprocess import check_call, check_output

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -163,3 +163,67 @@ def diff_code(start, end):
f'end: {end}, '
f'diff_ref: {diff_ref}.')
return diff_ref


def find_modified_files_against_master_branch():
"""
Find modified files from src/ only, using merge-base for accurate PR diff.
A: Added, C: Copied, M: Modified, R: Renamed, T: File type changed.
Deleted files don't count in diff.
"""
ado_pr_target_branch = os.environ.get('ADO_PULL_REQUEST_TARGET_BRANCH')
if not ado_pr_target_branch or ado_pr_target_branch == '$(System.PullRequest.TargetBranch)':
logger.warning('ADO_PULL_REQUEST_TARGET_BRANCH is not available, skip diff.')
return []

normalized_branch = re.sub(
r'^(?:refs/remotes/origin/|refs/heads/|origin/)+', '', ado_pr_target_branch
)

ado_pr_target_branch = 'origin/{}'.format(normalized_branch)

logger.info('-' * 100)
logger.info('pull request target branch: %s', ado_pr_target_branch)

# Ensure target ref exists and has enough history for merge-base.
# Only use --deepen when the repo is a shallow clone.
is_shallow = os.path.isfile(os.path.join('.git', 'shallow'))
fetch_cmd = ['git', 'fetch', 'origin']
if is_shallow:
fetch_cmd.append('--deepen=50')
fetch_cmd.append('refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch))
check_call(fetch_cmd)

try:
merge_base = check_output([
'git', 'merge-base', 'HEAD', ado_pr_target_branch
]).decode('utf-8').strip()
except Exception:
if is_shallow:
logger.warning('merge-base failed after --deepen=50, falling back to --unshallow')
check_call([
'git',
'fetch',
'origin',
'--unshallow',
'refs/heads/{}:refs/remotes/origin/{}'.format(normalized_branch, normalized_branch),
])
merge_base = check_output([
'git', 'merge-base', 'HEAD', ado_pr_target_branch
]).decode('utf-8').strip()
else:
raise

logger.info('merge base: %s', merge_base)

cmd = ['git', '--no-pager', 'diff', '--name-only', '--diff-filter=ACMRT', merge_base, 'HEAD', '--', 'src/']
files = check_output(cmd).decode('utf-8').split('\n')
files = [f for f in files if len(f) > 0]

if files:
logger.info('modified files:')
logger.info('-' * 100)
for f in files:
logger.info(f)

return files
Loading