Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Occurences to Potential Secrets #527

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions detect_secrets/core/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def format_for_output(secrets: SecretsCollection, is_slim_mode: bool = False) ->
).items():
for secret_dict in secret_list:
secret_dict.pop('line_number')
secret_dict.pop('occurrences')

return output

Expand Down
6 changes: 6 additions & 0 deletions detect_secrets/core/potential_secret.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
line_number: int = 0,
is_secret: Optional[bool] = None,
is_verified: bool = False,
occurrences: int = 0,
) -> None:
"""
:param type: human-readable secret type, defined by the plugin
Expand All @@ -46,6 +47,7 @@ def __init__(
self.set_secret(secret)
self.is_secret = is_secret
self.is_verified = is_verified
self.occurrences = occurrences

# If two PotentialSecrets have the same values for these fields,
# they are considered equal. Note that line numbers aren't included
Expand Down Expand Up @@ -84,6 +86,7 @@ def load_secret_from_dict(cls, data: Dict[str, Union[str, int, bool]]) -> 'Poten
'line_number',
'is_secret',
'is_verified',
'occurrences',
}:
if parameter in data:
kwargs[parameter] = data[parameter]
Expand All @@ -109,6 +112,9 @@ def json(self) -> Dict[str, Union[str, int, bool]]:
if hasattr(self, 'is_secret') and self.is_secret is not None:
attributes['is_secret'] = self.is_secret

if self.occurrences:
attributes['occurrences'] = self.occurrences

return attributes

def __eq__(self, other: Any) -> bool:
Expand Down
31 changes: 21 additions & 10 deletions detect_secrets/core/secrets_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, root: str = '') -> None:
relative to root, since we're running as if it was in a different directory,
rather than scanning a different directory.
"""
self.data: Dict[str, Set[PotentialSecret]] = defaultdict(set)
self.data: Dict[str, List[PotentialSecret]] = defaultdict(list)
self.root = root

@classmethod
Expand All @@ -40,7 +40,7 @@ def load_from_baseline(cls, baseline: Dict[str, Any]) -> 'SecretsCollection':
for filename in baseline['results']:
for item in baseline['results'][filename]:
secret = PotentialSecret.load_secret_from_dict({'filename': filename, **item})
output[filename].add(secret)
output[filename].append(secret)

return output

Expand Down Expand Up @@ -69,25 +69,32 @@ def scan_files(self, *filenames: str, num_processors: Optional[int] = None) -> N
[os.path.join(self.root, filename) for filename in filenames],
):
for secret in secrets:
self[os.path.relpath(secret.filename, self.root)].add(secret)
self.add_secret(os.path.relpath(secret.filename, self.root), secret)

def scan_file(self, filename: str) -> None:
for secret in scan.scan_file(os.path.join(self.root, filename)):
self[filename].add(secret)
self.add_secret(filename, secret)

def scan_diff(self, diff: str) -> None:
"""
:raises: UnidiffParseError
"""
try:
for secret in scan.scan_diff(diff):
self[secret.filename].add(secret)
self.add_secret(secret.filename, secret)
except ImportError: # pragma: no cover
raise NotImplementedError(
'SecretsCollection.scan_diff requires `unidiff` to work. Try pip '
'installing that package, and try again.',
)

def add_secret(self, filename: str, secret: PotentialSecret) -> None:
if secret in self[filename]:
index = self[filename].index(secret)
self[filename][index].occurrences += 1
else:
self[filename].append(secret)

def merge(self, old_results: 'SecretsCollection') -> None:
"""
We operate under an assumption that the latest results are always more accurate,
Expand Down Expand Up @@ -161,7 +168,7 @@ def trim(

# Unfortunately, we can't merely do a set intersection since we want to update the line
# numbers (if applicable). Therefore, this does it manually.
result: Dict[str, Set[PotentialSecret]] = defaultdict(set)
result: Dict[str, List[PotentialSecret]] = defaultdict(list)

for filename in scanned_results.files:
if filename not in self.files:
Expand All @@ -180,7 +187,11 @@ def trim(
# Only update line numbers if we're tracking them.
existing_secret.line_number = secret.line_number
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at your implementation, it looks like it could be fairly trivial to track the line number of each of the occurrences found and report them as it was mentioned in #493.


result[filename].add(existing_secret)
if existing_secret.occurrences:
# Only update occurences if we're tracking them.
existing_secret.occurrences = secret.occurrences

result[filename].append(existing_secret)

for filename in self.files:
# If this is already populated by scanned_results, then the set intersection
Expand Down Expand Up @@ -211,10 +222,10 @@ def json(self) -> Dict[str, Any]:
def exactly_equals(self, other: Any) -> bool:
return self.__eq__(other, strict=True) # type: ignore

def __getitem__(self, filename: str) -> Set[PotentialSecret]:
def __getitem__(self, filename: str) -> List[PotentialSecret]:
return self.data[filename]

def __setitem__(self, filename: str, value: Set[PotentialSecret]) -> None:
def __setitem__(self, filename: str, value: List[PotentialSecret]) -> None:
self.data[filename] = value

def __iter__(self) -> Generator[Tuple[str, PotentialSecret], None, None]:
Expand Down Expand Up @@ -297,7 +308,7 @@ def __sub__(self, other: Any) -> 'SecretsCollection':
if filename not in self.files:
continue

output[filename] = self[filename] - other[filename]
output[filename] = list(set(self[filename]) - set(other[filename]))

for filename in self.files:
if filename in other.files:
Expand Down
2 changes: 1 addition & 1 deletion detect_secrets/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def handle_scan_action(args: argparse.Namespace) -> None:
root=args.custom_root,
):
for secret in scan_for_allowlisted_secrets_in_file(filename):
secrets[secret.filename].add(secret)
secrets[secret.filename].append(secret)

print(json.dumps(baseline.format_for_output(secrets), indent=2))
return
Expand Down
1 change: 1 addition & 0 deletions detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def analyze_line(
secret=match,
line_number=line_number,
is_verified=is_verified,
occurrences=1,
),
)

Expand Down
2 changes: 2 additions & 0 deletions test_data/files/file_with_duplicate_secrets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tokenA = 'gX69YO4CvBsVjzAwYxdGyDd30t5+9ez31gKATtj4'
tokenB = 'gX69YO4CvBsVjzAwYxdGyDd30t5+9ez31gKATtj4'
2 changes: 2 additions & 0 deletions testing/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def potential_secret_factory(
filename: str = 'filename',
secret: str = 'secret',
line_number: int = 1,
occurrences: int = 1,
**kwargs: Any,
) -> PotentialSecret:
"""This is only marginally better than creating PotentialSecret objects directly,
Expand All @@ -18,5 +19,6 @@ def potential_secret_factory(
filename=filename,
secret=secret,
line_number=line_number,
occurrences=occurrences,
**kwargs
)
2 changes: 1 addition & 1 deletion tests/audit/analytics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_basic_statistics_json(printer):
)
def test_no_divide_by_zero(secret):
secrets = SecretsCollection()
secrets['file'].add(secret)
secrets['file'].append(secret)
with mock_named_temporary_file() as f:
baseline.save_to_file(secrets, f.name)
f.seek(0)
Expand Down
2 changes: 1 addition & 1 deletion tests/audit/audit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_nothing_to_audit(printer):

def test_file_no_longer_exists():
secrets = SecretsCollection()
secrets['non-existent'].add(potential_secret_factory())
secrets['non-existent'].append(potential_secret_factory())

run_logic(secrets)

Expand Down
6 changes: 3 additions & 3 deletions tests/audit/compare_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ def parse_ordering(printer) -> str:

def test_file_no_longer_exists(printer, mock_user_decision):
secretsA = SecretsCollection()
secretsA['fileB'].add(potential_secret_factory('a'))
secretsA['fileB'].append(potential_secret_factory('a'))

secretsB = SecretsCollection()
secretsB['fileA'].add(potential_secret_factory('a'))
secretsB['fileA'].append(potential_secret_factory('a'))

run_logic(secretsA, secretsB)
assert not mock_user_decision.called
Expand All @@ -149,7 +149,7 @@ def run_logic(secretsA: SecretsCollection, secretsB: SecretsCollection):
def get_secrets(*secrets) -> SecretsCollection:
output = SecretsCollection()
for secret in secrets:
output[secret.filename].add(secret)
output[secret.filename].append(secret)

return output

Expand Down
8 changes: 4 additions & 4 deletions tests/core/baseline_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import subprocess
import tempfile
from pathlib import Path
from unittest import mock

import pytest
Expand Down Expand Up @@ -40,9 +39,10 @@ class TestCreate:
def test_basic_usage(path):
secrets = baseline.create(path)

assert len(secrets.data.keys()) == 2
assert len(secrets[str(Path('test_data/files/file_with_secrets.py'))]) == 1
assert len(secrets[str(Path('test_data/files/tmp/file_with_secrets.py'))]) == 2
assert len(secrets.data.keys()) == 3
assert len(secrets['test_data/files/file_with_secrets.py']) == 1
assert len(secrets['test_data/files/file_with_duplicate_secrets.py']) == 1
assert len(secrets['test_data/files/tmp/file_with_secrets.py']) == 2

@staticmethod
def test_error_when_getting_git_tracked_files():
Expand Down
3 changes: 3 additions & 0 deletions tests/core/potential_secret_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def test_json():
'is_secret': True,
'is_verified': False,
},
{
'occurrences': 2,
},
),
)
def test_load_secret_from_dict(kwargs):
Expand Down
13 changes: 11 additions & 2 deletions tests/core/secrets_collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,15 @@ def test_file_based_success_unexpected_config_file(filename):

assert bool(secrets)

@staticmethod
def test_duplicate_secrets_occurrences():
secrets = SecretsCollection()
secrets.scan_file('test_data/files/file_with_duplicate_secrets.py')

secret = next(iter(secrets['test_data/files/file_with_duplicate_secrets.py']))
assert len(secrets['test_data/files/file_with_duplicate_secrets.py']) == 1
assert secret.occurrences == 2


class TestScanDiff:
@staticmethod
Expand Down Expand Up @@ -335,11 +344,11 @@ def test_mismatch_files():
def test_strict_equality():
secret = potential_secret_factory()
secretsA = SecretsCollection()
secretsA[secret.filename].add(secret)
secretsA[secret.filename].append(secret)

secret = potential_secret_factory(line_number=2)
secretsB = SecretsCollection()
secretsB[secret.filename].add(secret)
secretsB[secret.filename].append(secret)

assert secretsA == secretsB
assert not secretsA.exactly_equals(secretsB)
Expand Down
37 changes: 37 additions & 0 deletions tests/pre_commit_hook_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import io
import json
import sys
import tempfile
from contextlib import contextmanager
from functools import partial
from typing import List
Expand Down Expand Up @@ -215,6 +216,42 @@ def modified_baseline(self):
yield secrets


class TestOccurrencesChanges:
FILENAME = 'test_data/files/file_with_secrets.py'

def test_modifies_baseline(self, modified_baseline):
with tempfile.NamedTemporaryFile() as f:
baseline.save_to_file(modified_baseline, f.name)

assert_commit_blocked_with_diff_exit_code([
self.FILENAME,
'--baseline',
f.name,
])

def test_does_not_modify_slim_baseline(self, modified_baseline):
with tempfile.NamedTemporaryFile() as f:
baseline.save_to_file(
baseline.format_for_output(modified_baseline, is_slim_mode=True),
f.name,
)

assert_commit_succeeds([
self.FILENAME,
'--baseline',
f.name,
])

@pytest.fixture
def modified_baseline(self):
secrets = SecretsCollection()
secrets.scan_file(self.FILENAME)
for _, secret in secrets:
secret.occurrences += 1

yield secrets


def assert_commit_succeeds(command: List[str]):
assert main(command) == 0

Expand Down