Skip to content

Commit 7d67c71

Browse files
authored
Merge pull request #96 from MobSF/sarif_update
Sarif update
2 parents 0ef8c86 + bf50202 commit 7d67c71

File tree

5 files changed

+104
-112
lines changed

5 files changed

+104
-112
lines changed

action.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Update Pipfile
44
```bash
5-
PIPENV_IGNORE_VIRTUALENVS=1 pipenv lock
5+
PIPENV_IGNORE_VIRTUALENVS=1 pipenv lock
66
PIPENV_IGNORE_VIRTUALENVS=1 pipenv sync
77
PIPENV_IGNORE_VIRTUALENVS=1 pipenv run pip freeze > requirements.txt
88
```

mobsfscan/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
__title__ = 'mobsfscan'
77
__authors__ = 'Ajin Abraham'
88
__copyright__ = f'Copyright {datetime.now().year} Ajin Abraham, OpenSecurity'
9-
__version__ = '0.4.0'
9+
__version__ = '0.4.1'
1010
__version_info__ = tuple(int(i) for i in __version__.split('.'))
1111
__all__ = [
1212
'__title__',

mobsfscan/formatters/sarif.py

Lines changed: 77 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,138 +1,109 @@
11
# -*- coding: utf_8 -*-
2-
"""Sarif output format.
2+
"""SARIF output formatter for MobSF scan results.
33
4-
Based on https://github.com/microsoft/bandit-sarif-formatter/
5-
blob/master/bandit_sarif_formatter/formatter.py
4+
Based on https://github.com/microsoft/
5+
bandit-sarif-formatter/blob/master/
6+
bandit_sarif_formatter/formatter.py
7+
MIT License, Copyright (c) Microsoft Corporation.
68
7-
Copyright (c) Microsoft. All Rights Reserved.
8-
MIT License
9-
10-
Copyright (c) Microsoft Corporation.
11-
12-
Permission is hereby granted, free of charge, to any person obtaining a copy
13-
of this software and associated documentation files (the "Software"), to deal
14-
in the Software without restriction, including without limitation the rights
15-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16-
copies of the Software, and to permit persons to whom the Software is
17-
furnished to do so, subject to the following conditions:
18-
19-
The above copyright notice and this permission notice shall be included in all
20-
copies or substantial portions of the Software.
21-
22-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28-
SOFTWARE
299
"""
30-
from datetime import datetime
10+
from datetime import datetime, timezone
3111
from pathlib import PurePath
3212
import urllib.parse as urlparse
3313

3414
import sarif_om as om
3515

3616
from jschema_to_python.to_json import to_json
3717

38-
3918
TS_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
4019

4120

4221
def level_from_severity(severity):
43-
if severity == 'ERROR':
44-
return 'error'
45-
elif severity == 'WARNING':
46-
return 'warning'
47-
elif severity == 'INFO':
48-
return 'note'
49-
else:
50-
return 'none'
22+
return {
23+
'ERROR': 'error',
24+
'WARNING': 'warning',
25+
'INFO': 'note',
26+
}.get(severity, 'none')
5127

5228

5329
def to_uri(file_path):
5430
pure_path = PurePath(file_path)
5531
if pure_path.is_absolute():
5632
return pure_path.as_uri()
5733
else:
58-
posix_path = pure_path.as_posix() # Replace backslashes with slashes.
59-
return urlparse.quote(posix_path) # %-encode special characters.
34+
return urlparse.quote(pure_path.as_posix())
6035

6136

62-
def get_rule_name(rule_id):
63-
normalized = []
64-
noms = rule_id.split('_')
65-
for nom in noms:
66-
normalized.append(nom.capitalize())
67-
return ''.join(normalized)
37+
def format_rule_name(rule_id):
38+
return ''.join(word.capitalize() for word in rule_id.split('_'))
6839

6940

7041
def add_results(path, scan_results, run):
7142
if run.results is None:
7243
run.results = []
73-
res = {}
74-
res.update(scan_results.get('results', []))
44+
res = scan_results.get('results', {})
7545
rules = {}
7646
rule_indices = {}
7747

7848
for rule_id, issue_dict in res.items():
79-
result = create_result(path, rule_id, issue_dict, rules, rule_indices)
80-
run.results.append(result)
49+
rule_results = create_rule_results(
50+
path, rule_id, issue_dict, rules, rule_indices)
51+
run.results.extend(rule_results)
8152

82-
if len(rules) > 0:
53+
if rules:
8354
run.tool.driver.rules = list(rules.values())
8455

8556

86-
def create_result(path, rule_id, issue_dict, rules, rule_indices):
87-
if rule_id in rules:
88-
rule = rules[rule_id]
89-
rule_index = rule_indices[rule_id]
90-
else:
91-
doc = issue_dict['metadata'].get('reference')
92-
if not doc:
93-
doc = ('https://mobile-security.gitbook.io/'
94-
'mobile-security-testing-guide/')
57+
def create_rule_results(path, rule_id, issue_dict, rules, rule_indices):
58+
rule_results = []
59+
rule, rule_index = rules.get(rule_id), rule_indices.get(rule_id)
60+
ref_url = ('https://mobile-security.gitbook.io/'
61+
'mobile-security-testing-guide/')
62+
if not rule:
63+
doc = issue_dict['metadata'].get('reference') or ref_url
9564
cwe_id = issue_dict['metadata']['cwe'].split(':')[0].lower()
9665
rule = om.ReportingDescriptor(
9766
id=rule_id,
98-
name=get_rule_name(rule_id),
67+
name=format_rule_name(rule_id),
9968
help_uri=doc,
100-
properties={
101-
'tags': ['security', f'external/cwe/{cwe_id}'],
102-
},
103-
)
69+
properties={'tags': ['security', f'external/cwe/{cwe_id}']})
10470
rule_index = len(rules)
10571
rules[rule_id] = rule
10672
rule_indices[rule_id] = rule_index
10773

108-
locations = []
10974
for item in issue_dict.get('files', []):
110-
physical_location = om.PhysicalLocation(
111-
artifact_location=om.ArtifactLocation(
112-
uri=to_uri(item['file_path'])),
113-
)
114-
physical_location.region = om.Region(
115-
start_line=item['match_lines'][0],
116-
end_line=item['match_lines'][1],
117-
start_column=item['match_position'][0],
118-
end_column=item['match_position'][1],
119-
snippet=om.ArtifactContent(text=item['match_string']),
120-
)
121-
locations.append(om.Location(physical_location=physical_location))
122-
if not locations:
123-
artifact = om.PhysicalLocation(
124-
artifact_location=om.ArtifactLocation(
125-
uri=path[0]),
126-
)
127-
artifact.region = om.Region(
128-
start_line=1,
129-
end_line=1,
130-
start_column=1,
131-
end_column=1,
132-
snippet=om.ArtifactContent(text='Missing Best Practice'),
133-
)
134-
locations.append(om.Location(physical_location=artifact))
135-
75+
location = create_location(item)
76+
rule_results.append(create_result(rule, rule_index, issue_dict, [location]))
77+
78+
if not issue_dict.get('files'):
79+
default_location = om.Location(
80+
physical_location=om.PhysicalLocation(
81+
artifact_location=om.ArtifactLocation(uri=path[0]),
82+
region=om.Region(
83+
start_line=1,
84+
end_line=1,
85+
start_column=1,
86+
end_column=1,
87+
snippet=om.ArtifactContent(text='Missing Best Practice'))))
88+
rule_results.append(create_result(
89+
rule, rule_index, issue_dict, [default_location]))
90+
91+
return rule_results
92+
93+
94+
def create_location(item):
95+
return om.Location(
96+
physical_location=om.PhysicalLocation(
97+
artifact_location=om.ArtifactLocation(uri=to_uri(item['file_path'])),
98+
region=om.Region(
99+
start_line=item['match_lines'][0],
100+
end_line=item['match_lines'][1],
101+
start_column=item['match_position'][0],
102+
end_column=item['match_position'][1],
103+
snippet=om.ArtifactContent(text=item['match_string']))))
104+
105+
106+
def create_result(rule, rule_index, issue_dict, locations):
136107
return om.Result(
137108
rule_id=rule.id,
138109
rule_index=rule_index,
@@ -144,38 +115,34 @@ def create_result(path, rule_id, issue_dict, rules, rule_indices):
144115
'masvs': issue_dict['metadata']['masvs'],
145116
'cwe': issue_dict['metadata']['cwe'],
146117
'reference': issue_dict['metadata']['reference'],
147-
},
148-
)
118+
})
149119

150120

151121
def sarif_output(outfile, scan_results, mobsfscan_version, path):
152122
log = om.SarifLog(
153-
schema_uri=('https://raw.githubusercontent.com/oasis-tcs/'
154-
'sarif-spec/master/Schemata/sarif-schema-2.1.0.json'),
123+
schema_uri=('https://raw.githubusercontent.com/'
124+
'oasis-tcs/sarif-spec/master/Schemata/'
125+
'sarif-schema-2.1.0.json'),
155126
version='2.1.0',
156-
runs=[
157-
om.Run(
158-
tool=om.Tool(driver=om.ToolComponent(
159-
name='mobsfscan',
160-
information_uri='https://github.com/MobSF/mobsfscan',
161-
semantic_version=mobsfscan_version,
162-
version=mobsfscan_version),
163-
),
164-
invocations=[
165-
om.Invocation(
166-
end_time_utc=datetime.utcnow().strftime(TS_FORMAT),
167-
execution_successful=True,
168-
),
169-
],
170-
),
171-
],
172-
)
127+
runs=[om.Run(
128+
tool=om.Tool(driver=om.ToolComponent(
129+
name='mobsfscan',
130+
information_uri='https://github.com/MobSF/mobsfscan',
131+
semantic_version=mobsfscan_version,
132+
version=mobsfscan_version,
133+
)),
134+
invocations=[om.Invocation(
135+
end_time_utc=datetime.now(timezone.utc).strftime(TS_FORMAT),
136+
execution_successful=True,
137+
)])])
173138
run = log.runs[0]
174139
add_results(path, scan_results, run)
175140
json_out = to_json(log)
141+
176142
if outfile:
177143
with open(outfile, 'w') as of:
178144
of.write(json_out)
179145
else:
180146
print(json_out)
147+
181148
return json_out

mobsfscan/mobsfscan.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,28 @@ def format_output(self, results) -> dict:
127127
self.post_ignore_rules()
128128
self.post_ignore_rules_by_severity()
129129
self.post_ignore_files()
130+
self.deduplicate_files()
131+
132+
def deduplicate_files(self):
133+
"""Deduplicate files."""
134+
for _, details in self.result['results'].items():
135+
files = details.get('files')
136+
# some results don't have any files,
137+
# so we need to check before we continue
138+
if files:
139+
# "file" here refers to the dictionary containig
140+
# the file_path, match_lines, etc.
141+
# for each file we create a tuple with it's contents
142+
# then using those tuples as keys and
143+
# "file" as values we create a dictionary
144+
# This means that for each unique "file"
145+
# we will get only one entry as we
146+
# can't have duplicate keys
147+
# Once this is done - convert the dictionary
148+
# back to list by grabbing it's values and passing it to list()
149+
unique_files = list(
150+
{tuple(sorted(f.items())): f for f in files}.values())
151+
details['files'] = unique_files
130152

131153
def format_semgrep(self, sgrep_output):
132154
"""Format semgrep output."""

tox.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ setenv =
1616
skip_install = true
1717
deps =
1818
pydocstyle
19+
autopep8
1920
flake8
2021
flake8-broken-line
2122
flake8-bugbear
@@ -32,6 +33,7 @@ deps =
3233
pep8-naming
3334
radon
3435
commands =
36+
autopep8 --recursive --in-place setup.py mobsfscan tests
3537
flake8 setup.py mobsfscan tests
3638

3739
[testenv:bandit]
@@ -99,3 +101,4 @@ ignore =
99101
R701,
100102
# Too complex
101103
radon_max_cc = 10
104+
max-line-length = 88

0 commit comments

Comments
 (0)