-
Notifications
You must be signed in to change notification settings - Fork 35
/
audit.py
executable file
·117 lines (91 loc) · 3.58 KB
/
audit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# Copyright (c) 2015, Paul R. Tagliamonte <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import json
from collections import defaultdict
def audit_tags(licenses):
total = len(licenses)
tags = defaultdict(lambda: 0)
for license in licenses:
for tag in license.get('keywords', []):
tags[tag] += 1
return [{"tag": tag, "count": count, "percent": count/total*100,
"fatal": False} for (tag, count) in tags.items()]
def audit_identifiers(licenses):
total = len(licenses)
schemes = defaultdict(lambda: 0)
for license in licenses:
for identifier in license.get('identifiers', []):
schemes[identifier['scheme']] += 1
return [{"scheme": scheme, "count": count, "percent": count/total*100,
"fatal": False} for (scheme, count) in schemes.items()]
def audit_names(licenses):
def check_name(license):
if 'version' in license['name']:
yield "contains 'version', use 'Version'"
if license['name'] == license['name'].upper():
yield "all uppercase"
# lname = license['name'].lower()
# if lname.startswith("the"):
# yield "Name starts with 'The'"
return list(filter(
lambda x: x['problems'] != [],
[{"id": x['id'], "name": x['name'], "problems": list(check_name(x)),
"message": "License was named poorly",
"fatal": True} for x in licenses]))
def audit_full_text(licenses):
def missing(licenses):
for license in licenses:
if not os.path.exists("texts/plain/{id}".format(**license)):
yield license
return [{"id": license['id'],
"message": "License missing fulltext",
"fatal": True} for license in missing(licenses)]
def has_error(report):
for class_, elements in report.items():
for element in elements:
if element['fatal']:
return True
return False
def audit(path='licenses.json'):
with open(path, 'r') as fd:
licenses = json.load(fd)
report = {"identifiers": audit_identifiers(licenses),
"keywords": audit_tags(licenses),
"names": audit_names(licenses),
"full_text": audit_full_text(licenses)}
return report
def display_report(report):
fatal = False
for key, values in report.items():
for value in values:
if value['fatal']:
print("FATAL:", value['id'], value['message'], value)
fatal = True
if fatal:
raise Exception("Fatal error found")
for identifier in report['identifiers']:
print(" {count:03d} licenses contain scheme {scheme} ({percent:1f}%)".format(
**identifier
))
for tag in report['keywords']:
print(" {count:03d} licenses contain tag {tag} ({percent:1f}%)".format(
**tag
))
if __name__ == "__main__":
report = audit(*sys.argv[1:])
display_report(report=report)