Skip to content

Commit c863409

Browse files
committed
Dump compiler_info.json
A new flag is added to "CodeChecker analyze": --dump-compiler-info-file. This flag is given a filename as parameter. This filename will be created in the output folder. The file contains implicit include paths and some other gcc-specific data that can be used to finetune analysis later. When this flag is used, then analysis doesn't run. The compiler_info.json was generated in the output directory by default. Due to some bug, this file is empty in recent CodeChecker versions. The reason is that parsing compile_commands.json is done in parallel with a process pool. The dict object that collects this data must be handled by multiprocessing.SyncManager() so it can be used in a process pool.
1 parent ddefe11 commit c863409

File tree

8 files changed

+151
-81
lines changed

8 files changed

+151
-81
lines changed

analyzer/codechecker_analyzer/buildlog/log_parser.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# -------------------------------------------------------------------------
88

99

10-
from collections import namedtuple
10+
from dataclasses import dataclass
1111
from shutil import which
1212
from enum import Enum
1313
from functools import lru_cache
@@ -322,9 +322,25 @@ class ImplicitCompilerInfo:
322322
# attributes to the implicit settings. In the future we may find that some
323323
# other attributes are also dependencies of implicit compiler info in which
324324
# case this tuple should be extended.
325-
ImplicitInfoSpecifierKey = namedtuple(
326-
'ImplicitInfoSpecifierKey',
327-
['compiler', 'language', 'compiler_flags'])
325+
@dataclass
326+
class ImplicitInfoSpecifierKey:
327+
compiler: str
328+
language: str
329+
compiler_flags: list[str]
330+
331+
def __str__(self):
332+
return json.dumps([
333+
self.compiler,
334+
self.language,
335+
self.compiler_flags])
336+
337+
def __eq__(self, other):
338+
return (self.compiler, self.language, self.compiler_flags) == \
339+
(other.compiler, other.language, other.compiler_flags)
340+
341+
def __hash__(self):
342+
return hash(
343+
(self.compiler, self.language, tuple(self.compiler_flags)))
328344

329345
compiler_info: Dict[ImplicitInfoSpecifierKey, dict] = {}
330346
compiler_isexecutable = {}
@@ -333,14 +349,6 @@ class ImplicitCompilerInfo:
333349
# should be a clang version information object.
334350
compiler_versions = {}
335351

336-
@staticmethod
337-
def c():
338-
return "c"
339-
340-
@staticmethod
341-
def cpp():
342-
return "c++"
343-
344352
@staticmethod
345353
def is_executable_compiler(compiler):
346354
if compiler not in ImplicitCompilerInfo.compiler_isexecutable:
@@ -378,11 +386,9 @@ def __get_compiler_err(cmd: List[str]) -> Optional[str]:
378386
_, err = proc.communicate("")
379387
return err
380388
except OSError as oerr:
381-
# TODO: shlex.join(cmd) would be more elegant after upgrading to
382-
# Python 3.8.
383389
LOG.error(
384390
"Error during process execution: %s\n%s\n",
385-
' '.join(map(shlex.quote, cmd)), oerr.strerror)
391+
shlex.join(cmd), oerr.strerror)
386392
return None
387393

388394
@staticmethod
@@ -437,11 +443,7 @@ def get_compiler_includes(compiler, language, compiler_flags):
437443
"""
438444
cmd = [compiler, *compiler_flags, '-E', '-x', language, '-', '-v']
439445

440-
# TODO: shlex.join(cmd) would be more elegant after upgrading to
441-
# Python 3.8.
442-
LOG.debug(
443-
"Retrieving default includes via %s",
444-
' '.join(map(shlex.quote, cmd)))
446+
LOG.debug("Retrieving default includes via %s", shlex.join(cmd))
445447
include_dirs = ImplicitCompilerInfo.__parse_compiler_includes(cmd)
446448

447449
return list(map(os.path.normpath, include_dirs))
@@ -551,7 +553,7 @@ def get_compiler_standard(compiler, language):
551553
@staticmethod
552554
def dump_compiler_info(file_path: str):
553555
dumpable = {
554-
json.dumps(k): v for k, v
556+
str(k): v for k, v
555557
in ImplicitCompilerInfo.compiler_info.items()}
556558

557559
with open(file_path, 'w', encoding="utf-8", errors="ignore") as f:
@@ -976,8 +978,7 @@ def parse_options(compilation_db_entry,
976978

977979
if 'arguments' in compilation_db_entry:
978980
gcc_command = compilation_db_entry['arguments']
979-
details['original_command'] = \
980-
' '.join([shlex.quote(x) for x in gcc_command])
981+
details['original_command'] = shlex.join(gcc_command)
981982
elif 'command' in compilation_db_entry:
982983
details['original_command'] = compilation_db_entry['command']
983984
gcc_command = shlex.split(compilation_db_entry['command'])
@@ -1223,7 +1224,6 @@ def _process_entry_worker(args):
12231224

12241225

12251226
def parse_unique_log(compilation_database,
1226-
report_dir,
12271227
compile_uniqueing="none",
12281228
compiler_info_file=None,
12291229
keep_gcc_include_fixed=False,
@@ -1251,8 +1251,6 @@ def parse_unique_log(compilation_database,
12511251
by "arguments" which is a split command. Older
12521252
versions of intercept-build provide the build
12531253
command this way.
1254-
report_dir -- The output report directory. The compiler infos
1255-
will be written to <report_dir>/compiler.info.json.
12561254
compile_uniqueing -- Compilation database uniqueing mode.
12571255
If there are more than one compile commands for a
12581256
target file, only a single one is kept.
@@ -1312,6 +1310,13 @@ def parse_unique_log(compilation_database,
13121310
keep_gcc_intrin)
13131311
for entry in entries)
13141312

1313+
# Here we overwrite ImplicitCompilerInfo.compiker_info with a dict type
1314+
# that can be used in multiprocess environment, since the next section
1315+
# is executed in a process pool.
1316+
manager = multiprocessing.SyncManager()
1317+
manager.start()
1318+
ImplicitCompilerInfo.compiler_info = manager.dict()
1319+
13151320
# Process entries in parallel using imap_unordered with chunk size 1024
13161321
with multiprocessing.Pool(jobs) as pool:
13171322
# Convert generator to list for map function
@@ -1390,9 +1395,6 @@ def parse_unique_log(compilation_database,
13901395
compile_uniqueing)
13911396
sys.exit(1)
13921397

1393-
ImplicitCompilerInfo.dump_compiler_info(
1394-
os.path.join(report_dir, "compiler_info.json"))
1395-
13961398
LOG.debug('Parsing log file done.')
13971399
return list(uniqued_build_actions.values()), skipped_cmp_cmd_count
13981400

analyzer/codechecker_analyzer/cli/analyze.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,17 @@ def add_arguments_to_parser(parser):
229229
"specified file rather than invoke the compiler "
230230
"executable.")
231231

232+
parser.add_argument('--dump-compiler-info-file',
233+
dest="dump_compiler_info_file",
234+
required=False,
235+
action='store_true',
236+
default=False,
237+
help="Dump implicit gcc compiler info to a json file "
238+
"that can be used for fine-tuning analysis later."
239+
"These are information like the implicit include "
240+
"paths of standard headers, the default language "
241+
"version and the default target architecture.")
242+
232243
parser.add_argument('--keep-gcc-include-fixed',
233244
dest="keep_gcc_include_fixed",
234245
required=False,
@@ -1254,7 +1265,8 @@ def main(args):
12541265

12551266
args.output_path = os.path.abspath(args.output_path)
12561267
if os.path.exists(args.output_path) and \
1257-
not os.path.isdir(args.output_path):
1268+
not os.path.isdir(args.output_path) and \
1269+
not args.dump_compiler_info_file:
12581270
LOG.error("The given output path is not a directory: " +
12591271
args.output_path)
12601272
sys.exit(1)
@@ -1342,20 +1354,15 @@ def main(args):
13421354
"overwriting with current result", args.output_path)
13431355
shutil.rmtree(args.output_path)
13441356

1345-
if not os.path.exists(args.output_path):
1357+
if not os.path.exists(args.output_path) and \
1358+
not args.dump_compiler_info_file:
13461359
os.makedirs(args.output_path)
13471360

1348-
# TODO: I'm not sure that this directory should be created here.
1349-
fixit_dir = os.path.join(args.output_path, 'fixit')
1350-
if not os.path.exists(fixit_dir):
1351-
os.makedirs(fixit_dir)
1352-
13531361
LOG.debug("args: %s", str(args))
13541362
LOG.debug("Output will be stored to: '%s'", args.output_path)
13551363

13561364
actions, skipped_cmp_cmd_count = log_parser.parse_unique_log(
13571365
compile_commands,
1358-
args.output_path,
13591366
args.compile_uniqueing,
13601367
compiler_info_file,
13611368
args.keep_gcc_include_fixed,
@@ -1371,6 +1378,19 @@ def main(args):
13711378
"compilation database or all of them were skipped.")
13721379
sys.exit(0)
13731380

1381+
if args.dump_compiler_info_file:
1382+
log_parser.ImplicitCompilerInfo.dump_compiler_info(
1383+
args.output_path)
1384+
sys.exit(0)
1385+
else:
1386+
log_parser.ImplicitCompilerInfo.dump_compiler_info(
1387+
Path(args.output_path) / "compiler_info.json")
1388+
1389+
# TODO: I'm not sure that this directory should be created here.
1390+
fixit_dir = os.path.join(args.output_path, 'fixit')
1391+
if not os.path.exists(fixit_dir):
1392+
os.makedirs(fixit_dir)
1393+
13741394
uniqued_compilation_db_file = os.path.join(
13751395
args.output_path, "unique_compile_commands.json")
13761396
with open(uniqued_compilation_db_file, 'w',

analyzer/codechecker_analyzer/cli/check.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,25 @@ def add_arguments_to_parser(parser):
129129
"analyzers' output will not be printed to the "
130130
"standard output.")
131131

132+
parser.add_argument('--compiler-info-file',
133+
dest="compiler_info_file",
134+
required=False,
135+
default=argparse.SUPPRESS,
136+
help="Read the compiler includes and target from the "
137+
"specified file rather than invoke the compiler "
138+
"executable.")
139+
140+
parser.add_argument('--dump-compiler-info-file',
141+
dest="dump_compiler_info_file",
142+
required=False,
143+
action='store_true',
144+
default=False,
145+
help="Dump implicit gcc compiler info to a json file "
146+
"that can be used for fine-tuning analysis later."
147+
"These are information like the implicit include "
148+
"paths of standard headers, the default language "
149+
"version and the default target architecture.")
150+
132151
parser.add_argument('--keep-gcc-include-fixed',
133152
dest="keep_gcc_include_fixed",
134153
required=False,
@@ -980,6 +999,8 @@ def __update_if_key_exists(source, target, key):
980999
'compile_uniqueing',
9811000
'report_hash',
9821001
'add_gcc_include_dirs_with_isystem',
1002+
'compiler_info_file',
1003+
'dump_compiler_info_file',
9831004
'enable_z3',
9841005
'enable_z3_refutation']
9851006
for key in args_to_update:

analyzer/codechecker_analyzer/pre_analysis_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ def collect_statistics(action, source, clangsa_config, statistics_data):
4343
LOG.debug('Can not collect statistical data.')
4444
return None
4545

46-
# TODO: shlex.join() will be more convenient in Python 3.8.
47-
LOG.debug_analyzer(' '.join(map(shlex.quote, cmd)))
46+
LOG.debug_analyzer(shlex.join(cmd))
4847

4948
ret_code, analyzer_out, analyzer_err = \
5049
analyzer_base.SourceAnalyzer.run_proc(cmd)

analyzer/tests/functional/analyze/test_analyze.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,45 @@ def test_compiler_info_file_is_loaded(self):
266266
self.assertTrue("--target=FAKE_TARGET" in out)
267267
self.assertTrue("-idirafter /FAKE_INCLUDE_DIR" in out)
268268

269+
def test_compiler_info_file_is_dumped(self):
270+
"""
271+
Test if the compiler info file is dumped properly.
272+
"""
273+
build_json = os.path.join(self.test_workspace, "build_simple.json")
274+
source_file = os.path.join(self.test_workspace, "simple.cpp")
275+
276+
build_log = [{
277+
"directory": self.test_workspace,
278+
"command": f"g++ -c {source_file}",
279+
"file": source_file
280+
}]
281+
282+
with open(build_json, 'w',
283+
encoding="utf-8", errors="ignore") as outfile:
284+
json.dump(build_log, outfile)
285+
286+
with tempfile.NamedTemporaryFile() as f:
287+
analyze_cmd = [
288+
self._codechecker_cmd, "analyze", build_json, "-o", f.name,
289+
"--dump-compiler-info-file"]
290+
291+
process = subprocess.Popen(
292+
analyze_cmd,
293+
stdout=subprocess.PIPE,
294+
stderr=subprocess.PIPE,
295+
cwd=self.test_dir,
296+
encoding="utf-8",
297+
errors="ignore")
298+
out, _ = process.communicate()
299+
print(out)
300+
301+
f.flush()
302+
f.seek(0)
303+
304+
compiler_info = json.loads(f.read())
305+
306+
self.assertIn('["g++", "c++", []]', compiler_info)
307+
269308
def test_capture_analysis_output(self):
270309
"""
271310
Test if reports/success/<output_file>.[stdout,stderr].txt

analyzer/tests/unit/test_buildcmd_escaping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def test_analyzer_exec_double_quote(self):
8888
' -DDEBUG \'-DMYPATH="/this/some/path/"\''
8989

9090
comp_actions, _ = log_parser.\
91-
parse_unique_log(self.__get_cmp_json(compile_cmd), self.tmp_dir)
91+
parse_unique_log(self.__get_cmp_json(compile_cmd))
9292

9393
for comp_action in comp_actions:
9494
cmd = [self.compiler]
@@ -114,7 +114,7 @@ def test_analyzer_ansic_double_quote(self):
114114
"""
115115
compile_cmd = self.compiler + ''' '-DMYPATH=\"/some/other/path\"' '''
116116
comp_actions, _ = log_parser.\
117-
parse_unique_log(self.__get_cmp_json(compile_cmd), self.tmp_dir)
117+
parse_unique_log(self.__get_cmp_json(compile_cmd))
118118

119119
for comp_action in comp_actions:
120120
cmd = [self.compiler]

0 commit comments

Comments
 (0)