Skip to content

Commit 5595f30

Browse files
committed
Dump compiler_info.json
A new flag is added to "CodeChecker analyze": --dump-compiler-info-file. This flag is given a filename as parameter. This filename will be created in the output folder. The file contains implicit include paths and some other gcc-specific data that can be used to finetune analysis later. When this flag is used, then analysis doesn't run. The compiler_info.json was generated in the output directory by default. Due to some bug, this file is empty in recent CodeChecker versions. The reason is that parsing compile_commands.json is done in parallel with a process pool. The dict object that collects this data must be handled by multiprocessing.SyncManager() so it can be used in a process pool.
1 parent 29f42aa commit 5595f30

File tree

5 files changed

+78
-25
lines changed

5 files changed

+78
-25
lines changed

analyzer/codechecker_analyzer/buildlog/log_parser.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# -------------------------------------------------------------------------
88

99

10-
from collections import namedtuple
10+
from dataclasses import dataclass
1111
from shutil import which
1212
from enum import Enum
1313
from functools import lru_cache
@@ -322,9 +322,25 @@ class ImplicitCompilerInfo:
322322
# attributes to the implicit settings. In the future we may find that some
323323
# other attributes are also dependencies of implicit compiler info in which
324324
# case this tuple should be extended.
325-
ImplicitInfoSpecifierKey = namedtuple(
326-
'ImplicitInfoSpecifierKey',
327-
['compiler', 'language', 'compiler_flags'])
325+
@dataclass
326+
class ImplicitInfoSpecifierKey:
327+
compiler: str
328+
language: str
329+
compiler_flags: list[str]
330+
331+
def __str__(self):
332+
return json.dumps([
333+
self.compiler,
334+
self.language,
335+
self.compiler_flags])
336+
337+
def __eq__(self, other):
338+
return (self.compiler, self.language, self.compiler_flags) == \
339+
(other.compiler, other.language, other.compiler_flags)
340+
341+
def __hash__(self):
342+
return hash(
343+
(self.compiler, self.language, tuple(self.compiler_flags)))
328344

329345
compiler_info: Dict[ImplicitInfoSpecifierKey, dict] = {}
330346
compiler_isexecutable = {}
@@ -333,14 +349,6 @@ class ImplicitCompilerInfo:
333349
# should be a clang version information object.
334350
compiler_versions = {}
335351

336-
@staticmethod
337-
def c():
338-
return "c"
339-
340-
@staticmethod
341-
def cpp():
342-
return "c++"
343-
344352
@staticmethod
345353
def is_executable_compiler(compiler):
346354
if compiler not in ImplicitCompilerInfo.compiler_isexecutable:
@@ -378,11 +386,9 @@ def __get_compiler_err(cmd: List[str]) -> Optional[str]:
378386
_, err = proc.communicate("")
379387
return err
380388
except OSError as oerr:
381-
# TODO: shlex.join(cmd) would be more elegant after upgrading to
382-
# Python 3.8.
383389
LOG.error(
384390
"Error during process execution: %s\n%s\n",
385-
' '.join(map(shlex.quote, cmd)), oerr.strerror)
391+
shlex.join(cmd), oerr.strerror)
386392
return None
387393

388394
@staticmethod
@@ -437,11 +443,7 @@ def get_compiler_includes(compiler, language, compiler_flags):
437443
"""
438444
cmd = [compiler, *compiler_flags, '-E', '-x', language, '-', '-v']
439445

440-
# TODO: shlex.join(cmd) would be more elegant after upgrading to
441-
# Python 3.8.
442-
LOG.debug(
443-
"Retrieving default includes via %s",
444-
' '.join(map(shlex.quote, cmd)))
446+
LOG.debug("Retrieving default includes via %s", shlex.join(cmd))
445447
include_dirs = ImplicitCompilerInfo.__parse_compiler_includes(cmd)
446448

447449
return list(map(os.path.normpath, include_dirs))
@@ -551,7 +553,7 @@ def get_compiler_standard(compiler, language):
551553
@staticmethod
552554
def dump_compiler_info(file_path: str):
553555
dumpable = {
554-
json.dumps(k): v for k, v
556+
str(k): v for k, v
555557
in ImplicitCompilerInfo.compiler_info.items()}
556558

557559
with open(file_path, 'w', encoding="utf-8", errors="ignore") as f:
@@ -976,8 +978,7 @@ def parse_options(compilation_db_entry,
976978

977979
if 'arguments' in compilation_db_entry:
978980
gcc_command = compilation_db_entry['arguments']
979-
details['original_command'] = \
980-
' '.join([shlex.quote(x) for x in gcc_command])
981+
details['original_command'] = shlex.join(gcc_command)
981982
elif 'command' in compilation_db_entry:
982983
details['original_command'] = compilation_db_entry['command']
983984
gcc_command = shlex.split(compilation_db_entry['command'])
@@ -1312,6 +1313,13 @@ def parse_unique_log(compilation_database,
13121313
keep_gcc_intrin)
13131314
for entry in entries)
13141315

1316+
# Here we overwrite ImplicitCompilerInfo.compiker_info with a dict type
1317+
# that can be used in multiprocess environment, since the next section
1318+
# is executed in a process pool.
1319+
manager = multiprocessing.SyncManager()
1320+
manager.start()
1321+
ImplicitCompilerInfo.compiler_info = manager.dict()
1322+
13151323
# Process entries in parallel using imap_unordered with chunk size 1024
13161324
with multiprocessing.Pool(jobs) as pool:
13171325
# Convert generator to list for map function

analyzer/codechecker_analyzer/cli/analyze.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ def add_arguments_to_parser(parser):
229229
"specified file rather than invoke the compiler "
230230
"executable.")
231231

232+
parser.add_argument('--dump-compiler-info-file',
233+
dest="dump_compiler_info_file",
234+
required=False,
235+
default=argparse.SUPPRESS,
236+
help="Dump implicit gcc compiler info to a json file "
237+
"that can be used for fine-tuning analysis later."
238+
"These are information like the implicit include "
239+
"paths of standard headers, the default language "
240+
"version and the default target architecture.")
241+
232242
parser.add_argument('--keep-gcc-include-fixed',
233243
dest="keep_gcc_include_fixed",
234244
required=False,
@@ -1371,6 +1381,17 @@ def main(args):
13711381
"compilation database or all of them were skipped.")
13721382
sys.exit(0)
13731383

1384+
if args.dump_compiler_info_file:
1385+
compiler_info = Path(args.output_path) / 'compiler_info.json'
1386+
try:
1387+
os.rename(
1388+
compiler_info,
1389+
compiler_info.with_name(args.dump_compiler_info_file))
1390+
except ValueError as err:
1391+
LOG.error(err)
1392+
LOG.error("Provide a single filename.")
1393+
sys.exit(0)
1394+
13741395
uniqued_compilation_db_file = os.path.join(
13751396
args.output_path, "unique_compile_commands.json")
13761397
with open(uniqued_compilation_db_file, 'w',

analyzer/codechecker_analyzer/cli/check.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,23 @@ def add_arguments_to_parser(parser):
129129
"analyzers' output will not be printed to the "
130130
"standard output.")
131131

132+
parser.add_argument('--compiler-info-file',
133+
dest="compiler_info_file",
134+
required=False,
135+
default=argparse.SUPPRESS,
136+
help="Read the compiler includes and target from the "
137+
"specified file rather than invoke the compiler "
138+
"executable.")
139+
140+
parser.add_argument('--dump-compiler-info-file',
141+
dest="dump_compiler_info_file",
142+
required=False,
143+
help="Dump implicit gcc compiler info to a json file "
144+
"that can be used for fine-tuning analysis later."
145+
"These are information like the implicit include "
146+
"paths of standard headers, the default language "
147+
"version and the default target architecture.")
148+
132149
parser.add_argument('--keep-gcc-include-fixed',
133150
dest="keep_gcc_include_fixed",
134151
required=False,
@@ -980,6 +997,8 @@ def __update_if_key_exists(source, target, key):
980997
'compile_uniqueing',
981998
'report_hash',
982999
'add_gcc_include_dirs_with_isystem',
1000+
'compiler_info_file',
1001+
'dump_compiler_info_file',
9831002
'enable_z3',
9841003
'enable_z3_refutation']
9851004
for key in args_to_update:

analyzer/codechecker_analyzer/pre_analysis_manager.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ def collect_statistics(action, source, clangsa_config, statistics_data):
4343
LOG.debug('Can not collect statistical data.')
4444
return None
4545

46-
# TODO: shlex.join() will be more convenient in Python 3.8.
47-
LOG.debug_analyzer(' '.join(map(shlex.quote, cmd)))
46+
LOG.debug_analyzer(shlex.join(cmd))
4847

4948
ret_code, analyzer_out, analyzer_err = \
5049
analyzer_base.SourceAnalyzer.run_proc(cmd)

docs/analyzer/user_guide.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,12 @@ optional arguments:
10581058
Read the compiler includes and target from the
10591059
specified file rather than invoke the compiler
10601060
executable.
1061+
--dump-compiler-info-file DUMP_COMPILER_INFO_FILE
1062+
Dump implicit gcc compiler info to a json file that can
1063+
be used for fine-tuning analysis later.These are
1064+
information like the implicit include paths of standard
1065+
headers, the default language version and the default
1066+
target architecture.
10611067
--keep-gcc-include-fixed
10621068
There are some implicit include paths which
10631069
are only used by GCC (include-fixed). This flag

0 commit comments

Comments
 (0)