From 4afef78498dca2f88f5dc42e1fd87a8d614e17ea Mon Sep 17 00:00:00 2001 From: Tyler Veness Date: Fri, 20 Sep 2024 17:44:52 -0700 Subject: [PATCH] Migrate to newer cpplint.py fork (#286) https://github.com/cpplint/cpplint has bugfixes and is actually maintained. --- wpiformat/wpiformat/cpplint.py | 4173 ++++++++++++++++++++++++++++---- wpiformat/wpiformat/lint.py | 46 +- 2 files changed, 3734 insertions(+), 485 deletions(-) diff --git a/wpiformat/wpiformat/cpplint.py b/wpiformat/wpiformat/cpplint.py index 53dee70..a144bb1 100644 --- a/wpiformat/wpiformat/cpplint.py +++ b/wpiformat/wpiformat/cpplint.py @@ -42,45 +42,43 @@ """ import codecs +import collections import copy import getopt import glob import itertools import math # for log import os -import regex +import re import string import sys - -try: - import re._compiler as sre_compile -except ImportError: # Python < 3.11 - import sre_compile - -# if empty, use defaults -_header_regex = regex.compile("a^") +import sysconfig +import unicodedata +import xml.etree.ElementTree # if empty, use defaults -_source_regex = regex.compile("a^") - - -# Files which match the regex are considered to be header -# files (and will undergo different style checks). -# This set can be extended by using the --headers -# option -def IsHeaderFile(filename): - return _header_regex.search(filename) - -def IsSourceFile(filename): - return _source_regex.search(filename) - - -_USAGE = r""" -Syntax: cpplint.py [--repository=path] - [--headers=header_regex] - [--srcs=src_regex] +_valid_extensions = set([]) + +__VERSION__ = '1.7' + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] + [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--repository=path] + [--linelength=digits] [--headers=x,y,...] + [--recursive] + [--exclude=path] + [--extensions=hpp,cpp,...] + [--includeorder=default|standardcfirst] + [--config=filename] + [--quiet] + [--version] [file] ... + Style checker for C/C++ source files. + This is a fork of the Google style checker with minor extensions. + The style guidelines this tries to follow are those in https://google.github.io/styleguide/cppguide.html @@ -88,14 +86,71 @@ def IsSourceFile(filename): certain of the problem, and 1 meaning it could be a legitimate construct. This will miss some errors, and is not a substitute for a code review. - To suppress false-positive errors of a certain category, add a - 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) - suppresses errors of all categories on that line. + To suppress false-positive errors of certain categories, add a + 'NOLINT(category[, category...])' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. To suppress categories + on the next line use NOLINTNEXTLINE instead of NOLINT. To suppress errors in + a block of code 'NOLINTBEGIN(category[, category...])' comment to a line at + the start of the block and to end the block add a comment with 'NOLINTEND'. + NOLINT blocks are inclusive so any statements on the same line as a BEGIN + or END will have the error suppression applied. The files passed in will be linted; at least one file must be provided. + Default linted extensions are %s. + Other file types will be ignored. + Change the extensions with the --extensions flag. Flags: + output=emacs|eclipse|vs7|junit|sed|gsed + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Further support exists for + eclipse (eclipse), and JUnit (junit). XML parsers such as those used + in Jenkins and Bamboo may also be used. + The sed format outputs sed commands that should fix some of the errors. + Note that this requires gnu sed. If that is installed as gsed on your + system (common e.g. on macOS with homebrew) you can use the gsed output + format. Sed commands are written to stdout, not stderr, so you should be + able to pipe output straight to a shell to run the fixes. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + Errors with lower verbosity levels have lower confidence and are more + likely to be false positives. + + quiet + Don't print anything if no errors are found. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=-whitespace,-runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + Filters can directly be limited to files and also line numbers. The + syntax is category:file:line , where line is optional. The filter limitation + works for both + and - and can be combined with ordinary filters: + + Examples: --filter=-whitespace:foo.h,+whitespace/braces:foo.h + --filter=-whitespace,-runtime/printf:foo.h:14,+runtime/printf_format:foo.h + --filter=-,+build/include_what_you_use:foo.h:321 + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'legal/copyright'. + repository=path The top level directory of the repository, used to derive the header guard CPP variable. By default, this is determined by searching for a @@ -120,17 +175,117 @@ def IsSourceFile(filename): Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_ Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ - srcs=src_regex - The regex for source files that cpplint will check + root=subdir + The root directory used for deriving header guard CPP variable. + This directory is relative to the top level directory of the repository + which by default is determined by searching for a directory that contains + .git, .hg, or .svn but can also be controlled with the --repository flag. + If the specified directory does not exist, this flag is ignored. Examples: - --srcs=\.c$|\.cpp$ + Assuming that src is the top level directory of the repository (and + cwd=top/src), the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + recursive + Search for files to lint recursively. Each directory given in the list + of files to be linted is replaced by all files that descend from that + directory. Files with extensions not in the valid extensions list are + excluded. - headers=header_regex - The regex for header files that cpplint will use + exclude=path + Exclude the given path from the list of files to be linted. Relative + paths are evaluated relative to the current directory and shell globbing + is performed. This flag can be provided multiple times to exclude + multiple files. Examples: - --headers=\.h$|\.hpp$|\.inc$ + --exclude=one.cc + --exclude=src/*.cc + --exclude=src/*.cc --exclude=test/*.cc + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=%s + + includeorder=default|standardcfirst + For the build/include_order rule, the default is to blindly assume angle + bracket includes with file extension are c-system-headers (default), + even knowing this will have false classifications. + The default is established at google. + standardcfirst means to instead use an allow-list of known c headers and + treat all others as separate group of "other system headers". The C headers + included are those of the C-standard lib and closely related ones. + + config=filename + Search for config files with the specified name instead of CPPLINT.cfg + + headers=x,y,... + The header extensions that cpplint will treat as .h in checks. Values are + automatically added to --extensions list. + (by default, only files with extensions %s will be assumed to be headers) + + Examples: + --headers=%s + --headers=hpp,hxx + --headers=hpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + headers=x,y,... + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through the linter. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). Paths are relative to the directory of the CPPLINT.cfg. + + The "headers" option is similar in function to the --headers flag + (see example above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. """ # We categorize each error message we print. Here are the categories. @@ -138,15 +293,24 @@ def IsSourceFile(filename): # If you add a new error message with a new category, add it to the list # here! cpplint_unittest.py should tell you if you forget to do this. _ERROR_CATEGORIES = [ - 'build/class', + 'build/c++11', + 'build/c++17', 'build/deprecated', 'build/endif_comment', 'build/explicit_make_pair', + 'build/forward_decl', 'build/header_guard', + 'build/include', + 'build/include_subdir', + 'build/include_alpha', 'build/include_order', 'build/include_what_you_use', + 'build/namespaces_headers', + 'build/namespaces_literals', + 'build/namespaces', 'build/printf_format', 'build/storage_class', + 'legal/copyright', 'readability/alt_tokens', 'readability/braces', 'readability/casting', @@ -160,6 +324,7 @@ def IsSourceFile(filename): 'readability/nolint', 'readability/nul', 'readability/strings', + 'readability/todo', 'readability/utf8', 'runtime/arrays', 'runtime/casting', @@ -172,30 +337,459 @@ def IsSourceFile(filename): 'runtime/operator', 'runtime/printf', 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', 'whitespace/comments', 'whitespace/empty_conditional_body', 'whitespace/empty_if_body', 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/indent_namespace', + 'whitespace/line_length', 'whitespace/newline', 'whitespace/operators', 'whitespace/parens', 'whitespace/semicolon', + 'whitespace/tab', 'whitespace/todo', ] +# keywords to use with --outputs which generate stdout for machine processing +_MACHINE_OUTPUTS = [ + 'junit', + 'sed', + 'gsed' +] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'build/class', + 'readability/streams', + 'readability/function', + ] + +# These prefixes for categories should be ignored since they relate to other +# tools which also use the NOLINT syntax, e.g. clang-tidy. +_OTHER_NOLINT_CATEGORY_PREFIXES = [ + 'clang-analyzer-', + 'abseil-', + 'altera-', + 'android-', + 'boost-', + 'bugprone-', + 'cert-', + 'concurrency-', + 'cppcoreguidelines-', + 'darwin-', + 'fuchsia-', + 'google-', + 'hicpp-', + 'linuxkernel-', + 'llvm-', + 'llvmlibc-', + 'misc-', + 'modernize-', + 'mpi-', + 'objc-', + 'openmp-', + 'performance-', + 'portability-', + 'readability-', + 'zircon-', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = [ + '-build/include_alpha', + '-readability/fn_size', + ] + # The default list of categories suppressed for C (not C++) files. _DEFAULT_C_SUPPRESSED_CATEGORIES = [ 'readability/casting', ] +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + # We used to check for high-bit characters, but after much discussion we # decided those were OK, as long as they were in UTF-8 and didn't represent # hard-coded international strings, which belong in a separate i18n file. +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + # 'complex.h', collides with System C header "complex.h" since C11 + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # C++14 headers + 'shared_mutex', + # C++17 headers + 'any', + 'charconv', + 'codecvt', + 'execution', + 'filesystem', + 'memory_resource', + 'optional', + 'string_view', + 'variant', + # C++20 headers + 'barrier', + 'bit', + 'compare', + 'concepts', + 'coroutine', + 'format', + 'latch' + 'numbers', + 'ranges', + 'semaphore', + 'source_location', + 'span', + 'stop_token', + 'syncstream', + 'version', + # C++23 headers + 'expected', + 'flat_map', + 'flat_set', + 'generator', + 'mdspan', + 'print', + 'spanstream', + 'stacktrace', + 'stdfloat', + # C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# C headers +_C_HEADERS = frozenset([ + # System C headers + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + # C23 headers + 'stdbit.h', + 'stdckdint.h', + # additional POSIX C headers + 'aio.h', + 'arpa/inet.h', + 'cpio.h', + 'dirent.h', + 'dlfcn.h', + 'fcntl.h', + 'fmtmsg.h', + 'fnmatch.h', + 'ftw.h', + 'glob.h', + 'grp.h', + 'iconv.h', + 'langinfo.h', + 'libgen.h', + 'monetary.h', + 'mqueue.h', + 'ndbm.h', + 'net/if.h', + 'netdb.h', + 'netinet/in.h', + 'netinet/tcp.h', + 'nl_types.h', + 'poll.h', + 'pthread.h', + 'pwd.h', + 'regex.h', + 'sched.h', + 'search.h', + 'semaphore.h', + 'setjmp.h', + 'signal.h', + 'spawn.h', + 'strings.h', + 'stropts.h', + 'syslog.h', + 'tar.h', + 'termios.h', + 'trace.h', + 'ulimit.h', + 'unistd.h', + 'utime.h', + 'utmpx.h', + 'wordexp.h', + # additional GNUlib headers + 'a.out.h', + 'aliases.h', + 'alloca.h', + 'ar.h', + 'argp.h', + 'argz.h', + 'byteswap.h', + 'crypt.h', + 'endian.h', + 'envz.h', + 'err.h', + 'error.h', + 'execinfo.h', + 'fpu_control.h', + 'fstab.h', + 'fts.h', + 'getopt.h', + 'gshadow.h', + 'ieee754.h', + 'ifaddrs.h', + 'libintl.h', + 'mcheck.h', + 'mntent.h', + 'obstack.h', + 'paths.h', + 'printf.h', + 'pty.h', + 'resolv.h', + 'shadow.h', + 'sysexits.h', + 'ttyent.h', + # Additional linux glibc headers + 'dlfcn.h', + 'elf.h', + 'features.h', + 'gconv.h', + 'gnu-versions.h', + 'lastlog.h', + 'libio.h', + 'link.h', + 'malloc.h', + 'memory.h', + 'netash/ash.h', + 'netatalk/at.h', + 'netax25/ax25.h', + 'neteconet/ec.h', + 'netipx/ipx.h', + 'netiucv/iucv.h', + 'netpacket/packet.h', + 'netrom/netrom.h', + 'netrose/rose.h', + 'nfs/nfs.h', + 'nl_types.h', + 'nss.h', + 're_comp.h', + 'regexp.h', + 'sched.h', + 'sgtty.h', + 'stab.h', + 'stdc-predef.h', + 'stdio_ext.h', + 'syscall.h', + 'termio.h', + 'thread_db.h', + 'ucontext.h', + 'ustat.h', + 'utmp.h', + 'values.h', + 'wait.h', + 'xlocale.h', + # Hardware specific headers + 'arm_neon.h', + 'emmintrin.h', + 'xmmintin.h', + ]) + +# Folders of C libraries so commonly used in C++, +# that they have parity with standard C libraries. +C_STANDARD_HEADER_FOLDERS = frozenset([ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ]) + # Type names -_TYPES = regex.compile( +_TYPES = re.compile( r'^(?:' # [dcl.type.simple] r'(char(16_t|32_t)?)|wchar_t|' @@ -208,15 +802,45 @@ def IsSourceFile(filename): r')$') -# These headers are excluded from [build/include] checks: +# These headers are excluded from [build/include] and [build/include_order] +# checks: # - Anything not following google file name conventions (containing an # uppercase character, such as Python.h or nsStringAPI.h, for example). # - Lua headers. -_THIRD_PARTY_HEADERS_PATTERN = regex.compile( +_THIRD_PARTY_HEADERS_PATTERN = re.compile( r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') +# Pattern for matching FileInfo.BaseName() against test file name +_test_suffixes = ['_test', '_regtest', '_unittest'] +_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' + # Pattern that matches only complete whitespace, possibly across multiple lines. -_EMPTY_CONDITIONAL_BODY_PATTERN = regex.compile(r'^\s*$', regex.DOTALL) +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = f'DCHECK_{replacement}' + _CHECK_REPLACEMENT['CHECK'][op] = f'CHECK_{replacement}' + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = f'EXPECT_{replacement}' + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = f'ASSERT_{replacement}' + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = f'EXPECT_{inv_replacement}' + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = f'ASSERT_{inv_replacement}' # Alternative tokens and their replacements. For full list, see section 2.5 # Alternative tokens [lex.digraph] in the C++ standard. @@ -242,10 +866,19 @@ def IsSourceFile(filename): # # False positives include C-style multi-line comments and multi-line strings # but those have always been troublesome for cpplint. -_ALT_TOKEN_REPLACEMENT_PATTERN = regex.compile( - r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'([ =()])(' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')([ (]|$)') +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_OTHER_SYS_HEADER = 3 +_LIKELY_MY_HEADER = 4 +_POSSIBLE_MY_HEADER = 5 +_OTHER_HEADER = 6 + # These constants define the current inline assembly state _NO_ASM = 0 # Outside of inline assembly block _INSIDE_ASM = 1 # Inside inline assembly block @@ -253,41 +886,178 @@ def IsSourceFile(filename): _BLOCK_ASM = 3 # The whole block is an inline assembly block # Match start of assembly blocks -_MATCH_ASM = regex.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' r'(?:\s+(volatile|__volatile__))?' r'\s*[{(]') # Match strings that indicate we're working on a C (not C++) file. -_SEARCH_C_FILE = regex.compile(r'\b(?:LINT_C_FILE|' +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') -_regexp_compile_cache = {} +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +# Commands for sed to fix the problem +_SED_FIXUPS = { + 'Remove spaces around =': r's/ = /=/', + 'Remove spaces around !=': r's/ != /!=/', + 'Remove space before ( in if (': r's/if (/if(/', + 'Remove space before ( in for (': r's/for (/for(/', + 'Remove space before ( in while (': r's/while (/while(/', + 'Remove space before ( in switch (': r's/switch (/switch(/', + 'Should have a space between // and comment': r's/\/\//\/\/ /', + 'Missing space before {': r's/\([^ ]\){/\1 {/', + 'Tab found, replace by spaces': r's/\t/ /g', + 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', + 'You don\'t need a ; after a }': r's/};/}/', + 'Missing space after ,': r's/,\([^ ]\)/, \1/g', +} # {str, set(int)}: a map from error categories to sets of linenumbers # on which those errors are expected and should be suppressed. _error_suppressions = {} -if sys.version_info < (3,): - # -- pylint: disable=no-member - # BINARY_TYPE = str - itervalues = dict.itervalues - iteritems = dict.iteritems -else: - # BINARY_TYPE = bytes - itervalues = dict.values - iteritems = dict.items +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None +_root_debug = False +# The top level repository directory. If set, _root is calculated relative to +# this directory instead of the directory containing version control artifacts. +# This is set by the --repository flag. +_repository = None -def unicode_escape_decode(x): - if sys.version_info < (3,): - return codecs.unicode_escape_decode(x)[0] - else: - return x +# Files to exclude from linting. This is set by the --exclude flag. +_excludes = None + +# Whether to suppress all PrintInfo messages, UNRELATED to --quiet flag +_quiet = False + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# This allows to use different include order rule than default +_include_order = "default" + +# This allows different config files to be used +_config_filename = "CPPLINT.cfg" + +# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. +# This is set by --headers flag. +_hpp_headers = set([]) -# {str, bool}: a map from error categories to booleans which indicate if the -# category should be suppressed for every line. -_global_error_suppressions = {} +class ErrorSuppressions: + """Class to track all error suppressions for cpplint""" + class LineRange: + """Class to represent a range of line numbers for which an error is suppressed""" + def __init__(self, begin, end): + self.begin = begin + self.end = end + + def __str__(self): + return f'[{self.begin}-{self.end}]' + + def __contains__(self, obj): + return self.begin <= obj <= self.end + + def ContainsRange(self, other): + return self.begin <= other.begin and self.end >= other.end + + def __init__(self): + self._suppressions = collections.defaultdict(list) + self._open_block_suppression = None + + def _AddSuppression(self, category, line_range): + suppressed = self._suppressions[category] + if not (suppressed and suppressed[-1].ContainsRange(line_range)): + suppressed.append(line_range) + + def GetOpenBlockStart(self): + """:return: The start of the current open block or `-1` if there is not an open block""" + return self._open_block_suppression.begin if self._open_block_suppression else -1 + + def AddGlobalSuppression(self, category): + """Add a suppression for `category` which is suppressed for the whole file""" + self._AddSuppression(category, self.LineRange(0, math.inf)) + + def AddLineSuppression(self, category, linenum): + """Add a suppression for `category` which is suppressed only on `linenum`""" + self._AddSuppression(category, self.LineRange(linenum, linenum)) + + def StartBlockSuppression(self, category, linenum): + """Start a suppression block for `category` on `linenum`. inclusive""" + if self._open_block_suppression is None: + self._open_block_suppression = self.LineRange(linenum, math.inf) + self._AddSuppression(category, self._open_block_suppression) + + def EndBlockSuppression(self, linenum): + """End the current block suppression on `linenum`. inclusive""" + if self._open_block_suppression: + self._open_block_suppression.end = linenum + self._open_block_suppression = None + + def IsSuppressed(self, category, linenum): + """:return: `True` if `category` is suppressed for `linenum`""" + suppressed = self._suppressions[category] + self._suppressions[None] + return any(linenum in lr for lr in suppressed) + + def HasOpenBlock(self): + """:return: `True` if a block suppression was started but not ended""" + return self._open_block_suppression is not None + + def Clear(self): + """Clear all current error suppressions""" + self._suppressions.clear() + self._open_block_suppression = None + +_error_suppressions = ErrorSuppressions() + +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(',')} + except ValueError: + PrintUsage('Header extensions must be comma separated list.') + +def ProcessIncludeOrderOption(val): + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + +def IsHeaderExtension(file_extension): + return file_extension in GetHeaderExtensions() + +def GetHeaderExtensions(): + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if 'h' in h} + return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + +# The allowed extensions for file names +# This is set by --extensions flag +def GetAllExtensions(): + return GetHeaderExtensions().union(_valid_extensions or set( + ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) + +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(',')] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage('Extensions should be a comma-separated list of values;' + 'for example: extensions=hpp,cpp\n' + f'This could not be parsed: "{val}"') + +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) def ParseNolintSuppressions(filename, raw_line, linenum, error): """Updates the global list of line error-suppressions. @@ -302,23 +1072,50 @@ def ParseNolintSuppressions(filename, raw_line, linenum, error): linenum: int, the number of the current line. error: function, an error handler. """ - matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + matched = re.search(r'\bNOLINT(NEXTLINE|BEGIN|END)?\b(\([^)]+\))?', raw_line) if matched: - if matched.group(1): - suppressed_line = linenum + 1 - else: - suppressed_line = linenum - category = matched.group(2) - if category in (None, '(*)'): # => "suppress all" - _error_suppressions.setdefault(None, set()).add(suppressed_line) + no_lint_type = matched.group(1) + if no_lint_type == 'NEXTLINE': + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum + 1) + elif no_lint_type == 'BEGIN': + if _error_suppressions.HasOpenBlock(): + error(filename, linenum, 'readability/nolint', 5, + f'NONLINT block already defined on line {_error_suppressions.GetOpenBlockStart()}') + + def ProcessCategory(category): + _error_suppressions.StartBlockSuppression(category, linenum) + elif no_lint_type == 'END': + if not _error_suppressions.HasOpenBlock(): + error(filename, linenum, 'readability/nolint', 5, 'Not in a NOLINT block') + + def ProcessCategory(category): + if category is not None: + error(filename, linenum, 'readability/nolint', 5, + f'NOLINT categories not supported in block END: {category}') + _error_suppressions.EndBlockSuppression(linenum) else: - if category.startswith('(') and category.endswith(')'): - category = category[1:-1] + def ProcessCategory(category): + _error_suppressions.AddLineSuppression(category, linenum) + categories = matched.group(2) + if categories in (None, '(*)'): # => "suppress all" + ProcessCategory(None) + elif categories.startswith('(') and categories.endswith(')'): + for category in set(map(lambda c: c.strip(), categories[1:-1].split(','))): if category in _ERROR_CATEGORIES: - _error_suppressions.setdefault(category, set()).add(suppressed_line) - + ProcessCategory(category) + elif any(c for c in _OTHER_NOLINT_CATEGORY_PREFIXES if category.startswith(c)): + # Ignore any categories from other tools. + pass + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + f'Unknown NOLINT error category: {category}') def ProcessGlobalSuppresions(lines): + """Deprecated; use ProcessGlobalSuppressions.""" + ProcessGlobalSuppressions(lines) + +def ProcessGlobalSuppressions(lines): """Updates the list of global error suppressions. Parses any lint directives in the file that have global effect. @@ -330,48 +1127,36 @@ def ProcessGlobalSuppresions(lines): for line in lines: if _SEARCH_C_FILE.search(line): for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: - _global_error_suppressions[category] = True + _error_suppressions.AddGlobalSuppression(category) + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _error_suppressions.AddGlobalSuppression(category) def ResetNolintSuppressions(): """Resets the set of NOLINT suppressions to empty.""" - _error_suppressions.clear() - _global_error_suppressions.clear() + _error_suppressions.Clear() def IsErrorSuppressedByNolint(category, linenum): """Returns true if the specified error category is suppressed on this line. Consults the global error_suppressions map populated by - ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions. Args: category: str, the category of the error. linenum: int, the current line number. Returns: - bool, True iff the error should be suppressed due to a NOLINT comment or - global suppression. + bool, True iff the error should be suppressed due to a NOLINT comment, + block suppression or global suppression. """ - return (_global_error_suppressions.get(category, False) or - linenum in _error_suppressions.get(category, set()) or - linenum in _error_suppressions.get(None, set())) - + return _error_suppressions.IsSuppressed(category, linenum) -def Match(pattern, s): - """Matches the string with the pattern, caching the compiled regexp.""" - # The regexp compilation caching is inlined in both Match and Search for - # performance reasons; factoring it out into a separate function turns out - # to be noticeably expensive. - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].match(s) - -def Search(pattern, s): - """Searches the string for the pattern, caching the compiled regexp.""" - if pattern not in _regexp_compile_cache: - _regexp_compile_cache[pattern] = sre_compile.compile(pattern) - return _regexp_compile_cache[pattern].search(s) +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() class _IncludeState(object): @@ -386,16 +1171,64 @@ class _IncludeState(object): raise an _IncludeError with an appropriate error message. """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _OTHER_SYS_HEADER: 'other system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_SYS_SECTION: 'other system header', + _OTHER_H_SECTION: 'other header', + } + def __init__(self): self.include_list = [[]] + self._section = None + self._last_header = None self.ResetSection('') + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + def ResetSection(self, directive): """Reset section checking for preprocessor directive. Args: directive: preprocessor directive (e.g. "if", "else"). """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + # Update list of includes. Note that we never pop from the # include list. if directive in ('if', 'ifdef', 'ifndef'): @@ -403,13 +1236,188 @@ def ResetSection(self, directive): elif directive in ('else', 'elif'): self.include_list[-1] = [] + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + re.match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = (f'Found {self._TYPE_NAMES[header_type]}' + f' after {self._SECTION_NAMES[self._section]}') + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + class _CppLintState(object): """Maintains module-wide state..""" def __init__(self): + self.verbose_level = 1 # global setting. self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messages? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = 'emacs' + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + f' ({filt} does not)') + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] def ResetErrorCounts(self): """Sets the module's error statistic back to zero.""" @@ -419,13 +1427,151 @@ def ResetErrorCounts(self): def IncrementErrorCount(self, category): """Bumps the module's error statistic.""" self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(dict.items(self.errors_by_category)): + self.PrintInfo(f'Category \'{category}\' errors found: {count}\n') + + def PrintInfo(self, message): + # _quiet does not represent --quiet flag. + # Hide infos from stdout to keep stdout pure for machine consumption + if not _quiet and self.output_format not in _MACHINE_OUTPUTS: + sys.stdout.write(message) def PrintError(self, message): - sys.stderr.write(message) + if self.output_format == 'junit': + self._junit_errors.append(message) + else: + sys.stderr.write(message) + + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, + confidence)) + + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) + + testsuite = xml.etree.ElementTree.Element('testsuite') + testsuite.attrib['errors'] = str(num_errors) + testsuite.attrib['failures'] = str(num_failures) + testsuite.attrib['name'] = 'cpplint' + + if num_errors == 0 and num_failures == 0: + testsuite.attrib['tests'] = str(1) + xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + + else: + testsuite.attrib['tests'] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = 'errors' + error = xml.etree.ElementTree.SubElement(testcase, 'error') + error.text = '\n'.join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, 'failure') + template = '{0}: {1} [{2}] [{3}]' + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = '\n'.join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') + _cpplint_state = _CppLintState() +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + +def _Quiet(): + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + +def _SetQuiet(quiet): + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) + + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + class _FunctionState(object): """Tracks current function name and the number of lines in its body.""" @@ -463,11 +1609,11 @@ def Check(self, error, filename, linenum): if not self.in_a_function: return - if Match(r'T(EST|est)', self.current_function): + if re.match(r'T(EST|est)', self.current_function): base_trigger = self._TEST_TRIGGER else: base_trigger = self._NORMAL_TRIGGER - trigger = base_trigger * 2 + trigger = base_trigger * 2**_VerboseLevel() if self.lines_in_function > trigger: error_level = int(math.log(self.lines_in_function / base_trigger, 2)) @@ -476,15 +1622,19 @@ def Check(self, error, filename, linenum): error_level = 5 error(filename, linenum, 'readability/fn_size', error_level, 'Small and focused functions are preferred:' - ' %s has %d non-comment lines' - ' (error triggered by exceeding %d lines).' % ( - self.current_function, self.lines_in_function, trigger)) + f' {self.current_function} has {self.lines_in_function} non-comment lines' + f' (error triggered by exceeding {trigger} lines).') def End(self): """Stop analyzing function body.""" self.in_a_function = False +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + + class FileInfo(object): """Provides utility functions for filenames. @@ -499,9 +1649,129 @@ def FullName(self): """Make Windows paths like Unix.""" return os.path.abspath(self._filename).replace('\\', '/') + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace('\\', '/') + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + break + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + def Extension(self): """File extension - text following the final period, includes that period.""" - return os.path.splitext(self.FullName()) + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + + +def _ShouldPrintError(category, confidence, filename, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + filter_cat, filter_file, filter_line = _ParseFilterSelector(one_filter[1:]) + category_match = category.startswith(filter_cat) + file_match = filter_file == "" or filter_file == filename + line_match = filter_line == linenum or filter_line == -1 + + if one_filter.startswith('-'): + if category_match and file_match and line_match: + is_filtered = True + elif one_filter.startswith('+'): + if category_match and file_match and line_match: + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True def Error(filename, linenum, category, confidence, message): @@ -511,9 +1781,9 @@ def Error(filename, linenum, category, confidence, message): that is, how certain we are this is a legitimate style regression, and not a misidentification or a use that's sometimes justified. - False positives can be suppressed by the use of - "cpplint(category)" comments on the offending line. These are - parsed into _error_suppressions. + False positives can be suppressed by the use of "NOLINT(category)" + comments, NOLINTNEXTLINE or in blocks started by NOLINTBEGIN. These + are parsed into _error_suppressions. Args: filename: The name of the file containing the error. @@ -526,14 +1796,32 @@ def Error(filename, linenum, category, confidence, message): and 1 meaning that it could be a legitimate construct. message: The error message. """ - if not IsErrorSuppressedByNolint(category, linenum): + if _ShouldPrintError(category, confidence, filename, linenum): _cpplint_state.IncrementErrorCount(category) - final_message = '%s:%s: %s [%s] [%d]\n' % ( - filename, linenum, message, category, confidence) - sys.stderr.write(final_message) + if _cpplint_state.output_format == 'vs7': + _cpplint_state.PrintError(f'{filename}({linenum}): error cpplint:' + f' [{category}] {message} [{confidence}]\n') + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write(f'{filename}:{linenum}: warning:' + f' {message} [{category}] [{confidence}]\n') + elif _cpplint_state.output_format == 'junit': + _cpplint_state.AddJUnitFailure(filename, linenum, message, category, confidence) + elif _cpplint_state.output_format in ['sed', 'gsed']: + if message in _SED_FIXUPS: + sys.stdout.write(f"{_cpplint_state.output_format} -i" + f" '{linenum}{_SED_FIXUPS[message]}' {filename}" + f" # {message} [{category}] [{confidence}]\n") + else: + sys.stderr.write(f'# {filename}:{linenum}: ' + f' "{message}" [{category}] [{confidence}]\n') + else: + final_message = (f'{filename}:{linenum}: ' + f' {message} [{category}] [{confidence}]\n') + sys.stderr.write(final_message) + # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. -_RE_PATTERN_CLEANSE_LINE_ESCAPES = regex.compile( +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') # Match a single C style comment on the same line. _RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' @@ -545,7 +1833,7 @@ def Error(filename, linenum, category, confidence, message): # end of the line. Otherwise, we try to remove spaces from the right side, # if this doesn't work we try on left side but only if there's a non-character # on the right. -_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = regex.compile( +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + _RE_PATTERN_C_COMMENTS + r'\s+|' + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + @@ -599,7 +1887,7 @@ def CleanseRawStrings(raw_lines): # Found the end of the string, match leading space for this # line and resume copying the original lines, and also insert # a "" on the last line. - leading_space = Match(r'^(\s*)\S', line) + leading_space = re.match(r'^(\s*)\S', line) line = leading_space.group(1) + '""' + line[end + len(delimiter):] delimiter = None else: @@ -620,9 +1908,9 @@ def CleanseRawStrings(raw_lines): # before removing raw strings. This is because there are some # cpplint checks that requires the comments to be preserved, but # we don't want to check comments that are inside raw strings. - matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + matched = re.match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) if (matched and - not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + not re.match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', matched.group(1))): delimiter = ')' + matched.group(2) + '"' @@ -667,7 +1955,7 @@ def FindNextMultiLineCommentEnd(lines, lineix): def RemoveMultiLineCommentsFromRange(lines, begin, end): """Clears a range of lines for multi-line comments.""" - # Having // dummy comments makes the lines non-empty, so we will not get + # Having // comments makes the lines non-empty, so we will not get # unnecessary blank line warnings later in the code. for i in range(begin, end): lines[i] = '/**/' @@ -705,6 +1993,28 @@ def CleanseComments(line): return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) +def ReplaceAlternateTokens(line): + """Replace any alternate token by its original counterpart. + + In order to comply with the google rule stating that unary operators should + never be followed by a space, an exception is made for the 'not' and 'compl' + alternate tokens. For these, any trailing space is removed during the + conversion. + + Args: + line: The line being processed. + + Returns: + The line with alternate tokens replaced. + """ + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + token = _ALT_TOKEN_REPLACEMENT[match.group(2)] + tail = '' if match.group(2) in ['not', 'compl'] and match.group(3) == ' ' \ + else r'\3' + line = re.sub(match.re, rf'\1{token}{tail}', line, count=1) + return line + + class CleansedLines(object): """Holds 4 copies of all lines with different preprocessing applied to them. @@ -717,15 +2027,17 @@ class CleansedLines(object): """ def __init__(self, lines): + if '-readability/alt_tokens' in _cpplint_state.filters: + for i, line in enumerate(lines): + lines[i] = ReplaceAlternateTokens(line) self.elided = [] self.lines = [] self.raw_lines = lines self.num_lines = len(lines) self.lines_without_raw_strings = CleanseRawStrings(lines) - for linenum in range(len(self.lines_without_raw_strings)): - self.lines.append(CleanseComments( - self.lines_without_raw_strings[linenum])) - elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + for line in self.lines_without_raw_strings: + self.lines.append(CleanseComments(line)) + elided = self._CollapseStrings(line) self.elided.append(CleanseComments(elided)) def NumLines(self): @@ -758,7 +2070,7 @@ def _CollapseStrings(elided): collapsed = '' while True: # Find the first quote character - match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided) if not match: collapsed += elided break @@ -783,8 +2095,8 @@ def _CollapseStrings(elided): # correctly as long as there are digits on both sides of the # separator. So we are fine as long as we don't see something # like "0.'3" (gcc 4.9.0 will not allow this literal). - if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): - match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + if re.search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = re.match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) collapsed += head + match_literal.group(1).replace("'", '') elided = match_literal.group(2) else: @@ -826,7 +2138,7 @@ def FindEndOfExpressionInLine(line, startpos, stack): stack.pop() if not stack: return (-1, None) - elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + elif i > 0 and re.search(r'\boperator\s*$', line[0:i]): # operator<, don't add to stack continue else: @@ -855,7 +2167,7 @@ def FindEndOfExpressionInLine(line, startpos, stack): # Ignore "->" and operator functions if (i > 0 and - (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + (line[i - 1] == '-' or re.search(r'\boperator\s*$', line[0:i - 1]))): continue # Pop the stack if there is a matching '<'. Otherwise, ignore @@ -902,7 +2214,7 @@ def CloseExpression(clean_lines, linenum, pos): """ line = clean_lines.elided[linenum] - if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + if (line[pos] not in '({[<') or re.match(r'<[<=]', line[pos:]): return (line, clean_lines.NumLines(), -1) # Check first line @@ -950,8 +2262,8 @@ def FindStartOfExpressionInLine(line, endpos, stack): # Ignore it if it's a "->" or ">=" or "operator>" if (i > 0 and (line[i - 1] == '-' or - Match(r'\s>=\s', line[i - 1:]) or - Search(r'\boperator\s*$', line[0:i]))): + re.match(r'\s>=\s', line[i - 1:]) or + re.search(r'\boperator\s*$', line[0:i]))): i -= 1 else: stack.append('>') @@ -1037,6 +2349,19 @@ def ReverseCloseExpression(clean_lines, linenum, pos): return (line, 0, -1) +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # placeholder line at the front. + for line in range(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + + def GetIndentLevel(line): """Return the number of leading spaces in line. @@ -1046,12 +2371,249 @@ def GetIndentLevel(line): Returns: An integer count of leading spaces, possibly zero. """ - indent = Match(r'^( *)\S', line) + indent = re.match(r'^( *)\S', line) if indent: return len(indent.group(1)) else: return 0 +def PathSplitToList(path): + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write(f"\n_root fixup, _root = '{_root}'," + f" repository name = '{fileinfo.RepositoryName()}'\n") + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + + if _root_debug: + sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + # adapt slashes for windows + root_abspath = os.path.abspath(_root).replace('\\', '/') + + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) + + if _root_debug: + sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write(f"_root ignore, returning {file_path_from_root}\n") + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if re.search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if re.search(r'^\s*#pragma\s+once', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + f'No #ifndef header guard found, suggested CPP variable is: {cppvar}') + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + f'#ifndef header guard has wrong style, please use: {cppvar}') + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = re.match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + f'#endif line should be "#endif // {cppvar}"') + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in range(1, len(raw_lines) - 1): + line = raw_lines[i] + if re.match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = re.match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + f'#endif line should be "#endif /* {cppvar} */"') + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + f'#endif line should be "#endif // {cppvar}"') + + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a source file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if re.search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + first_include = message = None + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + for ext in GetHeaderExtensions(): + headerfile = basefilename + '.' + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = f'{fileinfo.RepositoryName()} should include its header file {headername}' + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + if message: + error(filename, first_include, 'build/include', 5, message) + def CheckForBadCharacters(filename, lines, error): """Logs an error for each line containing bad characters. @@ -1071,13 +2633,31 @@ def CheckForBadCharacters(filename, lines, error): error: The function to call with any errors found. """ for linenum, line in enumerate(lines): - if unicode_escape_decode('\ufffd') in line: + if '\ufffd' in line: error(filename, linenum, 'readability/utf8', 5, 'Line contains invalid UTF-8 (or Unicode replacement character).') if '\0' in line: error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') + + def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): """Logs an error if we see /* ... */ or "..." that extend past one line. @@ -1116,9 +2696,83 @@ def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): 'Use C++11 raw strings or concatenation instead.') +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if re.search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if re.search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + # Matches invalid increment: *count++, which moves pointer instead of # incrementing a value. -_RE_PATTERN_INVALID_INCREMENT = regex.compile( +_RE_PATTERN_INVALID_INCREMENT = re.compile( r'^\s*\*\w+(\+\+|--);') @@ -1145,17 +2799,17 @@ def CheckInvalidIncrement(filename, clean_lines, linenum, error): def IsMacroDefinition(clean_lines, linenum): - if Search(r'^#define', clean_lines[linenum]): + if re.search(r'^#define', clean_lines[linenum]): return True - if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + if linenum > 0 and re.search(r'\\$', clean_lines[linenum - 1]): return True return False def IsForwardClassDeclaration(clean_lines, linenum): - return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + return re.match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) class _BlockInfo(object): @@ -1230,6 +2884,10 @@ def __init__(self, name, class_or_struct, clean_lines, linenum): self.access = 'private' self.is_struct = False + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + # Try to find the end of the class. This will be confused by things like: # class A { # } *x = { ... @@ -1246,7 +2904,7 @@ def __init__(self, name, class_or_struct, clean_lines, linenum): def CheckBegin(self, filename, clean_lines, linenum, error): # Look for a bare ':' - if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + if re.search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): self.is_derived = True def CheckEnd(self, filename, clean_lines, linenum, error): @@ -1254,7 +2912,7 @@ def CheckEnd(self, filename, clean_lines, linenum, error): # the class. seen_last_thing_in_class = False for i in range(linenum - 1, self.starting_linenum, -1): - match = Search( + match = re.search( r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + self.name + r'\)', clean_lines.elided[i]) @@ -1264,9 +2922,21 @@ def CheckEnd(self, filename, clean_lines, linenum, error): match.group(1) + ' should be the last thing in the class') break - if not Match(r'^\s*$', clean_lines.elided[i]): + if not re.match(r'^\s*$', clean_lines.elided[i]): seen_last_thing_in_class = True + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = re.match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + f'Closing brace should be aligned with beginning of {parent}') + class _NamespaceInfo(_BlockInfo): """Stores information about a namespace.""" @@ -1292,7 +2962,7 @@ def CheckEnd(self, filename, clean_lines, linenum, error): # deciding what these nontrivial things are, so this check is # triggered by namespace size only, which works most of the time. if (linenum - self.starting_linenum < 10 - and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + and not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): return # Look for matching comment at end of namespace. @@ -1309,18 +2979,17 @@ def CheckEnd(self, filename, clean_lines, linenum, error): # expected namespace. if self.name: # Named namespace - if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + - regex.escape(self.name) + r'[\*/\.\\\s]*$'), + if not re.match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), line): error(filename, linenum, 'readability/namespace', 5, - 'Namespace should be terminated with "// namespace %s"' % - self.name) + f'Namespace should be terminated with "// namespace {self.name}"') else: # Anonymous namespace - if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + if not re.match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): # If "// namespace anonymous" or "// anonymous namespace (more text)", # mention "// anonymous namespace" as an acceptable form - if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + if re.match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): error(filename, linenum, 'readability/namespace', 5, 'Anonymous namespace should be terminated with "// namespace"' ' or "// anonymous namespace"') @@ -1423,7 +3092,7 @@ def InTemplateArgumentList(self, clean_lines, linenum, pos): while linenum < clean_lines.NumLines(): # Find the earliest character that might indicate a template argument line = clean_lines.elided[linenum] - match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + match = re.match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) if not match: linenum += 1 pos = 0 @@ -1483,11 +3152,11 @@ def UpdatePreprocessor(self, line): Args: line: current line to check. """ - if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + if re.match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): # Beginning of #if block, save the nesting stack here. The saved # stack will allow us to restore the parsing state in the #else case. self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) - elif Match(r'^\s*#\s*(else|elif)\b', line): + elif re.match(r'^\s*#\s*(else|elif)\b', line): # Beginning of #else block if self.pp_stack: if not self.pp_stack[-1].seen_else: @@ -1502,7 +3171,7 @@ def UpdatePreprocessor(self, line): else: # TODO(unknown): unexpected #else, issue warning? pass - elif Match(r'^\s*#\s*endif\b', line): + elif re.match(r'^\s*#\s*endif\b', line): # End of #if or #else blocks. if self.pp_stack: # If we saw an #else, we will need to restore the nesting @@ -1574,7 +3243,7 @@ def Update(self, filename, clean_lines, linenum, error): # declarations even if it weren't followed by a whitespace, this # is so that we don't confuse our namespace checker. The # missing spaces will be flagged by CheckSpacing. - namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + namespace_decl_match = re.match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) if not namespace_decl_match: break @@ -1591,9 +3260,9 @@ def Update(self, filename, clean_lines, linenum, error): # such as in: # class LOCKABLE API Object { # }; - class_decl_match = Match( + class_decl_match = re.match( r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' - r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))' + r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' r'(.*)$', line) if (class_decl_match and (not self.stack or self.stack[-1].open_parentheses == 0)): @@ -1621,17 +3290,33 @@ def Update(self, filename, clean_lines, linenum, error): # Update access control if we are inside a class/struct if self.stack and isinstance(self.stack[-1], _ClassInfo): classinfo = self.stack[-1] - access_match = Match( + access_match = re.match( r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' r':(?:[^:]|$)', line) if access_match: classinfo.access = access_match.group(2) + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + re.match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + f'{access_match.group(2)}{slots}:' + f' should be indented +1 space inside {parent}') + # Consume braces or semicolons from what's left of the line while True: # Match first brace, semicolon, or closed parenthesis. - matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + matched = re.match(r'^[^{;)}]*([{;)}])(.*)$', line) if not matched: break @@ -1642,7 +3327,7 @@ def Update(self, filename, clean_lines, linenum, error): # stack otherwise. if not self.SeenOpenBrace(): self.stack[-1].seen_open_brace = True - elif Match(r'^extern\s*"[^"]*"\s*\{', line): + elif re.match(r'^extern\s*"[^"]*"\s*\{', line): self.stack.append(_ExternCInfo(linenum)) else: self.stack.append(_BlockInfo(linenum, True)) @@ -1679,7 +3364,6 @@ def InnermostClass(self): return classinfo return None - def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state, error): r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. @@ -1712,34 +3396,47 @@ def CheckForNonStandardConstructs(filename, clean_lines, linenum, # Remove comments from the line, but leave in strings for now. line = clean_lines.lines[linenum] - if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): + if re.search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): error(filename, linenum, 'runtime/printf_format', 3, '%q in format strings is deprecated. Use %ll instead.') - if Search(r'printf\s*\(.*".*%\d+\$', line): + if re.search(r'printf\s*\(.*".*%\d+\$', line): error(filename, linenum, 'runtime/printf_format', 2, '%N$ formats are unconventional. Try rewriting to avoid them.') # Remove escaped backslashes before looking for undefined escapes. line = line.replace('\\\\', '') - if Search(r'("|\').*\\(%|\[|\(|{)', line): + if re.search(r'("|\').*\\(%|\[|\(|{)', line): error(filename, linenum, 'build/printf_format', 3, '%, [, (, and { are undefined character escapes. Unescape them.') # For the rest, work with both comments and strings removed. line = clean_lines.elided[linenum] - if Search(r'\b(const|volatile|void|char|short|int|long' + if re.search(r'\b(const|volatile|void|char|short|int|long' r'|float|double|signed|unsigned' - r'|schar|u?int8|u?int16|u?int32|u?int64)' + r'|schar|u?int8_t|u?int16_t|u?int32_t|u?int64_t)' r'\s+(register|static|extern|typedef)\b', line): error(filename, linenum, 'build/storage_class', 5, 'Storage-class specifier (static, extern, typedef, etc) should be ' 'at the beginning of the declaration.') - if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line): + if re.match(r'\s*#\s*endif\s*[^/\s]+', line): + error(filename, linenum, 'build/endif_comment', 5, + 'Uncommented text after #endif is non-standard. Use a comment.') + + if re.match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line): + error(filename, linenum, 'build/forward_decl', 5, + 'Inner-style forward declarations are invalid. Remove this line.') + + if re.search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and = 1)) initializer_list_constructor = bool( onearg_constructor and - Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + re.search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) copy_constructor = bool( onearg_constructor and - Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' - % regex.escape(base_classname), constructor_args[0].strip())) + re.match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' + rf'{re.escape(base_classname)}(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&', + constructor_args[0].strip()) + ) if (not is_marked_explicit and onearg_constructor and not initializer_list_constructor and not copy_constructor): if defaulted_args or variadic_args: - error(filename, linenum, 'runtime/explicit', 5, + error(filename, linenum, 'runtime/explicit', 4, 'Constructors callable with one argument ' 'should be marked explicit.') else: - error(filename, linenum, 'runtime/explicit', 5, + error(filename, linenum, 'runtime/explicit', 4, 'Single-parameter constructors should be marked explicit.') - elif is_marked_explicit and not onearg_constructor: - if noarg_constructor: - error(filename, linenum, 'runtime/explicit', 5, - 'Zero-parameter constructors should not be marked explicit.') + + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = re.search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not re.search(r'\b(if|elif|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not re.search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not re.search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if re.search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif re.search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (re.search(r'\w\s+\(', fncall) and + not re.search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not re.search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not re.search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not re.search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if re.search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if re.search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if re.search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') def IsBlankLine(line): @@ -1845,6 +3616,20 @@ def IsBlankLine(line): return not line or line.isspace() +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) >= 1 and + (isinstance(nesting_state.stack[-1], _NamespaceInfo) or + (isinstance(nesting_state.previous_stack_top, _NamespaceInfo))) + ) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + + def CheckForFunctionLengths(filename, clean_lines, linenum, function_state, error): """Reports for long function bodies. @@ -1874,13 +3659,13 @@ def CheckForFunctionLengths(filename, clean_lines, linenum, starting_func = False regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... - match_result = Match(regexp, line) + match_result = re.match(regexp, line) if match_result: # If the name is all caps and underscores, figure it's a macro and # ignore it, unless it's TEST or TEST_F. function_name = match_result.group(1).split()[-1] if function_name == 'TEST' or function_name == 'TEST_F' or ( - not Match(r'[A-Z_]+$', function_name)): + not re.match(r'[A-Z_]+$', function_name)): starting_func = True if starting_func: @@ -1888,14 +3673,14 @@ def CheckForFunctionLengths(filename, clean_lines, linenum, for start_linenum in range(linenum, clean_lines.NumLines()): start_line = lines[start_linenum] joined_line += ' ' + start_line.lstrip() - if Search(r'(;|})', start_line): # Declarations and trivial functions + if re.search(r'(;|})', start_line): # Declarations and trivial functions body_found = True break # ... ignore - elif Search(r'{', start_line): + if re.search(r'{', start_line): body_found = True - function = Search(r'((\w|:)*)\(', line).group(1) - if Match(r'TEST', function): # Handle TEST... macros - parameter_regexp = Search(r'(\(.*\))', joined_line) + function = re.search(r'((\w|:)*)\(', line).group(1) + if re.match(r'TEST', function): # Handle TEST... macros + parameter_regexp = re.search(r'(\(.*\))', joined_line) if parameter_regexp: # Ignore bad syntax function += parameter_regexp.group(1) else: @@ -1906,14 +3691,14 @@ def CheckForFunctionLengths(filename, clean_lines, linenum, # No body for the function (or evidence of a non-function) was found. error(filename, linenum, 'readability/fn_size', 5, 'Lint failed to find start of function body.') - elif Match(r'^\}\s*$', line): # function end + elif re.match(r'^\}\s*$', line): # function end function_state.Check(error, filename, linenum) function_state.End() - elif not Match(r'^\s*$', line): + elif not re.match(r'^\s*$', line): function_state.Count() # Count non-blank/non-comment lines. -_RE_PATTERN_TODO = regex.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') def CheckComment(line, filename, linenum, next_line_start, error): @@ -1929,7 +3714,16 @@ def CheckComment(line, filename, linenum, next_line_start, error): commentpos = line.find('//') if commentpos != -1: # Check if the // may be in quotes. If so, ignore it - if regex.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (re.match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + # Checks for common mistakes in TODO comments. comment = line[commentpos:] match = _RE_PATTERN_TODO.match(comment) @@ -1940,11 +3734,24 @@ def CheckComment(line, filename, linenum, next_line_start, error): error(filename, linenum, 'whitespace/todo', 2, 'Too many spaces before TODO') + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness + # -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + # If the comment contains an alphanumeric character, there # should be a space somewhere between it and the // unless # it's a /// or //! Doxygen comment. - if (Match(r'//[^ ]*\w', comment) and - not Match(r'(///|//\!)(\s+|$)', comment)): + if (re.match(r'//[^ ]*\w', comment) and + not re.match(r'(///|//\!)(\s+|$)', comment)): error(filename, linenum, 'whitespace/comments', 4, 'Should have a space between // and comment') @@ -2007,12 +3814,12 @@ def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): # the previous line is indented 6 spaces, which may happen when the # initializers of a constructor do not fit into a 80 column line. exception = False - if Match(r' {6}\w', prev_line): # Initializer list? + if re.match(r' {6}\w', prev_line): # Initializer list? # We are looking for the opening column of initializer list, which # should be indented 4 spaces to cause 6 space indentation afterwards. search_position = linenum-2 while (search_position >= 0 - and Match(r' {6}\w', elided[search_position])): + and re.match(r' {6}\w', elided[search_position])): search_position -= 1 exception = (search_position >= 0 and elided[search_position][:5] == ' :') @@ -2023,9 +3830,9 @@ def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): # or colon (for initializer lists) we assume that it is the last line of # a function header. If we have a colon indented 4 spaces, it is an # initializer list. - exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + exception = (re.match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', prev_line) - or Match(r' {4}:', prev_line)) + or re.match(r' {4}:', prev_line)) if not exception: error(filename, linenum, 'whitespace/blank_line', 2, @@ -2042,16 +3849,16 @@ def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): if linenum + 1 < clean_lines.NumLines(): next_line = raw[linenum + 1] if (next_line - and Match(r'\s*}', next_line) + and re.match(r'\s*}', next_line) and next_line.find('} else ') == -1): error(filename, linenum, 'whitespace/blank_line', 3, 'Redundant blank line at the end of a code block ' 'should be deleted.') - matched = Match(r'\s*(public|protected|private):', prev_line) + matched = re.match(r'\s*(public|protected|private):', prev_line) if matched: error(filename, linenum, 'whitespace/blank_line', 3, - 'Do not leave a blank line after "%s:"' % matched.group(1)) + f'Do not leave a blank line after "{matched.group(1)}:"') # Next, check comments next_line_start = 0 @@ -2063,6 +3870,210 @@ def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): # get rid of comments and strings line = clean_lines.elided[linenum] + # You shouldn't have spaces before your brackets, except for C++11 attributes + # or maybe after 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if (re.search(r'\w\s+\[(?!\[)', line) and + not re.search(r'(?:auto&?|delete|return)\s+\[', line)): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (re.search(r'for *\(.*[^:]:[^: ]', line) or + re.search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = re.match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((re.search(r'[\w.]=', line) or + re.search(r'=[\w.]', line)) + and not re.search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not re.search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not re.search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = re.search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + # TODO: support alternate operators + error(filename, linenum, 'whitespace/operators', 3, + f'Missing spaces around {match.group(1)}') + elif not re.match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = re.match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = re.match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = re.search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = re.search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = re.search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + f'Extra space for operator {match.group(1)}') + + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = re.search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + f'Missing space before ( in {match.group(1)}') + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = re.search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and re.search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + f'Mismatching spaces inside () in {match.group(1)}') + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + f'Should have zero or one spaces inside ( and ) in {match.group(1)}') + + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + match = re.search(r',[^,\s]', re.sub(r'\b__VA_OPT__\s*\(,\)', '', + re.sub(r'\boperator\s*,\s*\(', 'F(', line))) + if (match and re.search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if re.search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + def _IsType(clean_lines, nesting_state, expr): """Check if expression looks like a type name, returns true if so. @@ -2076,7 +4087,7 @@ def _IsType(clean_lines, nesting_state, expr): True, if token looks like a type. """ # Keep only the last token in the expression - last_word = Match(r'^.*(\b\S+)$', expr) + last_word = re.match(r'^.*(\b\S+)$', expr) if last_word: token = last_word.group(1) else: @@ -2089,7 +4100,7 @@ def _IsType(clean_lines, nesting_state, expr): # Try a bit harder to match templated types. Walk up the nesting # stack until we find something that resembles a typename # declaration for what we are looking for. - typename_pattern = (r'\b(?:typename|class|struct)\s+' + regex.escape(token) + + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + r'\b') block_index = len(nesting_state.stack) - 1 while block_index >= 0: @@ -2120,7 +4131,7 @@ def _IsType(clean_lines, nesting_state, expr): # Look for typename in the specified range for i in range(first_line, last_line + 1, 1): - if Search(typename_pattern, clean_lines.elided[i]): + if re.search(typename_pattern, clean_lines.elided[i]): return True block_index -= 1 @@ -2140,23 +4151,99 @@ def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): """ line = clean_lines.elided[linenum] + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = re.match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in range(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not re.match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if re.search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + # You shouldn't have a space before a semicolon at the end of the line. # There's a special case for "for" since the style guide allows space before # the semicolon there. - if Search(r':\s*;\s*$', line): + if re.search(r':\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, 'Semicolon defining empty statement. Use {} instead.') - elif Search(r'^\s*;\s*$', line): + elif re.search(r'^\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, 'Line contains only semicolon. If this should be an empty statement, ' 'use {} instead.') - elif (Search(r'\s+;\s*$', line) and - not Search(r'\bfor\b', line)): + elif (re.search(r'\s+;\s*$', line) and + not re.search(r'\bfor\b', line)): error(filename, linenum, 'whitespace/semicolon', 5, 'Extra space before last semicolon. If this should be an empty ' 'statement, use {} instead.') +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if re.search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): """Checks for additional blank line issues related to sections. @@ -2184,7 +4271,7 @@ def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): linenum <= class_info.starting_linenum): return - matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + matched = re.match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) if matched: # Issue warning if the line before public/protected/private was # not a blank line, but don't do this if the previous line contains @@ -2196,20 +4283,20 @@ def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): # common when defining classes in C macros. prev_line = clean_lines.lines[linenum - 1] if (not IsBlankLine(prev_line) and - not Search(r'\b(class|struct)\b', prev_line) and - not Search(r'\\$', prev_line)): + not re.search(r'\b(class|struct)\b', prev_line) and + not re.search(r'\\$', prev_line)): # Try a bit harder to find the beginning of the class. This is to # account for multi-line base-specifier lists, e.g.: # class Derived # : public Base { end_class_head = class_info.starting_linenum for i in range(class_info.starting_linenum, linenum): - if Search(r'\{\s*$', clean_lines.lines[i]): + if re.search(r'\{\s*$', clean_lines.lines[i]): end_class_head = i break if end_class_head < linenum - 1: error(filename, linenum, 'whitespace/blank_line', 3, - '"%s:" should be preceded by a blank line' % matched.group(1)) + f'"{matched.group(1)}:" should be preceded by a blank line') def GetPreviousNonBlankLine(clean_lines, linenum): @@ -2247,10 +4334,36 @@ def CheckBraces(filename, clean_lines, linenum, error): line = clean_lines.elided[linenum] # get rid of comments and strings + if re.match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not re.search(r'[,;:}{(]\s*$', prevline) and + not re.match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if last_wrong := re.match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if re.match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + else: + last_wrong = False + # If braces come on one side of an else, they should be on both. # However, we have to worry about "else if" that spans multiple lines! - if Search(r'else if(\s+constexpr)?\s*\(', line): # could be multi-line if - brace_on_left = bool(Search(r'}\s*else if(\s+constexpr)?\s*\(', line)) + if re.search(r'else if(\s+constexpr)?\s*\(', line): # could be multi-line if + brace_on_left = bool(re.search(r'}\s*else if(\s+constexpr)?\s*\(', line)) # find the ( after the if pos = line.find('else if') pos = line.find('(', pos) @@ -2260,19 +4373,29 @@ def CheckBraces(filename, clean_lines, linenum, error): if brace_on_left != brace_on_right: # must be brace after if error(filename, linenum, 'readability/braces', 5, 'If an else has a brace on one side, it should have it on both') - elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + # Prevent detection if statement has { and we detected an improper newline after } + elif re.search(r'}\s*else[^{]*$', line) or (re.match(r'[^}]*else\s*{', line) and not last_wrong): error(filename, linenum, 'readability/braces', 5, 'If an else has a brace on one side, it should have it on both') - # Likewise, an else should never have the else clause on the same line - if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): - error(filename, linenum, 'whitespace/newline', 4, - 'Else clause should never be on same line as else (use 2 lines)') - - # In the same way, a do/while should never be on one line - if Match(r'\s*do [^\s{]', line): - error(filename, linenum, 'whitespace/newline', 4, - 'do/while clauses should not be on a single line') + # No control clauses with braces should have its contents on the same line + # Exclude } which will be covered by empty-block detect + # Exclude ; which may be used by while in a do-while + if keyword := re.search( + r'\b(else if|if|while|for|switch)' # These have parens + r'\s*\(.*\)\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\};]', line): + error(filename, linenum, 'whitespace/newline', 5, + f'Controlled statements inside brackets of {keyword.group(1)} clause' + ' should be on a separate line') + elif keyword := re.search( + r'\b(else|do|try)' # These don't have parens + r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{\s*[^\s\\}]', line): + error(filename, linenum, 'whitespace/newline', 5, + f'Controlled statements inside brackets of {keyword.group(1)} clause' + ' should be on a separate line') + + # TODO: Err on if...else and do...while statements without braces; + # style guide has changed since the below comment was written # Check single-line if/else bodies. The style guide says 'curly braces are not # required for single-line statements'. We additionally allow multi-line, @@ -2281,21 +4404,21 @@ def CheckBraces(filename, clean_lines, linenum, error): # its line, and the line after that should have an indent level equal to or # lower than the if. We also check for ambiguous if/else nesting without # braces. - if_else_match = Search(r'\b(if(\s+constexpr)?\s*\(|else\b)', line) - if if_else_match and not Match(r'\s*#', line): + if_else_match = re.search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) + if if_else_match and not re.match(r'\s*#', line): if_indent = GetIndentLevel(line) endline, endlinenum, endpos = line, linenum, if_else_match.end() - if_match = Search(r'\bif(\s+constexpr)?\s*\(', line) + if_match = re.search(r'\bif\s*(|constexpr)\s*\(', line) if if_match: # This could be a multiline if condition, so find the end first. pos = if_match.end() - 1 (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) # Check for an opening brace, either directly after the if or on the next # line. If found, this isn't a single-statement conditional. - if (not Match(r'\s*{', endline[endpos:]) - and not (Match(r'\s*$', endline[endpos:]) + if (not re.match(r'\s*(?:\[\[(?:un)?likely\]\]\s*)?{', endline[endpos:]) + and not (re.match(r'\s*$', endline[endpos:]) and endlinenum < (len(clean_lines.elided) - 1) - and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + and re.match(r'\s*{', clean_lines.elided[endlinenum + 1]))): while (endlinenum < len(clean_lines.elided) and ';' not in clean_lines.elided[endlinenum][endpos:]): endlinenum += 1 @@ -2305,11 +4428,11 @@ def CheckBraces(filename, clean_lines, linenum, error): # We allow a mix of whitespace and closing braces (e.g. for one-liner # methods) and a single \ after the semicolon (for macros) endpos = endline.find(';') - if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + if not re.match(r';[\s}]*(\\?)$', endline[endpos:]): # Semicolon isn't the last character, there's something trailing. # Output a warning if the semicolon is not contained inside # a lambda expression. - if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + if not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', endline): error(filename, linenum, 'readability/braces', 4, 'If/else bodies with multiple statements require braces') @@ -2320,7 +4443,7 @@ def CheckBraces(filename, clean_lines, linenum, error): # With ambiguous nested if statements, this will error out on the # if that *doesn't* match the else, regardless of whether it's the # inner one or outer one. - if (if_match and Match(r'\s*else\b', next_line) + if (if_match and re.match(r'\s*else\b', next_line) and next_indent != if_indent): error(filename, linenum, 'readability/braces', 4, 'Else clause should be indented at the same level as if. ' @@ -2344,9 +4467,9 @@ def CheckTrailingSemicolon(filename, clean_lines, linenum, error): # Block bodies should not be followed by a semicolon. Due to C++11 # brace initialization, there are more places where semicolons are - # required than not, so we use a whitelist approach to check these - # rather than a blacklist. These are the places where "};" should - # be replaced by just "}": + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": # 1. Some flavor of block following closing parenthesis: # for (;;) {}; # while (...) {}; @@ -2381,15 +4504,12 @@ def CheckTrailingSemicolon(filename, clean_lines, linenum, error): # 7. End of namespaces: # namespace {}; # - # 8. End of requires expression: - # concept name = requires () {}; - # # These semicolons seems far more common than other kinds of # redundant semicolons, possibly due to people converting classes # to namespaces. For now we do not warn for this case. # # Try matching case 1 first. - match = Match(r'^(.*\)\s*)\{', line) + match = re.match(r'^(.*\)\s*)\{', line) if match: # Matched closing parenthesis (case 1). Check the token before the # matching opening parenthesis, and don't warn if it looks like a @@ -2405,11 +4525,11 @@ def CheckTrailingSemicolon(filename, clean_lines, linenum, error): # - INTERFACE_DEF # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: # - # We implement a whitelist of safe macros instead of a blacklist of + # We implement a list of safe macros instead of a list of # unsafe macros, even though the latter appears less frequently in # google code and would have been easier to implement. This is because - # the downside for getting the whitelist wrong means some extra - # semicolons, while the downside for getting the blacklist wrong + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong # would result in compile errors. # # In addition to macros, we also don't want to warn on @@ -2417,33 +4537,34 @@ def CheckTrailingSemicolon(filename, clean_lines, linenum, error): # - Lambdas # - alignas specifier with anonymous structs # - decltype + # - concepts (requires expression) closing_brace_pos = match.group(1).rfind(')') opening_parenthesis = ReverseCloseExpression( clean_lines, linenum, closing_brace_pos) if opening_parenthesis[2] > -1: line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] - macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) - func = Match(r'^(.*\])\s*$', line_prefix) + macro = re.search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = re.match(r'^(.*\])\s*$', line_prefix) if ((macro and macro.group(1) not in ( 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or - (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or - Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or - Search(r'\bdecltype$', line_prefix) or - Search(r'\s+=\s*$', line_prefix) or - Search(r'\brequires$', line_prefix)): + (func and not re.search(r'\boperator\s*\[\s*\]', func.group(1))) or + re.search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + re.search(r'\bdecltype$', line_prefix) or + re.search(r'\brequires.*$', line_prefix) or + re.search(r'\s+=\s*$', line_prefix)): match = None if (match and opening_parenthesis[1] > 1 and - Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + re.search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): # Multi-line lambda-expression match = None else: # Try matching cases 2-3. - match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + match = re.match(r'^(.*(?:else|\)\s*const)\s*)\{', line) if not match: # Try matching cases 4-6. These are always matched on separate lines. # @@ -2454,14 +4575,14 @@ def CheckTrailingSemicolon(filename, clean_lines, linenum, error): # // blank line # } prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] - if prevline and Search(r'[;{}]\s*$', prevline): - match = Match(r'^(\s*)\{', line) + if prevline and re.search(r'[;{}]\s*$', prevline): + match = re.match(r'^(\s*)\{', line) # Check matching closing brace if match: (endline, endlinenum, endpos) = CloseExpression( clean_lines, linenum, len(match.group(1))) - if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + if endpos > -1 and re.match(r'^\s*;', endline[endpos:]): # Current {} pair is eligible for semicolon check, and we have found # the redundant semicolon, output warning here. # @@ -2498,7 +4619,7 @@ def CheckEmptyBlockBody(filename, clean_lines, linenum, error): # We also check "if" blocks here, since an empty conditional block # is likely an error. line = clean_lines.elided[linenum] - matched = Match(r'\s*(for|while|if)\s*\(', line) + matched = re.match(r'\s*(for|while|if)\s*\(', line) if matched: # Find the end of the conditional expression. (end_line, end_linenum, end_pos) = CloseExpression( @@ -2507,7 +4628,7 @@ def CheckEmptyBlockBody(filename, clean_lines, linenum, error): # Output warning if what follows the condition expression is a semicolon. # No warning for all other cases, including whitespace or newline, since we # have a separate check for semicolons preceded by whitespace. - if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if end_pos >= 0 and re.match(r';', end_line[end_pos:]): if matched.group(1) == 'if': error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, 'Empty conditional bodies should use {}') @@ -2523,8 +4644,8 @@ def CheckEmptyBlockBody(filename, clean_lines, linenum, error): opening_linenum = end_linenum opening_line_fragment = end_line[end_pos:] # Loop until EOF or find anything that's not whitespace or opening {. - while not Search(r'^\s*\{', opening_line_fragment): - if Search(r'^(?!\s*$)', opening_line_fragment): + while not re.search(r'^\s*\{', opening_line_fragment): + if re.search(r'^(?!\s*$)', opening_line_fragment): # Conditional has no brackets. return opening_linenum += 1 @@ -2567,22 +4688,162 @@ def CheckEmptyBlockBody(filename, clean_lines, linenum, error): # Check if the body is empty if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): return - # The body is empty. Now make sure there's not an else clause. - current_linenum = closing_linenum - current_line_fragment = closing_line[closing_pos:] - # Loop until EOF or find anything that's not whitespace or else clause. - while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): - if Search(r'^(?=\s*else)', current_line_fragment): - # Found an else clause, so don't log an error. - return - current_linenum += 1 - if current_linenum == len(clean_lines.elided): + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while re.search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if re.search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = re.match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not re.match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in range(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = re.match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = re.match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = re.match(r'^(\s*\S)(.*)$', expression) + if not matched: break - current_line_fragment = clean_lines.elided[current_linenum] + lhs += matched.group(1) + expression = matched.group(2) - # The body is empty and there's no else clause until EOF or other code. - error(filename, end_linenum, 'whitespace/empty_if_body', 4, - ('If statement had no body and no else clause')) + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if re.match(match_constant, lhs) or re.match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + f'Consider using {_CHECK_REPLACEMENT[check_macro][operator]}' + f' instead of {check_macro}(a {operator} b)') def CheckAltTokens(filename, clean_lines, linenum, error): @@ -2597,7 +4858,7 @@ def CheckAltTokens(filename, clean_lines, linenum, error): line = clean_lines.elided[linenum] # Avoid preprocessor lines - if Match(r'^\s*#', line): + if re.match(r'^\s*#', line): return # Last ditch effort to avoid multi-line comments. This will not help @@ -2613,11 +4874,43 @@ def CheckAltTokens(filename, clean_lines, linenum, error): for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): error(filename, linenum, 'readability/alt_tokens', 2, - 'Use operator %s instead of %s' % ( - _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + f'Use operator {_ALT_TOKEN_REPLACEMENT[match.group(2)]}' + f' instead of {match.group(2)}') + + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, str): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) -def CheckStyle(filename, clean_lines, linenum, is_header, nesting_state, +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, error): """Checks rules from the 'C++ style rules' section of cppguide.html. @@ -2629,7 +4922,7 @@ def CheckStyle(filename, clean_lines, linenum, is_header, nesting_state, filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. - is_header: Whether file is header. + file_extension: The extension (without the dot) of the filename. nesting_state: A NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. @@ -2642,6 +4935,10 @@ def CheckStyle(filename, clean_lines, linenum, is_header, nesting_state, line = raw_lines[linenum] prev = raw_lines[linenum - 1] if linenum > 0 else '' + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + # One or three blank spaces at the beginning of the line is weird; it's # hard to reconcile that with 2-space indents. # NOTE: here are the conditions rob pike used for his tests. Mine aren't @@ -2654,28 +4951,204 @@ def CheckStyle(filename, clean_lines, linenum, is_header, nesting_state, # if(match($0, " <<")) complain = 0; # if(match(prev, " +for \\(")) complain = 0; # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' classinfo = nesting_state.InnermostClass() + initial_spaces = 0 cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not re.search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not re.match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + re.match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith(f'#ifndef {cppvar}') or + line.startswith(f'#define {cppvar}') or + line.startswith(f'#endif // {cppvar}')): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if (not line.startswith('#include') and not is_header_guard and + not re.match(r'^\s*//.*http(s?)://\S*$', line) and + not re.match(r'^\s*//\s*[^\s]*$', line) and + not re.match(r'^// \$Id:.*#[0-9]+ \$$', line) and + not re.match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + f'Lines should be <= {_line_length} characters long') + + if (cleansed_line.count(';') > 1 and + # allow simple single line lambdas + not re.match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', + line) and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') # Some more style checks CheckBraces(filename, clean_lines, linenum, error) CheckTrailingSemicolon(filename, clean_lines, linenum, error) CheckEmptyBlockBody(filename, clean_lines, linenum, error) CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) CheckAltTokens(filename, clean_lines, linenum, error) classinfo = nesting_state.InnermostClass() if classinfo: CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) -_RE_PATTERN_INCLUDE = regex.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') # Matches the first component of a filename delimited by -s and _s. That is: # _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' -_RE_FIRST_COMPONENT = regex.compile(r'^[^-_.]+') +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + (f"{test_suffix.lstrip('_')}.{ext}" + for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), + (f'{suffix}.{ext}' + for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + + +def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or (include in _C_HEADERS + # additional linux glibc header folders + or re.search(rf'(?:{"|".join(C_STANDARD_HEADER_FOLDERS)})\/.*\.h', include)) + + # Headers with C++ extensions shouldn't be considered C system headers + include_ext = os.path.splitext(include)[1] + is_system = used_angle_brackets and include_ext not in ['.hh', '.hpp', '.hxx', '.h++'] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + '/../public') + target_dir_pub = target_dir_pub.replace('\\', '/') + if target_base == include_base and ( + include_dir == target_dir or + include_dir == target_dir_pub): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): @@ -2695,17 +5168,80 @@ def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): fileinfo = FileInfo(filename) line = clean_lines.lines[linenum] + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = re.match(r'#include\s*"([^/]+\.(.*))"', line) + if match: + if (IsHeaderExtension(match.group(2)) and + not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1))): + error(filename, linenum, 'build/include_subdir', 4, + 'Include the directory when naming header files') + # we shouldn't include a file more than once. actually, there are a # handful of instances where doing so is okay, but in general it's # not. match = _RE_PATTERN_INCLUDE.search(line) if match: include = match.group(2) - is_system = (match.group(1) == '<') + used_angle_brackets = match.group(1) == '<' + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + f'"{include}" already included at {filename}:{duplicate_line}') + return + + for extension in GetNonHeaderExtensions(): + if (include.endswith('.' + extension) and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .' + extension + ' files from other packages') + return + + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break - if not _THIRD_PARTY_HEADERS_PATTERN.match(include): + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): include_state.include_list[-1].append((include, linenum)) + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + f'{error_message}. Should be: {fileinfo.BaseName()}.h, c system,' + ' c++ system, other.') + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + f'Include "{include}" not in alphabetical order') + include_state.SetLastHeader(canonical_include) + + def _GetTextInside(text, start_pattern): r"""Retrieves all the text between matching open and close parentheses. @@ -2732,10 +5268,10 @@ def _GetTextInside(text, start_pattern): # Give opening punctuations to get the matching close-punctuations. matching_punctuation = {'(': ')', '{': '}', '[': ']'} - closing_punctuation = set(itervalues(matching_punctuation)) + closing_punctuation = set(dict.values(matching_punctuation)) # Find the position to start extracting text. - match = regex.search(start_pattern, text, regex.M) + match = re.search(start_pattern, text, re.M) if not match: # start_pattern not found in text. return None start_position = match.end(0) @@ -2779,7 +5315,7 @@ def _GetTextInside(text, start_pattern): r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' r'::)+') # A call-by-reference parameter ends with '& identifier'. -_RE_PATTERN_REF_PARAM = regex.compile( +_RE_PATTERN_REF_PARAM = re.compile( r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') # A call-by-const-reference parameter either ends with 'const& identifier' @@ -2792,18 +5328,18 @@ def _GetTextInside(text, start_pattern): r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') -def CheckLanguage(filename, clean_lines, linenum, is_header, +def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, nesting_state, error): """Checks rules from the 'C++ language rules' section of cppguide.html. Some of these rules are hard to test (function overloading, using - uint32 inappropriately), but we do the best we can. + uint32_t inappropriately), but we do the best we can. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. - is_header: Whether file is header. + file_extension: The extension (without the dot) of the filename. include_state: An _IncludeState instance in which the headers are inserted. nesting_state: A NestingState instance which maintains information about the current stack of nested blocks being parsed. @@ -2822,7 +5358,7 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, # Reset include state across preprocessor directives. This is meant # to silence warnings for conditional includes. - match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + match = re.match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) if match: include_state.ResetSection(match.group(1)) @@ -2832,7 +5368,7 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, CheckGlobalStatic(filename, clean_lines, linenum, error) CheckPrintf(filename, clean_lines, linenum, error) - if is_header: + if IsHeaderExtension(file_extension): # TODO(unknown): check that 1-arg constructors are explicit. # How to tell it's a constructor? # (handled in CheckForNonStandardConstructs for now) @@ -2842,15 +5378,15 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, # Check if people are using the verboten C basic types. The only exception # we regularly allow is "unsigned short port" for port. - if Search(r'\bshort port\b', line): - if not Search(r'\bunsigned short port\b', line): + if re.search(r'\bshort port\b', line): + if not re.search(r'\bunsigned short port\b', line): error(filename, linenum, 'runtime/int', 4, 'Use "unsigned short" for ports, not "short"') else: - match = Search(r'\b(short|long(?! +double)|long long)\b', line) + match = re.search(r'\b(short|long(?! +double)|long long)\b', line) if match: error(filename, linenum, 'runtime/int', 4, - 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + f'Use int16_t/int64_t/etc, rather than the C type {match.group(1)}') # Check if some verboten operator overloading is going on # TODO(unknown): catch out-of-line unary operator&: @@ -2858,10 +5394,16 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, # int operator&(const X& x) { return 42; } // unary operator& # The trick is it's hard to tell apart from binary operator&: # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& - if Search(r'\boperator\s*&\s*\(\s*\)', line): + if re.search(r'\boperator\s*&\s*\(\s*\)', line): error(filename, linenum, 'runtime/operator', 4, 'Unary operator& is dangerous. Do not use it.') + # Check for suspicious usage of "if" like + # } if (a == b) { + if re.search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + # Check for potential format string bugs like printf(foo). # We constrain the pattern not to pick things like DocidForPrintf(foo). # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) @@ -2871,29 +5413,38 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') if printf_args: - match = Match(r'([\w.\->()]+)$', printf_args) + match = re.match(r'([\w.\->()]+)$', printf_args) if match and match.group(1) != '__VA_ARGS__': - function_name = regex.search(r'\b((?:string)?printf)\s*\(', - line, regex.I).group(1) + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) error(filename, linenum, 'runtime/printf', 4, - 'Potential format string bug. Do %s("%%s", %s) instead.' - % (function_name, match.group(1))) + 'Potential format string bug. Do' + f' {function_name}("%s", {match.group(1)}) instead.') # Check for potential memset bugs like memset(buf, sizeof(buf), 0). - match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) - if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + match = re.search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not re.match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): error(filename, linenum, 'runtime/memset', 4, - 'Did you mean "memset(%s, 0, %s)"?' - % (match.group(1), match.group(2))) + f'Did you mean "memset({match.group(1)}, 0, {match.group(2)})"?') + + if re.search(r'\busing namespace\b', line): + if re.search(r'\bliterals\b', line): + error(filename, linenum, 'build/namespaces_literals', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + else: + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') # Detect variable-length arrays. - match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + match = re.match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) if (match and match.group(2) != 'return' and match.group(2) != 'delete' and match.group(3).find(']') == -1): # Split the size using space and arithmetic operators as delimiters. # If any of the resulting tokens are not compile time constants then # report the error. - tokens = regex.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) is_const = True skip_next = False for tok in tokens: @@ -2901,17 +5452,17 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, skip_next = False continue - if Search(r'sizeof\(.+\)', tok): continue - if Search(r'arraysize\(\w+\)', tok): continue + if re.search(r'sizeof\(.+\)', tok): continue + if re.search(r'arraysize\(\w+\)', tok): continue tok = tok.lstrip('(') tok = tok.rstrip(')') if not tok: continue - if Match(r'\d+', tok): continue - if Match(r'0[xX][0-9a-fA-F]+', tok): continue - if Match(r'k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue - if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + if re.match(r'\d+', tok): continue + if re.match(r'0[xX][0-9a-fA-F]+', tok): continue + if re.match(r'k[A-Z0-9]\w*', tok): continue + if re.match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if re.match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue # A catch all for tricky sizeof cases, including 'sizeof expression', # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' # requires skipping the next token because we split on ' ' and '*'. @@ -2925,6 +5476,17 @@ def CheckLanguage(filename, clean_lines, linenum, is_header, 'Do not use variable-length arrays. Use an appropriately named ' "('k' followed by CamelCase) compile-time constant for the size.") + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and re.search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces_headers', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + def CheckGlobalStatic(filename, clean_lines, linenum, error): """Check for unsafe global or static objects. @@ -2938,7 +5500,7 @@ def CheckGlobalStatic(filename, clean_lines, linenum, error): line = clean_lines.elided[linenum] # Match two lines at a time to support multiline declarations - if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + if linenum + 1 < clean_lines.NumLines() and not re.search(r'[;({]', line): line += clean_lines.elided[linenum + 1].strip() # Check for people declaring static/global STL strings at the top level. @@ -2947,13 +5509,41 @@ def CheckGlobalStatic(filename, clean_lines, linenum, error): # also because globals can be destroyed when some threads are still running. # TODO(unknown): Generalize this to also find static unique_ptr instances. # TODO(unknown): File bugs for clang-tidy to find these. - match = Match( + match = re.match( r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' r'([a-zA-Z0-9_:]+)\b(.*)', line) - if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or - Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not re.search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not re.search(r'\boperator\W', line) and + not re.match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if re.search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string instead:' + f' "{match.group(1)}char{match.group(2) or ""} {match.group(3)}[]".') + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (re.search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + re.search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): error(filename, linenum, 'runtime/init', 4, 'You seem to be initializing a member variable with itself.') @@ -2970,21 +5560,240 @@ def CheckPrintf(filename, clean_lines, linenum, error): line = clean_lines.elided[linenum] # When snprintf is used, the second argument shouldn't be a literal. - match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + match = re.search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) if match and match.group(2) != '0': # If 2nd arg is zero, snprintf is used to calculate size. - error(filename, linenum, 'runtime/printf', 3, - 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' - 'to snprintf.' % (match.group(1), match.group(2))) + error(filename, linenum, 'runtime/printf', 3, 'If you can, use' + f' sizeof({match.group(1)}) instead of {match.group(2)}' + ' as the 2nd arg to snprintf.') # Check if some verboten C functions are being used. - if Search(r'\bsprintf\s*\(', line): + if re.search(r'\bsprintf\s*\(', line): error(filename, linenum, 'runtime/printf', 5, 'Never use sprintf. Use snprintf instead.') - match = Search(r'\b(strcpy|strcat)\s*\(', line) + match = re.search(r'\b(strcpy|strcat)\s*\(', line) if match: error(filename, linenum, 'runtime/printf', 4, - 'Almost always, snprintf is better than %s' % match.group(1)) + f'Almost always, snprintf is better than {match.group(1)}') + + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + match = re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + re.search(r'\boverride\b', line[closing_paren:])) + return False + + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in range(linenum, max(-1, linenum - 10), -1): + if re.match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return re.match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in range(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = re.match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if re.search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if re.search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if re.search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if re.match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif re.match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = re.search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in range(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in range(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not re.search(r'[),]\s*$', previous_line): + break + if re.match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if re.search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if re.search(allowed_functions, line): + return + elif not re.search(r'\S+\([^)]*$', line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in range(2): + if (linenum > i and + re.search(allowed_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = re.sub(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not re.match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not re.match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + re.sub(' *<', '<', parameter)) def CheckCasts(filename, clean_lines, linenum, error): @@ -3002,9 +5811,9 @@ def CheckCasts(filename, clean_lines, linenum, error): # I just try to capture the most common basic types, though there are more. # Parameterless conversion functions, such as bool(), are allowed as they are # probably a member operator declaration or default constructor. - match = Search( + match = re.search( r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' - r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(int|float|double|bool|char|int16_t|uint16_t|int32_t|uint32_t|int64_t|uint64_t)' r'(\([^)].*)', line) expecting_function = ExpectingFunctionArgs(clean_lines, linenum) if match and not expecting_function: @@ -3026,7 +5835,7 @@ def CheckCasts(filename, clean_lines, linenum, error): # Avoid arrays by looking for brackets that come after the closing # parenthesis. - if Match(r'\([^()]+\)\s*\[', match.group(3)): + if re.match(r'\([^()]+\)\s*\[', match.group(3)): return # Other things to ignore: @@ -3037,22 +5846,18 @@ def CheckCasts(filename, clean_lines, linenum, error): matched_funcptr = match.group(3) if (matched_new_or_template is None and not (matched_funcptr and - (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + (re.match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', matched_funcptr) or matched_funcptr.startswith('(*)'))) and - not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and - not Search(r'new\(\S+\)\s*' + matched_type, line)): + not re.match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not re.search(r'new\(\S+\)\s*' + matched_type, line)): error(filename, linenum, 'readability/casting', 4, 'Using deprecated casting style. ' - 'Use static_cast<%s>(...) instead' % - matched_type) + f'Use static_cast<{matched_type}>(...) instead') if not expecting_function: - # This doesn't check for short, long, or long long integer casts because - # they are disallowed by other lint rules. CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', - r'\((unsigned|(unsigned )?(char|int)|float|double|bool|' - r'u?int(8|16|32|64)(_t)?)\)', error) + r'\((int|float|double|bool|char|u?int(16|32|64)_t|size_t)\)', error) # This doesn't catch all cases. Consider (const char * const)"hello". # @@ -3064,7 +5869,7 @@ def CheckCasts(filename, clean_lines, linenum, error): else: # Check pointer casts for other than string constants CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', - r'\(((const )?\w+\s?\*+\s?(const)?)\)', error) + r'\((\w+\s?\*+\s?)\)', error) # In addition, we look for people taking the address of a cast. This # is dangerous -- casts can assign to temporaries, so the pointer doesn't @@ -3077,7 +5882,7 @@ def CheckCasts(filename, clean_lines, linenum, error): # # This is not a cast: # reference_type&(int* function_param); - match = Search( + match = re.search( r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) if match: @@ -3085,7 +5890,7 @@ def CheckCasts(filename, clean_lines, linenum, error): # dereferenced by the casted pointer, as opposed to the casted # pointer itself. parenthesis_error = False - match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + match = re.match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) if match: _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) if x1 >= 0 and clean_lines.elided[y1][x1] == '(': @@ -3094,7 +5899,7 @@ def CheckCasts(filename, clean_lines, linenum, error): extended_line = clean_lines.elided[y2][x2:] if y2 < clean_lines.NumLines() - 1: extended_line += clean_lines.elided[y2 + 1] - if Match(r'\s*(?:->|\[)', extended_line): + if re.match(r'\s*(?:->|\[)', extended_line): parenthesis_error = True if parenthesis_error: @@ -3126,13 +5931,13 @@ def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): False otherwise. """ line = clean_lines.elided[linenum] - match = Search(pattern, line) + match = re.search(pattern, line) if not match: return False # Exclude lines with keywords that tend to look like casts context = line[0:match.start(1) - 1] - if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + if re.match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): return False # Try expanding current context to see if we one level of @@ -3140,24 +5945,24 @@ def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): if linenum > 0: for i in range(linenum - 1, max(0, linenum - 5), -1): context = clean_lines.elided[i] + context - if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + if re.match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): return False # operator++(int) and operator--(int) - if context.endswith(' operator++') or context.endswith(' operator--'): + if (context.endswith(' operator++') or context.endswith(' operator--') or + context.endswith('::operator++') or context.endswith('::operator--')): return False # A single unnamed argument for a function tends to look like old style cast. # If we see those, don't issue warnings for deprecated casts. remainder = line[match.end(0):] - if Match(r'^\s*(?:;|const\b|throw\b|noexcept\b|final\b|override\b|[=>{),]|->)', + if re.match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', remainder): return False # At this point, all that should be left is actual casts. error(filename, linenum, 'readability/casting', 4, - 'Using C-style cast. Use %s<%s>(...) instead' % - (cast_type, match.group(1))) + f'Using C-style cast. Use {cast_type}<{match.group(1)}>(...) instead') return True @@ -3174,19 +5979,19 @@ def ExpectingFunctionArgs(clean_lines, linenum): of function types. """ line = clean_lines.elided[linenum] - return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + return (re.match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or (linenum >= 2 and - (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + (re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', clean_lines.elided[linenum - 1]) or - Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + re.match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', clean_lines.elided[linenum - 2]) or - Search(r'\bstd::m?function\s*\<\s*$', + re.search(r'\bstd::m?function\s*\<\s*$', clean_lines.elided[linenum - 1])))) _HEADERS_CONTAINING_TEMPLATES = ( ('', ('deque',)), - ('', ('function', 'unary_function', 'binary_function', + ('', ('unary_function', 'binary_function', 'plus', 'minus', 'multiplies', 'divides', 'modulus', 'negate', 'equal_to', 'not_equal_to', 'greater', 'less', @@ -3205,7 +6010,7 @@ def ExpectingFunctionArgs(clean_lines, linenum): )), ('', ('numeric_limits',)), ('', ('list',)), - ('', ('map', 'multimap',)), + ('', ('multimap',)), ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', 'unique_ptr', 'weak_ptr')), ('', ('queue', 'priority_queue',)), @@ -3232,27 +6037,70 @@ def ExpectingFunctionArgs(clean_lines, linenum): ('', ('forward', 'make_pair', 'move', 'swap')), ) -_RE_PATTERN_STRING = regex.compile(r'\bstring\b') +# Non templated types or global objects +_HEADERS_TYPES_OR_OBJS = ( + # String and others are special -- it is a non-templatized type in STL. + ('', ('string',)), + ('', ('cin', 'cout', 'cerr', 'clog', 'wcin', 'wcout', + 'wcerr', 'wclog')), + ('', ('FILE', 'fpos_t'))) + +# Non templated functions +_HEADERS_FUNCTIONS = ( + ('', ('fopen', 'freopen', + 'fclose', 'fflush', 'setbuf', 'setvbuf', 'fread', + 'fwrite', 'fgetc', 'getc', 'fgets', 'fputc', 'putc', + 'fputs', 'getchar', 'gets', 'putchar', 'puts', 'ungetc', + 'scanf', 'fscanf', 'sscanf', 'vscanf', 'vfscanf', + 'vsscanf', 'printf', 'fprintf', 'sprintf', 'snprintf', + 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf', + 'ftell', 'fgetpos', 'fseek', 'fsetpos', + 'clearerr', 'feof', 'ferror', 'perror', + 'tmpfile', 'tmpnam'),),) _re_pattern_headers_maybe_templates = [] for _header, _templates in _HEADERS_MAYBE_TEMPLATES: for _template in _templates: # Match max(..., ...), max(..., ...), but not foo->max, foo.max or - # type::max(). + # 'type::max()'. _re_pattern_headers_maybe_templates.append( - (regex.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + (re.compile(r'((\bstd::)|[^>.:])\b' + _template + r'(<.*?>)?\([^\)]'), _template, _header)) +# Map is often overloaded. Only check, if it is fully qualified. +# Match 'std::map(...)', but not 'map(...)'' +_re_pattern_headers_maybe_templates.append( + (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), + 'map<>', + '')) + # Other scripts may reach in and modify this pattern. _re_pattern_templates = [] for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: for _template in _templates: _re_pattern_templates.append( - (regex.compile(r'(\<|\b)' + _template + r'\s*\<'), + (re.compile(r'((^|(^|\s|((^|\W)::))std::)|[^>.:]\b)' + _template + r'\s*\<'), _template + '<>', _header)) +_re_pattern_types_or_objs = [] +for _header, _types_or_objs in _HEADERS_TYPES_OR_OBJS: + for _type_or_obj in _types_or_objs: + _re_pattern_types_or_objs.append( + (re.compile(r'\b' + _type_or_obj + r'\b'), + _type_or_obj, + _header)) + +_re_pattern_functions = [] +for _header, _functions in _HEADERS_FUNCTIONS: + for _function in _functions: + # Match printf(..., ...), but not foo->printf, foo.printf or + # 'type::printf()'. + _re_pattern_functions.append( + (re.compile(r'([^>.]|^)\b' + _function + r'\([^\)]'), + _function, + _header)) def FilesBelongToSameModule(filename_cc, filename_h): """Check if these two filenames belong to the same module. @@ -3284,14 +6132,17 @@ def FilesBelongToSameModule(filename_cc, filename_h): string: the additional prefix needed to open the header file. """ fileinfo_cc = FileInfo(filename_cc) - if not IsSourceFile(filename_cc): + if fileinfo_cc.Extension().lstrip('.') not in GetNonHeaderExtensions(): return (False, '') fileinfo_h = FileInfo(filename_h) - if not IsHeaderFile(filename_h): + if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): return (False, '') filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] + matched_test_suffix = re.search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] filename_cc = filename_cc.replace('/public/', '/') filename_cc = filename_cc.replace('/internal/', '/') @@ -3309,33 +6160,6 @@ def FilesBelongToSameModule(filename_cc, filename_h): return files_belong_to_same_module, common_path -def UpdateIncludeState(filename, include_dict, io=codecs): - """Fill up the include_dict with new includes found from the file. - - Args: - filename: the name of the header to read. - include_dict: a dictionary in which the headers are inserted. - io: The io factory to use to read the file. Provided for testability. - - Returns: - True if a header was successfully added. False otherwise. - """ - headerfile = None - try: - headerfile = io.open(filename, 'r', 'utf8', 'replace') - except IOError: - return False - linenum = 0 - for line in headerfile: - linenum += 1 - clean_line = CleanseComments(line) - match = _RE_PATTERN_INCLUDE.search(clean_line) - if match: - include = match.group(2) - include_dict.setdefault(include, linenum) - return True - - def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, io=codecs): """Reports for missing stl includes. @@ -3362,21 +6186,24 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, if not line or line[0] == '#': continue - # String is special -- it is a non-templatized type in STL. - matched = _RE_PATTERN_STRING.search(line) - if matched: - # Don't warn about strings in non-STL namespaces: - # (We check only the first match per line; good enough.) - prefix = line[:matched.start()] - if prefix.endswith('std::') or not prefix.endswith('::'): - required[''] = (linenum, 'string') + _re_patterns = [] + _re_patterns.extend(_re_pattern_types_or_objs) + _re_patterns.extend(_re_pattern_functions) + for pattern, item, header in _re_patterns: + matched = pattern.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, item) for pattern, template, header in _re_pattern_headers_maybe_templates: if pattern.search(line): required[header] = (linenum, template) # The following function is just a speed up, no semantics are changed. - if not '<' in line: # Reduces the cpu time usage by skipping lines. + if '<' not in line: # Reduces the cpu time usage by skipping lines. continue for pattern, template, header in _re_pattern_templates: @@ -3388,36 +6215,10 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, if prefix.endswith('std::') or not prefix.endswith('::'): required[header] = (linenum, template) - # The policy is that if you #include something in foo.h you don't need to - # include it again in foo.cc. Here, we will look at possible includes. # Let's flatten the include_state include_list and copy it into a dictionary. include_dict = dict([item for sublist in include_state.include_list for item in sublist]) - # Did we find the header for this file (if any) and successfully load it? - header_found = False - - # Use the absolute path so that matching works properly. - abs_filename = FileInfo(filename).FullName() - - # include_dict is modified during iteration, so we iterate over a copy of - # the keys. - header_keys = list(include_dict.keys()) - for header in header_keys: - (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) - fullpath = common_path + header - if same_module and UpdateIncludeState(fullpath, include_dict, io): - header_found = True - - # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they - # didn't include it in the .h file. - # TODO(unknown): Do a better job of finding .h files so we are confident that - # not having the .h file means there isn't one. - if not header_found: - if IsSourceFile(filename): - return - # All the lines have been processed, report the errors found. for required_header_unstripped in sorted(required, key=required.__getitem__): template = required[required_header_unstripped][1] @@ -3427,7 +6228,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, 'Add #include ' + required_header_unstripped + ' for ' + template) -_RE_PATTERN_EXPLICIT_MAKEPAIR = regex.compile(r'\bmake_pair\s*<') +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): @@ -3462,20 +6263,20 @@ def CheckRedundantVirtual(filename, clean_lines, linenum, error): """ # Look for "virtual" on current line. line = clean_lines.elided[linenum] - virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + virtual = re.match(r'^(.*)(\bvirtual\b)(.*)$', line) if not virtual: return # Ignore "virtual" keywords that are near access-specifiers. These # are only used in class base-specifier and do not apply to member # functions. - if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or - Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + if (re.search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + re.match(r'^\s+(public|protected|private)\b', virtual.group(3))): return # Ignore the "virtual" keyword from virtual base classes. Usually # there is a column on the same line in these cases (virtual base # classes are rare in google3 because multiple inheritance is rare). - if Match(r'^.*[^:]:[^:].*$', line): return + if re.match(r'^.*[^:]:[^:].*$', line): return # Look for the next opening parenthesis. This is the start of the # parameter list (possibly on the next line shortly after virtual). @@ -3487,7 +6288,7 @@ def CheckRedundantVirtual(filename, clean_lines, linenum, error): start_col = len(virtual.group(2)) for start_line in range(linenum, min(linenum + 3, clean_lines.NumLines())): line = clean_lines.elided[start_line][start_col:] - parameter_list = Match(r'^([^(]*)\(', line) + parameter_list = re.match(r'^([^(]*)\(', line) if parameter_list: # Match parentheses to find the end of the parameter list (_, end_line, end_col) = CloseExpression( @@ -3502,16 +6303,16 @@ def CheckRedundantVirtual(filename, clean_lines, linenum, error): # (possibly on the next few lines). for i in range(end_line, min(end_line + 3, clean_lines.NumLines())): line = clean_lines.elided[i][end_col:] - match = Search(r'\b(override|final)\b', line) + match = re.search(r'\b(override|final)\b', line) if match: error(filename, linenum, 'readability/inheritance', 4, ('"virtual" is redundant since function is ' - 'already declared as "%s"' % match.group(1))) + f'already declared as "{match.group(1)}"')) # Set end_col to check whole lines after we are done with the # first line. end_col = 0 - if Search(r'[^\w]\s*$', line): + if re.search(r'[^\w]\s*$', line): break @@ -3538,12 +6339,14 @@ def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): return # Check that at most one of "override" or "final" is present, not both - if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + if re.search(r'\boverride\b', fragment) and re.search(r'\bfinal\b', fragment): error(filename, linenum, 'readability/inheritance', 4, ('"override" is redundant since function is ' 'already declared as "final"')) + + # Returns true if we are at a new block, and it is directly # inside of a namespace. def IsBlockInNameSpace(nesting_state, is_forward_declaration): @@ -3559,19 +6362,65 @@ def IsBlockInNameSpace(nesting_state, is_forward_declaration): return len(nesting_state.stack) >= 1 and ( isinstance(nesting_state.stack[-1], _NamespaceInfo)) + if len(nesting_state.stack) >= 1: + if isinstance(nesting_state.stack[-1], _NamespaceInfo): + return True + elif (len(nesting_state.stack) > 1 and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + isinstance(nesting_state.stack[-2], _NamespaceInfo)): + return True + return False + + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) - return (len(nesting_state.stack) > 1 and - nesting_state.stack[-1].check_namespace_indentation and - isinstance(nesting_state.stack[-2], _NamespaceInfo)) + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) -def ProcessLine(filename, is_header, clean_lines, line, - include_state, function_state, nesting_state, error): +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if re.match(r'^\s+', line): + error(filename, linenum, 'whitespace/indent_namespace', 4, + 'Do not indent within a namespace.') + + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=None): """Processes a single line in the file. Args: filename: Filename of the file that is being processed. - is_header: Whether file is header. + file_extension: The extension (dot not included) of the file. clean_lines: An array of strings, each representing a line of the file, with comments stripped. line: Number of line being processed. @@ -3581,34 +6430,76 @@ def ProcessLine(filename, is_header, clean_lines, line, the current stack of nested blocks being parsed. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error """ raw_lines = clean_lines.raw_lines ParseNolintSuppressions(filename, raw_lines[line], line, error) nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) if nesting_state.InAsmBlock(): return CheckForFunctionLengths(filename, clean_lines, line, function_state, error) CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) - CheckStyle(filename, clean_lines, line, is_header, nesting_state, error) - CheckLanguage(filename, clean_lines, line, is_header, include_state, + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) CheckInvalidIncrement(filename, clean_lines, line, error) CheckMakePairUsesDeduction(filename, clean_lines, line, error) CheckRedundantVirtual(filename, clean_lines, line, error) CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + + +def FlagCxxHeaders(filename, clean_lines, linenum, error): + """Flag C++ headers that the styleguide restricts. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + include = re.match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) -def ProcessFileData(filename, is_header, lines, error): + # Flag unapproved C++11 headers. + if include and include.group(1) in ('cfenv', + 'fenv.h', + 'ratio', + ): + error(filename, linenum, 'build/c++11', 5, + f"<{include.group(1)}> is an unapproved C++11 header.") + + # filesystem is the only unapproved C++17 header + if include and include.group(1) == 'filesystem': + error(filename, linenum, 'build/c++17', 5, + " is an unapproved C++17 header.") + + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=None): """Performs lint checks and reports any errors to the given error function. Args: filename: Filename of the file that is being processed. - is_header: Whether file is header. + file_extension: The extension (dot not included) of the file. lines: An array of strings, each representing a line of the file, with the last element being empty if the file is terminated with a newline. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error """ lines = (['// marker so line numbers and indices both start at 1'] + lines + ['// marker so line numbers end in a known way']) @@ -3619,46 +6510,208 @@ def ProcessFileData(filename, is_header, lines, error): ResetNolintSuppressions() - ProcessGlobalSuppresions(lines) + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppressions(lines) RemoveMultiLineComments(filename, lines, error) clean_lines = CleansedLines(lines) + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + for line in range(clean_lines.NumLines()): - ProcessLine(filename, is_header, clean_lines, line, - include_state, function_state, nesting_state, error) + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxxHeaders(filename, clean_lines, line, error) + if _error_suppressions.HasOpenBlock(): + error(filename, _error_suppressions.GetOpenBlockStart(), 'readability/nolint', 5, + 'NONLINT block never ended') CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + # We check here rather than inside ProcessLine so that we see raw # lines rather than "cleaned" lines. CheckForBadCharacters(filename, lines, error) + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, _config_filename) + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with codecs.open(cfg_file, 'r', 'utf8', 'replace') as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo(f'Ignoring "{filename}": file excluded by "{cfg_file}". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError('Line length must be numeric.') + elif name == 'extensions': + ProcessExtensionsOption(val) + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + elif name == 'includeorder': + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + f'Invalid configuration option ({name}) in file {cfg_file}\n') + + except IOError: + _cpplint_state.PrintError( + f"Skipping config file '{cfg_file}': Can't open for reading\n") + keep_looking = False -def ProcessFile(filename): + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) + + return True + + +def ProcessFile(filename, vlevel, extra_check_functions=None): """Does google-lint on a single file. Args: filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error """ - # Note that - # we are not opening the file with universal newline support - # (which codecs doesn't support anyway), so the resulting lines do - # contain trailing '\r' characters if we are reading a file that - # has CRLF endings. - # If after the split a trailing '\r' is present, it is removed - # below. - lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: + lines = target_file.read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + # TODO: Maybe make this have an exit code of 2 after all is done + _cpplint_state.PrintError( + f"Skipping input '{filename}': Can't open for reading\n") + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + basename = os.path.basename(filename) + file_extension = basename[basename.rfind('.') + 1:] - # Remove trailing '\r'. - # The -1 accounts for the extra trailing blank line we get from split() - for linenum in range(len(lines) - 1): - if lines[linenum].endswith('\r'): - lines[linenum] = lines[linenum].rstrip('\r') + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) - is_header = IsHeaderFile(filename) + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') - ProcessFileData(filename, is_header, lines, Error) + _RestoreFilters() def PrintUsage(message): @@ -3667,13 +6720,30 @@ def PrintUsage(message): Args: message: The optional error message. """ - sys.stderr.write(_USAGE) + sys.stderr.write(_USAGE % (sorted(list(GetAllExtensions())), + ','.join(sorted(list(GetAllExtensions()))), + sorted(GetHeaderExtensions()), + ','.join(sorted(GetHeaderExtensions())))) if message: sys.exit('\nFATAL ERROR: ' + message) else: sys.exit(0) +def PrintVersion(): + sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') + sys.stdout.write('cpplint ' + __VERSION__ + '\n') + sys.stdout.write('Python ' + sys.version + '\n') + sys.exit(0) + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(f' {cat}\n' for cat in _ERROR_CATEGORIES)) + sys.exit(0) + def ParseArguments(args): """Parses the command line arguments. @@ -3687,33 +6757,186 @@ def ParseArguments(args): The list of filenames to lint. """ try: - (opts, filenames) = getopt.getopt(args, '', ['help', - 'srcs=', - 'headers=']) + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'v=', + 'version', + 'counting=', + 'filter=', + 'root=', + 'repository=', + 'linelength=', + 'extensions=', + 'exclude=', + 'recursive', + 'headers=', + 'includeorder=', + 'config=', + 'quiet']) except getopt.GetoptError: PrintUsage('Invalid arguments.') + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + recursive = False + for (opt, val) in opts: if opt == '--help': PrintUsage(None) - elif opt == '--srcs': - global _source_regex + if opt == '--version': + PrintVersion() + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse', 'junit', 'sed', 'gsed'): + PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' + 'sed, gsed and junit.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose' or opt == '--v': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--repository': + global _repository + _repository = val + elif opt == '--linelength': + global _line_length try: - _source_regex = regex.compile(val) + _line_length = int(val) except ValueError: - PrintUsage('Extensions must be comma seperated list.') + PrintUsage('Line length must be digits.') + elif opt == '--exclude': + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == '--extensions': + ProcessExtensionsOption(val) elif opt == '--headers': - global _header_regex - try: - _header_regex = regex.compile(val) - except ValueError: - PrintUsage('Extensions must be comma seperated list.') + ProcessHppHeadersOption(val) + elif opt == '--recursive': + recursive = True + elif opt == '--includeorder': + ProcessIncludeOrderOption(val) + elif opt == '--config': + global _config_filename + _config_filename = val + if os.path.basename(_config_filename) != _config_filename: + PrintUsage('Config file name must not include directory components.') if not filenames: PrintUsage('No files were specified.') + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() return filenames +def _ParseFilterSelector(parameter): + """Parses the given command line parameter for file- and line-specific + exclusions. + readability/casting:file.cpp + readability/casting:file.cpp:43 + + Args: + parameter: The parameter value of --filter + + Returns: + [category, filename, line]. + Category is always given. + Filename is either a filename or empty if all files are meant. + Line is either a line in filename or -1 if all lines are meant. + """ + colon_pos = parameter.find(":") + if colon_pos == -1: + return parameter, "", -1 + category = parameter[:colon_pos] + second_colon_pos = parameter.find(":", colon_pos + 1) + if second_colon_pos == -1: + return category, parameter[colon_pos + 1:], -1 + else: + return category, parameter[colon_pos + 1: second_colon_pos], \ + int(parameter[second_colon_pos + 1:]) + +def _ExpandDirectories(filenames): + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith('.' + os.path.sep): + fullname = fullname[len('.' + os.path.sep):] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered + +def _FilterExcludedFiles(fnames): + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [f for f in fnames + if not any(e for e in exclude_paths + if _IsParentOrSame(e, os.path.abspath(f)))] + +def _IsParentOrSame(parent, child): + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix):] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) def main(): filenames = ParseArguments(sys.argv[1:]) @@ -3725,7 +6948,13 @@ def main(): _cpplint_state.ResetErrorCounts() for filename in filenames: - ProcessFile(filename) + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() + + if _cpplint_state.output_format == 'junit': + sys.stderr.write(_cpplint_state.FormatJUnitXML()) finally: sys.stderr = backup_err diff --git a/wpiformat/wpiformat/lint.py b/wpiformat/wpiformat/lint.py index 691b6c8..0174bd3 100644 --- a/wpiformat/wpiformat/lint.py +++ b/wpiformat/wpiformat/lint.py @@ -8,6 +8,7 @@ required it to be used as a module. """ +import os import sys from wpiformat import cpplint @@ -24,21 +25,40 @@ def run_batch(config_file, names): # Prepare arguments to cpplint.py saved_argv = sys.argv - # Prepare source file and header file regex strings - srcs = config_file.group("cppSrcFileInclude") - if srcs: - srcs = "|".join(srcs) - else: - srcs = "a^" - headers = config_file.group("cHeaderFileInclude") + config_file.group( + exclusion_filters = [ + "build/c++11", + "build/c++17", + "build/header_guard", + "build/include_order", + "build/include_subdir", + "build/namespaces", + "legal/copyright", + "readability/check", + "readability/todo", + "runtime/references", + "runtime/string", + "whitespace/braces", + "whitespace/indent", + "whitespace/indent_namespace", + "whitespace/line_length", + "whitespace/newline", + "whitespace/parens", + ] + + # Prepare header file extensions + header_exts = [] + for pattern in config_file.group("cHeaderFileInclude") + config_file.group( "cppHeaderFileInclude" - ) - if headers: - headers = "|".join(headers) - else: - headers = "a^" + ): + basename = os.path.basename(pattern) + header_exts.append(basename[basename.rfind(".") + 1 :].rstrip("$")) - sys.argv = ["cpplint.py", "--srcs=" + srcs, "--headers=" + headers] + names + sys.argv = [ + "cpplint.py", + "--filter=-" + ",-".join(exclusion_filters), + "--headers=" + ",".join(header_exts) if header_exts else "", + "--quiet", + ] + names # Run cpplint.py try: