From 23844382f7f59238ab52cba3f5dd4145971fc8d6 Mon Sep 17 00:00:00 2001 From: csagan5 <32685696+csagan5@users.noreply.github.com> Date: Tue, 5 May 2020 08:59:30 +0200 Subject: [PATCH] Do not perform domain substitutions within comments Covers C/C++, Java, Javascript and Python file extensions --- utils/domain_substitution.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/utils/domain_substitution.py b/utils/domain_substitution.py index f00d7b2d1b..2ffe594073 100755 --- a/utils/domain_substitution.py +++ b/utils/domain_substitution.py @@ -102,11 +102,35 @@ def _substitute_path(path, regex_iter): continue if not content: raise UnicodeDecodeError('Unable to decode with any encoding: %s' % path) + + # list that will contain the comments + comments = [] + replace_comments = (path.suffix == '.java' or path.suffix == '.cc' or path.suffix == '.h' + or path.suffix == '.js' or path.suffix == '.cpp' or path.suffix == '.c') + if replace_comments: + content = re.sub( + r'(\/\*.*?\*/|((\s|\n)\/\/.*?){1,})\n', + lambda match: _replace_comments(comments, match.group(0)), + content, + flags=re.DOTALL) + if path.suffix == '.py': + content = re.sub( + r'(#.*?\n)', + lambda match: _replace_comments(comments, match.group(0)), + content, + flags=re.DOTALL) + replace_comments = True + file_subs = 0 for regex_pair in regex_iter: content, sub_count = regex_pair.pattern.subn(regex_pair.replacement, content) file_subs += sub_count if file_subs > 0: + if replace_comments: + # restore comments + content = re.sub(PLACE_HOLDER + r'(\d+):', + lambda match: _restore_comments(comments, match.group(1)), content) + substituted_content = content.encode(encoding) input_file.seek(0) input_file.write(content.encode(encoding)) @@ -115,6 +139,19 @@ def _substitute_path(path, regex_iter): return (None, None) +# use a randomized placeholder for comment replacements +PLACE_HOLDER = ':C7yae7ozv:' + + +def _replace_comments(comments, comment): + comments.append(comment) + return PLACE_HOLDER + str(len(comments)) + ':' + + +def _restore_comments(comments, index): + return comments[int(index) - 1] + + def _validate_file_index(index_file, resolved_tree, cache_index_files): """ Validation of file index and hashes against the source tree.