Skip to content

Commit 84d04d2

Browse files
committed
Do not perform domain substitutions within comments
Covers C/C++, Java, Javascript and Python file extensions
1 parent 1c519fc commit 84d04d2

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

utils/domain_substitution.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import io
1616
import os
1717
import re
18+
import string
1819
import tarfile
1920
import tempfile
2021
import zlib
@@ -102,19 +103,48 @@ def _substitute_path(path, regex_iter):
102103
continue
103104
if not content:
104105
raise UnicodeDecodeError('Unable to decode with any encoding: %s' % path)
106+
107+
replace_comments = (path.suffix == '.java' or path.suffix == '.cc' or path.suffix == '.h'
108+
or path.suffix == '.js' or path.suffix == '.cpp' or path.suffix == '.c')
109+
if replace_comments:
110+
content = re.sub('(\/\*.*?\*/\n|\/\/.*?\n)', replaceComments, content, flags=re.DOTALL)
111+
if path.suffix == '.py':
112+
content = re.sub('(#.*?\n)', replaceComments, content, flags=re.DOTALL)
113+
replace_comments = True
114+
105115
file_subs = 0
106116
for regex_pair in regex_iter:
107117
content, sub_count = regex_pair.pattern.subn(regex_pair.replacement, content)
108118
file_subs += sub_count
109119
if file_subs > 0:
120+
if replace_comments:
121+
## restore comments
122+
content = re.sub(place_holder + '(\d+):', restoreComments, content)
123+
comments = []
124+
110125
substituted_content = content.encode(encoding)
111126
input_file.seek(0)
112127
input_file.write(content.encode(encoding))
113128
input_file.truncate()
114129
return (zlib.crc32(substituted_content), original_content)
130+
comments = []
115131
return (None, None)
116132

117133

134+
## use a randomized placeholder for comments
135+
place_holder = ':C7yae7ozv:'
136+
comments = []
137+
138+
139+
def replaceComments(m):
140+
comments.append(m.group(0))
141+
return place_holder + str(len(comments)) + ':'
142+
143+
144+
def restoreComments(m):
145+
return comments[int(m.group(1)) - 1]
146+
147+
118148
def _validate_file_index(index_file, resolved_tree, cache_index_files):
119149
"""
120150
Validation of file index and hashes against the source tree.

0 commit comments

Comments
 (0)