|
15 | 15 | import io
|
16 | 16 | import os
|
17 | 17 | import re
|
| 18 | +import string |
18 | 19 | import tarfile
|
19 | 20 | import tempfile
|
20 | 21 | import zlib
|
@@ -102,19 +103,48 @@ def _substitute_path(path, regex_iter):
|
102 | 103 | continue
|
103 | 104 | if not content:
|
104 | 105 | raise UnicodeDecodeError('Unable to decode with any encoding: %s' % path)
|
| 106 | + |
| 107 | + replace_comments = (path.suffix == '.java' or path.suffix == '.cc' or path.suffix == '.h' |
| 108 | + or path.suffix == '.js' or path.suffix == '.cpp' or path.suffix == '.c') |
| 109 | + if replace_comments: |
| 110 | + content = re.sub('(\/\*.*?\*/\n|\/\/.*?\n)', replaceComments, content, flags=re.DOTALL) |
| 111 | + if path.suffix == '.py': |
| 112 | + content = re.sub('(#.*?\n)', replaceComments, content, flags=re.DOTALL) |
| 113 | + replace_comments = True |
| 114 | + |
105 | 115 | file_subs = 0
|
106 | 116 | for regex_pair in regex_iter:
|
107 | 117 | content, sub_count = regex_pair.pattern.subn(regex_pair.replacement, content)
|
108 | 118 | file_subs += sub_count
|
109 | 119 | if file_subs > 0:
|
| 120 | + if replace_comments: |
| 121 | + ## restore comments |
| 122 | + content = re.sub(place_holder + '(\d+):', restoreComments, content) |
| 123 | + comments = [] |
| 124 | + |
110 | 125 | substituted_content = content.encode(encoding)
|
111 | 126 | input_file.seek(0)
|
112 | 127 | input_file.write(content.encode(encoding))
|
113 | 128 | input_file.truncate()
|
114 | 129 | return (zlib.crc32(substituted_content), original_content)
|
| 130 | + comments = [] |
115 | 131 | return (None, None)
|
116 | 132 |
|
117 | 133 |
|
| 134 | +## use a randomized placeholder for comments |
| 135 | +place_holder = ':C7yae7ozv:' |
| 136 | +comments = [] |
| 137 | + |
| 138 | + |
| 139 | +def replaceComments(m): |
| 140 | + comments.append(m.group(0)) |
| 141 | + return place_holder + str(len(comments)) + ':' |
| 142 | + |
| 143 | + |
| 144 | +def restoreComments(m): |
| 145 | + return comments[int(m.group(1)) - 1] |
| 146 | + |
| 147 | + |
118 | 148 | def _validate_file_index(index_file, resolved_tree, cache_index_files):
|
119 | 149 | """
|
120 | 150 | Validation of file index and hashes against the source tree.
|
|
0 commit comments