Skip to content

Commit b1ef10f

Browse files
authored
Improve performance of linker symbol cache under high contention. NFC (#24029)
This drastically improves the performance of running the test suite even though most users probably won't see any effect. Prior to this change the symbol generation process itself always happened with a global symbol list lock held. Now we use a per-symbol-list lock which means that only linker processes with identical linker flags with contend for the lock. before: - hot: 0m25.259s - cold: 1m36.121s after: - hot: 0m21.371s - cold: 0m20.350s This also happens to fix #18622 since it avoids using `cache.get()` which does logging.
1 parent e014359 commit b1ef10f

File tree

1 file changed

+25
-23
lines changed

1 file changed

+25
-23
lines changed

tools/link.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -220,35 +220,37 @@ def get_js_sym_info():
220220
input_files.append(read_file(jslib))
221221
content = '\n'.join(input_files)
222222
content_hash = hashlib.sha1(content.encode('utf-8')).hexdigest()
223+
library_syms = None
223224

224-
def build_symbol_list(filename):
225-
"""Only called when there is no existing symbol list for a given content hash.
226-
"""
227-
library_syms = generate_js_sym_info()
228-
229-
write_file(filename, json.dumps(library_syms, separators=(',', ':'), indent=2))
230-
231-
# We need to use a separate lock here for symbol lists because, unlike with system libraries,
232-
# it's normally for these file to get pruned as part of normal operation. This means that it
233-
# can be deleted between the `cache.get()` then the `read_file`.
234-
with filelock.FileLock(cache.get_path('symbol_lists.lock')):
235-
filename = cache.get(f'symbol_lists/{content_hash}.json', build_symbol_list)
236-
library_syms = json.loads(read_file(filename))
237-
238-
# Limit of the overall size of the cache.
239-
# This code will get test coverage since a full test run of `other` or `core`
240-
# generates ~1000 unique symbol lists.
241-
cache_limit = 500
242-
root = cache.get_path('symbol_lists')
243-
if len(os.listdir(root)) > cache_limit:
225+
cache_file = str(cache.get_path(f'symbol_lists/{content_hash}.json'))
226+
227+
utils.safe_ensure_dirs(cache.get_path('symbol_lists'))
228+
with filelock.FileLock(cache_file + '.lock'):
229+
if os.path.exists(cache_file):
230+
# Cache hit, read the file
231+
library_syms = json.loads(read_file(cache_file))
232+
else:
233+
# Cache miss, generate the symbol list and write the file
234+
library_syms = generate_js_sym_info()
235+
write_file(cache_file, json.dumps(library_syms, separators=(',', ':'), indent=2))
236+
237+
# Limit of the overall size of the cache.
238+
# This code will get test coverage since a full test run of `other` or `core`
239+
# generates ~1000 unique symbol lists.
240+
cache_limit = 500
241+
root = cache.get_path('symbol_lists')
242+
if len([f for f in os.listdir(root) if not f.endswith('.lock')]) > cache_limit:
243+
with filelock.FileLock(cache.get_path('symbol_lists.lock')):
244244
files = []
245245
for f in os.listdir(root):
246-
f = os.path.join(root, f)
247-
files.append((f, os.path.getmtime(f)))
246+
if not f.endswith('.lock'):
247+
f = os.path.join(root, f)
248+
files.append((f, os.path.getmtime(f)))
248249
files.sort(key=lambda x: x[1])
249250
# Delete all but the newest N files
250251
for f, _ in files[:-cache_limit]:
251-
delete_file(f)
252+
with filelock.FileLock(f + '.lock'):
253+
delete_file(f)
252254

253255
return library_syms
254256

0 commit comments

Comments
 (0)