Skip to content

Commit ddea28d

Browse files
authored
Bug 1907169: improve regression in in_tree_action performance r?#releng-reviewers! (#17)
Switching from `asyncio.gather` to individual `awaits` added a ton of overhead. Restoring `gather` usage required some refactoring, so I've split it out for easier reviewing. Differential Revision: https://phabricator.services.mozilla.com/D216207
1 parent 9d24fc6 commit ddea28d

File tree

1 file changed

+50
-54
lines changed

1 file changed

+50
-54
lines changed

src/ciadmin/generate/in_tree_actions.py

Lines changed: 50 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
55
# obtain one at http://mozilla.org/MPL/2.0/.
66

7+
import asyncio
78
import datetime
9+
import functools
810
import hashlib
911
import textwrap
1012

@@ -48,69 +50,59 @@ def should_hash(project):
4850
else:
4951
return False
5052

53+
# hash the value of this .taskcluster.yml. Note that this must match the
54+
# hashing in taskgraph/actions/registry.py
55+
def hash(val):
56+
return hashlib.sha256(val).hexdigest()[:10]
57+
5158
tcyml_projects = list(filter(should_hash, projects))
5259
futures = []
60+
rv = {}
5361
for p in tcyml_projects:
54-
branch_futures = {}
55-
for b in p.branches:
62+
rv[p.alias] = {}
63+
64+
for b in set([b.name for b in p.branches] + [p.default_branch]):
5665
# Can't fetch a .taskcluster.yml for a globbed branch
5766
# TODO: perhaps we should do this for partly globbed branches,
5867
# eg: release* ?
5968
# we'd have to fetch that list from the server to do that
60-
if "*" not in b.name:
61-
branch_futures[b.name] = tcyml.get(
62-
p.repo, repo_type=p.repo_type, default_branch=b.name
69+
if "*" not in b:
70+
71+
def process(project, branch_name, task):
72+
tcy = task.result()
73+
74+
# some ancient projects have no .taskcluster.yml
75+
if not tcy:
76+
return
77+
78+
# some old projects have .taskcluster.yml's that are not valid YAML
79+
# (back in the day, mozilla-taskcluster used mustache to templatize
80+
# the text before parsing it..). Ignore those projects.
81+
try:
82+
parsed = yaml.safe_load(tcy)
83+
except Exception:
84+
return
85+
86+
# some slightly less old projects have
87+
# {tasks: $let: .., in: [..]} instead of the expected
88+
# {tasks: [{$let: .., in: ..}]}. Those can be ignored too.
89+
if not isinstance(parsed["tasks"], list):
90+
return
91+
92+
rv[project.alias][branch_name] = {
93+
"parsed": parsed,
94+
"hash": hash(tcy),
95+
"level": project.get_level(branch_name),
96+
"alias": project.alias,
97+
}
98+
99+
future = asyncio.ensure_future(
100+
tcyml.get(p.repo, repo_type=p.repo_type, default_branch=b)
63101
)
102+
future.add_done_callback(functools.partial(process, p, b))
103+
futures.append(future)
64104

65-
if p.default_branch not in branch_futures:
66-
branch_futures[p.default_branch] = tcyml.get(
67-
p.repo, repo_type=p.repo_type, default_branch=p.default_branch
68-
)
69-
70-
futures.append(branch_futures)
71-
72-
tcymls = []
73-
for branches in futures:
74-
branch_tcymls = {}
75-
for b in branches:
76-
branch_tcymls[b] = await branches[b]
77-
78-
tcymls.append(branch_tcymls)
79-
80-
# hash the value of this .taskcluster.yml. Note that this must match the
81-
# hashing in taskgraph/actions/registry.py
82-
def hash(val):
83-
return hashlib.sha256(val).hexdigest()[:10]
84-
85-
rv = {}
86-
for project, branch_tcymls in zip(tcyml_projects, tcymls):
87-
for branch_name, tcy in branch_tcymls.items():
88-
# some ancient projects have no .taskcluster.yml
89-
if not tcy:
90-
continue
91-
92-
# some old projects have .taskcluster.yml's that are not valid YAML
93-
# (back in the day, mozilla-taskcluster used mustache to templatize
94-
# the text before parsing it..). Ignore those projects.
95-
try:
96-
parsed = yaml.safe_load(tcy)
97-
except Exception:
98-
continue
99-
100-
# some slightly less old projects have {tasks: $let: .., in: [..]} instead
101-
# of the expected {tasks: [{$let: .., in: ..}]}. Those can be ignored too.
102-
if not isinstance(parsed["tasks"], list):
103-
continue
104-
105-
if project.alias not in rv:
106-
rv[project.alias] = {}
107-
108-
rv[project.alias][branch_name] = {
109-
"parsed": parsed,
110-
"hash": hash(tcy),
111-
"level": project.get_level(branch_name),
112-
"alias": project.alias,
113-
}
105+
await asyncio.gather(*futures)
114106
return rv
115107

116108

@@ -352,6 +344,10 @@ async def update_resources(resources):
352344
continue
353345
if project.trust_domain != action.trust_domain:
354346
continue
347+
if branch_name not in hashed_tcymls[project.alias]:
348+
# Branch didn't exist, or doesn't have a parseable tcyml
349+
continue
350+
355351
content, hash = (
356352
hashed_tcymls[project.alias][branch_name]["parsed"],
357353
hashed_tcymls[project.alias][branch_name]["hash"],

0 commit comments

Comments
 (0)