Skip to content

Commit 42639fe

Browse files
committed
Use the config.ci.yml for the training defaults
1 parent 9d355d8 commit 42639fe

File tree

6 files changed

+52
-138
lines changed

6 files changed

+52
-138
lines changed

taskcluster/configs/config.ci.yml

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,13 @@ experiment:
99

1010
teacher-ensemble: 1
1111

12-
mono-max-sentences-src: 10000
13-
mono-max-sentences-trg: 10000
14-
spm-sample-size: 10000
12+
mono-max-sentences-src:
13+
total: 10000
14+
per-dataset: 10000
15+
mono-max-sentences-trg:
16+
total: 10000
17+
per-dataset: 10000
18+
spm-sample-size: 1000
1519
spm-vocab-size: 1000
1620

1721
best-model: chrf
@@ -20,50 +24,57 @@ experiment:
2024
opuscleaner-mode: "custom"
2125
teacher-mode: "two-stage"
2226

23-
2427
bicleaner:
2528
default-threshold: 0.5
2629
dataset-thresholds:
2730
opus_ada83/v1: 0.0
2831
opus_ELRC-3075-wikipedia_health/v1: 0.6
2932

33+
min-fluency-threshold:
34+
mono-src: 0.8
35+
mono-trg: 0.9
36+
3037
marian-args:
3138
training-backward:
32-
disp-freq: "1"
33-
save-freq: "5"
34-
valid-freq: "10"
35-
after: 10u
39+
# Run training for 10 updates, and display 5 updates. Only validate and save the
40+
# model once.
41+
disp-freq: "2"
42+
save-freq: "25"
43+
valid-freq: "50"
44+
after: 50u
3645
dim-vocabs: "1000 1000"
3746
training-teacher:
3847
disp-freq: "1"
39-
save-freq: "5"
40-
valid-freq: "10"
41-
after: 10u
48+
save-freq: "25"
49+
valid-freq: "50"
50+
after: 50u
4251
dim-vocabs: "1000 1000"
4352
task: transformer-base
4453
training-student:
4554
disp-freq: "1"
46-
save-freq: "5"
47-
valid-freq: "10"
48-
after: 10u
55+
save-freq: "25"
56+
valid-freq: "50"
57+
after: 50u
4958
dim-vocabs: "1000 1000"
5059
training-student-finetuned:
5160
disp-freq: "1"
52-
save-freq: "5"
53-
valid-freq: "10"
54-
after: 10u
55-
dim-vocabs: "1000 1000"
61+
save-freq: "25"
62+
valid-freq: "50"
63+
after: 50u
64+
dim-vocabs: 1000 1000
5665
decoding-backward:
5766
mini-batch-words: "2000"
5867
decoding-teacher:
5968
mini-batch-words: "1000"
6069
precision: float16
6170

71+
# Ensure that we have adequate coverage for dataset types in CI.
6272
datasets:
6373
train:
6474
- opus_ada83/v1
6575
- opus_ELRC-3075-wikipedia_health/v1
6676
- url_https://storage.googleapis.com/releng-translations-dev/data/en-ru/pytest-dataset.[LANG].zst
77+
- mtdata_ELRC-web_acquired_data_related_to_scientific_research-1-eng-rus
6778
devtest:
6879
- flores_dev
6980
- sacrebleu_aug-upper_wmt19
@@ -76,7 +87,8 @@ datasets:
7687
- news-crawl_news.2007
7788
- opus_tldr-pages/v2023-08-29 # 39,646 sentences
7889

79-
wandb-publication: false
90+
# Publishes to the "ci" project.
91+
wandb-publication: true
8092
target-stage: all
8193
taskcluster:
8294
split-chunks: 2

taskcluster/test/test_default_params.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
from taskgraph.taskgraph import TaskGraph
44

5-
from translations_taskgraph.parameters import get_defaults
5+
from translations_taskgraph.parameters import get_ci_training_config
66

7-
PARAMS = deepcopy(get_defaults(None))
7+
PARAMS = deepcopy(get_ci_training_config())
88
PARAMS["target_tasks_method"] = "train-target-tasks"
99

1010
MOCK_REQUESTS = [

taskcluster/test/test_target_stage.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
from taskgraph.taskgraph import TaskGraph
44

5-
from translations_taskgraph.parameters import get_defaults
5+
from translations_taskgraph.parameters import get_ci_training_config
66

7-
PARAMS = deepcopy(get_defaults(None))
7+
PARAMS = deepcopy(get_ci_training_config())
88
PARAMS["target_tasks_method"] = "train-target-tasks"
99
PARAMS["training_config"]["target-stage"] = "train-teacher"
1010

taskcluster/test/test_training_continuation_backwards.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
from taskgraph.taskgraph import TaskGraph
44

5-
from translations_taskgraph.parameters import get_defaults
5+
from translations_taskgraph.parameters import get_ci_training_config
66

7-
PARAMS = deepcopy(get_defaults(None))
7+
PARAMS = deepcopy(get_ci_training_config())
88
PARAMS["target_tasks_method"] = "train-target-tasks"
99
PARAMS["training_config"]["experiment"]["pretrained-models"] = {
1010
"train-backwards": {

taskcluster/translations_taskgraph/actions/train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from taskgraph.taskgraph import TaskGraph
1111
from taskgraph.util.taskcluster import get_ancestors, get_artifact
1212

13-
from translations_taskgraph.parameters import get_defaults
13+
from translations_taskgraph.parameters import get_ci_training_config
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -34,7 +34,7 @@ def can_train(parameters):
3434
)
3535

3636

37-
defaults = get_defaults("")["training_config"]
37+
defaults = get_ci_training_config()["training_config"]
3838

3939

4040
def validate_pretrained_models(params):

taskcluster/translations_taskgraph/parameters.py

Lines changed: 13 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -2,119 +2,21 @@
22
# License, v. 2.0. If a copy of the MPL was not distributed with this
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

5+
from pathlib import Path
56
from taskgraph.parameters import extend_parameters_schema
67
from voluptuous import Extra, Optional, Required
8+
import yaml
79

810

9-
# These defaults line up with the `config.ci.yml` pipeline as much as possible.
10-
# Their purpose is to provide a minimal config with a few datasets that can run
11-
# the entire pipeline reasonably quickly to validate changes to the pipeline
12-
# itself. Any real training should be overriding most, if not all, of these
13-
# via the input to the `train` action.
14-
def get_defaults(_) -> dict:
15-
return {
16-
"training_config": {
17-
"target-stage": "all",
18-
"experiment": {
19-
"name": "ci",
20-
"src": "ru",
21-
"trg": "en",
22-
"teacher-ensemble": 1,
23-
"teacher-mode": "two-stage",
24-
"mono-max-sentences-trg": {"total": 10000, "per-dataset": 10000},
25-
"mono-max-sentences-src": {"total": 10000, "per-dataset": 10000},
26-
"spm-sample-size": 10000,
27-
"spm-vocab-size": 1000,
28-
"best-model": "chrf",
29-
"use-opuscleaner": "true",
30-
"opuscleaner-mode": "custom",
31-
"bicleaner": {
32-
"default-threshold": 0.5,
33-
"dataset-thresholds": {
34-
"opus_ada83/v1": 0.0,
35-
"opus_ELRC-3075-wikipedia_health/v1": 0.6,
36-
},
37-
},
38-
"min-fluency-threshold": {
39-
"mono-src": 0.8,
40-
"mono-trg": 0.9,
41-
},
42-
},
43-
"marian-args": {
44-
"training-backward": {
45-
"disp-freq": "2",
46-
"save-freq": "25",
47-
"valid-freq": "50",
48-
"after": "50u",
49-
"dim-vocabs": "1000 1000",
50-
},
51-
"training-teacher": {
52-
"disp-freq": "1",
53-
"save-freq": "25",
54-
"valid-freq": "50",
55-
"after": "50u",
56-
"dim-vocabs": "1000 1000",
57-
"task": "transformer-base",
58-
},
59-
"training-student": {
60-
"disp-freq": "1",
61-
"save-freq": "25",
62-
"valid-freq": "50",
63-
"after": "50u",
64-
"dim-vocabs": "1000 1000",
65-
},
66-
"training-student-finetuned": {
67-
"disp-freq": "1",
68-
"save-freq": "25",
69-
"valid-freq": "50",
70-
"after": "50u",
71-
"dim-vocabs": "1000 1000",
72-
},
73-
"decoding-backward": {
74-
"mini-batch-words": "2000",
75-
},
76-
"decoding-teacher": {
77-
"mini-batch-words": "1000",
78-
"precision": "float16",
79-
},
80-
},
81-
# These will never be used in practice, but specifying them ensures
82-
# that we always generate at least one task for each kind, which helps
83-
# to avoid bustage that doesn't show up until we run the training action.
84-
"datasets": {
85-
"train": [
86-
"opus_ada83/v1",
87-
"opus_ELRC-3075-wikipedia_health/v1",
88-
"url_https://storage.googleapis.com/releng-translations-dev/data/en-ru/pytest-dataset.[LANG].zst",
89-
"mtdata_ELRC-web_acquired_data_related_to_scientific_research-1-eng-rus",
90-
],
91-
"devtest": [
92-
"flores_dev",
93-
"sacrebleu_aug-upper_wmt19",
94-
],
95-
"test": [
96-
"flores_devtest",
97-
],
98-
"mono-src": [
99-
"news-crawl_news.2008",
100-
"opus_tldr-pages/v2023-08-29",
101-
],
102-
"mono-trg": [
103-
"news-crawl_news.2007",
104-
"opus_tldr-pages/v2023-08-29",
105-
],
106-
},
107-
# Taskcluster-specific configuration
108-
"taskcluster": {
109-
"split-chunks": 2,
110-
"worker-classes": {
111-
"default": "gcp-spot",
112-
},
113-
},
114-
# Disable Weight & Biases publication on CI
115-
"wandb-publication": True,
116-
},
117-
}
11+
# By default, provide a very minimal config for CI that runs very quickly. This allows
12+
# the pipeline to be validated in CI. The production training configs should override
13+
# all of these values.
14+
def get_ci_training_config(_=None) -> dict:
15+
vcs_path = (Path(__file__).parent / "../..").resolve()
16+
config_path = vcs_path / "taskcluster/configs/config.ci.yml"
17+
18+
with config_path.open() as file:
19+
return {"training_config": yaml.safe_load(file)}
11820

11921

12022
extend_parameters_schema(
@@ -184,7 +86,7 @@ def get_defaults(_) -> dict:
18486
Optional("wandb-publication"): bool,
18587
},
18688
},
187-
defaults_fn=get_defaults,
89+
defaults_fn=get_ci_training_config,
18890
)
18991

19092

@@ -198,4 +100,4 @@ def deep_setdefault(dict_, defaults):
198100

199101
def get_decision_parameters(graph_config, parameters):
200102
parameters.setdefault("training_config", {})
201-
deep_setdefault(parameters, get_defaults(""))
103+
deep_setdefault(parameters, get_ci_training_config())

0 commit comments

Comments
 (0)