Skip to content

Commit 5691840

Browse files
committed
rebase on dev and cherry-pick
1 parent 4ce703c commit 5691840

File tree

6 files changed

+495
-30
lines changed

6 files changed

+495
-30
lines changed

composer/datasets/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"""
2222
from composer.datasets.ade20k import ADE20kDatasetHparams as ADE20kDatasetHparams
2323
from composer.datasets.brats import BratsDatasetHparams as BratsDatasetHparams
24+
from composer.datasets.c4 import C4DatasetHparams as C4DatasetHparams
2425
from composer.datasets.cifar10 import CIFAR10DatasetHparams as CIFAR10DatasetHparams
2526
from composer.datasets.dataloader import DataloaderHparams as DataloaderHparams
2627
from composer.datasets.dataloader import WrappedDataLoader as WrappedDataLoader

composer/datasets/c4.py

Lines changed: 360 additions & 0 deletions
Large diffs are not rendered by default.

composer/datasets/dataset_registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from composer.datasets.ade20k import ADE20kDatasetHparams
44
from composer.datasets.brats import BratsDatasetHparams
5+
from composer.datasets.c4 import C4DatasetHparams
56
from composer.datasets.cifar10 import CIFAR10DatasetHparams
67
from composer.datasets.glue import GLUEHparams
78
from composer.datasets.imagenet import ImagenetDatasetHparams
@@ -15,7 +16,8 @@
1516
"cifar10": CIFAR10DatasetHparams,
1617
"mnist": MNISTDatasetHparams,
1718
"lm": LMDatasetHparams,
18-
"glue": GLUEHparams
19+
"glue": GLUEHparams,
20+
"c4": C4DatasetHparams,
1921
}
2022

2123

composer/trainer/trainer_hparams.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,10 @@ def validate(self):
420420
super().validate()
421421

422422
if self.deepspeed is not None:
423-
zero_stage = cast(int, self.deepspeed.get("zero_stage", 0))
423+
self.deepspeed["zero_stage"] = cast(int, self.deepspeed.get("zero_stage", 0))
424+
self.deepspeed["steps_per_print"] = cast(int, self.deepspeed.get("steps_per_print", 1e20))
424425

425-
if self.deterministic_mode and zero_stage > 0:
426+
if self.deterministic_mode and self.deepspeed["zero_stage"] > 0:
426427
raise ValueError("Deepspeed with zero stage > 0 is not compatible with deterministic mode")
427428

428429
if isinstance(self.device, CPUDeviceHparams):
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
train_dataset:
2+
c4:
3+
split: train
4+
max_samples: 3584000 # Compute-optimal 7.3e9 tok ~= 256[bs] * 14000[ba] * 2048[msl] = 3584000[sa] * 2048[msl]
5+
max_seq_len: 2048
6+
tokenizer_name: gpt2
7+
group_method: concat
8+
seed: 17
9+
shuffle: true
10+
drop_last: true
11+
val_dataset:
12+
c4:
13+
split: validation
14+
max_samples: 102400 # Approx 100k samples
15+
max_seq_len: 2048
16+
tokenizer_name: gpt2
17+
group_method: concat
18+
seed: 17
19+
shuffle: false
20+
drop_last: false
21+
model:
22+
gpt2:
23+
use_pretrained: false
24+
tokenizer_name: gpt2
25+
model_config:
26+
activation_function: gelu_new
27+
architectures:
28+
- GPT2LMHeadModel
29+
attn_pdrop: 0.0
30+
bos_token_id: 50256
31+
embd_pdrop: 0.0
32+
eos_token_id: 50256
33+
initializer_range: 0.02
34+
layer_norm_epsilon: 1.0e-05
35+
model_type: gpt2
36+
n_embd: 768
37+
n_head: 12
38+
n_inner: 3072
39+
n_layer: 12
40+
n_positions: 2048
41+
resid_pdrop: 0.0
42+
scale_attn_weights: true
43+
summary_activation: null
44+
summary_first_dropout: 0.0
45+
summary_proj_to_labels: true
46+
summary_type: cls_index
47+
summary_use_proj: true
48+
task_specific_params:
49+
text-generation:
50+
do_sample: true
51+
max_length: 50
52+
transformers_version: 4.16.2
53+
use_cache: true
54+
vocab_size: 50257
55+
optimizer:
56+
decoupled_adamw:
57+
lr: 6.0e-4
58+
betas:
59+
- 0.9
60+
- 0.95
61+
eps: 1.0e-08
62+
weight_decay: 0.0
63+
schedulers:
64+
- cosine_decay_with_warmup:
65+
warmup_time: 0.01dur
66+
loggers:
67+
- tqdm: {}
68+
max_duration: 1ep
69+
train_batch_size: 256 # 0.5e6 tok ~= 256[bs] * 2048[msl]
70+
grad_accum: 2 # 256[bs] / 8[devices] / 16[per_gpu_microbatch_size] = 2[ga], assuming 8xA100-80GB
71+
eval_batch_size: 128 # 128[bs] / 8[devices] = 16[per_gpu_microbatch_size], assuming 8xA100-80GB
72+
seed: 17
73+
device:
74+
gpu: {}
75+
dataloader:
76+
pin_memory: true
77+
persistent_workers: true
78+
num_workers: 1
79+
timeout: 0
80+
prefetch_factor: 2
81+
deepspeed:
82+
zero_stage: 0
83+
precision: fp16
84+
grad_clip_norm: 1.0
85+
validate_every_n_batches: 1000
86+
validate_every_n_epochs: 1

tests/test_dataset_registry.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,55 @@
44

55
import pytest
66

7-
from composer.datasets import (ADE20kDatasetHparams, BratsDatasetHparams, CIFAR10DatasetHparams, DataloaderHparams,
8-
DatasetHparams, GLUEHparams, ImagenetDatasetHparams, LMDatasetHparams,
7+
from composer.datasets import (ADE20kDatasetHparams, BratsDatasetHparams, C4DatasetHparams, CIFAR10DatasetHparams,
8+
DataloaderHparams, DatasetHparams, GLUEHparams, ImagenetDatasetHparams, LMDatasetHparams,
99
MNISTDatasetHparams, SyntheticHparamsMixin)
1010
from composer.trainer.trainer_hparams import dataset_registry
1111

1212
# for testing, we provide values for required hparams fields
1313
# to initialize test hparams objects
1414
default_required_fields: Dict[Type[DatasetHparams], Callable[[], DatasetHparams]] = {
1515
# hparams with empty dicts have no required fields
16-
CIFAR10DatasetHparams: lambda: CIFAR10DatasetHparams(
17-
is_train=False,
18-
download=False,
19-
),
20-
ADE20kDatasetHparams: lambda: ADE20kDatasetHparams(is_train=False),
21-
BratsDatasetHparams: lambda: BratsDatasetHparams(is_train=False,),
22-
ImagenetDatasetHparams: lambda: ImagenetDatasetHparams(
23-
is_train=False,
24-
crop_size=224,
25-
resize_size=-1,
26-
),
27-
MNISTDatasetHparams: lambda: MNISTDatasetHparams(
28-
is_train=False,
29-
download=False,
30-
),
31-
LMDatasetHparams: lambda: LMDatasetHparams(
32-
datadir=["hello"],
33-
split='train',
34-
tokenizer_name='gpt2',
35-
),
36-
GLUEHparams: lambda: GLUEHparams(
37-
task="rte",
38-
tokenizer_name="bert-base-uncased",
39-
split="train",
40-
),
16+
CIFAR10DatasetHparams:
17+
lambda: CIFAR10DatasetHparams(
18+
is_train=False,
19+
download=False,
20+
),
21+
ADE20kDatasetHparams:
22+
lambda: ADE20kDatasetHparams(is_train=False),
23+
BratsDatasetHparams:
24+
lambda: BratsDatasetHparams(is_train=False,),
25+
ImagenetDatasetHparams:
26+
lambda: ImagenetDatasetHparams(
27+
is_train=False,
28+
crop_size=224,
29+
resize_size=-1,
30+
),
31+
MNISTDatasetHparams:
32+
lambda: MNISTDatasetHparams(
33+
is_train=False,
34+
download=False,
35+
),
36+
LMDatasetHparams:
37+
lambda: LMDatasetHparams(
38+
datadir=["hello"],
39+
split='train',
40+
tokenizer_name='gpt2',
41+
),
42+
GLUEHparams:
43+
lambda: GLUEHparams(
44+
task="rte",
45+
tokenizer_name="bert-base-uncased",
46+
split="train",
47+
),
48+
C4DatasetHparams:
49+
lambda: C4DatasetHparams(
50+
split="train",
51+
max_samples=1000,
52+
max_seq_len=100,
53+
tokenizer_name="gpt2",
54+
group_method="concat",
55+
),
4156
}
4257

4358

0 commit comments

Comments
 (0)