Skip to content

Commit f1862b8

Browse files
AutoYAHP Part 1: Cleanup the Algorithms for AutoYAHP (#1056)
This PR refactors the algorithms and tests as will be required by AutoYAHP. It does not depend on AutoYAHP itself (a future PR will remove the underlying hparam classes). - Refactored algorithm tests to not depend on hparams - Reformatted the factorize and selective backprop docstrings so they would be correctly parsed by auto-yahp - Refactor algorithm_settings.py to not depend on hparams and to return a list of pytest.param objects for a pytest.mark.parametrize. This change makes it more re-usable since it now includes information about markers required for each algorithm. - Moved the TestTrainerAlgorithms into tests/algorithms/test_algorithms_train.py, since it tests the individual algorithms, not the trainer, and thus should live in tests/algorithms. - Add helper methods for scanning a module to discover subclass implementations, check that the registry contains an entry, and test that a class is constructible from yaml
1 parent 414c874 commit f1862b8

29 files changed

+421
-373
lines changed

.pre-commit-config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ repos:
5151
hooks:
5252
- id: check-added-large-files
5353
- id: check-ast
54-
- id: check-byte-order-marker
5554
- id: check-builtin-literals
5655
args:
5756
- --no-allow-dict-kwargs

composer/algorithms/factorize/factorize.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ class Factorize(Algorithm):
117117
See :func:`~composer.algorithms.factorize.factorize_matrix` and
118118
:func:`~composer.algorithms.factorize.factorize_conv2d` for more
119119
information about the factorization process. See
120-
:class:`~composer.algorithms.factorize.FactorizedConv2d` and
121-
:class:`~composer.algorithms.factorize.FactorizedLinear`
122-
for more information about the factorized modules used to replace the
120+
:class:`~composer.algorithms.factorize.FactorizedConv2d`
121+
and :class:`~composer.algorithms.factorize.FactorizedLinear` for
122+
more information about the factorized modules used to replace the
123123
original modules.
124124
125125
Runs on :attr:`~composer.core.event.Event.INIT`.

composer/algorithms/selective_backprop/selective_backprop.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,18 +179,6 @@ class SelectiveBackprop(Algorithm):
179179
180180
.. _Jiang et al, 2019: https://arxiv.org/abs/1910.00762
181181
182-
Args:
183-
start (float, optional): SB interval start as fraction of training duration
184-
Default: ``0.5``.
185-
end (float, optional): SB interval end as fraction of training duration
186-
Default: ``0.9``.
187-
keep (float, optional): fraction of minibatch to select and keep for gradient computation
188-
Default: ``0.5``.
189-
scale_factor (float, optional): scale for downsampling input for selection forward pass
190-
Default: ``1.``.
191-
interrupt (int, optional): interrupt SB with a vanilla minibatch step every
192-
``interrupt`` batches. Default: ``2``.
193-
194182
Example:
195183
.. testcode::
196184
@@ -204,6 +192,18 @@ class SelectiveBackprop(Algorithm):
204192
algorithms=[algorithm],
205193
optimizers=[optimizer]
206194
)
195+
196+
Args:
197+
start (float, optional): SB interval start as fraction of training duration
198+
Default: ``0.5``.
199+
end (float, optional): SB interval end as fraction of training duration
200+
Default: ``0.9``.
201+
keep (float, optional): fraction of minibatch to select and keep for gradient computation
202+
Default: ``0.5``.
203+
scale_factor (float, optional): scale for downsampling input for selection forward pass
204+
Default: ``1.``.
205+
interrupt (int, optional): interrupt SB with a vanilla minibatch step every
206+
``interrupt`` batches. Default: ``2``.
207207
"""
208208

209209
def __init__(self,

composer/core/algorithm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class Algorithm(Serializable, ABC):
3636
+----------------+-------------------------------------------------------------------------------+
3737
"""
3838

39+
def __init__(self, *args, **kwargs): # Stub signature for PyRight
40+
del args, kwargs # unused
41+
pass
42+
3943
@property
4044
def find_unused_parameters(self) -> bool:
4145
"""Return True to indicate that the effect of this algorithm may cause some model parameters to be unused.

docs/source/templates/dataset_task_card.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
:orphan:
22

3-
##################
3+
#################
44
Dataset/Task Name
5-
##################
5+
#################
66
.. Based, in part, on [Datasheets for Datasets](https://arxiv.org/pdf/1803.09010.pdf) by Gebru et al.
77
88
TLDR
9-
=======
9+
====
1010
..
1111
TLDR about the dataset name, kind of data, tasks associated with it, and number of examples.
1212
1313
*E.g., ImageNet is a dataset of natural images for 1000-way classification consisting of 1.2M training examples and 50K validation examples at about resolution 224x224.*
1414

1515
Attribution
16-
============
16+
===========
1717
**Created By:** _______
1818

1919
.. Who created the dataset (which team, research group) and on behalf of which entity (company, institution, organization)? Provide any links or citations as appropriate.
@@ -23,34 +23,34 @@ Attribution
2323
.. Under what license is the dataset available?*
2424
2525
Using the Dataset
26-
==================
26+
=================
2727

2828
Obtaining the Dataset
29-
-----------------------
29+
---------------------
3030

3131
The dataset can be obtained from _________.
3232

3333
.. URL, library, or other description of where to get it.
3434
3535
Expected Format
36-
----------------
36+
---------------
3737

3838
The Composer library expects this dataset to be stored as ________.
3939

4040
.. What format does the Composer library expect this data to be stored in? E.g. JPEGs in folders for each class, TFRecords with a particular schema, etc.
4141
4242
Steps to Obtain and Prepare Dataset
43-
------------------------------------
43+
-----------------------------------
4444

4545
.. A list of step-by-step instructions necessary to obtain this dataset and place it in the right format or a link to such a guide that is available elsewhere.
4646
4747
Performance Considerations:
48-
----------------------------
48+
---------------------------
4949

5050
.. A list of performance considerations in order to use this dataset efficiently. E.g., the cost of loading the data for the first time, recommended storage medium, sensitivity of performance to memory size and disk throughput, costs associated with data augmentation, recommended batching strategies, etc.
5151
5252
Technical Specifications
53-
=========================
53+
========================
5454

5555
**Kind of Data: _____**
5656

tests/algorithms/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
# Copyright 2022 MosaicML Composer authors
22
# SPDX-License-Identifier: Apache-2.0
3-
4-
from tests.algorithms.algorithm_settings import get_settings as get_settings

tests/algorithms/algorithm_settings.py

Lines changed: 111 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,19 @@
88
Each algorithm is keyed based on its name in the algorithm registry.
99
"""
1010

11-
from composer.algorithms import algorithm_registry
11+
from typing import Any, Dict, Optional, Type
12+
13+
import pytest
14+
from torch.utils.data import Dataset
15+
16+
import composer
17+
from composer import Algorithm
18+
from composer.algorithms import (AGC, EMA, SAM, SWA, Alibi, AugMix, BlurPool, ChannelsLast, ColOut, CutMix, CutOut,
19+
Factorize, GhostBatchNorm, LabelSmoothing, LayerFreezing, MixUp, NoOpModel,
20+
ProgressiveResizing, RandAugment, SelectiveBackprop, SeqLengthWarmup, SqueezeExcite,
21+
StochasticDepth)
1222
from composer.models import ComposerResNet
23+
from composer.models.base import ComposerModel
1324
from tests import common
1425

1526
simple_vision_settings = {
@@ -23,7 +34,7 @@
2334
'dataset': (common.RandomImageDataset, {
2435
'is_PIL': True
2536
}),
26-
'kwargs': {}
37+
'kwargs': {},
2738
}
2839

2940
simple_resnet_settings = {
@@ -34,32 +45,39 @@
3445
'dataset': (common.RandomImageDataset, {
3546
'shape': (3, 224, 224),
3647
}),
48+
'kwargs': {},
3749
}
3850

39-
_settings = {
40-
'agc': simple_vision_settings,
41-
'alibi': None, # NLP settings needed
42-
'augmix': None, # requires PIL dataset to test
43-
'blurpool': {
51+
_settings: Dict[Type[Algorithm], Optional[Dict[str, Any]]] = {
52+
AGC: simple_vision_settings,
53+
Alibi: None, # NLP settings needed
54+
AugMix: simple_vision_settings,
55+
BlurPool: {
4456
'model': common.SimpleConvModel,
4557
'dataset': common.RandomImageDataset,
4658
'kwargs': {
4759
'min_channels': 0,
4860
},
4961
},
50-
'channels_last': simple_vision_settings,
51-
'colout': simple_vision_settings,
52-
'cutmix': {
62+
ChannelsLast: simple_vision_settings,
63+
ColOut: simple_vision_settings,
64+
CutMix: {
5365
'model': common.SimpleConvModel,
5466
'dataset': common.RandomImageDataset,
5567
'kwargs': {
5668
'num_classes': 2
5769
}
5870
},
59-
'cutout': simple_vision_settings,
60-
'ema': simple_vision_settings,
61-
'factorize': simple_resnet_settings,
62-
'ghost_batchnorm': {
71+
CutOut: simple_vision_settings,
72+
EMA: {
73+
'model': common.SimpleConvModel,
74+
'dataset': common.RandomImageDataset,
75+
'kwargs': {
76+
'half_life': "1ba",
77+
},
78+
},
79+
Factorize: simple_resnet_settings,
80+
GhostBatchNorm: {
6381
'model': (ComposerResNet, {
6482
'model_name': 'resnet18',
6583
'num_classes': 2
@@ -71,16 +89,17 @@
7189
'ghost_batch_size': 2,
7290
}
7391
},
74-
'label_smoothing': simple_vision_settings,
75-
'layer_freezing': simple_vision_settings,
76-
'mixup': simple_vision_settings,
77-
'progressive_resizing': simple_vision_settings,
78-
'randaugment': None, # requires PIL dataset to test
79-
'sam': simple_vision_settings,
80-
'selective_backprop': simple_vision_settings,
81-
'seq_length_warmup': None, # NLP settings needed
82-
'squeeze_excite': simple_resnet_settings,
83-
'stochastic_depth': {
92+
LabelSmoothing: simple_vision_settings,
93+
LayerFreezing: simple_vision_settings,
94+
MixUp: simple_vision_settings,
95+
ProgressiveResizing: simple_vision_settings,
96+
RandAugment: simple_vision_settings,
97+
NoOpModel: simple_vision_settings,
98+
SAM: simple_vision_settings,
99+
SelectiveBackprop: simple_vision_settings,
100+
SeqLengthWarmup: None, # NLP settings needed
101+
SqueezeExcite: simple_resnet_settings,
102+
StochasticDepth: {
84103
'model': (ComposerResNet, {
85104
'model_name': 'resnet50',
86105
'num_classes': 2
@@ -93,10 +112,11 @@
93112
'target_layer_name': 'ResNetBottleneck',
94113
'drop_rate': 0.2,
95114
'drop_distribution': 'linear',
96-
'use_same_gpu_seed': False
115+
'drop_warmup': "0.0dur",
116+
'use_same_gpu_seed': False,
97117
}
98118
},
99-
'swa': {
119+
SWA: {
100120
'model': common.SimpleConvModel,
101121
'dataset': common.RandomImageDataset,
102122
'kwargs': {
@@ -105,37 +125,73 @@
105125
'update_interval': '1ep',
106126
'schedule_swa_lr': True,
107127
}
108-
}
128+
},
109129
}
110130

111131

112-
def get_settings(name: str):
113-
"""For a given algorithm name, creates the canonical setting
114-
(algorithm, model, dataset) for testing.
132+
def _get_alg_settings(alg_cls: Type[Algorithm]):
133+
if alg_cls not in _settings or _settings[alg_cls] is None:
134+
raise ValueError(f"Algorithm {alg_cls.__name__} not in the settings dictionary.")
135+
settings = _settings[alg_cls]
136+
assert settings is not None
137+
return settings
138+
139+
140+
def get_alg_kwargs(alg_cls: Type[Algorithm]) -> Dict[str, Any]:
141+
"""Return the kwargs for an algorithm."""
142+
return _get_alg_settings(alg_cls)['kwargs']
115143

116-
Returns ``None`` if no settings provided.
144+
145+
def get_alg_model(alg_cls: Type[Algorithm]) -> ComposerModel:
146+
"""Return an instance of the model for an algorithm."""
147+
settings = _get_alg_settings(alg_cls)['model']
148+
if isinstance(settings, tuple):
149+
(cls, kwargs) = settings
150+
else:
151+
(cls, kwargs) = (settings, {})
152+
return cls(**kwargs)
153+
154+
155+
def get_alg_dataset(alg_cls: Type[Algorithm]) -> Dataset:
156+
"""Return an instance of the dataset for an algorithm."""
157+
settings = _get_alg_settings(alg_cls)['dataset']
158+
if isinstance(settings, tuple):
159+
(cls, kwargs) = settings
160+
else:
161+
(cls, kwargs) = (settings, {})
162+
return cls(**kwargs)
163+
164+
165+
def get_algs_with_marks():
166+
"""Returns a list of algorithms appropriate markers for a subsequent call to pytest.mark.parameterize.
167+
It applies markers as appropriate (e.g. XFAIL for algs missing config)
168+
It reads from the algorithm registry
169+
170+
E.g. @pytest.mark.parametrize("alg_class", get_algs_with_marks())
117171
"""
118-
if name not in _settings:
119-
raise ValueError(f'No settings for {name} found, please add.')
120-
121-
setting = _settings[name]
122-
if setting is None:
123-
return None
124-
125-
result = {}
126-
for key in ('model', 'dataset'):
127-
if isinstance(setting[key], tuple):
128-
(obj, kwargs) = setting[key]
129-
else:
130-
(obj, kwargs) = (setting[key], {})
131-
132-
# create the object
133-
result[key] = obj(**kwargs)
134-
135-
# create algorithm
136-
kwargs = setting.get('kwargs', {})
137-
hparams = algorithm_registry.get_algorithm_registry()[name]
138-
result['algorithm'] = hparams(**kwargs).initialize_object()
139-
result['algorithm_kwargs'] = kwargs
140-
141-
return result
172+
ans = []
173+
for alg_cls in common.get_module_subclasses(composer.algorithms, Algorithm):
174+
marks = []
175+
settings = _settings[alg_cls]
176+
177+
if alg_cls in (CutMix, MixUp, LabelSmoothing):
178+
# see: https://github.com/mosaicml/composer/issues/362
179+
pytest.importorskip("torch", minversion="1.10", reason="Pytorch 1.10 required.")
180+
181+
if alg_cls == SWA:
182+
# TODO(matthew): Fix
183+
marks.append(
184+
pytest.mark.filterwarnings(
185+
r'ignore:Detected call of `lr_scheduler.step\(\)` before `optimizer.step\(\)`:UserWarning'))
186+
187+
if alg_cls == MixUp:
188+
# TODO(Landen): Fix
189+
marks.append(
190+
pytest.mark.filterwarnings(r"ignore:Some targets have less than 1 total probability:UserWarning"))
191+
192+
if settings is None:
193+
marks.append(pytest.mark.xfail(reason=f"Algorithm {alg_cls.__name__} is missing settings."))
194+
195+
ans.append(pytest.param(alg_cls, marks=marks, id=alg_cls.__name__))
196+
197+
return ans
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2022 MosaicML Composer authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from typing import Type
5+
6+
import pytest
7+
8+
import composer.algorithms
9+
from composer.algorithms import AlgorithmHparams
10+
from composer.algorithms.algorithm_registry import registry as algorithm_registry
11+
from composer.core import Algorithm
12+
from tests.algorithms.algorithm_settings import get_alg_kwargs
13+
from tests.common import get_module_subclasses
14+
from tests.common.hparams import assert_in_registry, assert_yaml_loads
15+
16+
17+
@pytest.mark.parametrize("alg_hparams_cls", get_module_subclasses(composer.algorithms, AlgorithmHparams))
18+
def test_all_algs_in_registry(alg_hparams_cls: Type[AlgorithmHparams]):
19+
assert_in_registry(alg_hparams_cls, algorithm_registry)
20+
21+
22+
@pytest.mark.xfail(reason="This test depends on AutoYAHP")
23+
@pytest.mark.parametrize("alg_cls", get_module_subclasses(composer.algorithms, Algorithm))
24+
def test_algs_load_from_yaml(alg_cls: Type[Algorithm]):
25+
kwargs = get_alg_kwargs(alg_cls)
26+
if kwargs is None:
27+
pytest.xfail(f"Missing settings for algorithm {alg_cls.__name__}")
28+
assert_yaml_loads(alg_cls, kwargs, expected=alg_cls)

0 commit comments

Comments
 (0)