Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions composer/datasets/ade20k_hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from composer.datasets.synthetic import SyntheticBatchPairDataset
from composer.datasets.synthetic_hparams import SyntheticHparamsMixin
from composer.datasets.utils import NormalizationFn, pil_image_collate
from composer.utils import dist
from composer.utils import dist, warn_streaming_dataset_deprecation
from composer.utils.import_helpers import MissingConditionalImportError

__all__ = ['ADE20kDatasetHparams', 'StreamingADE20kHparams']
Expand Down Expand Up @@ -152,9 +152,9 @@ class StreamingADE20kHparams(DatasetHparams):
"""DatasetHparams for creating an instance of StreamingADE20k.

Args:
version (int): Which version of streaming to use. Default: ``2``.
version (int): Which version of streaming to use. Default: ``1``.
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
Default: ``'s3://mosaicml-internal-dataset-ade20k/mds/2/```
Default: ``'s3://mosaicml-internal-dataset-ade20k/mds/1/```
local (str): Local filesystem directory where dataset is cached during operation.
Default: ``'/tmp/mds-cache/mds-ade20k/```
split (str): The dataset split to use, either 'train' or 'val'. Default: ``'train```.
Expand All @@ -166,9 +166,9 @@ class StreamingADE20kHparams(DatasetHparams):
Default: ``true``.
"""

version: int = hp.optional('Version of streaming (1 or 2)', default=2)
version: int = hp.optional('Version of streaming (1 or 2)', default=1)
remote: str = hp.optional('Remote directory (S3 or local filesystem) where dataset is stored',
default='s3://mosaicml-internal-dataset-ade20k/mds/2/')
default='s3://mosaicml-internal-dataset-ade20k/mds/1/')
local: str = hp.optional('Local filesystem directory where dataset is cached during operation',
default='/tmp/mds-cache/mds-ade20k/')
split: str = hp.optional("Which split of the dataset to use. Either ['train', 'val']", default='train')
Expand All @@ -180,6 +180,7 @@ class StreamingADE20kHparams(DatasetHparams):

def initialize_object(self, batch_size: int, dataloader_hparams: DataLoaderHparams) -> DataSpec:
if self.version == 1:
warn_streaming_dataset_deprecation(old_version=self.version, new_version=2)
dataset = StreamingADE20k(remote=self.remote,
local=self.local,
split=self.split,
Expand Down
10 changes: 6 additions & 4 deletions composer/datasets/c4_hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from composer.core.data_spec import DataSpec
from composer.datasets.c4 import C4Dataset, StreamingC4
from composer.datasets.dataset_hparams import DataLoaderHparams, DatasetHparams
from composer.utils import warn_streaming_dataset_deprecation
from composer.utils.import_helpers import MissingConditionalImportError

log = logging.getLogger(__name__)
Expand All @@ -24,9 +25,9 @@ class StreamingC4Hparams(DatasetHparams):
"""Builds a :class:`.DataSpec` for the StreamingC4 (Colossal Cleaned Common Crawl) dataset.

Args:
version (int): Which version of streaming to use. Default: ``2``.
version (int): Which version of streaming to use. Default: ``1``.
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
Default: ``'s3://mosaicml-internal-dataset-c4/mds/2/'``
Default: ``'s3://mosaicml-internal-dataset-c4/mds/1/'``
local (str): Local filesystem directory where dataset is cached during operation.
Default: ``'/tmp/mds-cache/mds-c4/'``
split (str): What split of the dataset to use. Either ``'train'`` or ``'val'``. Default: ``'train'``.
Expand All @@ -40,9 +41,9 @@ class StreamingC4Hparams(DatasetHparams):
timeout (float): How long to wait for shard to download before raising an exception. Default: 120 sec.
"""

version: int = hp.optional('Version of streaming (1 or 2)', default=2)
version: int = hp.optional('Version of streaming (1 or 2)', default=1)
remote: str = hp.optional('Remote directory (S3 or local filesystem) where dataset is stored',
default='s3://mosaicml-internal-dataset-c4/mds/2/')
default='s3://mosaicml-internal-dataset-c4/mds/1/')
local: str = hp.optional('Local filesystem directory where dataset is cached during operation',
default='/tmp/mds-cache/mds-c4/')
split: str = hp.optional('What split of the dataset to use. Either `train` or `val`.', default='train')
Expand Down Expand Up @@ -76,6 +77,7 @@ def initialize_object(self, batch_size: int, dataloader_hparams: DataLoaderHpara

# Get StreamingC4 dataset
if self.version == 1:
warn_streaming_dataset_deprecation(old_version=self.version, new_version=2)
dataset = StreamingC4(remote=self.remote,
local=self.local,
split=self.split,
Expand Down
11 changes: 6 additions & 5 deletions composer/datasets/cifar_hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from composer.datasets.ffcv_utils import write_ffcv_dataset
from composer.datasets.synthetic import SyntheticBatchPairDataset
from composer.datasets.synthetic_hparams import SyntheticHparamsMixin
from composer.utils import dist
from composer.utils import dist, warn_streaming_dataset_deprecation
from composer.utils.import_helpers import MissingConditionalImportError

__all__ = ['CIFAR10DatasetHparams', 'StreamingCIFAR10Hparams']
Expand Down Expand Up @@ -185,23 +185,24 @@ class StreamingCIFAR10Hparams(DatasetHparams):
"""Streaming CIFAR10 hyperparameters.

Args:
version (int): Which version of streaming to use. Default: ``2``.
version (int): Which version of streaming to use. Default: ``1``.
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
Default: ``'s3://mosaicml-internal-dataset-cifar10/mds/2/'``
Default: ``'s3://mosaicml-internal-dataset-cifar10/mds/1/'``
local (str): Local filesystem directory where dataset is cached during operation.
Default: ``'/tmp/mds-cache/mds-cifar10/'``
split (str): The dataset split to use, either 'train' or 'val'. Default: ``'train'``.
"""

version: int = hp.optional('Version of streaming (1 or 2)', default=2)
version: int = hp.optional('Version of streaming (1 or 2)', default=1)
remote: str = hp.optional('Remote directory (S3 or local filesystem) where dataset is stored',
default='s3://mosaicml-internal-dataset-cifar10/mds/2/')
default='s3://mosaicml-internal-dataset-cifar10/mds/1/')
local: str = hp.optional('Local filesystem directory where dataset is cached during operation',
default='/tmp/mds-cache/mds-cifar10/')
split: str = hp.optional("Which split of the dataset to use. Either ['train', 'val']", default='train')

def initialize_object(self, batch_size: int, dataloader_hparams: DataLoaderHparams) -> DataLoader:
if self.version == 1:
warn_streaming_dataset_deprecation(old_version=self.version, new_version=2)
dataset = StreamingCIFAR10(remote=self.remote,
local=self.local,
split=self.split,
Expand Down
11 changes: 6 additions & 5 deletions composer/datasets/coco_hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from composer.datasets.coco import COCODetection, StreamingCOCO, split_dict_fn
from composer.datasets.dataset_hparams import DataLoaderHparams, DatasetHparams
from composer.models.ssd.utils import SSDTransformer, dboxes300_coco
from composer.utils import dist
from composer.utils import dist, warn_streaming_dataset_deprecation
from composer.utils.import_helpers import MissingConditionalImportError

__all__ = ['COCODatasetHparams', 'StreamingCOCOHparams']
Expand Down Expand Up @@ -75,23 +75,24 @@ class StreamingCOCOHparams(DatasetHparams):
"""DatasetHparams for creating an instance of StreamingCOCO.

Args:
version (int): Which version of streaming to use. Default: ``2``.
version (int): Which version of streaming to use. Default: ``1``.
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
Default: ``'s3://mosaicml-internal-dataset-coco/mds/2/```
Default: ``'s3://mosaicml-internal-dataset-coco/mds/1/```
local (str): Local filesystem directory where dataset is cached during operation.
Default: ``'/tmp/mds-cache/mds-coco/```
split (str): The dataset split to use, either 'train' or 'val'. Default: ``'train```.
"""

version: int = hp.optional('Version of streaming (1 or 2)', default=2)
version: int = hp.optional('Version of streaming (1 or 2)', default=1)
remote: str = hp.optional('Remote directory (S3 or local filesystem) where dataset is stored',
default='s3://mosaicml-internal-dataset-coco/mds/2/')
default='s3://mosaicml-internal-dataset-coco/mds/1/')
local: str = hp.optional('Local filesystem directory where dataset is cached during operation',
default='/tmp/mds-cache/mds-coco/')
split: str = hp.optional("Which split of the dataset to use. Either ['train', 'val']", default='train')

def initialize_object(self, batch_size: int, dataloader_hparams: DataLoaderHparams):
if self.version == 1:
warn_streaming_dataset_deprecation(old_version=self.version, new_version=2)
dataset = StreamingCOCO(remote=self.remote,
local=self.local,
split=self.split,
Expand Down
11 changes: 6 additions & 5 deletions composer/datasets/imagenet_hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from composer.datasets.synthetic import SyntheticBatchPairDataset
from composer.datasets.synthetic_hparams import SyntheticHparamsMixin
from composer.datasets.utils import NormalizationFn, pil_image_collate
from composer.utils import dist
from composer.utils import dist, warn_streaming_dataset_deprecation
from composer.utils.import_helpers import MissingConditionalImportError

# ImageNet normalization values from torchvision: https://pytorch.org/vision/stable/models.html
Expand Down Expand Up @@ -212,19 +212,19 @@ class StreamingImageNet1kHparams(DatasetHparams):
"""DatasetHparams for creating an instance of StreamingImageNet1k.

Args:
version (int): Which version of streaming to use. Default: ``2``.
version (int): Which version of streaming to use. Default: ``1``.
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
Default: ``'s3://mosaicml-internal-dataset-imagenet1k/mds/2/```
Default: ``'s3://mosaicml-internal-dataset-imagenet1k/mds/1/```
local (str): Local filesystem directory where dataset is cached during operation.
Default: ``'/tmp/mds-cache/mds-imagenet1k/```
split (str): The dataset split to use, either 'train' or 'val'. Default: ``'train```.
resize_size (int, optional): The resize size to use. Use -1 to not resize. Default: ``-1``.
crop size (int): The crop size to use. Default: ``224``.
"""

version: int = hp.optional('Version of streaming (1 or 2)', default=2)
version: int = hp.optional('Version of streaming (1 or 2)', default=1)
remote: str = hp.optional('Remote directory (S3 or local filesystem) where dataset is stored',
default='s3://mosaicml-internal-dataset-imagenet1k/mds/2/')
default='s3://mosaicml-internal-dataset-imagenet1k/mds/1/')
local: str = hp.optional('Local filesystem directory where dataset is cached during operation',
default='/tmp/mds-cache/mds-imagenet1k/')
split: str = hp.optional("Which split of the dataset to use. Either ['train', 'val']", default='train')
Expand All @@ -233,6 +233,7 @@ class StreamingImageNet1kHparams(DatasetHparams):

def initialize_object(self, batch_size: int, dataloader_hparams: DataLoaderHparams) -> DataSpec:
if self.version == 1:
warn_streaming_dataset_deprecation(old_version=self.version, new_version=2)
dataset = StreamingImageNet1k(remote=self.remote,
local=self.local,
split=self.split,
Expand Down
14 changes: 12 additions & 2 deletions composer/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,21 @@
from composer.utils.string_enum import StringEnum


def warn_yahp_deprecation():
def warn_yahp_deprecation() -> None:
warnings.warn(
'yahp-based workflows are deprecated and will be removed in a future release. Please'
'migrate to using other configuration managers and create the Trainer objects directly.'
'v0.10 will be the last release to support yahp.', DeprecationWarning)
'v0.10 will be the last release to support yahp.',
DeprecationWarning,
stacklevel=2)


def warn_streaming_dataset_deprecation(old_version: int, new_version: int) -> None:
warnings.warn(
f'streaming dataset version {old_version} is deprecated and will be removed in the future. '
f'Please migrate to using streaming dataset version {new_version}',
DeprecationWarning,
stacklevel=2)


__all__ = [
Expand Down