26
26
from composer .datasets .synthetic import SyntheticBatchPairDataset
27
27
from composer .datasets .synthetic_hparams import SyntheticHparamsMixin
28
28
from composer .datasets .utils import NormalizationFn , pil_image_collate
29
- from composer .utils import dist
29
+ from composer .utils import dist , warn_streaming_dataset_deprecation
30
30
from composer .utils .import_helpers import MissingConditionalImportError
31
31
32
32
# ImageNet normalization values from torchvision: https://pytorch.org/vision/stable/models.html
@@ -212,19 +212,19 @@ class StreamingImageNet1kHparams(DatasetHparams):
212
212
"""DatasetHparams for creating an instance of StreamingImageNet1k.
213
213
214
214
Args:
215
- version (int): Which version of streaming to use. Default: ``2 ``.
215
+ version (int): Which version of streaming to use. Default: ``1 ``.
216
216
remote (str): Remote directory (S3 or local filesystem) where dataset is stored.
217
- Default: ``'s3://mosaicml-internal-dataset-imagenet1k/mds/2 /```
217
+ Default: ``'s3://mosaicml-internal-dataset-imagenet1k/mds/1 /```
218
218
local (str): Local filesystem directory where dataset is cached during operation.
219
219
Default: ``'/tmp/mds-cache/mds-imagenet1k/```
220
220
split (str): The dataset split to use, either 'train' or 'val'. Default: ``'train```.
221
221
resize_size (int, optional): The resize size to use. Use -1 to not resize. Default: ``-1``.
222
222
crop size (int): The crop size to use. Default: ``224``.
223
223
"""
224
224
225
- version : int = hp .optional ('Version of streaming (1 or 2)' , default = 2 )
225
+ version : int = hp .optional ('Version of streaming (1 or 2)' , default = 1 )
226
226
remote : str = hp .optional ('Remote directory (S3 or local filesystem) where dataset is stored' ,
227
- default = 's3://mosaicml-internal-dataset-imagenet1k/mds/2 /' )
227
+ default = 's3://mosaicml-internal-dataset-imagenet1k/mds/1 /' )
228
228
local : str = hp .optional ('Local filesystem directory where dataset is cached during operation' ,
229
229
default = '/tmp/mds-cache/mds-imagenet1k/' )
230
230
split : str = hp .optional ("Which split of the dataset to use. Either ['train', 'val']" , default = 'train' )
@@ -233,6 +233,7 @@ class StreamingImageNet1kHparams(DatasetHparams):
233
233
234
234
def initialize_object (self , batch_size : int , dataloader_hparams : DataLoaderHparams ) -> DataSpec :
235
235
if self .version == 1 :
236
+ warn_streaming_dataset_deprecation (old_version = self .version , new_version = 2 )
236
237
dataset = StreamingImageNet1k (remote = self .remote ,
237
238
local = self .local ,
238
239
split = self .split ,
0 commit comments