20
20
from composer .core .logging import Logger
21
21
from composer .core .logging .logger import LogLevel
22
22
from composer .core .state import State
23
- from composer .utils import ddp
23
+ from composer .utils import dist
24
24
from composer .utils .run_directory import get_modified_files , get_run_directory
25
25
26
26
log = logging .getLogger (__name__ )
@@ -148,13 +148,13 @@ def __init__(
148
148
self ._finished : Union [None , multiprocessing ._EventType , threading .Event ] = None
149
149
self ._workers = []
150
150
151
- if ddp .get_local_rank () == 0 :
151
+ if dist .get_local_rank () == 0 :
152
152
_validate_credentials (provider , container , self ._object_name_prefix , provider_init_kwargs )
153
153
154
154
def init (self , state : State , logger : Logger ) -> None :
155
155
if get_run_directory () is None :
156
156
return
157
- if not ddp .get_local_rank () == 0 :
157
+ if not dist .get_local_rank () == 0 :
158
158
return
159
159
del state , logger # unused
160
160
self ._finished = self ._finished_cls ()
@@ -194,7 +194,7 @@ def training_end(self, state: State, logger: Logger) -> None:
194
194
def post_close (self ):
195
195
# Cleaning up on post_close to ensure that all artifacts are uploaded
196
196
self ._trigger_upload (logger = None , log_level = None )
197
- if not ddp .get_local_rank () == 0 :
197
+ if not dist .get_local_rank () == 0 :
198
198
return
199
199
if self ._finished is not None :
200
200
self ._finished .set ()
@@ -207,8 +207,8 @@ def _trigger_upload(self, logger: Optional[Logger], log_level: Optional[LogLevel
207
207
# Ensure that every rank is at this point
208
208
# Assuming only the main thread on each rank writes to the run directory, then the barrier here will ensure
209
209
# that the run directory is not being modified after we pass this barrier
210
- ddp .barrier ()
211
- if ddp .get_local_rank () == 0 :
210
+ dist .barrier ()
211
+ if dist .get_local_rank () == 0 :
212
212
run_directory = get_run_directory ()
213
213
assert run_directory is not None , "invariant error"
214
214
# the disk time can differ from system time, so going to touch a file and then read the timestamp from it to get the real time
@@ -244,7 +244,7 @@ def _trigger_upload(self, logger: Optional[Logger], log_level: Optional[LogLevel
244
244
logger .metric (log_level , {"run_directory/uploaded_files" : files_to_be_uploaded })
245
245
246
246
# Ensure that other callbacks do not start writing to the run directory until we copying everything
247
- ddp .barrier ()
247
+ dist .barrier ()
248
248
249
249
250
250
def _validate_credentials (
0 commit comments