mosaicml · abhi-mosaic · Jul 12, 2022 · Jul 12, 2022 · Jul 12, 2022
diff --git a/composer/trainer/devices/device_gpu.py b/composer/trainer/devices/device_gpu.py
@@ -8,6 +8,8 @@
 from typing import Any, Dict, Optional, TypeVar
 
 import torch
+import torch.backends.cuda
+import torch.backends.cudnn
 import torch.cuda.amp
 import torch.utils.data
 
@@ -24,18 +26,29 @@ class DeviceGPU(Device):
 
     Args:
         device_id (int, optional): Integer ID of a GPU device to train with. If not specified, the local rank
-        of the current process is used. Default: None.
+            of the current process is used. Default: None.
+        allow_tf32 (bool, optional): Whether to allow TF32 matrix multiplications. Defaults to True.
+            For more information, see :ref:`torch:tf32_on_ampere`.
     """
     dist_backend = 'nccl'
 
-    def __init__(self, device_id: Optional[int] = None):
+    def __init__(
+        self,
+        device_id: Optional[int] = None,
+        *,
+        allow_tf32: bool = True,
+    ):
         if not torch.cuda.is_available():
             raise ValueError('DeviceGPU cannot be created as torch.cuda is not available.')
         if not device_id:
             device_id = dist.get_local_rank()
         self._device = torch.device(f'cuda:{device_id}')
         torch.cuda.set_device(self._device)
         assert torch.cuda.current_device() == device_id
+        torch.backends.cuda.matmul.allow_tf32 = allow_tf32
+        # pyright error: "allow_tf32" is not a known member of module
+        # however, this flag exists on pytorch 1.9+: https://pytorch.org/docs/1.9.0/backends.html
+        torch.backends.cudnn.allow_tf32 = allow_tf32  # type: ignore
 
     def module_to_device(self, module: T_nnModule) -> T_nnModule:
         return module.to(self._device)