Skip to content

Commit 9d17116

Browse files
authored
print warning if actual triton cache dir is on NFS, not just for default (#6487)
move the logic that prints a warning when triton cache dir is on NFS to act on the actual calculated cache_dir rather than on the default. this means that: - when the default directory (in the user's home directory) is on NFS but `TRITON_CACHE_DIR` is set to a non-NFS directory, no warning will be printed whereas prior to this change a spurious and confusing warning was printed - when the user's home directory is not on NFS but `TRITON_CACHE_DIR` is set to an NFS directory, a warning will be printed whereas prior to this change no warning would be printed fixes #6486
1 parent 5df12a4 commit 9d17116

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

deepspeed/ops/transformer/inference/triton/matmul_ext.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,17 @@ class TritonCacheDir:
4040
_warning_printed = False
4141

4242
@staticmethod
43-
def default_cache_dir():
44-
tmp_path = os.path.join(Path.home(), ".triton", "autotune")
45-
if is_nfs_path(tmp_path) and not TritonCacheDir._warning_printed:
43+
def warn_if_nfs(cache_dir):
44+
if is_nfs_path(cache_dir) and not TritonCacheDir._warning_printed:
4645
print(
47-
f"Warning: The default cache directory for DeepSpeed Triton autotune, {tmp_path}, appears to be on an NFS system. While this is generally acceptable, if you experience slowdowns or hanging when DeepSpeed exits, it is recommended to set the TRITON_CACHE_DIR environment variable to a non-NFS path."
46+
f"Warning: The cache directory for DeepSpeed Triton autotune, {cache_dir}, appears to be on an NFS system. While this is generally acceptable, if you experience slowdowns or hanging when DeepSpeed exits, it is recommended to set the TRITON_CACHE_DIR environment variable to a non-NFS path."
4847
)
4948
TritonCacheDir._warning_printed = True
49+
return
50+
51+
@staticmethod
52+
def default_cache_dir():
53+
tmp_path = os.path.join(Path.home(), ".triton", "autotune")
5054
return tmp_path
5155

5256

@@ -80,9 +84,9 @@ def __init__(self, key):
8084
self.lock_path = None
8185
# if caching is enabled, get the lock and bin path
8286
self.cache_dir = os.environ.get('TRITON_CACHE_DIR', TritonCacheDir.default_cache_dir())
87+
TritonCacheDir.warn_if_nfs(self.cache_dir)
8388
if self.cache_dir:
8489
os.makedirs(self.cache_dir, exist_ok=True)
85-
if self.cache_dir:
8690
self.file_path = os.path.join(self.cache_dir, self.key + ".pickle")
8791
self.lock_path = self.file_path + ".lock"
8892

0 commit comments

Comments
 (0)