formatting fixes + additional comments

rithwik-db · rithwik-db · commit 02e818ef4746 · 2025-04-28T14:35:11.000-07:00
diff --git a/composer/distributed/fsdp2.py b/composer/distributed/fsdp2.py
@@ -11,8 +11,8 @@
 from torch.distributed.fsdp.wrap import CustomPolicy
 
 from composer.distributed.fsdp2_utils import (
-    generate_default_policy,
     check_param_tying,
+    generate_default_policy,
     get_standalone_and_tied_modules,
     legalize_param_sharing_between_modules,
     update_optimizer_modules,
diff --git a/composer/distributed/fsdp2_utils.py b/composer/distributed/fsdp2_utils.py
@@ -15,7 +15,8 @@
 
 # FSDP2 Weight Tying Functions
 # TODO: These functions are all relatively similar to each other, we should consider
-# refactoring them in the future to be simpler.
+# refactoring them in the future to be simpler. We also might benefit from moving these
+# weight tying functions to a new file (in a potential `fsdp2_utils` directory).
 
 
 def legalize_param_sharing_between_modules(model: nn.Module, modules_to_shard: list[nn.Module]) -> None:
@@ -139,9 +140,8 @@ def _recursive_get_params(module: nn.Module, prefix: str = '') -> None:
 
     _recursive_get_params(model)
 
-    # Filter to keep only groups where the same parameter object has multiple FQNs
-    tying_groups = [fqns for fqns in param_object_to_fqns.values() if len(fqns) > 1]
-    return tying_groups
+    # Return a list of sets, each set contains the FQNs for a tied parameter group
+    return list(param_object_to_fqns.values())
 
 
 @contextlib.contextmanager
@@ -245,7 +245,12 @@ def update_optimizer_modules(
 
 
 def generate_default_policy(parent_model: nn.Module) -> CustomPolicy:
-    # The same policy as FSDP1 with some caveats around the parent_model (root_module)
+    """Generates the default fsdp wrap policy for FSDP2.
+
+    This policy is the same as the default policy in FSDP1 with some caveats around
+    how the root_module (parent_model) is handled to best support FSDP2.
+    """
+
     def lambda_fn(current_module: nn.Module) -> Union[bool, dict[str, Any]]:
         ret = False
         if hasattr(current_module, '_fsdp_wrap'):