update

huggingface · DN6 · Dec 5, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 24, 2025
commit f305934de636df8d4220a87fe2234876df2122f2
diff --git a/src/diffusers/hooks/group_offloading.py b/src/diffusers/hooks/group_offloading.py
@@ -322,6 +322,7 @@ def pre_forward(self, module: torch.nn.Module, *args, **kwargs):
 
         args = send_to_device(args, self.group.onload_device, non_blocking=self.group.non_blocking)
         kwargs = send_to_device(kwargs, self.group.onload_device, non_blocking=self.group.non_blocking)
+
         return args, kwargs
 
     def post_forward(self, module: torch.nn.Module, output):
@@ -608,6 +609,7 @@ def _apply_group_offloading_block_level(module: torch.nn.Module, config: GroupOf
             # Apply block offloading to the specified submodule
             _apply_group_offloading_block_level(submodule, config)
             modules_with_group_offloading.add(name)
+
         elif isinstance(submodule, (torch.nn.ModuleList, torch.nn.Sequential)):
             # Handle ModuleList and Sequential blocks as before
             for i in range(0, len(submodule), config.num_blocks_per_group):
@@ -653,7 +655,9 @@ def _apply_group_offloading_block_level(module: torch.nn.Module, config: GroupOf
     # Create a group for the remaining unmatched submodules of the top-level
     # module so that they are on the correct device when the forward pass is called.
     unmatched_modules = [unmatched_module for _, unmatched_module in unmatched_modules]
-    if len(unmatched_modules) > 0 or len(parameters) > 0 or len(buffers) > 0:
+    has_unmatched = len(unmatched_modules) > 0 or len(parameters) > 0 or len(buffers) > 0
+
+    if has_unmatched or len(block_modules) > 0:
         unmatched_group = ModuleGroup(
             modules=unmatched_modules,
             offload_device=config.offload_device,

diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_wan.py b/src/diffusers/models/autoencoders/autoencoder_kl_wan.py
@@ -1146,6 +1146,9 @@ def _encode(self, x: torch.Tensor):
                     feat_idx=self._enc_conv_idx,
                 )
                 out = torch.cat([out, out_], 2)
+        __import__("ipdb").set_trace()
+        # cache_devices = [i.device.type for i in self._enc_feat_map]
+        # any((d != "cuda" for d in cache_devices))
 
         enc = self.quant_conv(out)
         self.clear_cache()
@@ -1409,6 +1412,7 @@ def forward(
         """
         x = sample
         posterior = self.encode(x).latent_dist
+
         if sample_posterior:
             z = posterior.sample(generator=generator)
         else:

diff --git a/tests/models/test_modeling_common.py b/tests/models/test_modeling_common.py
@@ -1851,7 +1851,9 @@ def _run_forward(model, inputs_dict):
                     offload_to_disk_path=tmpdir,
                     offload_type=offload_type,
                     num_blocks_per_group=num_blocks_per_group,
-                    block_modules=model._group_offload_block_modules if hasattr(model, "_group_offload_block_modules") else None
+                    block_modules=model._group_offload_block_modules
+                    if hasattr(model, "_group_offload_block_modules")
+                    else None,
                 )
                 if not is_correct:
                     if extra_files:

diff --git a/tests/testing_utils.py b/tests/testing_utils.py
@@ -1424,7 +1424,7 @@ def _get_expected_safetensors_files(
         offload_to_disk_path: str,
         offload_type: str,
         num_blocks_per_group: Optional[int] = None,
-        block_modules: Optional[List[str]] = None
+        block_modules: Optional[List[str]] = None,
     ) -> Set[str]:
         expected_files = set()