Spaces:

LPX55
/

Diptych-FLUX.1-merged_8step

Runtime error

LPX55 commited on Mar 24

Commit

fca4fe6

verified ·

1 Parent(s): e36a1cb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,11 +15,14 @@ import subprocess
 from transformers import T5EncoderModel
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 HF_TOKEN = os.getenv("HF_TOKEN")
 # Ensure that the minimal version of diffusers is installed
 check_min_version("0.30.2")
 quant_config = TransformersBitsAndBytesConfig(
@@ -89,12 +92,21 @@ pipe.text_encoder_2 = text_encoder_2_4bit
 # pipe.fuse_lora(lora_scale=0.8)
 pipe.transformer.to(torch.bfloat16)
 pipe.controlnet.to(torch.bfloat16)
 # pipe.push_to_hub("FLUX.1-Inpainting-8step_uncensored", private=True, token=HF_TOKEN)
 # pipe.unload_lora_weights()
 pipe.to("cuda")
 # pipe.enable_vae_tiling()
 # pipe.enable_model_cpu_offload()

 from transformers import T5EncoderModel
 from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
 from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
+from diffusers.hooks import apply_group_offloading
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 HF_TOKEN = os.getenv("HF_TOKEN")
+onload_device = torch.device("cuda")
+offload_device = torch.device("cpu")
 # Ensure that the minimal version of diffusers is installed
 check_min_version("0.30.2")
 quant_config = TransformersBitsAndBytesConfig(
 # pipe.fuse_lora(lora_scale=0.8)
 pipe.transformer.to(torch.bfloat16)
+# We can utilize the enable_group_offload method for Diffusers model implementations
+pipe.transformer.enable_group_offload(onload_device=onload_device, offload_device=offload_device, offload_type="leaf_level", use_stream=True)
+# For any other model implementations, the apply_group_offloading function can be used
+apply_group_offloading(pipe.text_encoder, onload_device=onload_device, offload_type="block_level", num_blocks_per_group=2)
+apply_group_offloading(pipe.vae, onload_device=onload_device, offload_type="leaf_level")
 pipe.controlnet.to(torch.bfloat16)
 # pipe.push_to_hub("FLUX.1-Inpainting-8step_uncensored", private=True, token=HF_TOKEN)
 # pipe.unload_lora_weights()
 pipe.to("cuda")
 # pipe.enable_vae_tiling()
 # pipe.enable_model_cpu_offload()