LPX55 commited on
Commit
fca4fe6
Β·
verified Β·
1 Parent(s): e36a1cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -0
app.py CHANGED
@@ -15,11 +15,14 @@ import subprocess
15
  from transformers import T5EncoderModel
16
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
17
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
 
18
 
19
  subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
20
 
21
 
22
  HF_TOKEN = os.getenv("HF_TOKEN")
 
 
23
  # Ensure that the minimal version of diffusers is installed
24
  check_min_version("0.30.2")
25
  quant_config = TransformersBitsAndBytesConfig(
@@ -89,12 +92,21 @@ pipe.text_encoder_2 = text_encoder_2_4bit
89
  # pipe.fuse_lora(lora_scale=0.8)
90
 
91
  pipe.transformer.to(torch.bfloat16)
 
 
 
 
 
92
  pipe.controlnet.to(torch.bfloat16)
93
  # pipe.push_to_hub("FLUX.1-Inpainting-8step_uncensored", private=True, token=HF_TOKEN)
94
 
95
  # pipe.unload_lora_weights()
96
 
97
  pipe.to("cuda")
 
 
 
 
98
  # pipe.enable_vae_tiling()
99
 
100
  # pipe.enable_model_cpu_offload()
 
15
  from transformers import T5EncoderModel
16
  from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
17
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
18
+ from diffusers.hooks import apply_group_offloading
19
 
20
  subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
21
 
22
 
23
  HF_TOKEN = os.getenv("HF_TOKEN")
24
+ onload_device = torch.device("cuda")
25
+ offload_device = torch.device("cpu")
26
  # Ensure that the minimal version of diffusers is installed
27
  check_min_version("0.30.2")
28
  quant_config = TransformersBitsAndBytesConfig(
 
92
  # pipe.fuse_lora(lora_scale=0.8)
93
 
94
  pipe.transformer.to(torch.bfloat16)
95
+ # We can utilize the enable_group_offload method for Diffusers model implementations
96
+ pipe.transformer.enable_group_offload(onload_device=onload_device, offload_device=offload_device, offload_type="leaf_level", use_stream=True)
97
+ # For any other model implementations, the apply_group_offloading function can be used
98
+ apply_group_offloading(pipe.text_encoder, onload_device=onload_device, offload_type="block_level", num_blocks_per_group=2)
99
+ apply_group_offloading(pipe.vae, onload_device=onload_device, offload_type="leaf_level")
100
  pipe.controlnet.to(torch.bfloat16)
101
  # pipe.push_to_hub("FLUX.1-Inpainting-8step_uncensored", private=True, token=HF_TOKEN)
102
 
103
  # pipe.unload_lora_weights()
104
 
105
  pipe.to("cuda")
106
+
107
+
108
+
109
+
110
  # pipe.enable_vae_tiling()
111
 
112
  # pipe.enable_model_cpu_offload()