import torch from diffsynth import ModelManager, FluxImagePipeline model_manager = ModelManager( file_path_list=[ "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", "models/FLUX/FLUX.1-dev/text_encoder_2", "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", "models/FLUX/FLUX.1-dev/ae.safetensors", ], torch_dtype=torch.float8_e4m3fn, device="cpu" ) pipe = FluxImagePipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda") # Enable VRAM management # `num_persistent_param_in_dit` indicates the number of parameters that reside persistently in VRAM within the DiT model. # When `num_persistent_param_in_dit=None`, it means all parameters reside persistently in memory. # When `num_persistent_param_in_dit=7*10**9`, it indicates that 7 billion parameters reside persistently in memory. # When `num_persistent_param_in_dit=0`, it means no parameters reside persistently in memory, and they are loaded layer by layer during inference. pipe.enable_vram_management(num_persistent_param_in_dit=None) image = pipe(prompt="a beautiful orange cat", seed=0) image.save("image.jpg")