openfree's picture
Update app.py
18d8c50 verified
import spaces
import torch
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
from diffusers.utils.export_utils import export_to_video
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
import random
import gc
from torchao.quantization import quantize_
from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
from torchao.quantization import Int8WeightOnlyConfig
import aoti
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
MAX_DIM = 832
MIN_DIM = 480
SQUARE_DIM = 640
MULTIPLE_OF = 16
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 80
MIN_DURATION = round(MIN_FRAMES_MODEL/FIXED_FPS,1)
MAX_DURATION = round(MAX_FRAMES_MODEL/FIXED_FPS,1)
pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID,
transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
subfolder='transformer',
torch_dtype=torch.bfloat16,
device_map='cuda',
),
transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
subfolder='transformer_2',
torch_dtype=torch.bfloat16,
device_map='cuda',
),
torch_dtype=torch.bfloat16,
).to('cuda')
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v"
)
kwargs_lora = {}
kwargs_lora["load_into_transformer_2"] = True
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v_2", **kwargs_lora
)
pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
pipe.unload_lora_weights()
quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
default_prompt_i2v = "이 이미지에 생동감을 λΆ€μ—¬ν•˜κ³ , μ˜ν™” 같은 μ›€μ§μž„κ³Ό λΆ€λ“œλŸ¬μš΄ μ• λ‹ˆλ©”μ΄μ…˜μ„ 적용"
default_negative_prompt = "색쑰 μ„ λͺ…, κ³Όλ‹€ λ…ΈμΆœ, 정적, μ„ΈλΆ€ 흐림, μžλ§‰, μŠ€νƒ€μΌ, μž‘ν’ˆ, κ·Έλ¦Ό, ν™”λ©΄, μ •μ§€, νšŒμƒ‰μ‘°, μ΅œμ•… ν’ˆμ§ˆ, μ €ν’ˆμ§ˆ, JPEG μ••μΆ•, 좔함, λΆˆμ™„μ „, μΆ”κ°€ 손가락, 잘λͺ» κ·Έλ €μ§„ 손, 잘λͺ» κ·Έλ €μ§„ μ–Όκ΅΄, κΈ°ν˜•, λ³€ν˜•, ν˜•νƒœ λΆˆλŸ‰ 사지, 손가락 μœ΅ν•©, μ •μ§€ ν™”λ©΄, μ§€μ €λΆ„ν•œ λ°°κ²½, μ„Έ 개의 닀리, λ°°κ²½ μ‚¬λžŒ 많음, λ’€λ‘œ κ±·κΈ°"
def resize_image(image: Image.Image) -> Image.Image:
width, height = image.size
if width == height:
return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
aspect_ratio = width / height
MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
image_to_resize = image
if aspect_ratio > MAX_ASPECT_RATIO:
target_w, target_h = MAX_DIM, MIN_DIM
crop_width = int(round(height * MAX_ASPECT_RATIO))
left = (width - crop_width) // 2
image_to_resize = image.crop((left, 0, left + crop_width, height))
elif aspect_ratio < MIN_ASPECT_RATIO:
target_w, target_h = MIN_DIM, MAX_DIM
crop_height = int(round(width / MIN_ASPECT_RATIO))
top = (height - crop_height) // 2
image_to_resize = image.crop((0, top, width, top + crop_height))
else:
if width > height:
target_w = MAX_DIM
target_h = int(round(target_w / aspect_ratio))
else:
target_h = MAX_DIM
target_w = int(round(target_h * aspect_ratio))
final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
final_w = max(MIN_DIM, min(MAX_DIM, final_w))
final_h = max(MIN_DIM, min(MAX_DIM, final_h))
return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
def get_num_frames(duration_seconds: float):
return 1 + int(np.clip(
int(round(duration_seconds * FIXED_FPS)),
MIN_FRAMES_MODEL,
MAX_FRAMES_MODEL,
))
def get_duration(
input_image,
prompt,
steps,
negative_prompt,
duration_seconds,
guidance_scale,
guidance_scale_2,
seed,
randomize_seed,
progress,
):
BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
BASE_STEP_DURATION = 15
width, height = resize_image(input_image).size
frames = get_num_frames(duration_seconds)
factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
step_duration = BASE_STEP_DURATION * factor ** 1.5
return 10 + int(steps) * step_duration
@spaces.GPU(duration=get_duration)
def generate_video(
input_image,
prompt,
steps = 4,
negative_prompt=default_negative_prompt,
duration_seconds = MAX_DURATION,
guidance_scale = 1,
guidance_scale_2 = 1,
seed = 42,
randomize_seed = False,
progress=gr.Progress(track_tqdm=True),
):
if input_image is None:
raise gr.Error("이미지λ₯Ό μ—…λ‘œλ“œν•΄μ£Όμ„Έμš”.")
num_frames = get_num_frames(duration_seconds)
current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
resized_image = resize_image(input_image)
output_frames_list = pipe(
image=resized_image,
prompt=prompt,
negative_prompt=negative_prompt,
height=resized_image.height,
width=resized_image.width,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
guidance_scale_2=float(guidance_scale_2),
num_inference_steps=int(steps),
generator=torch.Generator(device="cuda").manual_seed(current_seed),
).frames[0]
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
video_path = tmpfile.name
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
return video_path, current_seed
# μ„Έλ ¨λœ ν•œκΈ€ UI
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎬 WAN 기반 μ΄ˆκ³ μ† 이미지 to λΉ„λ””μ˜€ 무료 생성 μ˜€ν”ˆμ†ŒμŠ€")
gr.Markdown("** WAN 2.2 14B + FAST + ν•œκΈ€ν™” + νŠœλ‹ ** - 4~8λ‹¨κ³„λ‘œ λΉ λ₯Έ μ˜μƒ 생성")
gr.Markdown("** νŠΈλž˜ν”½ μ œν•œμ‹œ λ‹€μŒ 4개의 미러링 μ„œλ²„λ“€μ„ μ΄μš©ν•˜μ—¬ λΆ„μ‚° μ‚¬μš© ꢌ고")
gr.HTML("""
<div style="display: flex; gap: 10px; flex-wrap: wrap; justify-content: center; margin: 20px 0;">
<a href="https://huggingface.co/spaces/Heartsync/wan2_2-I2V-14B-FAST" target="_blank">
<img src="https://img.shields.io/static/v1?label=WAN%202.2%2014B%20FAST%2B&message=Image%20to%20Video&color=%230000ff&labelColor=%23800080&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
</a>
<a href="https://huggingface.co/spaces/ginipick/wan2_2-I2V-14B-FAST" target="_blank">
<img src="https://img.shields.io/static/v1?label=WAN%202.2%2014B%20FAST%2B&message=Image%20to%20Video&color=%230000ff&labelColor=%23800080&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
</a>
<a href="https://huggingface.co/spaces/ginigen/wan2_2-I2V-14B-FAST" target="_blank">
<img src="https://img.shields.io/static/v1?label=WAN%202.2%2014B%20FAST%2B&message=Image%20to%20Video&color=%230000ff&labelColor=%23800080&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
</a>
<a href="https://huggingface.co/spaces/VIDraft/wan2_2-I2V-14B-FAST" target="_blank">
<img src="https://img.shields.io/static/v1?label=WAN%202.2%2014B%20FAST%2B&message=Image%20to%20Video&color=%230000ff&labelColor=%23800080&logo=huggingface&logoColor=white&style=for-the-badge" alt="badge">
</a>
<a href="https://discord.gg/openfreeai" target="_blank">
<img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge"></a>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
input_image_component = gr.Image(type="pil", label="μž…λ ₯ 이미지")
prompt_input = gr.Textbox(label="ν”„λ‘¬ν”„νŠΈ", value=default_prompt_i2v, lines=2)
duration_seconds_input = gr.Slider(
minimum=MIN_DURATION,
maximum=MAX_DURATION,
step=0.1,
value=3.5,
label="μ˜μƒ 길이 (초)"
)
with gr.Accordion("κ³ κΈ‰ μ„€μ •", open=False):
negative_prompt_input = gr.Textbox(label="λ„€κ±°ν‹°λΈŒ ν”„λ‘¬ν”„νŠΈ", value=default_negative_prompt, lines=2)
steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="생성 단계")
guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="κ°€μ΄λ˜μŠ€ μŠ€μΌ€μΌ 1")
guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="κ°€μ΄λ˜μŠ€ μŠ€μΌ€μΌ 2")
seed_input = gr.Slider(label="μ‹œλ“œ", minimum=0, maximum=MAX_SEED, step=1, value=42)
randomize_seed_checkbox = gr.Checkbox(label="랜덀 μ‹œλ“œ μ‚¬μš©", value=True)
generate_button = gr.Button("πŸŽ₯ μ˜μƒ 생성", variant="primary", size="lg")
with gr.Column(scale=1):
video_output = gr.Video(label="μƒμ„±λœ μ˜μƒ", autoplay=True, interactive=False)
ui_inputs = [
input_image_component, prompt_input, steps_slider,
negative_prompt_input, duration_seconds_input,
guidance_scale_input, guidance_scale_2_input, seed_input, randomize_seed_checkbox
]
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
gr.Examples(
examples=[
[
"wan_i2v_input.JPG",
"POV μ…€μΉ΄ μ˜μƒ, μ„ κΈ€λΌμŠ€ λ‚€ 흰 고양이가 μ„œν•‘λ³΄λ“œμ— μ„œμ„œ νŽΈμ•ˆν•œ λ―Έμ†Œ. 배경에 μ—΄λŒ€ ν•΄λ³€(맑은 λ¬Ό, 녹색 언덕, ꡬ름 λ‚€ ν‘Έλ₯Έ ν•˜λŠ˜). μ„œν•‘λ³΄λ“œκ°€ κΈ°μšΈμ–΄μ§€κ³  고양이가 λ°”λ‹€λ‘œ λ–¨μ–΄μ§€λ©° 카메라가 κ±°ν’ˆκ³Ό ν–‡λΉ›κ³Ό ν•¨κ»˜ λ¬Όμ†μœΌλ‘œ 빠짐. 잠깐 λ¬Όμ†μ—μ„œ 고양이 μ–Όκ΅΄ 보이닀가 λ‹€μ‹œ 수면 μœ„λ‘œ μ˜¬λΌμ™€ μ…€μΉ΄ 촬영 계속, 즐거운 여름 νœ΄κ°€ λΆ„μœ„κΈ°.",
4,
],
[
"wan22_input_2.jpg",
"μ„Έλ ¨λœ 달 탐사 μ°¨λŸ‰μ΄ μ™Όμͺ½μ—μ„œ 였λ₯Έμͺ½μœΌλ‘œ λ―Έλ„λŸ¬μ§€λ“― μ΄λ™ν•˜λ©° 달 λ¨Όμ§€λ₯Ό μΌμœΌν‚΄. 흰 μš°μ£Όλ³΅μ„ μž…μ€ μš°μ£ΌμΈλ“€μ΄ 달 특유의 λ›°λŠ” λ™μž‘μœΌλ‘œ νƒ‘μŠΉ. λ¨Ό λ°°κ²½μ—μ„œ VTOL 비행체가 수직으둜 ν•˜κ°•ν•˜μ—¬ ν‘œλ©΄μ— 쑰용히 μ°©λ₯™. μž₯λ©΄ 전체에 걸쳐 μ΄ˆν˜„μ‹€μ μΈ μ˜€λ‘œλΌκ°€ 별이 κ°€λ“ν•œ ν•˜λŠ˜μ„ κ°€λ‘œμ§€λ₯΄λ©° μΆ€μΆ”κ³ , 녹색, νŒŒλž€μƒ‰, 보라색 λΉ›μ˜ 컀튼이 달 풍경을 μ‹ λΉ„λ‘­κ³  λ§ˆλ²• 같은 λΉ›μœΌλ‘œ 감쌈.",
4,
],
[
"kill_bill.jpeg",
"우마 μ„œλ¨Όμ˜ 캐릭터 λ² μ•„νŠΈλ¦­μŠ€ 킀도가 μ˜ν™” 같은 μ‘°λͺ… μ†μ—μ„œ λ‚ μΉ΄λ‘œμš΄ μΉ΄νƒ€λ‚˜ 검을 μ•ˆμ •μ μœΌλ‘œ λ“€κ³  있음. κ°‘μžκΈ° 광택 λ‚˜λŠ” 강철이 λΆ€λ“œλŸ¬μ›Œμ§€κ³  μ™œκ³‘λ˜κΈ° μ‹œμž‘ν•˜λ©° κ°€μ—΄λœ κΈˆμ†μ²˜λŸΌ ꡬ쑰적 완전성을 μžƒκΈ° μ‹œμž‘. κ²€λ‚ μ˜ μ™„λ²½ν•œ 끝이 천천히 νœ˜μ–΄μ§€κ³  λŠ˜μ–΄μ§€λ©°, 녹은 강철이 은빛 λ¬Όμ€„κΈ°λ‘œ μ•„λž˜λ‘œ ν˜λŸ¬λ‚΄λ¦Ό. λ³€ν˜•μ€ μ²˜μŒμ—λŠ” λ―Έλ¬˜ν•˜κ²Œ μ‹œμž‘λ˜λ‹€κ°€ κΈˆμ†μ΄ 점점 더 μœ λ™μ μ΄ λ˜λ©΄μ„œ 가속화. μΉ΄λ©”λΌλŠ” κ·Έλ…€μ˜ 얼꡴을 κ³ μ •ν•˜κ³  λ‚ μΉ΄λ‘œμš΄ λˆˆλΉ›μ΄ 점차 μ’μ•„μ§€λŠ”λ°, 치λͺ…적인 집쀑이 μ•„λ‹ˆλΌ 무기가 λˆˆμ•žμ—μ„œ λ…ΉλŠ” 것을 보며 ν˜Όλž€κ³Ό κ²½μ•…. 호흑이 μ•½κ°„ 빨라지며 이 λΆˆκ°€λŠ₯ν•œ λ³€ν˜•μ„ λͺ©κ²©. λ…ΉλŠ” ν˜„μƒμ΄ κ°•ν™”λ˜κ³  μΉ΄νƒ€λ‚˜μ˜ μ™„λ²½ν•œ ν˜•νƒœκ°€ 점점 좔상적이 되며 μ†μ—μ„œ μˆ˜μ€μ²˜λŸΌ 떨어짐. 녹은 방울이 λΆ€λ“œλŸ¬μš΄ κΈˆμ† 좩격음과 ν•¨κ»˜ λ°”λ‹₯에 떨어짐. ν‘œμ •μ΄ μ°¨λΆ„ν•œ μ€€λΉ„μ—μ„œ λ‹Ήν˜Ήκ°κ³Ό 우렀둜 λ°”λ€Œλ©° 전섀적인 볡수의 도ꡬ가 μ†μ—μ„œ 문자 κ·ΈλŒ€λ‘œ μ•‘ν™”λ˜μ–΄ 무방비 μƒνƒœκ°€ 됨.",
6,
],
],
inputs=[input_image_component, prompt_input, steps_slider],
outputs=[video_output, seed_input],
fn=generate_video,
cache_examples="lazy"
)
if __name__ == "__main__":
demo.queue().launch(mcp_server=True)