Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -630,8 +630,41 @@ def generate_image(
|
|
| 630 |
if seed < 0:
|
| 631 |
seed = random.randint(0, MAX_SEED)
|
| 632 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
# Configure scheduler
|
| 634 |
-
print("Step
|
| 635 |
scheduler_class_name = scheduler.split("-")[0]
|
| 636 |
add_kwargs = {}
|
| 637 |
if len(scheduler.split("-")) > 1:
|
|
@@ -642,7 +675,7 @@ def generate_image(
|
|
| 642 |
pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config, **add_kwargs)
|
| 643 |
print(f" ✓ Scheduler: {scheduler_class_name}")
|
| 644 |
|
| 645 |
-
# Apply style
|
| 646 |
if not prompt:
|
| 647 |
prompt = "a person"
|
| 648 |
|
|
@@ -654,32 +687,22 @@ def generate_image(
|
|
| 654 |
print(" ⚠ Warning: Prompt contains physical feature descriptions. These may override face identity.")
|
| 655 |
print(" 💡 Tip: Focus on style/setting only (e.g., 'professional headshot, studio lighting') for better identity preservation.")
|
| 656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
| 658 |
print(f" ✓ Style applied: {style_name}")
|
| 659 |
-
|
| 660 |
-
# Load and process face image
|
| 661 |
-
print("Step 2/8: Loading and processing face image...")
|
| 662 |
-
face_image = load_image(face_image_path)
|
| 663 |
-
face_image = resize_img(face_image, max_side=1024)
|
| 664 |
-
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
| 665 |
-
height, width, _ = face_image_cv2.shape
|
| 666 |
-
print(f" ✓ Image loaded: {width}x{height}")
|
| 667 |
-
|
| 668 |
-
# Detect face
|
| 669 |
-
print("Step 3/8: Detecting face...")
|
| 670 |
-
face_info = app.get(face_image_cv2)
|
| 671 |
-
if len(face_info) == 0:
|
| 672 |
-
raise gr.Error(
|
| 673 |
-
"Unable to detect a face in the image. Please upload a different photo with a clear face."
|
| 674 |
-
)
|
| 675 |
-
print(f" ✓ Face detected")
|
| 676 |
-
|
| 677 |
-
# Use largest detected face
|
| 678 |
-
print("Step 4/8: Processing face features...")
|
| 679 |
-
face_info = sorted(
|
| 680 |
-
face_info,
|
| 681 |
-
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
|
| 682 |
-
)[-1]
|
| 683 |
|
| 684 |
# Extract face embedding and ensure it's a proper tensor
|
| 685 |
face_emb_raw = face_info["embedding"]
|
|
@@ -694,11 +717,15 @@ def generate_image(
|
|
| 694 |
# Fallback: try to convert
|
| 695 |
face_emb = torch.tensor(face_emb_raw, device=device, dtype=dtype)
|
| 696 |
|
| 697 |
-
#
|
| 698 |
if len(face_emb.shape) == 1:
|
| 699 |
face_emb = face_emb.unsqueeze(0) # Add batch dimension: [1, 512]
|
| 700 |
|
|
|
|
|
|
|
|
|
|
| 701 |
print(f" Face embedding final shape: {face_emb.shape}, dtype: {face_emb.dtype}, device: {face_emb.device}")
|
|
|
|
| 702 |
|
| 703 |
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
|
| 704 |
print(f" Face keypoints image size: {face_kps.size}")
|
|
@@ -872,6 +899,7 @@ def generate_image(
|
|
| 872 |
print(" Starting pipeline inference NOW...")
|
| 873 |
sys.stdout.flush()
|
| 874 |
|
|
|
|
| 875 |
images = pipe(
|
| 876 |
prompt=prompt,
|
| 877 |
negative_prompt=negative_prompt,
|
|
@@ -886,6 +914,7 @@ def generate_image(
|
|
| 886 |
generator=generator,
|
| 887 |
callback=progress_callback,
|
| 888 |
callback_steps=1, # Update every step
|
|
|
|
| 889 |
).images
|
| 890 |
|
| 891 |
print(f" ✓ Pipeline completed, generated {len(images)} image(s)")
|
|
@@ -1257,7 +1286,7 @@ with gr.Blocks() as demo:
|
|
| 1257 |
|
| 1258 |
# Hidden advanced settings
|
| 1259 |
negative_prompt = gr.Textbox(
|
| 1260 |
-
value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
| 1261 |
visible=False,
|
| 1262 |
)
|
| 1263 |
num_steps = gr.Slider(
|
|
|
|
| 630 |
if seed < 0:
|
| 631 |
seed = random.randint(0, MAX_SEED)
|
| 632 |
|
| 633 |
+
# Load and process face image
|
| 634 |
+
print("Step 2/8: Loading and processing face image...")
|
| 635 |
+
face_image = load_image(face_image_path)
|
| 636 |
+
face_image = resize_img(face_image, max_side=1024)
|
| 637 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
| 638 |
+
height, width, _ = face_image_cv2.shape
|
| 639 |
+
print(f" ✓ Image loaded: {width}x{height}")
|
| 640 |
+
|
| 641 |
+
# Detect face
|
| 642 |
+
print("Step 3/8: Detecting face...")
|
| 643 |
+
face_info_list = app.get(face_image_cv2)
|
| 644 |
+
if len(face_info_list) == 0:
|
| 645 |
+
raise gr.Error(
|
| 646 |
+
"Unable to detect a face in the image. Please upload a different photo with a clear face."
|
| 647 |
+
)
|
| 648 |
+
print(f" ✓ Face detected")
|
| 649 |
+
|
| 650 |
+
# Use largest detected face
|
| 651 |
+
print("Step 4/8: Processing face features...")
|
| 652 |
+
face_info = sorted(
|
| 653 |
+
face_info_list,
|
| 654 |
+
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
|
| 655 |
+
)[-1]
|
| 656 |
+
|
| 657 |
+
# Detect gender from face info if available (for identity preservation)
|
| 658 |
+
detected_gender = None
|
| 659 |
+
if "gender" in face_info:
|
| 660 |
+
detected_gender = face_info["gender"]
|
| 661 |
+
print(f" Detected gender: {'Female' if detected_gender == 0 else 'Male' if detected_gender == 1 else 'Unknown'}")
|
| 662 |
+
elif hasattr(face_info, "get") and face_info.get("gender") is not None:
|
| 663 |
+
detected_gender = face_info.get("gender")
|
| 664 |
+
print(f" Detected gender: {'Female' if detected_gender == 0 else 'Male' if detected_gender == 1 else 'Unknown'}")
|
| 665 |
+
|
| 666 |
# Configure scheduler
|
| 667 |
+
print("Step 5/8: Configuring scheduler...")
|
| 668 |
scheduler_class_name = scheduler.split("-")[0]
|
| 669 |
add_kwargs = {}
|
| 670 |
if len(scheduler.split("-")) > 1:
|
|
|
|
| 675 |
pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config, **add_kwargs)
|
| 676 |
print(f" ✓ Scheduler: {scheduler_class_name}")
|
| 677 |
|
| 678 |
+
# Apply style and process prompts
|
| 679 |
if not prompt:
|
| 680 |
prompt = "a person"
|
| 681 |
|
|
|
|
| 687 |
print(" ⚠ Warning: Prompt contains physical feature descriptions. These may override face identity.")
|
| 688 |
print(" 💡 Tip: Focus on style/setting only (e.g., 'professional headshot, studio lighting') for better identity preservation.")
|
| 689 |
|
| 690 |
+
# Add gender preservation to negative prompt if gender was detected
|
| 691 |
+
gender_negative_terms = "wrong gender, gender swap, different person, different face, face swap, identity change, different identity"
|
| 692 |
+
if detected_gender is not None:
|
| 693 |
+
# Add opposite gender terms to negative prompt
|
| 694 |
+
if detected_gender == 0: # Female
|
| 695 |
+
gender_negative_terms += ", man, male, masculine"
|
| 696 |
+
elif detected_gender == 1: # Male
|
| 697 |
+
gender_negative_terms += ", woman, female, feminine"
|
| 698 |
+
print(f" ✓ Gender preservation enabled in negative prompt")
|
| 699 |
+
|
| 700 |
+
# Add gender preservation terms to negative prompt
|
| 701 |
+
if gender_negative_terms not in negative_prompt:
|
| 702 |
+
negative_prompt = f"{negative_prompt}, {gender_negative_terms}" if negative_prompt else gender_negative_terms
|
| 703 |
+
|
| 704 |
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
| 705 |
print(f" ✓ Style applied: {style_name}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
|
| 707 |
# Extract face embedding and ensure it's a proper tensor
|
| 708 |
face_emb_raw = face_info["embedding"]
|
|
|
|
| 717 |
# Fallback: try to convert
|
| 718 |
face_emb = torch.tensor(face_emb_raw, device=device, dtype=dtype)
|
| 719 |
|
| 720 |
+
# Normalize face embedding (L2 normalization for better identity preservation)
|
| 721 |
if len(face_emb.shape) == 1:
|
| 722 |
face_emb = face_emb.unsqueeze(0) # Add batch dimension: [1, 512]
|
| 723 |
|
| 724 |
+
# L2 normalize the embedding to unit length (standard for face embeddings)
|
| 725 |
+
face_emb = torch.nn.functional.normalize(face_emb, p=2, dim=1)
|
| 726 |
+
|
| 727 |
print(f" Face embedding final shape: {face_emb.shape}, dtype: {face_emb.dtype}, device: {face_emb.device}")
|
| 728 |
+
print(f" Face embedding norm: {torch.norm(face_emb, p=2, dim=1).item():.4f} (should be ~1.0)")
|
| 729 |
|
| 730 |
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
|
| 731 |
print(f" Face keypoints image size: {face_kps.size}")
|
|
|
|
| 899 |
print(" Starting pipeline inference NOW...")
|
| 900 |
sys.stdout.flush()
|
| 901 |
|
| 902 |
+
# Pass IP-Adapter scale explicitly to ensure it's used
|
| 903 |
images = pipe(
|
| 904 |
prompt=prompt,
|
| 905 |
negative_prompt=negative_prompt,
|
|
|
|
| 914 |
generator=generator,
|
| 915 |
callback=progress_callback,
|
| 916 |
callback_steps=1, # Update every step
|
| 917 |
+
ip_adapter_scale=adapter_strength_ratio, # Explicitly pass IP-Adapter scale
|
| 918 |
).images
|
| 919 |
|
| 920 |
print(f" ✓ Pipeline completed, generated {len(images)} image(s)")
|
|
|
|
| 1286 |
|
| 1287 |
# Hidden advanced settings
|
| 1288 |
negative_prompt = gr.Textbox(
|
| 1289 |
+
value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green, wrong gender, gender swap, different person, different face, face swap, identity change, different identity",
|
| 1290 |
visible=False,
|
| 1291 |
)
|
| 1292 |
num_steps = gr.Slider(
|