Beepeen24 commited on
Commit
dc51fb0
·
verified ·
1 Parent(s): f325bb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -28
app.py CHANGED
@@ -630,8 +630,41 @@ def generate_image(
630
  if seed < 0:
631
  seed = random.randint(0, MAX_SEED)
632
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
633
  # Configure scheduler
634
- print("Step 4/8: Configuring scheduler...")
635
  scheduler_class_name = scheduler.split("-")[0]
636
  add_kwargs = {}
637
  if len(scheduler.split("-")) > 1:
@@ -642,7 +675,7 @@ def generate_image(
642
  pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config, **add_kwargs)
643
  print(f" ✓ Scheduler: {scheduler_class_name}")
644
 
645
- # Apply style
646
  if not prompt:
647
  prompt = "a person"
648
 
@@ -654,32 +687,22 @@ def generate_image(
654
  print(" ⚠ Warning: Prompt contains physical feature descriptions. These may override face identity.")
655
  print(" 💡 Tip: Focus on style/setting only (e.g., 'professional headshot, studio lighting') for better identity preservation.")
656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
658
  print(f" ✓ Style applied: {style_name}")
659
-
660
- # Load and process face image
661
- print("Step 2/8: Loading and processing face image...")
662
- face_image = load_image(face_image_path)
663
- face_image = resize_img(face_image, max_side=1024)
664
- face_image_cv2 = convert_from_image_to_cv2(face_image)
665
- height, width, _ = face_image_cv2.shape
666
- print(f" ✓ Image loaded: {width}x{height}")
667
-
668
- # Detect face
669
- print("Step 3/8: Detecting face...")
670
- face_info = app.get(face_image_cv2)
671
- if len(face_info) == 0:
672
- raise gr.Error(
673
- "Unable to detect a face in the image. Please upload a different photo with a clear face."
674
- )
675
- print(f" ✓ Face detected")
676
-
677
- # Use largest detected face
678
- print("Step 4/8: Processing face features...")
679
- face_info = sorted(
680
- face_info,
681
- key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
682
- )[-1]
683
 
684
  # Extract face embedding and ensure it's a proper tensor
685
  face_emb_raw = face_info["embedding"]
@@ -694,11 +717,15 @@ def generate_image(
694
  # Fallback: try to convert
695
  face_emb = torch.tensor(face_emb_raw, device=device, dtype=dtype)
696
 
697
- # Ensure it has the right shape (should be [512] for InstantID)
698
  if len(face_emb.shape) == 1:
699
  face_emb = face_emb.unsqueeze(0) # Add batch dimension: [1, 512]
700
 
 
 
 
701
  print(f" Face embedding final shape: {face_emb.shape}, dtype: {face_emb.dtype}, device: {face_emb.device}")
 
702
 
703
  face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
704
  print(f" Face keypoints image size: {face_kps.size}")
@@ -872,6 +899,7 @@ def generate_image(
872
  print(" Starting pipeline inference NOW...")
873
  sys.stdout.flush()
874
 
 
875
  images = pipe(
876
  prompt=prompt,
877
  negative_prompt=negative_prompt,
@@ -886,6 +914,7 @@ def generate_image(
886
  generator=generator,
887
  callback=progress_callback,
888
  callback_steps=1, # Update every step
 
889
  ).images
890
 
891
  print(f" ✓ Pipeline completed, generated {len(images)} image(s)")
@@ -1257,7 +1286,7 @@ with gr.Blocks() as demo:
1257
 
1258
  # Hidden advanced settings
1259
  negative_prompt = gr.Textbox(
1260
- value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
1261
  visible=False,
1262
  )
1263
  num_steps = gr.Slider(
 
630
  if seed < 0:
631
  seed = random.randint(0, MAX_SEED)
632
 
633
+ # Load and process face image
634
+ print("Step 2/8: Loading and processing face image...")
635
+ face_image = load_image(face_image_path)
636
+ face_image = resize_img(face_image, max_side=1024)
637
+ face_image_cv2 = convert_from_image_to_cv2(face_image)
638
+ height, width, _ = face_image_cv2.shape
639
+ print(f" ✓ Image loaded: {width}x{height}")
640
+
641
+ # Detect face
642
+ print("Step 3/8: Detecting face...")
643
+ face_info_list = app.get(face_image_cv2)
644
+ if len(face_info_list) == 0:
645
+ raise gr.Error(
646
+ "Unable to detect a face in the image. Please upload a different photo with a clear face."
647
+ )
648
+ print(f" ✓ Face detected")
649
+
650
+ # Use largest detected face
651
+ print("Step 4/8: Processing face features...")
652
+ face_info = sorted(
653
+ face_info_list,
654
+ key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
655
+ )[-1]
656
+
657
+ # Detect gender from face info if available (for identity preservation)
658
+ detected_gender = None
659
+ if "gender" in face_info:
660
+ detected_gender = face_info["gender"]
661
+ print(f" Detected gender: {'Female' if detected_gender == 0 else 'Male' if detected_gender == 1 else 'Unknown'}")
662
+ elif hasattr(face_info, "get") and face_info.get("gender") is not None:
663
+ detected_gender = face_info.get("gender")
664
+ print(f" Detected gender: {'Female' if detected_gender == 0 else 'Male' if detected_gender == 1 else 'Unknown'}")
665
+
666
  # Configure scheduler
667
+ print("Step 5/8: Configuring scheduler...")
668
  scheduler_class_name = scheduler.split("-")[0]
669
  add_kwargs = {}
670
  if len(scheduler.split("-")) > 1:
 
675
  pipe.scheduler = scheduler_cls.from_config(pipe.scheduler.config, **add_kwargs)
676
  print(f" ✓ Scheduler: {scheduler_class_name}")
677
 
678
+ # Apply style and process prompts
679
  if not prompt:
680
  prompt = "a person"
681
 
 
687
  print(" ⚠ Warning: Prompt contains physical feature descriptions. These may override face identity.")
688
  print(" 💡 Tip: Focus on style/setting only (e.g., 'professional headshot, studio lighting') for better identity preservation.")
689
 
690
+ # Add gender preservation to negative prompt if gender was detected
691
+ gender_negative_terms = "wrong gender, gender swap, different person, different face, face swap, identity change, different identity"
692
+ if detected_gender is not None:
693
+ # Add opposite gender terms to negative prompt
694
+ if detected_gender == 0: # Female
695
+ gender_negative_terms += ", man, male, masculine"
696
+ elif detected_gender == 1: # Male
697
+ gender_negative_terms += ", woman, female, feminine"
698
+ print(f" ✓ Gender preservation enabled in negative prompt")
699
+
700
+ # Add gender preservation terms to negative prompt
701
+ if gender_negative_terms not in negative_prompt:
702
+ negative_prompt = f"{negative_prompt}, {gender_negative_terms}" if negative_prompt else gender_negative_terms
703
+
704
  prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
705
  print(f" ✓ Style applied: {style_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
 
707
  # Extract face embedding and ensure it's a proper tensor
708
  face_emb_raw = face_info["embedding"]
 
717
  # Fallback: try to convert
718
  face_emb = torch.tensor(face_emb_raw, device=device, dtype=dtype)
719
 
720
+ # Normalize face embedding (L2 normalization for better identity preservation)
721
  if len(face_emb.shape) == 1:
722
  face_emb = face_emb.unsqueeze(0) # Add batch dimension: [1, 512]
723
 
724
+ # L2 normalize the embedding to unit length (standard for face embeddings)
725
+ face_emb = torch.nn.functional.normalize(face_emb, p=2, dim=1)
726
+
727
  print(f" Face embedding final shape: {face_emb.shape}, dtype: {face_emb.dtype}, device: {face_emb.device}")
728
+ print(f" Face embedding norm: {torch.norm(face_emb, p=2, dim=1).item():.4f} (should be ~1.0)")
729
 
730
  face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
731
  print(f" Face keypoints image size: {face_kps.size}")
 
899
  print(" Starting pipeline inference NOW...")
900
  sys.stdout.flush()
901
 
902
+ # Pass IP-Adapter scale explicitly to ensure it's used
903
  images = pipe(
904
  prompt=prompt,
905
  negative_prompt=negative_prompt,
 
914
  generator=generator,
915
  callback=progress_callback,
916
  callback_steps=1, # Update every step
917
+ ip_adapter_scale=adapter_strength_ratio, # Explicitly pass IP-Adapter scale
918
  ).images
919
 
920
  print(f" ✓ Pipeline completed, generated {len(images)} image(s)")
 
1286
 
1287
  # Hidden advanced settings
1288
  negative_prompt = gr.Textbox(
1289
+ value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green, wrong gender, gender swap, different person, different face, face swap, identity change, different identity",
1290
  visible=False,
1291
  )
1292
  num_steps = gr.Slider(