# -*- coding: utf-8 -*-
"""
Streamlit app: Prompt Generator from Image (NSFW-ready, self-hosted on Hugging Face Spaces)
- Backends: Gemini API (optional) + Local open-source (Qwen2-VL 2B/7B)
- Detail modes: soft / artistic / raw
- JSONL export with policy fields (adult-only, consent)
- Simple keyword tag extractor (can be swapped for WD14/DeepDanbooru later)

NOTE: To use the local backend you must select a Qwen2-VL model that fits your Space hardware.
Suggested default for T4/low VRAM: "Qwen/Qwen2-VL-2B-Instruct" (loads with 4-bit if bitsandbytes available).

Requirements (put these lines into requirements.txt):
----- requirements.txt -----
streamlit==1.37.1
Pillow
transformers>=4.43.0
accelerate>=0.33.0
sentencepiece
safetensors
huggingface_hub
bitsandbytes; platform_system != 'Darwin'
google-generativeai==0.7.2  # only if you keep Gemini option
---------------------------
"""

import os
import io
import json
from datetime import datetime

import streamlit as st
from PIL import Image

# ===== Gemini (optional) =====
USE_GEMINI = True
try:
    import google.generativeai as genai  # type: ignore
except Exception:
    USE_GEMINI = False

def get_gemini_api_key() -> str:
    # Return Gemini API key from SECRET_KEY or GOOGLE_API_KEY (if present)
    return os.getenv('SECRET_KEY') or os.getenv('GOOGLE_API_KEY') or ''

# ===== Transformers (open-source backend) =====
import torch
from transformers import AutoProcessor, AutoModelForVision2Seq

# ---------------- UI CONFIG ----------------
st.set_page_config(page_title="🖼️ Prompt Generator from Image (NSFW-ready)", layout="wide")
st.title("🖼️ Prompt Generator from Image")
st.markdown(
    "> Please try my other tool at : https://imgkey.lovable.app"
)

with st.sidebar:
    st.header("⚙️ Settings")

    # Gemini availability message
    gem_key = get_gemini_api_key() if USE_GEMINI else ''
    gem_ready = bool(gem_key)

    backend_opts = ["Local Qwen2-VL (Open-Source)"]
    if USE_GEMINI and gem_ready:
        backend_opts.append("Gemini API")
    elif USE_GEMINI and not gem_ready:
        backend_opts.append("Gemini API (key missing)")
    else:
        backend_opts.append("Gemini API (unavailable)")

    backend = st.selectbox("Backend", backend_opts, index=0)

    mode = st.selectbox("Detail level", ["soft", "artistic", "raw"], index=2)

    model_id = st.text_input(
        "HF Model (local backend)",
        value="Qwen/Qwen2-VL-2B-Instruct",
        help="Pick a Qwen2-VL Instruct model that fits your GPU (e.g., 2B/7B).",
    )

    max_tokens = st.slider("Max new tokens", 64, 512, 220, 8)
    temperature = st.slider("Temperature", 0.0, 1.2, 0.6, 0.05)

    # Gemini status badge
    if USE_GEMINI:
        if gem_ready:
            st.success("Gemini key detected (SECRET_KEY / GOOGLE_API_KEY)")
        else:
            st.warning("Gemini key not found. Add SECRET_KEY or GOOGLE_API_KEY in Space Secrets.")

    st.divider()
    st.subheader("🔐 Policy")
    st.caption("This app only describes consenting adults. It refuses illegal/underage/forced content.")

# ---------------- Helpers ----------------
@st.cache_resource(show_spinner=False)
def load_qwen(model_id: str):
    """Load Qwen2-VL model+processor with best effort for the available hardware."""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    load_kwargs = {"torch_dtype": torch.float16 if device == "cuda" else torch.float32,
                   "low_cpu_mem_usage": True,
                   "trust_remote_code": True}
    try:
        if device == "cuda":
            load_kwargs.update({"device_map": "auto", "load_in_4bit": True})
    except Exception:
        pass

    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForVision2Seq.from_pretrained(model_id, **load_kwargs)
    if device == "cpu":
        model.to(device)
    model.eval()
    return processor, model


def _system_prompt():
    return (
        "You are an expert adult-image describer. Describe only consenting adults. "
        "Never mention or imply minors, coercion, violence, incest, or illegal content."
    )


def _style_prompt(mode: str):
    if mode == "soft":
        return (
            "Write an elegant, suggestive description focusing on mood, wardrobe, and lighting. "
            "Avoid explicit anatomy terms."
        )
    if mode == "artistic":
        return (
            "Describe composition, pose, camera angle, lens, lighting, colors, materials, and setting."
        )
    # raw
    return (
        "Use precise, clinical adult anatomy/act terms when visible. Mention positions, garments, accessories, fluids, and interactions explicitly."
    )


def caption_with_qwen(image: Image.Image, mode: str, model_id: str, max_new_tokens=220, temperature=0.6):
    processor, model = load_qwen(model_id)

    messages = [
        {"role": "system", "content": _system_prompt()},
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {
                    "type": "text",
                    "text": (
                        f"Describe the image in the '{mode}' style. "
                        + _style_prompt(mode)
                        + " Return a single coherent paragraph."
                    ),
                },
            ],
        },
    ]

    # Build chat template -> tokens
    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = processor(text=[text], images=[image], return_tensors="pt")
    device = next(model.parameters()).device
    inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=int(max_new_tokens),
            do_sample=True,
            temperature=float(temperature),
            top_p=0.9,
            repetition_penalty=1.05,
        )
    decoded = processor.batch_decode(out, skip_special_tokens=True)[0]
    # Try to cut assistant prefix if present
    if "assistant" in decoded:
        decoded = decoded.split("assistant")[-1].strip().lstrip("\n: ")
    return decoded.strip()


# Minimal keyword tagger from caption text
KEYWORDS = {
    "acts": [
        "oral", "blowjob", "doggy", "missionary", "cowgirl", "anal", "handjob",
        "cumshot", "facial", "masturbation", "kissing", "threesome", "spanking",
    ],
    "nudity": ["topless", "nude", "full", "see-through", "lingerie", "panties", "stockings"],
    "style": ["photo", "studio", "candid", "mirror", "selfie", "soft lighting", "hard lighting", "bokeh"],
}


def extract_tags(text: str):
    tags = set()
    low = text.lower()
    for group in KEYWORDS.values():
        for w in group:
            if w in low:
                tags.add(w)
    return sorted(tags)


# Gemini helper
def get_gemini_response(image: Image.Image):
    if not USE_GEMINI:
        raise RuntimeError("Gemini library not available in this Space.")
    secret_key = get_gemini_api_key()
    if not secret_key:
        raise RuntimeError("Gemini API key not found. Set SECRET_KEY or GOOGLE_API_KEY in Space secrets.")
    genai.configure(api_key=secret_key)
    model = genai.GenerativeModel("gemini-2.0-flash")
    sys = (
        "You are a prompt generator for text-to-image models. "
        "When content is adult, describe it clinically without euphemism."
    )
    resp = model.generate_content([sys, image])
    return getattr(resp, "text", "")


# ---------------- LAYOUT ----------------
col1, col2 = st.columns(2)

with col1:
    st.markdown("### 📤 Upload Your Image")
    uploaded_file = st.file_uploader(
        "Drag and drop or click to upload an image...",
        type=["jpg", "jpeg", "png", "webp"],
        label_visibility="collapsed",
    )
    image = None
    if uploaded_file is not None:
        try:
            image = Image.open(uploaded_file).convert("RGB")
            st.image(image, caption="Uploaded Image", use_column_width=True)
        except Exception as e:
            st.error(f"Failed to open image: {e}")

with col2:
    st.markdown("### 🎯 Generated Prompt")
    if image is None:
        st.info("Please upload an image to generate a prompt.")
    else:
        if st.button("✨ Generate Prompt", use_container_width=True):
            with st.spinner("Generating prompt..."):
                try:
                    if backend.startswith("Local Qwen2-VL"):
                        prompt = caption_with_qwen(
                            image,
                            mode=mode,
                            model_id=model_id,
                            max_new_tokens=max_tokens,
                            temperature=temperature,
                        )
                    else:
                        prompt = get_gemini_response(image)

                    if not prompt:
                        st.warning("No text generated.")
                    else:
                        st.code(prompt, language="markdown")
                        # Build JSON record
                        record = {
                            "timestamp": datetime.utcnow().isoformat() + "Z",
                            "image": uploaded_file.name,
                            "mode": mode if backend.startswith("Local") else "gemini_default",
                            "prompt": prompt,
                            "tags": extract_tags(prompt),
                            "policy": {"age": "adult_only", "consent": True},
                            "backend": "qwen2-vl" if backend.startswith("Local") else "gemini",
                            "model": model_id if backend.startswith("Local") else "gemini-2.0-flash",
                        }
                        st.json(record)
                        # Append to JSONL
                        out_path = "captions.jsonl"
                        with open(out_path, "a", encoding="utf-8") as f:
                            f.write(json.dumps(record, ensure_ascii=False) + "\n")
                        st.success(f"Appended to {out_path}")
                except torch.cuda.OutOfMemoryError:
                    st.error("CUDA OOM. Try a smaller model (e.g., Qwen2-VL-2B) or reduce max tokens.")
                except Exception as e:
                    st.error(f"Generation failed: {e}")

# Footer
st.markdown("---")
st.caption(
    "This Space is intended for lawful, adult-only NSFW dataset preparation. You are responsible for compliance with local laws and platform policies."
)