# -*- coding: utf-8 -*- """ Streamlit app: Prompt Generator from Image (NSFW-ready, self-hosted on Hugging Face Spaces) - Backends: Gemini API (optional) + Local open-source (Qwen2-VL 2B/7B) - Detail modes: soft / artistic / raw - JSONL export with policy fields (adult-only, consent) - Simple keyword tag extractor (can be swapped for WD14/DeepDanbooru later) NOTE: To use the local backend you must select a Qwen2-VL model that fits your Space hardware. Suggested default for T4/low VRAM: "Qwen/Qwen2-VL-2B-Instruct" (loads with 4-bit if bitsandbytes available). Requirements (put these lines into requirements.txt): ----- requirements.txt ----- streamlit==1.37.1 Pillow transformers>=4.43.0 accelerate>=0.33.0 sentencepiece safetensors huggingface_hub bitsandbytes; platform_system != 'Darwin' google-generativeai==0.7.2 # only if you keep Gemini option --------------------------- """ import os import io import json from datetime import datetime import streamlit as st from PIL import Image # ===== Gemini (optional) ===== USE_GEMINI = True try: import google.generativeai as genai # type: ignore except Exception: USE_GEMINI = False def get_gemini_api_key() -> str: # Return Gemini API key from SECRET_KEY or GOOGLE_API_KEY (if present) return os.getenv('SECRET_KEY') or os.getenv('GOOGLE_API_KEY') or '' # ===== Transformers (open-source backend) ===== import torch from transformers import AutoProcessor, AutoModelForVision2Seq # ---------------- UI CONFIG ---------------- st.set_page_config(page_title="🖼️ Prompt Generator from Image (NSFW-ready)", layout="wide") st.title("🖼️ Prompt Generator from Image") st.markdown( "> Please try my other tool at : https://imgkey.lovable.app" ) with st.sidebar: st.header("⚙️ Settings") # Gemini availability message gem_key = get_gemini_api_key() if USE_GEMINI else '' gem_ready = bool(gem_key) backend_opts = ["Local Qwen2-VL (Open-Source)"] if USE_GEMINI and gem_ready: backend_opts.append("Gemini API") elif USE_GEMINI and not gem_ready: backend_opts.append("Gemini API (key missing)") else: backend_opts.append("Gemini API (unavailable)") backend = st.selectbox("Backend", backend_opts, index=0) mode = st.selectbox("Detail level", ["soft", "artistic", "raw"], index=2) model_id = st.text_input( "HF Model (local backend)", value="Qwen/Qwen2-VL-2B-Instruct", help="Pick a Qwen2-VL Instruct model that fits your GPU (e.g., 2B/7B).", ) max_tokens = st.slider("Max new tokens", 64, 512, 220, 8) temperature = st.slider("Temperature", 0.0, 1.2, 0.6, 0.05) # Gemini status badge if USE_GEMINI: if gem_ready: st.success("Gemini key detected (SECRET_KEY / GOOGLE_API_KEY)") else: st.warning("Gemini key not found. Add SECRET_KEY or GOOGLE_API_KEY in Space Secrets.") st.divider() st.subheader("🔐 Policy") st.caption("This app only describes consenting adults. It refuses illegal/underage/forced content.") # ---------------- Helpers ---------------- @st.cache_resource(show_spinner=False) def load_qwen(model_id: str): """Load Qwen2-VL model+processor with best effort for the available hardware.""" device = "cuda" if torch.cuda.is_available() else "cpu" load_kwargs = {"torch_dtype": torch.float16 if device == "cuda" else torch.float32, "low_cpu_mem_usage": True, "trust_remote_code": True} try: if device == "cuda": load_kwargs.update({"device_map": "auto", "load_in_4bit": True}) except Exception: pass processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForVision2Seq.from_pretrained(model_id, **load_kwargs) if device == "cpu": model.to(device) model.eval() return processor, model def _system_prompt(): return ( "You are an expert adult-image describer. Describe only consenting adults. " "Never mention or imply minors, coercion, violence, incest, or illegal content." ) def _style_prompt(mode: str): if mode == "soft": return ( "Write an elegant, suggestive description focusing on mood, wardrobe, and lighting. " "Avoid explicit anatomy terms." ) if mode == "artistic": return ( "Describe composition, pose, camera angle, lens, lighting, colors, materials, and setting." ) # raw return ( "Use precise, clinical adult anatomy/act terms when visible. Mention positions, garments, accessories, fluids, and interactions explicitly." ) def caption_with_qwen(image: Image.Image, mode: str, model_id: str, max_new_tokens=220, temperature=0.6): processor, model = load_qwen(model_id) messages = [ {"role": "system", "content": _system_prompt()}, { "role": "user", "content": [ {"type": "image", "image": image}, { "type": "text", "text": ( f"Describe the image in the '{mode}' style. " + _style_prompt(mode) + " Return a single coherent paragraph." ), }, ], }, ] # Build chat template -> tokens text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = processor(text=[text], images=[image], return_tensors="pt") device = next(model.parameters()).device inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()} with torch.no_grad(): out = model.generate( **inputs, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=float(temperature), top_p=0.9, repetition_penalty=1.05, ) decoded = processor.batch_decode(out, skip_special_tokens=True)[0] # Try to cut assistant prefix if present if "assistant" in decoded: decoded = decoded.split("assistant")[-1].strip().lstrip("\n: ") return decoded.strip() # Minimal keyword tagger from caption text KEYWORDS = { "acts": [ "oral", "blowjob", "doggy", "missionary", "cowgirl", "anal", "handjob", "cumshot", "facial", "masturbation", "kissing", "threesome", "spanking", ], "nudity": ["topless", "nude", "full", "see-through", "lingerie", "panties", "stockings"], "style": ["photo", "studio", "candid", "mirror", "selfie", "soft lighting", "hard lighting", "bokeh"], } def extract_tags(text: str): tags = set() low = text.lower() for group in KEYWORDS.values(): for w in group: if w in low: tags.add(w) return sorted(tags) # Gemini helper def get_gemini_response(image: Image.Image): if not USE_GEMINI: raise RuntimeError("Gemini library not available in this Space.") secret_key = get_gemini_api_key() if not secret_key: raise RuntimeError("Gemini API key not found. Set SECRET_KEY or GOOGLE_API_KEY in Space secrets.") genai.configure(api_key=secret_key) model = genai.GenerativeModel("gemini-2.0-flash") sys = ( "You are a prompt generator for text-to-image models. " "When content is adult, describe it clinically without euphemism." ) resp = model.generate_content([sys, image]) return getattr(resp, "text", "") # ---------------- LAYOUT ---------------- col1, col2 = st.columns(2) with col1: st.markdown("### 📤 Upload Your Image") uploaded_file = st.file_uploader( "Drag and drop or click to upload an image...", type=["jpg", "jpeg", "png", "webp"], label_visibility="collapsed", ) image = None if uploaded_file is not None: try: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="Uploaded Image", use_column_width=True) except Exception as e: st.error(f"Failed to open image: {e}") with col2: st.markdown("### 🎯 Generated Prompt") if image is None: st.info("Please upload an image to generate a prompt.") else: if st.button("✨ Generate Prompt", use_container_width=True): with st.spinner("Generating prompt..."): try: if backend.startswith("Local Qwen2-VL"): prompt = caption_with_qwen( image, mode=mode, model_id=model_id, max_new_tokens=max_tokens, temperature=temperature, ) else: prompt = get_gemini_response(image) if not prompt: st.warning("No text generated.") else: st.code(prompt, language="markdown") # Build JSON record record = { "timestamp": datetime.utcnow().isoformat() + "Z", "image": uploaded_file.name, "mode": mode if backend.startswith("Local") else "gemini_default", "prompt": prompt, "tags": extract_tags(prompt), "policy": {"age": "adult_only", "consent": True}, "backend": "qwen2-vl" if backend.startswith("Local") else "gemini", "model": model_id if backend.startswith("Local") else "gemini-2.0-flash", } st.json(record) # Append to JSONL out_path = "captions.jsonl" with open(out_path, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") st.success(f"Appended to {out_path}") except torch.cuda.OutOfMemoryError: st.error("CUDA OOM. Try a smaller model (e.g., Qwen2-VL-2B) or reduce max tokens.") except Exception as e: st.error(f"Generation failed: {e}") # Footer st.markdown("---") st.caption( "This Space is intended for lawful, adult-only NSFW dataset preparation. You are responsible for compliance with local laws and platform policies." )