from flask import Flask, request, jsonify from flask_cors import CORS from PIL import Image import torch from transformers import AutoProcessor, BlipForConditionalGeneration app = Flask(__name__) CORS(app, resources={r"/*": {"origins": "*"}}) device = "cuda" if torch.cuda.is_available() else "cpu" vision_processor, vision_model = None, None try: vision_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large") vision_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device) print("--- VISION SERVICE --- BLIP Vision model loaded successfully.") except Exception as e: print(f"--- VISION SERVICE --- CRITICAL ERROR loading Vision model: {e}") @app.route("/describe_image", methods=["POST"]) def describe_image(): if not vision_model: return jsonify({"error": "Vision model not available."}), 500 user_prompt = request.form.get("prompt", "") image_file = request.files.get("image") if not image_file: return jsonify({"error": "No image file found."}), 400 try: image_obj = Image.open(image_file.stream).convert("RGB") inputs = (vision_processor(images=image_obj, text=user_prompt, return_tensors="pt").to(device) if user_prompt else vision_processor(images=image_obj, return_tensors="pt").to(device)) output = vision_model.generate(**inputs, max_new_tokens=50) caption = vision_processor.decode(output[0], skip_special_tokens=True).strip() return jsonify({"content": caption}) except Exception as e: print(f"Error processing image: {e}") return jsonify({"error": "Sorry, I had trouble processing that image."}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=8081) # Use a different port for local testing