Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from PIL import Image | |
| import torch | |
| from transformers import AutoProcessor, BlipForConditionalGeneration | |
| app = Flask(__name__) | |
| CORS(app, resources={r"/*": {"origins": "*"}}) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| vision_processor, vision_model = None, None | |
| try: | |
| vision_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
| vision_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device) | |
| print("--- VISION SERVICE --- BLIP Vision model loaded successfully.") | |
| except Exception as e: | |
| print(f"--- VISION SERVICE --- CRITICAL ERROR loading Vision model: {e}") | |
| def describe_image(): | |
| if not vision_model: | |
| return jsonify({"error": "Vision model not available."}), 500 | |
| user_prompt = request.form.get("prompt", "") | |
| image_file = request.files.get("image") | |
| if not image_file: | |
| return jsonify({"error": "No image file found."}), 400 | |
| try: | |
| image_obj = Image.open(image_file.stream).convert("RGB") | |
| inputs = (vision_processor(images=image_obj, text=user_prompt, return_tensors="pt").to(device) if user_prompt else vision_processor(images=image_obj, return_tensors="pt").to(device)) | |
| output = vision_model.generate(**inputs, max_new_tokens=50) | |
| caption = vision_processor.decode(output[0], skip_special_tokens=True).strip() | |
| return jsonify({"content": caption}) | |
| except Exception as e: | |
| print(f"Error processing image: {e}") | |
| return jsonify({"error": "Sorry, I had trouble processing that image."}), 500 | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=8081) # Use a different port for local testing |