Spaces:

MCP-1st-Birthday
/

VisionPro

Running

App Files Files Community

subhash4face commited on 15 days ago

Commit

d1a371f

verified ·

1 Parent(s): 4af8a8f

fix build issues

Browse files

Files changed (1) hide show

app.py +44 -7

app.py CHANGED Viewed

@@ -152,24 +152,61 @@ def describe_image_hf(image_path: str) -> str:
 def describe_image_openai(image_path: str) -> str:
-    """Attempt to describe an image using OpenAI vision if available."""
     if not OPENAI_AVAILABLE:
         return "OpenAI not available for image captioning"
     try:
         with open(image_path, "rb") as f:
-            b64 = base64.b64encode(f.read()).decode("utf-8")
-            prompt = (
-                "You are an assistant that describes images for visually impaired users. "
-                "Provide a concise, vivid, and accessible description of the image.
-Image(base64):" + b64
             )
             resp = openai.ChatCompletion.create(
-                model="gpt-4o-mini", messages=[{"role":"user","content":prompt}], max_tokens=300
             )
             return resp.choices[0].message.content.strip()
     except Exception as e:
         return f"OpenAI image describe error: {e}"
 # -----------------------------
 # MCP Tools
 # -----------------------------

 def describe_image_openai(image_path: str) -> str:
+    """Describe an image using OpenAI Vision (modern SDK compatible)."""
     if not OPENAI_AVAILABLE:
         return "OpenAI not available for image captioning"
     try:
+        # Read image bytes
         with open(image_path, "rb") as f:
+            image_bytes = f.read()
+        # Convert to base64 for safe transport in older SDKs
+        b64_image = base64.b64encode(image_bytes).decode("utf-8")
+        # Modern prompt content
+        prompt = (
+            "You are an accessibility assistant that describes images for visually impaired users. "
+            "Provide a clear, helpful, vivid, human-friendly description of the image.\n"
+        )
+        # Some OpenAI SDK versions require: client = openai.OpenAI()
+        try:
+            client = openai.OpenAI()
+            response = client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": "You describe images for visually impaired users."},
+                    {"role": "user", "content": [
+                        {"type": "text", "text": prompt},
+                        {
+                            "type": "image_url",
+                            "image_url": f"data:image/jpeg;base64,{b64_image}"
+                        }
+                    ]}
+                ],
+                max_tokens=300,
+            )
+            return response.choices[0].message.content.strip()
+        except Exception:
+            # Fallback for legacy SDKs
+            legacy_prompt = (
+                "You are an assistant that describes images for visually impaired users.\n"
+                "Provide a concise, vivid, accessible description.\n"
+                "Image(base64): " + b64_image
             )
             resp = openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[{"role": "user", "content": legacy_prompt}],
+                max_tokens=300,
             )
             return resp.choices[0].message.content.strip()
     except Exception as e:
         return f"OpenAI image describe error: {e}"
 # -----------------------------
 # MCP Tools
 # -----------------------------