Spaces:

shndap
/

multimodal-persian-QA

Sleeping

App Files Files Community

shndap commited on Sep 15

Commit

ab1ae1b

1 Parent(s): f77aa91

Refactor query and prompt generation in EndToEndRAG class to support multiple-choice questions and improve context formatting. Simplify user input handling in app.py by passing raw user text to the retrieval function.

Browse files

Files changed (2) hide show

app.py +2 -18
end_to_end_class.py +44 -40

app.py CHANGED Viewed

@@ -35,25 +35,9 @@ def respond(
     user_text = message if isinstance(message, str) else str(message)
     img_input = image_url if isinstance(image_url, str) and image_url.strip() else None
-    # Build a Persian prompt aligned with the notebook style
-    sys_prefix = system_message if isinstance(system_message, str) and system_message.strip() else "تو یک دستیار پاسخ‌گوی دقیق به زبان فارسی هستی."
-    user_desc_parts = []
-    if user_text and user_text.strip():
-        user_desc_parts.append(f"پرسش متنی: {user_text.strip()}")
-    if img_input:
-        user_desc_parts.append(f"لینک تصویر: {img_input}")
-    prompt = (
-        f"{sys_prefix} "
-        "از زمینهٔ زیر برای پاسخ استفاده کن و اگر کافی نبود، صراحتاً اعلام کن. "
-        "از حدس‌زدن بپرهیز و در صورت امکان به منبع اشاره کن.\n\n"
-        f"جزئیات ورودی کاربر:\n- {' | '.join(user_desc_parts) if user_desc_parts else 'نامشخص'}\n\n"
-        "پاسخ نهایی فارسی، موجز و مستدل:"
-    )
     try:
-        answer = rag_instance.query(text=prompt, image_url=img_input)
         yield answer
     except Exception as e:
         yield f"Error while generating answer: {e}"

     user_text = message if isinstance(message, str) else str(message)
     img_input = image_url if isinstance(image_url, str) and image_url.strip() else None
     try:
+        # Pass only the raw user text to retrieval so CLIP stays within its 77-token limit
+        answer = rag_instance.query(text=user_text, image_url=img_input)
         yield answer
     except Exception as e:
         yield f"Error while generating answer: {e}"

end_to_end_class.py CHANGED Viewed

@@ -125,13 +125,13 @@ class EndToEndRAG:
         return instance
-    def query(self, text: Optional[str], image_url: Optional[str]) -> str:
         if (text is None or text.strip() == "") and (image_url is None or image_url.strip() == ""):
             return "ورودی معتبری ارائه نشده است. لطفاً متن پرسش یا تصویر را ارسال کنید."
         retrieved = self._retrieve(text=text, image_url=image_url, top_k=self.top_k)
-        prompt = self._build_prompt(text=text, image_url=image_url, retrieved=retrieved)
-        answer = self._generate(prompt)
         return answer
     def _load_index(
@@ -288,56 +288,60 @@ class EndToEndRAG:
         text: Optional[str],
         image_url: Optional[str],
         retrieved: List[Dict[str, Any]],
     ) -> str:
-        context_blocks: List[str] = []
-        for item in retrieved:
-            parts = []
-            if item.get("title"):
-                parts.append(f"عنوان: {item['title']}")
-            if item.get("biography"):
-                parts.append(f"متن: {item['biography']}")
-            elif item.get("text"):
-                parts.append(f"متن: {item['text']}")
-            if item.get("image_urls"):
-                parts.append(f"تصاویر: {', '.join(item['image_urls'])}")
-            if item.get("image_path"):
-                parts.append(f"تصویر: {item['image_path']}")
-            if item.get("combined_similarity") is not None:
-                parts.append(f"امتیاز شباهت: {item['combined_similarity']:.3f}")
-            context_blocks.append("\n".join(parts))
-        context_str = "\n\n".join(context_blocks) if context_blocks else "(بدون محتوای بازیابی‌شده)"
-        user_query_desc = []
-        if text and text.strip():
-            user_query_desc.append(f"پرسش متنی: {text.strip()}")
-        if image_url and image_url.strip():
-            user_query_desc.append(f"لینک تصویر: {image_url.strip()}")
         prompt = (
-            "تو یک دستیار پاسخ‌گوی دقیق به زبان فارسی هستی. "
-            "از زمینهٔ زیر برای پاسخ استفاده کن و اگر کافی نبود، صراحتاً اعلام کن. "
-            "از حدس‌زدن بپرهیز و به منابع اشاره کن.\n\n"
-            f"اطلاعات بازیابی‌شده:\n{context_str}\n\n"
-            f"جزئیات ورودی کاربر:\n- {' | '.join(user_query_desc) if user_query_desc else 'نامشخص'}\n\n"
-            "پاسخ نهایی فارسی، موجز و مستدل:"
         )
         return prompt
-    def _generate(self, prompt: str) -> str:
         if self.inference_client is None:
             return (
                 "سرویس تولید متن تنظیم نشده است. لطفاً یک مدل از طریق Inference API تنظیم کنید یا تولید محلی را فعال کنید."
             )
         try:
-            # Prefer chat completion when available
             chat = self.inference_client.chat_completion(
                 messages=[
                     {"role": "system", "content": "You are a helpful assistant."},
                     {"role": "user", "content": prompt},
                 ],
-                max_tokens=self.max_new_tokens,
-                temperature=self.temperature,
                 stream=False,
             )
             if chat and getattr(chat, "choices", None):
@@ -350,9 +354,9 @@ class EndToEndRAG:
         try:
             out = self.inference_client.text_generation(
                 prompt,
-                max_new_tokens=self.max_new_tokens,
-                temperature=self.temperature,
-                do_sample=self.temperature > 0,
                 return_full_text=False,
                 details=False,
                 stream=False,

         return instance
+    def query(self, text: Optional[str], image_url: Optional[str], options: Optional[List[str]] = None) -> str:
         if (text is None or text.strip() == "") and (image_url is None or image_url.strip() == ""):
             return "ورودی معتبری ارائه نشده است. لطفاً متن پرسش یا تصویر را ارسال کنید."
         retrieved = self._retrieve(text=text, image_url=image_url, top_k=self.top_k)
+        prompt = self._build_prompt(text=text, image_url=image_url, retrieved=retrieved, options=options)
+        answer = self._generate(prompt, is_mcq=bool(options), options=options)
         return answer
     def _load_index(
         text: Optional[str],
         image_url: Optional[str],
         retrieved: List[Dict[str, Any]],
+        options: Optional[List[str]] = None,
     ) -> str:
+        # Notebook-style context formatting
+        parts: List[str] = []
+        for i, item in enumerate(retrieved, start=1):
+            parts.append(f"Person {i}:")
+            bio = item.get("biography") or item.get("text") or ""
+            parts.append(f"Biography: {bio}")
+            imgs = item.get("image_urls") or []
+            if imgs:
+                parts.append(f"Image URLs: {', '.join(imgs)}")
+            score = item.get("combined_similarity")
+            if score is not None:
+                parts.append(f"Relevance Score: {float(score):.3f}")
+            parts.append("---")
+        context = "\n".join(parts) if parts else "(no retrieved content)"
+        user_q = text.strip() if text else ""
+        if options:
+            options_text = "\n".join([f"{i}: {opt}" for i, opt in enumerate(options)])
+            prompt = (
+                f"Retrieved Information:\n{context}\n\n"
+                f"Question: {user_q}\n\n"
+                f"Options:\n{options_text}\n\n"
+                "Output ONLY the chosen option number in the format \"Choice: [number]\". Do not include any other text.\n"
+                "Choice:"
+            )
+            return prompt
+        # Free-form answer
         prompt = (
+            f"Retrieved Information:\n{context}\n\n"
+            f"Question: {user_q}\n\n"
+            "Answer in concise Persian:"
         )
         return prompt
+    def _generate(self, prompt: str, is_mcq: bool, options: Optional[List[str]]) -> str:
         if self.inference_client is None:
             return (
                 "سرویس تولید متن تنظیم نشده است. لطفاً یک مدل از طریق Inference API تنظیم کنید یا تولید محلی را فعال کنید."
             )
+        max_new = 10 if is_mcq else self.max_new_tokens
+        temp = 0.1 if is_mcq else self.temperature
+        # Prefer chat
         try:
             chat = self.inference_client.chat_completion(
                 messages=[
                     {"role": "system", "content": "You are a helpful assistant."},
                     {"role": "user", "content": prompt},
                 ],
+                max_tokens=max_new,
+                temperature=temp,
                 stream=False,
             )
             if chat and getattr(chat, "choices", None):
         try:
             out = self.inference_client.text_generation(
                 prompt,
+                max_new_tokens=max_new,
+                temperature=temp,
+                do_sample=temp > 0,
                 return_full_text=False,
                 details=False,
                 stream=False,