| import os | |
| import pickle | |
| import faiss | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| import requests | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from sentence_transformers import SentenceTransformer | |
| # ------------------ | |
| # Environment Setup | |
| # ------------------ | |
| os.environ["HF_HOME"] = "/app/hf_cache" | |
| os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache" | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY") | |
| GOOGLE_CX = os.getenv("GOOGLE_SEARCH_CX") | |
| GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3n-e4b-it:generateContent?key={GEMINI_API_KEY}" if GEMINI_API_KEY else None | |
| # ------------------ | |
| # FastAPI App Config | |
| # ------------------ | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ------------------ | |
| # Load Components | |
| # ------------------ | |
| try: | |
| model = SentenceTransformer("/app/model") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| model = None | |
| try: | |
| with open("thirukkural_data.pkl", "rb") as f: | |
| kural_data = pickle.load(f) | |
| print(f"✅ Kural data loaded: {kural_data.shape}") | |
| except Exception as e: | |
| print(f"❌ Error loading data: {e}") | |
| kural_data = None | |
| try: | |
| english_index = faiss.read_index("thirukkural_english_index.faiss") | |
| tamil_index = faiss.read_index("thirukkural_tamil_index.faiss") | |
| print("✅ FAISS indexes loaded") | |
| except Exception as e: | |
| print(f"❌ Error loading FAISS indexes: {e}") | |
| english_index = tamil_index = None | |
| # ------------------ | |
| # Request Schema | |
| # ------------------ | |
| class QueryRequest(BaseModel): | |
| query: str | |
| lang: str # "en" or "ta" | |
| top_k: int = 3 | |
| # ------------------ | |
| # Intent Classifier | |
| # ------------------ | |
| def classify_intent(query: str) -> str: | |
| query = query.lower() | |
| if any(word in query for word in ["hello", "hi", "vanakkam", "hey"]): | |
| return "greeting" | |
| if any(word in query for word in ["thanks", "thank you", "nandri"]): | |
| return "thanks" | |
| if any(word in query for word in ["sad", "happy", "life", "love", "philosophy", "purpose", "feel", "emotion"]): | |
| return "life_advice" | |
| if any(word in query for word in ["what", "why", "how", "when", "where", "who", "search", "find"]): | |
| return "web_search" | |
| return "fallback" | |
| # ------------------ | |
| # Utility Functions | |
| # ------------------ | |
| def safe_get_value(row, keys, default=""): | |
| for k in keys: | |
| if k in row: | |
| return row[k] | |
| return default | |
| def format_kural_for_response(kural_dict): | |
| return { | |
| "Number": safe_get_value(kural_dict, ["ID", "Index"], "Unknown"), | |
| "Tamil": safe_get_value(kural_dict, ["Kural", "combined_text_tamil"]), | |
| "English": safe_get_value(kural_dict, ["Couplet", "combined_text_english"]), | |
| "Category": safe_get_value(kural_dict, ["Adhigaram", "Section", "Adhigaram_ID"], "General"), | |
| "Explanation": safe_get_value(kural_dict, ["Vilakam", "Kalaingar_Urai", "Solomon_Pappaiya"], "") | |
| } | |
| def format_kural_for_prompt(kural_dict): | |
| return f"#{safe_get_value(kural_dict, ['ID', 'Index'])}\nTamil: {safe_get_value(kural_dict, ['Kural'])}\nEnglish: {safe_get_value(kural_dict, ['Couplet'])}" | |
| # ------------------ | |
| # Web Search Agent | |
| # ------------------ | |
| def web_search_agent(query: str, lang: str = "en") -> dict: | |
| if not GOOGLE_API_KEY or not GOOGLE_CX: | |
| return {"query": query, "results": [], "explanation": "Google Search API credentials are missing."} | |
| try: | |
| params = { | |
| "key": GOOGLE_API_KEY, | |
| "cx": GOOGLE_CX, | |
| "q": query, | |
| "hl": lang, | |
| "num": 5 | |
| } | |
| response = requests.get("https://www.googleapis.com/customsearch/v1", params=params, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| results = [{"title": i.get("title"), "snippet": i.get("snippet"), "link": i.get("link")} for i in data.get("items", [])] | |
| return {"query": query, "results": results, "explanation": f"Here are relevant results for: '{query}'"} | |
| except Exception as e: | |
| return {"query": query, "results": [], "explanation": f"Search failed: {str(e)}"} | |
| # === Supervisor Agent === | |
| def classify_intent(query: str) -> str: | |
| query_lower = query.lower().strip() | |
| greetings = ["hi", "hello", "vanakkam", "hey", "good morning", "good evening"] | |
| gratitude = ["thanks", "thank you"] | |
| farewell = ["bye", "goodbye"] | |
| emotional_keywords = [ | |
| "sad", "happy", "lonely", "depressed", "worried", "peace", "struggle", | |
| "confidence", "purpose", "meaning", "life", "love", "failure", "success" | |
| ] | |
| philosophical_keywords = ["karma", "virtue", "truth", "justice", "ethics", "philosophy", "soul"] | |
| # Prioritize emotional/philosophical over question words | |
| if any(word in query_lower for word in greetings + farewell + gratitude): | |
| return "greeting" | |
| if any(word in query_lower for word in emotional_keywords + philosophical_keywords): | |
| return "life_advice" | |
| # THEN check for factual / web queries | |
| question_starters = ["who", "what", "when", "where", "why", "how"] | |
| if any(query_lower.startswith(w) for w in question_starters): | |
| return "factual_question" | |
| if "search" in query_lower or "find" in query_lower: | |
| return "web_search" | |
| return "fallback" | |
| # === Thirukkural RAG Agent === | |
| def thirukkural_rag_agent(query: str, lang: str, top_k: int): | |
| if not all([model, kural_data, tamil_index, english_index]): | |
| return {"error": "RAG components not loaded properly."} | |
| # Embed and vector search | |
| query_embedding = model.encode([query]) | |
| index = tamil_index if lang == "ta" else english_index | |
| search_k = min(top_k, len(kural_data)) | |
| D, I = index.search(np.array(query_embedding).astype("float32"), search_k) | |
| results = [] | |
| for i in I[0]: | |
| if 0 <= i < len(kural_data): | |
| row_dict = kural_data.iloc[i].to_dict() | |
| results.append(format_kural_for_response(row_dict)) | |
| if not results: | |
| return {"query": query, "results": [], "explanation": "No relevant Thirukkurals found."} | |
| # Gemini Prompt | |
| kural_texts = '\n\n'.join([f'#{k["Number"]}\nTamil: {k["Tamil"]}\nEnglish: {k["English"]}' for k in results]) | |
| prompt = f""" | |
| You are a compassionate, wise, and culturally sensitive literary guide. | |
| Your role is to explain how specific Thirukkural couplets relate meaningfully to a user's question. Your tone must be emotionally supportive, philosophically grounded, and respectful of the Tamil literary tradition. | |
| The user has asked: | |
| ➡️ "{query}" (Language: {lang}) | |
| 📜 Matching Thirukkurals: | |
| {kural_texts} | |
| 🎯 Guidelines: | |
| - Reply fully in Tamil if lang='ta'; in English if lang='en'. | |
| - Don't rephrase Kural poems; show clearly and preserve original form. | |
| - Avoid AI/tech mentions. Write like a human mentor. | |
| - Use paragraph style, not list. | |
| """ | |
| # Gemini API Call | |
| explanation = "Explanation unavailable." | |
| if GEMINI_API_KEY and GEMINI_URL: | |
| try: | |
| headers = {"Content-Type": "application/json"} | |
| data = {"contents": [{"parts": [{"text": prompt}]}]} | |
| response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(data), timeout=30) | |
| response.raise_for_status() | |
| response_data = response.json() | |
| explanation = ( | |
| response_data.get("candidates", [{}])[0] | |
| .get("content", {}) | |
| .get("parts", [{}])[0] | |
| .get("text", "Explanation unavailable.") | |
| .strip() | |
| ) | |
| except Exception as e: | |
| explanation = f"Gemini error: {str(e)}" | |
| # return { | |
| # "query": query, | |
| # "language": lang, | |
| # "matched_kurals": results, | |
| # "explanation": explanation, | |
| # "total_results": len(results) | |
| # } | |
| return { | |
| "query": query, | |
| "language": lang, | |
| "matched_kurals": [], | |
| "web_results": results, | |
| "explanation": f"Here are relevant results for: '{query}'", | |
| "total_results": len(results) | |
| } | |
| # === Supervisor Agent === | |
| def supervisor_agent(query: str, lang: str, top_k: int = 3): | |
| intent = classify_intent(query) | |
| if intent == "greeting": | |
| return { | |
| "query": query, | |
| "language": lang, | |
| "matched_kurals": [], | |
| "explanation": "வணக்கம்! என்னைப் பற்றி கேளுங்கள். உங்கள் கேள்விக்கு உதவ தயாராக உள்ளேன்." if lang == "ta" else "Hello! I'm here to help. Ask me anything.", | |
| "total_results": 0 | |
| } | |
| elif intent == "life_advice": | |
| return thirukkural_rag_agent(query, lang, top_k) | |
| elif intent == "web_search": | |
| return web_search_agent(query, lang) | |
| else: | |
| return { | |
| "query": query, | |
| "language": lang, | |
| "matched_kurals": [], | |
| "explanation": "I couldn't determine how to respond. Could you please rephrase?", | |
| "total_results": 0 | |
| } | |
| # === /search/ Endpoint === | |
| def search_router(req: QueryRequest): | |
| try: | |
| return supervisor_agent(req.query, req.lang, req.top_k) | |
| except Exception as e: | |
| print(f"Error in search_router: {str(e)}") | |
| raise HTTPException(status_code=500, detail="Internal server error") | |
| # import os | |
| # from fastapi import FastAPI, HTTPException | |
| # from fastapi.middleware.cors import CORSMiddleware | |
| # from pydantic import BaseModel | |
| # import faiss | |
| # import pickle | |
| # import numpy as np | |
| # from sentence_transformers import SentenceTransformer | |
| # import requests | |
| # import json | |
| # import pandas as pd | |
| # # Set cache dirs (optional, for HF spaces) | |
| # os.environ["HF_HOME"] = "/app/hf_cache" | |
| # os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache" | |
| # # Load Gemini API key (add this in your HF Space secrets) | |
| # GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
| # if GEMINI_API_KEY: | |
| # GEMINI_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3n-e4b-it:generateContent?key={GEMINI_API_KEY}" | |
| # else: | |
| # GEMINI_URL = None | |
| # # Initialize FastAPI | |
| # app = FastAPI() | |
| # # Add CORS middleware | |
| # app.add_middleware( | |
| # CORSMiddleware, | |
| # allow_origins=["*"], | |
| # allow_credentials=True, | |
| # allow_methods=["*"], | |
| # allow_headers=["*"], | |
| # ) | |
| # # Load local SentenceTransformer model | |
| # try: | |
| # model = SentenceTransformer("/app/model") | |
| # except Exception as e: | |
| # print(f"Error loading model: {e}") | |
| # model = None | |
| # # Load Thirukkural data | |
| # try: | |
| # with open("thirukkural_data.pkl", "rb") as f: | |
| # kural_data = pickle.load(f) | |
| # print(f"Data loaded successfully. Shape: {kural_data.shape}") | |
| # print(f"Columns: {list(kural_data.columns)}") | |
| # except Exception as e: | |
| # print(f"Error loading data: {e}") | |
| # kural_data = None | |
| # # Load FAISS indexes | |
| # try: | |
| # english_index = faiss.read_index("thirukkural_english_index.faiss") | |
| # tamil_index = faiss.read_index("thirukkural_tamil_index.faiss") | |
| # print("FAISS indexes loaded successfully") | |
| # except Exception as e: | |
| # print(f"Error loading FAISS indexes: {e}") | |
| # english_index = None | |
| # tamil_index = None | |
| # # Request schema | |
| # class QueryRequest(BaseModel): | |
| # query: str | |
| # lang: str # "en" or "ta" | |
| # top_k: int = 3 | |
| # def safe_get_value(row, possible_keys, default=""): | |
| # """Safely get value from row using possible key names""" | |
| # for key in possible_keys: | |
| # if key in row: | |
| # return row[key] | |
| # return default | |
| # def format_kural_for_gemini(kural_dict, lang): | |
| # number = safe_get_value(kural_dict, ['ID', 'Index'], "Unknown") | |
| # if lang == 'ta': | |
| # text = safe_get_value(kural_dict, ['combined_text_tamil', 'Kural'], "Tamil not available") | |
| # else: | |
| # text = safe_get_value(kural_dict, ['combined_text_english', 'Couplet'], "English not available") | |
| # return f"{number}. {text}" | |
| # def format_kural_for_response(kural_dict): | |
| # return { | |
| # "Number": safe_get_value(kural_dict, ['ID', 'Index'], "Unknown"), | |
| # "Tamil": safe_get_value(kural_dict, ['Kural', 'combined_text_tamil']), | |
| # "English": safe_get_value(kural_dict, ['Couplet', 'combined_text_english']), | |
| # "Category": safe_get_value(kural_dict, ['Adhigaram', 'Section', 'Adhigaram_ID'], "General"), | |
| # "Explanation": safe_get_value(kural_dict, ['Vilakam', 'Kalaingar_Urai', 'Solomon_Pappaiya'], "") | |
| # } | |
| # # Search + Explain Endpoint | |
| # @app.post("/search/") | |
| # def search_and_explain(req: QueryRequest): | |
| # try: | |
| # # Check if required components are loaded | |
| # if model is None: | |
| # raise HTTPException(status_code=500, detail="Model not loaded") | |
| # if kural_data is None: | |
| # raise HTTPException(status_code=500, detail="Kural data not loaded") | |
| # if english_index is None or tamil_index is None: | |
| # raise HTTPException(status_code=500, detail="FAISS indexes not loaded") | |
| # # Step 1: Embed and search | |
| # query_embedding = model.encode([req.query]) | |
| # index = tamil_index if req.lang == "ta" else english_index | |
| # # Ensure we don't search for more results than available | |
| # search_k = min(req.top_k, len(kural_data)) | |
| # D, I = index.search(np.array(query_embedding).astype("float32"), search_k) | |
| # # Step 2: Get results and handle potential index errors | |
| # results = [] | |
| # for i in I[0]: | |
| # if i >= 0 and i < len(kural_data): # Valid index | |
| # row_dict = kural_data.iloc[i].to_dict() | |
| # formatted_kural = format_kural_for_response(row_dict) | |
| # results.append(formatted_kural) | |
| # if not results: | |
| # raise HTTPException(status_code=404, detail="No matching kurals found") | |
| # # Step 3: Format for Gemini explanation | |
| # # ✅ New better-structured Gemini prompt | |
| # kural_texts = '\n\n'.join([ | |
| # f'#{k["Number"]}\nTamil: {k["Tamil"]}\nEnglish: {k["English"]}' for k in results | |
| # ]) | |
| # prompt = f""" | |
| # You are a compassionate, wise, and culturally sensitive literary guide. | |
| # Your role is to explain how specific Thirukkural couplets relate meaningfully to a user's question. Your tone must be emotionally supportive, philosophically grounded, and respectful of the Tamil literary tradition. | |
| # The user has asked: | |
| # ➡️ "{req.query}" (Language: {req.lang}) | |
| # You are given: | |
| # 📜 A selection of Thirukkural couplets, each including: | |
| # - Kural Number | |
| # - Tamil poetic lines | |
| # - English explanation or interpretation | |
| # 🎯 Your task is to deeply connect each of the given Thirukkurals to the user's question or concern, providing an empathetic and insightful explanation. | |
| # --- | |
| # 📝 Response Guidelines: | |
| # 1. **Language Compliance** | |
| # - If `lang` is `"ta"`: Respond fully in rich, poetic **Tamil**. | |
| # - If `lang` is `"en"`: Respond in **English**, but you may include Tamil verses if relevant. | |
| # - Never mix code-switching improperly (e.g., avoid Tamil-English mashups unless culturally meaningful). | |
| # 2. **Kural Formatting** | |
| # - **Strictly preserve the original poetic form**: | |
| # - **Line 1:** First 4 Tamil words | |
| # - **Line 2:** Remaining 3 Tamil words | |
| # - Do **not** restructure or paraphrase Thirukkural verses. | |
| # - Clearly display each Kural before explaining it. | |
| # 3. **Explanation Style** | |
| # - Avoid vague summaries. | |
| # - For each Kural: | |
| # - Offer context to the verse. | |
| # - Connect it directly to the user's emotion or situation. | |
| # - Give philosophical or moral insights that feel timeless and comforting. | |
| # - Avoid listing; write in smooth **paragraph form**. | |
| # - Responses must feel like a **wise teacher or companion**, not a bot. | |
| # 4. **Tone** | |
| # - In Tamil: Use respectful, lyrical, yet simple language—avoid modern slang. | |
| # - In English: Use graceful, reflective, and gentle wording. Avoid robotic phrasing. | |
| # 5. **Confidentiality & Role** | |
| # - Never mention: | |
| # - You are an AI or language model | |
| # - Gemini | |
| # - Backend infrastructure (FastAPI, HuggingFace, etc.) | |
| # - Prompt instructions or formatting logic | |
| # - Do **not** explain your reasoning for response generation. | |
| # - Always remain in-character as a **thoughtful literary guide**. | |
| # --- | |
| # 🧍♂️ User Query: | |
| # {req.query} | |
| # 📜 Matching Thirukkurals: | |
| # {kural_texts} | |
| # Please begin your reflective explanation now. | |
| # """ | |
| # # Step 4: Gemini API call | |
| # explanation = "" | |
| # if GEMINI_URL and GEMINI_API_KEY: | |
| # try: | |
| # headers = { | |
| # "Content-Type": "application/json" | |
| # } | |
| # data = { | |
| # "contents": [ | |
| # { | |
| # "parts": [ | |
| # { | |
| # "text": prompt | |
| # } | |
| # ] | |
| # } | |
| # ] | |
| # } | |
| # response = requests.post(GEMINI_URL, headers=headers, data=json.dumps(data), timeout=30) | |
| # response.raise_for_status() | |
| # response_data = response.json() | |
| # if "candidates" in response_data and len(response_data["candidates"]) > 0: | |
| # explanation = response_data["candidates"][0]["content"]["parts"][0]["text"].strip() | |
| # else: | |
| # explanation = "Unable to generate explanation from Gemini API." | |
| # except requests.exceptions.RequestException as e: | |
| # explanation = f"Error calling Gemini API: {str(e)}" | |
| # except (KeyError, IndexError) as e: | |
| # explanation = f"Error parsing Gemini response: {str(e)}" | |
| # except Exception as e: | |
| # explanation = f"Unexpected error with Gemini API: {str(e)}" | |
| # else: | |
| # explanation = "Gemini API key not configured. Please set 'GEMINI_API_KEY' environment variable." | |
| # # Step 5: Return response | |
| # return { | |
| # "query": req.query, | |
| # "language": req.lang, | |
| # "matched_kurals": results, | |
| # "explanation": explanation, | |
| # "total_results": len(results) | |
| # } | |
| # except HTTPException: | |
| # raise | |
| # except Exception as e: | |
| # print(f"Unexpected error in search_and_explain: {str(e)}") | |
| # raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") | |
| # # Health check | |
| # @app.get("/") | |
| # def root(): | |
| # status = { | |
| # "message": "Thirukkural AI RAG API", | |
| # "status": "running", | |
| # "components": { | |
| # "model_loaded": model is not None, | |
| # "data_loaded": kural_data is not None, | |
| # "english_index_loaded": english_index is not None, | |
| # "tamil_index_loaded": tamil_index is not None, | |
| # "gemini_configured": GEMINI_API_KEY is not None | |
| # } | |
| # } | |
| # if kural_data is not None: | |
| # status["data_info"] = { | |
| # "total_kurals": len(kural_data), | |
| # "columns": list(kural_data.columns) | |
| # } | |
| # return status | |
| # # Debug endpoint to check data structure | |
| # @app.get("/debug/data") | |
| # def debug_data(): | |
| # if kural_data is None: | |
| # return {"error": "Data not loaded"} | |
| # return { | |
| # "shape": kural_data.shape, | |
| # "columns": list(kural_data.columns), | |
| # "sample_row": kural_data.iloc[0].to_dict() if len(kural_data) > 0 else None, | |
| # "dtypes": kural_data.dtypes.to_dict() | |
| # } | |
| # # Test endpoint | |
| # @app.get("/test") | |
| # def test_endpoint(): | |
| # return {"message": "API is working!", "timestamp": pd.Timestamp.now().isoformat()} |