Spaces:

viswadarshan06
/

Thirukkural_AI_Backend

Running

App Files Files Community

viswadarshan06 commited on Jul 13

Commit

edcd131

verified ·

1 Parent(s): 243baad

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -22

app.py CHANGED Viewed

@@ -7,16 +7,27 @@ import numpy as np
 from transformers import AutoTokenizer, AutoModel
 import torch
-# Set Hugging Face cache dir to avoid /.cache error
 os.environ["HF_HOME"] = "/app/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
-# Initialize FastAPI app
 app = FastAPI()
-# Load multilingual model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2", cache_dir="/app/hf_cache")
-model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2", cache_dir="/app/hf_cache")
 # Load Thirukkural data
 with open("thirukkural_data.pkl", "rb") as f:
@@ -26,41 +37,35 @@ with open("thirukkural_data.pkl", "rb") as f:
 english_index = faiss.read_index("thirukkural_english_index.faiss")
 tamil_index = faiss.read_index("thirukkural_tamil_index.faiss")
-# Input model for API
 class QueryRequest(BaseModel):
-    query: str        # user input
-    lang: str         # "en" or "ta"
-    top_k: int = 3    # number of kurals to return
-# Mean pooling function
 def mean_pooling(model_output, attention_mask):
-    token_embeddings = model_output[0]  # first element: token embeddings
     input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
     return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-# POST endpoint for retrieving relevant Thirukkural(s)
 @app.post("/search/")
 def search_kural(req: QueryRequest):
-    # Tokenize input
     encoded_input = tokenizer(req.query, padding=True, truncation=True, return_tensors="pt")
-    # Compute embeddings
     with torch.no_grad():
         model_output = model(**encoded_input)
         query_embedding = mean_pooling(model_output, encoded_input["attention_mask"])
-    # Convert to numpy
     query_embedding = query_embedding.detach().cpu().numpy()
-    # Choose FAISS index
     index = tamil_index if req.lang == "ta" else english_index
     D, I = index.search(query_embedding.astype("float32"), req.top_k)
-    # Return top-k matching kurals
     results = [kural_data[i] for i in I[0]]
     return {"results": results}
-# Health check endpoint
 @app.get("/")
 def root():
-    return {"message": "Thirukkural FastAPI RAG is running."}

 from transformers import AutoTokenizer, AutoModel
 import torch
+# Set cache to custom dir to avoid /.cache issues
 os.environ["HF_HOME"] = "/app/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache"
+# Get Hugging Face token from environment (must be set in HF Space secrets)
+hf_token = os.getenv("HF_TOKEN")
+# Initialize FastAPI
 app = FastAPI()
+# ✅ Load your private model
+tokenizer = AutoTokenizer.from_pretrained(
+    "viswadarshan06/paraphrase-multilingual-MiniLM-L12-v2-local",
+    cache_dir="/app/hf_cache",
+    token=hf_token
+)
+model = AutoModel.from_pretrained(
+    "viswadarshan06/paraphrase-multilingual-MiniLM-L12-v2-local",
+    cache_dir="/app/hf_cache",
+    token=hf_token
+)
 # Load Thirukkural data
 with open("thirukkural_data.pkl", "rb") as f:
 english_index = faiss.read_index("thirukkural_english_index.faiss")
 tamil_index = faiss.read_index("thirukkural_tamil_index.faiss")
+# Request schema
 class QueryRequest(BaseModel):
+    query: str
+    lang: str  # "en" or "ta"
+    top_k: int = 3
+# Mean pooling (same as SentenceTransformer style)
 def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0]
     input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
     return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+# POST endpoint
 @app.post("/search/")
 def search_kural(req: QueryRequest):
     encoded_input = tokenizer(req.query, padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
         model_output = model(**encoded_input)
         query_embedding = mean_pooling(model_output, encoded_input["attention_mask"])
     query_embedding = query_embedding.detach().cpu().numpy()
     index = tamil_index if req.lang == "ta" else english_index
     D, I = index.search(query_embedding.astype("float32"), req.top_k)
     results = [kural_data[i] for i in I[0]]
     return {"results": results}
+# Health check
 @app.get("/")
 def root():
+    return {"message": "Thirukkural FastAPI RAG is running with private model."}