Spaces:

moseleydev
/

Medical-Report-Extractive-Summarizer

Sleeping

App Files Files Community

moseleydev commited on Mar 20

Commit

237e309

verified ·

1 Parent(s): a30a7a5

loaded custome model

Browse files

Files changed (1) hide show

main.py +55 -32

main.py CHANGED Viewed

@@ -2,16 +2,16 @@ from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModel
-from sklearn.cluster import KMeans
 import torch
-import numpy as np
 import spacy
 import spacy.cli
 import time
 app = FastAPI(
     title="Clinical Extractive Summarization",
-    description="SciBERT + KMeans NLP Engine for Medical Reports"
 )
 app.add_middleware(
@@ -21,9 +21,25 @@ app.add_middleware(
     allow_headers=["*"],
 )
 tokenizer = None
 model = None
 nlp = None
 class ReportRequest(BaseModel):
     text: str
@@ -41,14 +57,28 @@ def health_check():
 def summarize_medical_report(request: ReportRequest):
     start_time = time.time()
-    global tokenizer, model, nlp
     if model is None:
-        print("Initializing SciBERT and SpaCy... This takes a moment.")
-        # Load SciBERT
         model_name = "allenai/scibert_scivocab_uncased"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModel.from_pretrained(model_name)
         try:
             nlp = spacy.load("en_core_web_sm")
@@ -61,36 +91,29 @@ def summarize_medical_report(request: ReportRequest):
     # 1. Safely split text into sentences using SpaCy NLP
     doc = nlp(request.text)
-    sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 5]
     # Edge case: Report is too short to summarize
     if len(sentences) <= request.num_sentences:
         return {"summary": request.text, "metadata": {"status": "too_short"}}
-    # 2. Get embeddings for each sentence using SciBERT
-    embeddings = []
-    for sent in sentences:
-        inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True, max_length=512)
-        with torch.no_grad():
-            output = model(**inputs)
-        # Extract the [CLS] token representation
-        cls_embedding = output.last_hidden_state[0][0].numpy()
-        embeddings.append(cls_embedding)
-    # 3. Use KMeans to cluster the embeddings and find the most central sentences
-    # n_init='auto' suppresses sklearn warnings
-    kmeans = KMeans(n_clusters=request.num_sentences, n_init='auto', random_state=42).fit(embeddings)
-    avg = []
-    for i in range(request.num_sentences):
-        # Find the sentence closest to the cluster centroid
-        idx = np.argmin(np.linalg.norm(embeddings - kmeans.cluster_centers_[i], axis=1))
-        avg.append(idx)
-    # 4. Sort indices chronologically to maintain original report flow
-    avg = sorted(list(set(avg)))
-    final_summary = " ".join([sentences[i] for i in avg])
     process_time = round((time.time() - start_time) * 1000, 2)
@@ -99,7 +122,7 @@ def summarize_medical_report(request: ReportRequest):
         "metadata": {
             "processing_time_ms": process_time,
             "original_length": len(sentences),
-            "summary_length": len(avg),
-            "engine": "SciBERT + KMeans"
         }
     }

 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModel
 import torch
+import torch.nn as nn
 import spacy
 import spacy.cli
 import time
+import os
 app = FastAPI(
     title="Clinical Extractive Summarization",
+    description="SciBERT + BERTsum Fine-Tuned Engine for Medical Reports"
 )
 app.add_middleware(
     allow_headers=["*"],
 )
+# --- ARCHITECTURE DEFINITION ---
+class BioExtractor(nn.Module):
+    def __init__(self, model_name):
+        super(BioExtractor, self).__init__()
+        self.bert = AutoModel.from_pretrained(model_name)
+        # The classification layer that predicts sentence salience [cite: 279]
+        self.classifier = nn.Linear(768, 1)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        cls_output = outputs.last_hidden_state[:, 0, :]
+        return self.sigmoid(self.classifier(cls_output))
+# Global variables to cache models in memory
 tokenizer = None
 model = None
 nlp = None
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 class ReportRequest(BaseModel):
     text: str
 def summarize_medical_report(request: ReportRequest):
     start_time = time.time()
+    global tokenizer, model, nlp, device
     if model is None:
+        print("Initializing Fine-Tuned SciBERT and SpaCy...")
+        # Load the base tokenizer
         model_name = "allenai/scibert_scivocab_uncased"
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Instantiate your custom architecture
+        model = BioExtractor(model_name)
+        # Load the trained weights from the uploaded .pt file
+        model_path = "med_summarizer_trained.pt"
+        if os.path.exists(model_path):
+            print(f"Loading fine-tuned weights from {model_path}...")
+            # map_location ensures it works even if Hugging Face runs on a CPU space
+            model.load_state_dict(torch.load(model_path, map_location=device))
+        else:
+            print(f"WARNING: {model_path} not found! Upload it to your Space.")
+        model.to(device)
+        model.eval() # Lock the model for inference
         try:
             nlp = spacy.load("en_core_web_sm")
     # 1. Safely split text into sentences using SpaCy NLP
     doc = nlp(request.text)
+    # Filter out extremely short strings just like your Colab script
+    sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 10]
     # Edge case: Report is too short to summarize
     if len(sentences) <= request.num_sentences:
         return {"summary": request.text, "metadata": {"status": "too_short"}}
+    # 2. Get probability scores for each sentence using the fine-tuned model
+    scores = []
+    with torch.no_grad():
+        for sent in sentences:
+            inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding='max_length', max_length=128).to(device)
+            output = model(inputs['input_ids'], inputs['attention_mask'])
+            scores.append(output.item())
+    # 3. Rank and select the top N sentences
+    # Enumerate keeps track of the original sentence index (e.g., (index, score))
+    scored_sentences = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
+    top_indices = [idx for idx, score in scored_sentences[:request.num_sentences]]
+    # 4. Sort indices chronologically to maintain original report flow [cite: 248]
+    top_indices_sorted = sorted(top_indices)
+    final_summary = " ".join([sentences[i] for i in top_indices_sorted])
     process_time = round((time.time() - start_time) * 1000, 2)
         "metadata": {
             "processing_time_ms": process_time,
             "original_length": len(sentences),
+            "summary_length": len(top_indices_sorted),
+            "engine": "SciBERT + BERTsum Fine-Tuned"
         }
     }