shree256 commited on
Commit
dbe4b72
·
1 Parent(s): 0e8e166

updates with biobert

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +96 -4
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py CHANGED
@@ -1,8 +1,100 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
3
+ from transformers import pipeline
4
+ import torch
5
 
 
 
6
 
7
+ # Load BioBERT model for NER (using a medical NER model based on BioBERT)
8
+ # You can replace this with a specific PHI detection model if available
9
+ MODEL_NAME = "dmis-lab/biobert-v1.1"
10
+ # Alternative: Use a medical NER model if available, e.g., "alvaroalon2/biobert_diseases_ner"
11
 
12
+ # Initialize the NER pipeline
13
+ try:
14
+ # Try to load a tokenizer and model for token classification
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ # For PHI detection, we'll use a simple approach with the base model
17
+ # In production, you'd use a fine-tuned model for PHI detection
18
+ ner_pipeline = pipeline(
19
+ "token-classification",
20
+ model=MODEL_NAME,
21
+ tokenizer=MODEL_NAME,
22
+ aggregation_strategy="simple",
23
+ )
24
+ except Exception as e:
25
+ print(f"Error loading model: {e}")
26
+ print("Falling back to a simpler approach...")
27
+ ner_pipeline = None
28
+
29
+
30
+ def detect_phi(text: str) -> str:
31
+ """
32
+ Detect PHI (Protected Health Information) in the input text using BioBERT.
33
+
34
+ Args:
35
+ text: Input text containing potential PHI
36
+
37
+ Returns:
38
+ Formatted string showing detected entities
39
+ """
40
+ if not text or not text.strip():
41
+ return "Please enter some text to analyze."
42
+
43
+ if ner_pipeline is None:
44
+ return "Model not loaded. Please check the model configuration."
45
+
46
+ try:
47
+ # Run NER on the input text
48
+ entities = ner_pipeline(text)
49
+
50
+ if not entities:
51
+ return "No entities detected in the text."
52
+
53
+ # Format the results
54
+ result = "**Detected PHI Entities:**\n\n"
55
+
56
+ for entity in entities:
57
+ entity_text = entity.get("word", "")
58
+ entity_label = entity.get("entity_group", entity.get("label", "UNKNOWN"))
59
+ confidence = entity.get("score", 0.0)
60
+
61
+ result += (
62
+ f"- **{entity_text}** ({entity_label}) - Confidence: {confidence:.2%}\n"
63
+ )
64
+
65
+ # Also show the original text with highlights
66
+ result += "\n---\n\n**Original Text:**\n"
67
+ result += text
68
+
69
+ return result
70
+
71
+ except Exception as e:
72
+ return f"Error processing text: {str(e)}"
73
+
74
+
75
+ # Create Gradio interface
76
+ demo = gr.Interface(
77
+ fn=detect_phi,
78
+ inputs=gr.Textbox(
79
+ label="PHI Text Input",
80
+ placeholder="Enter text containing potential PHI (e.g., 'Patient John Doe, age 45, was admitted on 2024-01-15. SSN: 123-45-6789')",
81
+ lines=5,
82
+ ),
83
+ outputs=gr.Markdown(label="PHI Detection Results"),
84
+ title="BioBERT PHI Detection",
85
+ description="Enter text containing Protected Health Information (PHI) to detect entities using BioBERT model.",
86
+ examples=[
87
+ [
88
+ "Patient John Smith, age 52, was admitted to Memorial Hospital on January 15, 2024. Contact: [email protected]"
89
+ ],
90
+ [
91
+ "Dr. Sarah Johnson treated patient ID 12345 at the clinic located at 123 Main St, Boston, MA 02101."
92
+ ],
93
+ [
94
+ "The patient's date of birth is 03/15/1975 and their medical record number is MRN-987654."
95
+ ],
96
+ ],
97
+ )
98
+
99
+ if __name__ == "__main__":
100
+ demo.launch()