import gradio as gr from transformers import pipeline # Load Stanford PHI detector model print("Loading Stanford PHI detector model...") phi_detector = pipeline( "token-classification", model="StanfordAIMI/stanford-deidentifier-base", aggregation_strategy="simple", device=-1, # CPU mode ) print("Model loaded successfully!") def detect_and_redact_phi(text): """ Detect and redact PHI in text using Stanford's PHI detector Args: text: Input text to analyze Returns: Formatted string with redacted text and original text comparison """ if not text or not text.strip(): return "⚠️ Please enter some text to analyze." try: # Get PHI predictions results = phi_detector(text) if not results: output = "## ✅ No PHI Detected\n\n" output += "**Original Text:**\n```\n" output += text output += "\n```\n\n" output += "**Redacted Text:**\n```\n" output += text output += "\n```\n" return output # Create redacted text by replacing PHI entities # Sort results by start position in reverse to replace from end to start # This prevents index shifting issues sorted_results = sorted(results, key=lambda x: x["start"], reverse=True) redacted_text = text # Replace PHI entities with redaction markers phi_details = [] for entity in sorted_results: start = entity["start"] end = entity["end"] entity_text = text[start:end] phi_type = entity["entity_group"] redaction_tag = f"[{phi_type}]" # Store details for display phi_details.insert( 0, { "text": entity_text, "type": phi_type, "confidence": entity["score"], "position": f"{start}-{end}", }, ) # Replace in redacted text redacted_text = redacted_text[:start] + redaction_tag + redacted_text[end:] # Format output output = "## 🔍 PHI Detection & Redaction Results\n\n" output += f"**Found {len(results)} PHI entity(ies):**\n\n" for idx, detail in enumerate(phi_details, 1): output += f"{idx}. **{detail['text']}** → `{detail['type']}` " output += f"(Confidence: {detail['confidence']:.2%})\n" output += "\n---\n\n" output += "### 📄 Original Text\n```\n" output += text output += "\n```\n\n" output += "### 🔒 Redacted Text\n```\n" output += redacted_text output += "\n```\n" return output except Exception as e: return f"❌ **Error:** {str(e)}" # Create Gradio interface demo = gr.Interface( fn=detect_and_redact_phi, inputs=gr.Textbox( label="Enter Text to Analyze", placeholder="Patient John Doe, SSN: 123-45-6789, visited on 01/15/2024.", lines=8, ), outputs=gr.Markdown(label="PHI Detection & Redaction Results"), title="🏥 Stanford PHI Detector & Redactor", description="Detect and redact Protected Health Information (PHI) using Stanford's de-identification model.", examples=[ ["Patient John Doe, SSN: 123-45-6789, visited on 01/15/2024."], [ "Jane Smith, DOB: 03/22/1980, Phone: (555) 123-4567, Address: 123 Main St, Boston, MA" ], [ "MRN: 98765432. Dr. Anderson saw the patient at Massachusetts General Hospital on December 15, 2024." ], ], theme="soft", ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False, # Set to True for public link )