Spaces:

samarth09healthPM
/

my-streamlit-app

Running

App Files Files Community

samarth09healthPM commited on Oct 11

Commit

1c2a87b

1 Parent(s): 445c1de

Fix duplicate key error with session state

Browse files

Files changed (1) hide show

main.py +331 -126

main.py CHANGED Viewed

@@ -7,28 +7,33 @@ from pathlib import Path
 import subprocess
 import torch
-# Fix torch.classes path error
 torch.classes.__path__ = []
-# HF Spaces env vars
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
-st.set_page_config(page_title="Clinical AI Summarizer", layout="wide", initial_sidebar_state="expanded")
 st.title("🏥 HIPAA-Compliant RAG Clinical Summarizer")
-st.markdown("De-identification → Clinical Summarization → Quality Assessment")
 # Global configuration
 secure_dir = "./secure_store"
-model_name = "google/flan-t5-base"  # Changed to flan-t5 for better summarization
 # Ensure directories exist
 Path(secure_dir).mkdir(exist_ok=True)
-# Sidebar
 with st.sidebar:
     st.header("System Status")
@@ -36,7 +41,7 @@ with st.sidebar:
         from deid_pipeline import DeidPipeline
         st.success("✓ De-identification module")
         HAS_DEID = True
-    except ImportError as e:
         st.warning("⚠ De-ID fallback mode")
         HAS_DEID = False
@@ -44,17 +49,19 @@ with st.sidebar:
         import transformers
         st.success("✓ Transformers loaded")
     except ImportError:
-        st.error("✗ Transformers missing")
         st.stop()
-    st.info("Mode: Direct Summarization")
-    st.caption(f"Model: {model_name}")
-# Fallback De-ID
 def fallback_deid(text: str) -> str:
     patterns = [
         (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),
-        (r'\b[A-Z][a-z]{2,}\b(?! (mg|mmHg|bpm|CT|MRI|TIA|BP|HR|RR|NIH|EF|BID|QID|PCP|PMH|HPI))', '[NAME]'),
         (r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', '[DATE]'),
         (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'),
         (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
@@ -66,9 +73,10 @@ def fallback_deid(text: str) -> str:
         result = re.sub(pat, rep, result, flags=re.IGNORECASE)
     return result
-# Load model with proper caching
 @st.cache_resource
 def load_model(model_name):
     from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
     tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="/tmp/hf_cache")
@@ -91,66 +99,151 @@ def load_model(model_name):
 tokenizer, model, device = load_model(model_name)
-def extract_sections_from_note(text: str) -> dict:
-    """Extract clinical sections using keywords"""
-    sections = {
-        "Chief Complaint": "",
-        "HPI": "",
-        "Assessment": "",
-        "Vitals": "",
-        "Medications": "",
-        "Plan": "",
-        "Discharge Summary": ""
     }
     lines = text.split('\n')
     current_section = None
     for line in lines:
-        line_lower = line.lower().strip()
-        # Detect section headers
-        if any(kw in line_lower for kw in ['chief complaint', 'cc:']):
-            current_section = "Chief Complaint"
-        elif any(kw in line_lower for kw in ['history of present illness', 'hpi:', 'history:']):
-            current_section = "HPI"
-        elif any(kw in line_lower for kw in ['assessment', 'impression', 'diagnosis']):
-            current_section = "Assessment"
-        elif any(kw in line_lower for kw in ['vital signs', 'vitals:', 'bp:', 'temp:']):
-            current_section = "Vitals"
-        elif any(kw in line_lower for kw in ['medications', 'meds:', 'current medications']):
-            current_section = "Medications"
-        elif any(kw in line_lower for kw in ['plan:', 'treatment plan', 'recommendations']):
-            current_section = "Plan"
-        elif any(kw in line_lower for kw in ['discharge', 'discharge summary', 'disposition']):
-            current_section = "Discharge Summary"
-        # Append content to current section
-        if current_section and line.strip():
-            sections[current_section] += line + " "
     return sections
-def summarize_clinical_note(text: str, tokenizer, model, device) -> str:
-    """Generate structured clinical summary using T5"""
-    # First extract any existing structure
-    sections = extract_sections_from_note(text)
-    # Truncate if too long
     max_input_length = 1024
-    if len(text) > max_input_length * 4:  # Rough character estimate
         text = text[:max_input_length * 4]
-    # Create focused prompt for T5
-    prompt = f"""Summarize this clinical note into these sections:
-Chief Complaint: [patient's main concern]
-HPI: [history of present illness]
-Assessment: [clinical findings and diagnosis]
-Vitals: [vital signs if present]
-Medications: [current medications]
-Plan: [treatment plan]
-Discharge Summary: [discharge plan if applicable]
 Clinical Note:
 {text}
@@ -167,80 +260,159 @@ Structured Summary:"""
     inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         outputs = model.generate(
             inputs['input_ids'],
-            max_new_tokens=512,
-            min_length=100,
             num_beams=4,
-            temperature=0.7,
             do_sample=False,
             early_stopping=True,
-            no_repeat_ngram_size=3,  # Prevent repetition
-            repetition_penalty=2.0,   # Strong penalty for repetition
             pad_token_id=tokenizer.pad_token_id,
             eos_token_id=tokenizer.eos_token_id
         )
-    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Format output with sections
-    formatted_summary = ""
-    for section_name in sections.keys():
-        formatted_summary += f"**{section_name}:**\n"
-        # Check if AI generated content for this section
-        if section_name.lower() in summary.lower():
-            # Extract relevant part from summary
-            relevant_content = "Generated summary content"
-        elif sections[section_name].strip():
-            # Use extracted content
-            formatted_summary += f"{sections[section_name].strip()[:200]}\n\n"
-        else:
-            formatted_summary += "Not documented\n\n"
-    # If summary is too short, add the full AI output
-    if len(formatted_summary) < 200:
-        formatted_summary = summary
-    return formatted_summary
 def validate_summary(summary: str, original_text: str) -> dict:
-    """Assess summary quality"""
     score = 100
     warnings = []
-    required_sections = ["Chief Complaint", "HPI", "Assessment", "Vitals", "Medications", "Plan"]
-    present = sum(1 for sec in required_sections if sec.lower() in summary.lower())
-    missing_count = len(required_sections) - present
-    if missing_count > 0:
-        score -= missing_count * 10
-        warnings.append(f"{missing_count} sections missing or incomplete")
-    # Check for medical content
-    if re.search(r'\d+\s*(mg|mmHg|bpm|%)', summary, re.I):
         score += 10
-    # Check for repetition (like "windshield windshield")
     words = summary.lower().split()
-    if len(words) > 10:
         unique_ratio = len(set(words)) / len(words)
-        if unique_ratio < 0.3:
-            score -= 40
-            warnings.append("High repetition detected - review summary quality")
-    # Check length
-    if len(summary) < 100:
-        score -= 20
-        warnings.append("Summary too short")
     score = max(0, min(100, score))
-    status = "EXCELLENT" if score >= 85 else "GOOD" if score >= 70 else "FAIR" if score >= 50 else "POOR"
-    return {"quality_score": score, "status": status, "warnings": warnings}
-# Session state
 if 'deid_text' not in st.session_state:
     st.session_state.deid_text = ""
 if 'original_text' not in st.session_state:
@@ -250,15 +422,19 @@ if 'summary' not in st.session_state:
 if 'validation' not in st.session_state:
     st.session_state.validation = None
-# UI Tabs
 tab1, tab2 = st.tabs(["📝 De-Identify Note", "✨ Generate Summary"])
 with tab1:
     st.header("Step 1: De-identify Clinical Note")
     uploaded = st.file_uploader("Upload clinical note (.txt)", type=["txt"])
-    input_text = st.text_area("Or paste clinical note here:", height=300,
-                              placeholder="Paste clinical documentation here...")
     note_text = ""
     if uploaded:
@@ -281,33 +457,34 @@ with tab1:
                             with open(f"{secure_dir}/session_note.spanmap.enc", "wb") as f:
                                 f.write(result["encrypted_span_map"])
-                        st.success("✓ De-identified with encrypted audit trail")
                     except Exception as e:
-                        st.warning(f"Using regex-based de-identification")
                         deid_text = fallback_deid(note_text)
                 else:
                     deid_text = fallback_deid(note_text)
-                    st.info("Using regex-based de-identification")
                 st.session_state.deid_text = deid_text
-                st.success(f"✓ Processed {len(deid_text)} characters (PHI redacted)")
         else:
             st.warning("⚠ Please enter or upload a clinical note")
     if st.session_state.deid_text:
-        with st.expander("📄 Preview De-identified Text"):
             st.text_area("", st.session_state.deid_text, height=250, disabled=True, key="preview_deid")
 with tab2:
     st.header("Step 2: Generate Clinical Summary")
     if not st.session_state.deid_text:
-        st.warning("⚠ Please de-identify a note first (Tab 1)")
     else:
-        st.info(f"✓ Ready to summarize: {len(st.session_state.deid_text)} characters")
         if st.button("🚀 Generate Summary", type="primary"):
-            with st.spinner("Generating structured summary (30-60 seconds)..."):
                 try:
                     summary = summarize_clinical_note(
                         st.session_state.deid_text,
@@ -317,51 +494,79 @@ with tab2:
                     )
                     st.session_state.summary = summary
-                    st.session_state.validation = validate_summary(summary, st.session_state.deid_text)
-                    st.success("✓ Summary generated successfully!")
                 except Exception as e:
-                    st.error(f"Summarization failed: {str(e)}")
                     st.session_state.summary = None
         if st.session_state.summary:
-            col1, col2 = st.columns([3, 1])
             with col1:
                 st.subheader("📋 Structured Clinical Summary")
                 st.markdown(st.session_state.summary)
             with col2:
-                st.subheader("📊 Quality")
                 val = st.session_state.validation
-                color_map = {"EXCELLENT": "🟢", "GOOD": "🔵", "FAIR": "🟡", "POOR": "🔴"}
                 status_color = color_map.get(val.get("status", ""), "⚪")
                 st.markdown(f"### {status_color} {val.get('status', 'N/A')}")
                 st.metric("Quality Score", f"{val.get('quality_score', 0)}/100")
                 if val.get("warnings"):
-                    st.warning("**Issues:**")
-                    for w in val["warnings"]:
-                        st.write(f"• {w}")
-            # Download buttons
-            col_a, col_b = st.columns(2)
             with col_a:
                 st.download_button(
                     "💾 Download Summary",
                     st.session_state.summary,
                     "clinical_summary.txt",
                     type="secondary"
                 )
             with col_b:
-                if st.button("🔄 Reset & Start Over"):
                     st.session_state.deid_text = ""
                     st.session_state.original_text = ""
                     st.session_state.summary = None
                     st.session_state.validation = None
                     st.rerun()
 st.markdown("---")
-st.caption("🏥 HIPAA-Compliant Clinical Summarizer | Portfolio Demo | Powered by Flan-T5")

 import subprocess
 import torch
+# Fix torch.classes path error for Streamlit compatibility
 torch.classes.__path__ = []
+# HF Spaces environment variables
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["HF_HUB_CACHE"] = "/tmp/hf_cache"
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 warnings.filterwarnings("ignore", category=UserWarning)
+st.set_page_config(
+    page_title="Clinical AI Summarizer",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
 st.title("🏥 HIPAA-Compliant RAG Clinical Summarizer")
+st.markdown("**De-identification → Clinical Summarization → Quality Assessment**")
 # Global configuration
 secure_dir = "./secure_store"
+model_name = "google/flan-t5-base"
 # Ensure directories exist
 Path(secure_dir).mkdir(exist_ok=True)
+# ==================== SIDEBAR ====================
 with st.sidebar:
     st.header("System Status")
         from deid_pipeline import DeidPipeline
         st.success("✓ De-identification module")
         HAS_DEID = True
+    except ImportError:
         st.warning("⚠ De-ID fallback mode")
         HAS_DEID = False
         import transformers
         st.success("✓ Transformers loaded")
     except ImportError:
+        st.error("✗ Transformers missing - rebuild Space")
         st.stop()
+    st.info("**Mode:** Direct Summarization")
+    st.caption(f"**Model:** {model_name}")
+    st.caption(f"**Secure Dir:** {secure_dir}")
+# ==================== FALLBACK DE-ID ====================
 def fallback_deid(text: str) -> str:
+    """Regex-based PHI removal fallback"""
     patterns = [
         (r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', '[NAME]'),
+        (r'\b[A-Z][a-z]{2,}\b(?! (mg|mmHg|bpm|CT|MRI|TIA|BP|HR|RR|NIH|EF|BID|QID|PCP|PMH|HPI|ROS))', '[NAME]'),
         (r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b', '[DATE]'),
         (r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b', '[PHONE]'),
         (r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]'),
         result = re.sub(pat, rep, result, flags=re.IGNORECASE)
     return result
+# ==================== MODEL LOADING ====================
 @st.cache_resource
 def load_model(model_name):
+    """Load T5 model with proper caching"""
     from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
     tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="/tmp/hf_cache")
 tokenizer, model, device = load_model(model_name)
+# ==================== SECTION EXTRACTION FUNCTIONS ====================
+def extract_vitals(text: str) -> str:
+    """Extract vital signs using pattern matching"""
+    vitals_found = []
+    patterns = {
+        'BP': r'(?:BP|Blood Pressure)[:\s]+(\d{2,3}/\d{2,3})',
+        'HR': r'(?:HR|Heart Rate|Pulse)[:\s]+(\d{2,3})(?:\s*bpm)?',
+        'Temp': r'(?:Temp|Temperature)[:\s]+(\d{2,3}\.?\d*)(?:\s*[FC°])?',
+        'RR': r'(?:RR|Respiratory Rate|Resp)[:\s]+(\d{1,2})',
+        'O2': r'(?:O2|Oxygen|SpO2)[:\s]+(\d{2,3})%?',
+        'Weight': r'(?:Weight|Wt)[:\s]+(\d{2,3}\.?\d*)\s*(?:kg|lbs)?',
     }
+    for vital_name, pattern in patterns.items():
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        if matches:
+            vitals_found.append(f"{vital_name}: {matches[0]}")
+    return ', '.join(vitals_found) if vitals_found else ""
+def extract_all_sections(text: str) -> dict:
+    """Enhanced section extraction from clinical notes"""
+    sections = {}
     lines = text.split('\n')
     current_section = None
+    buffer = []
+    section_keywords = {
+        "Chief Complaint": ['chief complaint', 'cc:', 'presenting complaint', 'reason for visit', 'presenting concern'],
+        "HPI": ['history of present illness', 'hpi:', 'present illness', 'history:', 'clinical history'],
+        "Assessment": ['assessment:', 'impression:', 'diagnosis:', 'clinical impression', 'diagnoses:'],
+        "Vitals": ['vital signs', 'vitals:', 'bp:', 'blood pressure', 'temperature', 'pulse', 'hr:', 'physical exam'],
+        "Medications": ['medications:', 'meds:', 'current medications', 'home medications', 'prescriptions', 'drug list'],
+        "Plan": ['plan:', 'treatment plan', 'recommendations:', 'disposition:', 'instructions', 'management plan'],
+        "Discharge Summary": ['discharge', 'discharge summary', 'discharge plan', 'follow-up', 'disposition', 'discharge instructions']
+    }
     for line in lines:
+        line_clean = line.strip()
+        line_lower = line_clean.lower()
+        if not line_clean:
+            continue
+        # Check if this line is a section header
+        matched_section = None
+        for section_name, keywords in section_keywords.items():
+            if any(kw in line_lower for kw in keywords):
+                # Save previous section
+                if current_section and buffer:
+                    sections[current_section] = ' '.join(buffer).strip()
+                matched_section = section_name
+                current_section = section_name
+                buffer = []
+                # Capture content on the same line after the header
+                for kw in keywords:
+                    if kw in line_lower:
+                        idx = line_lower.index(kw)
+                        remainder = line_clean[idx + len(kw):].strip()
+                        # Remove leading colon/dash
+                        remainder = re.sub(r'^[:\-\s]+', '', remainder).strip()
+                        if remainder and len(remainder) > 2:
+                            buffer.append(remainder)
+                break
+        # If not a header and we have an active section, add to buffer
+        if not matched_section and current_section and line_clean:
+            # Avoid adding another section header accidentally
+            is_likely_header = any(kw in line_lower for keywords_list in section_keywords.values() for kw in keywords_list)
+            if not is_likely_header:
+                buffer.append(line_clean)
+    # Save final section
+    if current_section and buffer:
+        sections[current_section] = ' '.join(buffer).strip()
+    # Special extraction for vitals using regex
+    if "Vitals" not in sections or not sections["Vitals"]:
+        vitals = extract_vitals(text)
+        if vitals:
+            sections["Vitals"] = vitals
     return sections
+def parse_ai_summary(ai_text: str) -> dict:
+    """Parse structured output from AI if it generated section-based content"""
+    sections = {}
+    current_section = None
+    buffer = []
+    lines = ai_text.split('\n')
+    for line in lines:
+        line_clean = line.strip()
+        # Check if line starts with a section name
+        section_starters = ['Chief Complaint:', 'HPI:', 'Assessment:', 'Vitals:',
+                           'Medications:', 'Plan:', 'Discharge Summary:']
+        matched = None
+        for starter in section_starters:
+            if line_clean.startswith(starter):
+                matched = starter
+                break
+        if matched:
+            # Save previous section
+            if current_section and buffer:
+                sections[current_section] = ' '.join(buffer).strip()
+            # Start new section
+            current_section = matched.replace(':', '').strip()
+            content = line_clean[len(matched):].strip()
+            buffer = [content] if content else []
+        elif current_section and line_clean:
+            buffer.append(line_clean)
+    # Save final section
+    if current_section and buffer:
+        sections[current_section] = ' '.join(buffer).strip()
+    return sections
+# ==================== MAIN SUMMARIZATION FUNCTION ====================
+def summarize_clinical_note(text: str, tokenizer, model, device) -> str:
+    """Generate structured clinical summary using T5 with proper section extraction"""
+    # Truncate if too long (T5 has token limits)
     max_input_length = 1024
+    if len(text) > max_input_length * 4:
         text = text[:max_input_length * 4]
+    # Create detailed prompt for T5
+    prompt = f"""Summarize this clinical documentation into a structured format with these exact sections:
+Chief Complaint: State the patient's main presenting concern or reason for visit
+HPI: Summarize the history of present illness including onset, duration, and progression
+Assessment: List clinical findings, diagnoses, and impressions
+Vitals: Extract all vital signs including BP, HR, Temperature, RR, O2 saturation
+Medications: List all current medications with dosages and frequencies
+Plan: Describe the treatment plan, recommendations, and next steps
+Discharge Summary: Provide discharge status, instructions, and follow-up plans
 Clinical Note:
 {text}
     inputs = {k: v.to(device) for k, v in inputs.items()}
+    # Generate with optimal parameters to prevent repetition
     with torch.no_grad():
         outputs = model.generate(
             inputs['input_ids'],
+            max_new_tokens=650,
+            min_length=200,
             num_beams=4,
+            temperature=0.8,
             do_sample=False,
             early_stopping=True,
+            no_repeat_ngram_size=3,
+            repetition_penalty=2.5,
+            length_penalty=1.0,
             pad_token_id=tokenizer.pad_token_id,
             eos_token_id=tokenizer.eos_token_id
         )
+    ai_summary = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+    # Extract sections from original text using keyword matching
+    sections_content = extract_all_sections(text)
+    # Parse AI output for any additional structured content
+    ai_sections = parse_ai_summary(ai_summary)
+    # Merge: prioritize extracted content, fallback to AI, then "Not documented"
+    final_sections = {}
+    section_names = ["Chief Complaint", "HPI", "Assessment", "Vitals", "Medications", "Plan", "Discharge Summary"]
+    for section in section_names:
+        # Try extracted content first
+        content = sections_content.get(section, "").strip()
+        # If no content or too short, try AI summary
+        if not content or len(content) < 15:
+            content = ai_sections.get(section, "").strip()
+        # If still no content and AI generated something generic, use it
+        if not content or len(content) < 10:
+            # Check if AI summary contains relevant info in unstructured format
+            if section.lower() in ai_summary.lower():
+                # Extract sentences mentioning this section
+                sentences = ai_summary.split('.')
+                relevant = [s.strip() for s in sentences if section.lower() in s.lower()]
+                if relevant:
+                    content = '. '.join(relevant) + '.'
+        # Final fallback
+        if not content or len(content) < 10:
+            content = "Not documented"
+        # Clean up content
+        content = content.replace('  ', ' ').strip()
+        final_sections[section] = content
+    # Format output with proper markdown
+    formatted_output = ""
+    for section in section_names:
+        formatted_output += f"**{section}:**\n{final_sections[section]}\n\n"
+    return formatted_output
+# ==================== QUALITY VALIDATION ====================
 def validate_summary(summary: str, original_text: str) -> dict:
+    """Assess summary quality with detailed metrics"""
     score = 100
     warnings = []
+    required_sections = ["Chief Complaint", "HPI", "Assessment", "Vitals", "Medications", "Plan", "Discharge Summary"]
+    # Count present sections
+    present_count = 0
+    for sec in required_sections:
+        section_content = ""
+        if sec + ":" in summary:
+            # Extract content for this section
+            lines = summary.split('\n')
+            in_section = False
+            for line in lines:
+                if line.startswith(f"**{sec}:**"):
+                    in_section = True
+                    continue
+                if in_section:
+                    if line.startswith("**"):
+                        break
+                    section_content += line
+            if "not documented" not in section_content.lower() and len(section_content.strip()) > 10:
+                present_count += 1
+    missing_count = len(required_sections) - present_count
+    if missing_count > 0:
+        score -= missing_count * 12
+        warnings.append(f"{missing_count} of 7 sections incomplete")
+    # Check for medical content indicators
+    medical_patterns = [
+        r'\d+\s*mg',
+        r'\d+/\d+\s*mmHg',
+        r'\d+\s*bpm',
+        r'\d+\.?\d*\s*[FC°]',
+        r'\d+%',
+    ]
+    medical_content_found = any(re.search(pattern, summary, re.I) for pattern in medical_patterns)
+    if medical_content_found:
         score += 10
+    else:
+        warnings.append("Limited quantitative clinical data")
+    # Check for repetition issues
     words = summary.lower().split()
+    if len(words) > 20:
         unique_ratio = len(set(words)) / len(words)
+        if unique_ratio < 0.35:
+            score -= 30
+            warnings.append("High repetition detected - summary quality poor")
+    # Check overall length
+    if len(summary) < 150:
+        score -= 15
+        warnings.append("Summary too brief")
+    elif len(summary) > 2000:
+        score -= 5
+        warnings.append("Summary may be overly verbose")
+    # Check for key clinical terms
+    clinical_terms = ['patient', 'diagnosis', 'treatment', 'plan', 'medication', 'assessment']
+    terms_found = sum(1 for term in clinical_terms if term in summary.lower())
+    if terms_found < 3:
+        score -= 10
+        warnings.append("Limited clinical terminology")
     score = max(0, min(100, score))
+    if score >= 90:
+        status = "EXCELLENT"
+    elif score >= 75:
+        status = "GOOD"
+    elif score >= 60:
+        status = "FAIR"
+    else:
+        status = "POOR"
+    return {
+        "quality_score": score,
+        "status": status,
+        "warnings": warnings,
+        "sections_present": present_count,
+        "sections_total": len(required_sections)
+    }
+# ==================== SESSION STATE ====================
 if 'deid_text' not in st.session_state:
     st.session_state.deid_text = ""
 if 'original_text' not in st.session_state:
 if 'validation' not in st.session_state:
     st.session_state.validation = None
+# ==================== UI TABS ====================
 tab1, tab2 = st.tabs(["📝 De-Identify Note", "✨ Generate Summary"])
 with tab1:
     st.header("Step 1: De-identify Clinical Note")
+    st.markdown("Upload or paste a clinical note to remove PHI (Protected Health Information)")
     uploaded = st.file_uploader("Upload clinical note (.txt)", type=["txt"])
+    input_text = st.text_area(
+        "Or paste clinical note here:",
+        height=300,
+        placeholder="Paste clinical documentation here...\n\nExample:\nChief Complaint: Chest pain\nHPI: 72-year-old male presents with...\nVitals: BP 140/90, HR 88..."
+    )
     note_text = ""
     if uploaded:
                             with open(f"{secure_dir}/session_note.spanmap.enc", "wb") as f:
                                 f.write(result["encrypted_span_map"])
+                        st.success("✅ De-identified with encrypted audit trail saved")
                     except Exception as e:
+                        st.warning(f"⚠ Using regex-based de-identification: {str(e)[:100]}")
                         deid_text = fallback_deid(note_text)
                 else:
                     deid_text = fallback_deid(note_text)
+                    st.info("ℹ Using regex-based de-identification")
                 st.session_state.deid_text = deid_text
+                st.success(f"✅ Processed **{len(deid_text)}** characters (PHI redacted)")
         else:
             st.warning("⚠ Please enter or upload a clinical note")
     if st.session_state.deid_text:
+        with st.expander("📄 Preview De-identified Text", expanded=False):
             st.text_area("", st.session_state.deid_text, height=250, disabled=True, key="preview_deid")
 with tab2:
     st.header("Step 2: Generate Clinical Summary")
+    st.markdown("AI-powered structured summarization with quality assessment")
     if not st.session_state.deid_text:
+        st.warning("⚠ Please de-identify a note first in **Tab 1**")
     else:
+        st.info(f"✅ Ready to summarize: **{len(st.session_state.deid_text)}** characters")
         if st.button("🚀 Generate Summary", type="primary"):
+            with st.spinner("⏳ Generating structured summary (30-60 seconds)..."):
                 try:
                     summary = summarize_clinical_note(
                         st.session_state.deid_text,
                     )
                     st.session_state.summary = summary
+                    st.session_state.validation = validate_summary(
+                        summary,
+                        st.session_state.deid_text
+                    )
+                    st.success("✅ Summary generated successfully!")
                 except Exception as e:
+                    st.error(f"❌ Summarization failed: {str(e)}")
+                    st.exception(e)
                     st.session_state.summary = None
         if st.session_state.summary:
+            st.markdown("---")
+            col1, col2 = st.columns([2.5, 1])
             with col1:
                 st.subheader("📋 Structured Clinical Summary")
                 st.markdown(st.session_state.summary)
             with col2:
+                st.subheader("📊 Quality Assessment")
                 val = st.session_state.validation
+                color_map = {
+                    "EXCELLENT": "🟢",
+                    "GOOD": "🔵",
+                    "FAIR": "🟡",
+                    "POOR": "🔴"
+                }
                 status_color = color_map.get(val.get("status", ""), "⚪")
                 st.markdown(f"### {status_color} {val.get('status', 'N/A')}")
                 st.metric("Quality Score", f"{val.get('quality_score', 0)}/100")
+                st.metric(
+                    "Sections Complete",
+                    f"{val.get('sections_present', 0)}/{val.get('sections_total', 7)}"
+                )
                 if val.get("warnings"):
+                    with st.expander("⚠ Quality Warnings", expanded=True):
+                        for w in val["warnings"]:
+                            st.warning(f"• {w}")
+            st.markdown("---")
+            # Download and reset buttons
+            col_a, col_b, col_c = st.columns([2, 2, 1])
             with col_a:
                 st.download_button(
                     "💾 Download Summary",
                     st.session_state.summary,
                     "clinical_summary.txt",
+                    mime="text/plain",
                     type="secondary"
                 )
             with col_b:
+                st.download_button(
+                    "💾 Download De-identified Note",
+                    st.session_state.deid_text,
+                    "deidentified_note.txt",
+                    mime="text/plain",
+                    type="secondary"
+                )
+            with col_c:
+                if st.button("🔄 Reset"):
                     st.session_state.deid_text = ""
                     st.session_state.original_text = ""
                     st.session_state.summary = None
                     st.session_state.validation = None
                     st.rerun()
+# ==================== FOOTER ====================
 st.markdown("---")
+st.caption("🏥 **HIPAA-Compliant Clinical Summarizer** | Portfolio Demo | Powered by Flan-T5 & Presidio")
+st.caption("⚠ For demonstration purposes only - not for clinical use")