Spaces:

SreekarB
/

SLPAnalysis

Sleeping

App Files Files Community

SreekarB commited on Nov 7

Commit

ec1ed75

verified ·

1 Parent(s): e935875

Update annotated_casl_app.py

Browse files

Files changed (1) hide show

annotated_casl_app.py +66 -35

annotated_casl_app.py CHANGED Viewed

@@ -27,12 +27,13 @@ def segment_response_by_sections(response_text):
         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
-        "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
-        "8. COGNITIVE-LINGUISTIC FACTORS",
-        "9. FLUENCY AND RHYTHM ANALYSIS",
-        "10. QUANTITATIVE METRICS",
-        "11. CLINICAL IMPLICATIONS",
-        "12. PROGNOSIS AND SUMMARY"
     ]
     sections = {}
@@ -75,12 +76,13 @@ def combine_sections_smartly(sections_dict):
         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
-        "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
-        "8. COGNITIVE-LINGUISTIC FACTORS",
-        "9. FLUENCY AND RHYTHM ANALYSIS",
-        "10. QUANTITATIVE METRICS",
-        "11. CLINICAL IMPLICATIONS",
-        "12. PROGNOSIS AND SUMMARY"
     ]
     combined_parts = []
@@ -568,7 +570,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     ANNOTATED TRANSCRIPT:
     {annotated_transcript}{notes_section}
-    INSTRUCTIONS: Complete ALL 12 sections below. Use simple formatting (no excessive markdown, headers, or bullets). Focus on clinical utility and completeness. Count all markers precisely and provide specific examples.
     COMPREHENSIVE SPEECH SAMPLE ANALYSIS
@@ -712,7 +714,34 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Academic vs. conversational vocabulary ratio
     - Age-appropriate vocabulary development
-    7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
     A. Morphological Patterns:
     - Derivational morphology: Prefixes and suffixes
@@ -725,7 +754,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Syllable structure complexity
     - Phonological awareness indicators
-    8. COGNITIVE-LINGUISTIC FACTORS
     A. Working Memory Indicators:
     - Sentence length and complexity management
@@ -742,7 +771,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Planning and organization in discourse
     - Cognitive flexibility in topic management
-    9. FLUENCY AND RHYTHM ANALYSIS
     A. Disfluency Patterns:
     - Total disfluency count and rate per 100 words
@@ -755,7 +784,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Rhythm and prosodic patterns (if evident)
     - Overall fluency profile and age-appropriateness
-    10. QUANTITATIVE METRICS
     A. Basic Measures:
     - Total words: [exact count]
@@ -771,7 +800,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Vocabulary sophistication ratio
     - Sentence complexity distribution percentages
-    11. CLINICAL IMPLICATIONS
     A. Strengths (ranked by prominence):
     - Primary strengths with supporting evidence
@@ -789,7 +818,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Frequency and duration recommendations
     - Progress monitoring strategies
-    12. PROGNOSIS AND SUMMARY
     A. Overall Communication Profile:
     - Comprehensive summary of findings
@@ -803,13 +832,14 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
     - Family/educational recommendations
     CRITICAL REQUIREMENTS:
-    1. Complete ALL 12 sections - do not stop early
     2. Provide exact counts for all markers with specific examples
     3. Calculate all percentages and rates with formulas shown
     4. Include direct quotes from transcript for examples
     5. Analyze patterns and provide clinical interpretations
     6. Focus on actionable, clinically relevant insights
     7. If response is incomplete, end with <CONTINUE>
     """
     return call_claude_api_with_continuation(analysis_prompt)
@@ -1290,7 +1320,7 @@ def call_claude_api_with_continuation(prompt):
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
-    print("🔄 Starting comprehensive 12-section analysis...")
     print("⏱️ This may take 3-5 minutes for complex analyses...")
     # Define all required sections
@@ -1301,12 +1331,13 @@ def call_claude_api_with_continuation(prompt):
         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
-        "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
-        "8. COGNITIVE-LINGUISTIC FACTORS",
-        "9. FLUENCY AND RHYTHM ANALYSIS",
-        "10. QUANTITATIVE METRICS",
-        "11. CLINICAL IMPLICATIONS",
-        "12. PROGNOSIS AND SUMMARY"
     ]
     # Safety limits to prevent infinite loops
@@ -1321,7 +1352,7 @@ def call_claude_api_with_continuation(prompt):
         last_section_count = 0  # Track progress between calls
         # Add continuation instruction to original prompt
-        initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL 12 sections of the analysis. If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Do not skip any sections. Use the checklist to ensure all sections are completed."
         while True:  # Unlimited continuations until complete
             if continuation_count == 0:
@@ -1464,7 +1495,7 @@ def call_claude_api_with_continuation(prompt):
     # Log final results
     print(f"\n=== FINAL SMART VALIDATION ===")
     print(f"Total sections found: {len(all_sections)}")
-    print(f"All sections present: {len(all_sections) == 12}")
     print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
     print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
     print(f"Total API calls: {continuation_count + 1}")
@@ -1475,7 +1506,7 @@ def call_claude_api_with_continuation(prompt):
         final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
     # Add warning if incomplete due to safety limits
-    if len(all_sections) < 12:
         missing_sections = [s for s in required_sections if s not in all_sections]
         final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
         final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
@@ -1536,7 +1567,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
     {metrics_text}
     ANALYSIS INSTRUCTIONS:
-    Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL 12 sections below:
     COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
@@ -1689,7 +1720,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
     - Calculate precise percentages and show your work
     - Give specific examples from the transcript
     - If annotation is incomplete, supplement with analysis of the original transcript
-    - Complete ALL 12 sections - use <CONTINUE> if needed
     """
     return call_claude_api_with_continuation(analysis_prompt)
@@ -2166,7 +2197,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
         # Create comprehensive analysis prompt
         final_prompt = f"""
-        You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL 12 sections with detailed structure.
         Patient: {age}-year-old {gender}
@@ -2177,7 +2208,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
         INSTRUCTIONS:
         1. Use ONLY the verified statistical values above - do not recount anything
-        2. Complete ALL 12 sections without stopping
         3. Provide specific examples and quotes from the transcript
         4. Calculate rates and percentages using verified counts
         5. Focus on clinical interpretation and actionable insights
@@ -2306,7 +2337,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
         - Treatment planning and expected outcomes
         - Follow-up recommendations
-        CRITICAL: Complete ALL 12 sections using verified data and specific transcript examples.
         """
         # Get comprehensive analysis

         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
+        "7. NLP-DERIVED LINGUISTIC FEATURES",
+        "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
+        "9. COGNITIVE-LINGUISTIC FACTORS",
+        "10. FLUENCY AND RHYTHM ANALYSIS",
+        "11. QUANTITATIVE METRICS",
+        "12. CLINICAL IMPLICATIONS",
+        "13. PROGNOSIS AND SUMMARY"
     ]
     sections = {}
         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
+        "7. NLP-DERIVED LINGUISTIC FEATURES",
+        "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
+        "9. COGNITIVE-LINGUISTIC FACTORS",
+        "10. FLUENCY AND RHYTHM ANALYSIS",
+        "11. QUANTITATIVE METRICS",
+        "12. CLINICAL IMPLICATIONS",
+        "13. PROGNOSIS AND SUMMARY"
     ]
     combined_parts = []
     ANNOTATED TRANSCRIPT:
     {annotated_transcript}{notes_section}
+    INSTRUCTIONS: Complete ALL 13 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on clinical utility and completeness. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon.
     COMPREHENSIVE SPEECH SAMPLE ANALYSIS
     - Academic vs. conversational vocabulary ratio
     - Age-appropriate vocabulary development
+    7. NLP-DERIVED LINGUISTIC FEATURES
+    A. Lexical Diversity Measures (provide exact calculations):
+    - Type-Token Ratio (TTR): Unique words divided by total words
+      * Calculate: [unique words] / [total words] = [ratio]
+      * Interpretation: Higher ratios indicate greater lexical diversity
+    - Moving Average Type-Token Ratio (MATTR): Average TTR across text segments
+      * Calculate and interpret stability of lexical diversity
+    - Measure of Textual Lexical Diversity (MTLD): Length of text segments maintaining TTR threshold
+      * Higher values indicate sustained lexical diversity
+      * Provide exact MTLD score and interpretation
+    - Hypergeometric Distribution D (HDD): Probability-based diversity measure
+      * Controls for text length effects
+      * Provide HDD score and clinical significance
+    B. Word Frequency Analysis:
+    - Most frequent words used (top 10 with counts)
+    - High-frequency vs. low-frequency word distribution
+    - Function words vs. content words ratio
+    - Repetitive word patterns and their clinical significance
+    C. Linguistic Complexity Indicators:
+    - Average word length in syllables
+    - Syllable complexity patterns
+    - Morphological complexity index
+    - Syntactic complexity derived from automated parsing
+    8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
     A. Morphological Patterns:
     - Derivational morphology: Prefixes and suffixes
     - Syllable structure complexity
     - Phonological awareness indicators
+    9. COGNITIVE-LINGUISTIC FACTORS
     A. Working Memory Indicators:
     - Sentence length and complexity management
     - Planning and organization in discourse
     - Cognitive flexibility in topic management
+    10. FLUENCY AND RHYTHM ANALYSIS
     A. Disfluency Patterns:
     - Total disfluency count and rate per 100 words
     - Rhythm and prosodic patterns (if evident)
     - Overall fluency profile and age-appropriateness
+    11. QUANTITATIVE METRICS
     A. Basic Measures:
     - Total words: [exact count]
     - Vocabulary sophistication ratio
     - Sentence complexity distribution percentages
+    12. CLINICAL IMPLICATIONS
     A. Strengths (ranked by prominence):
     - Primary strengths with supporting evidence
     - Frequency and duration recommendations
     - Progress monitoring strategies
+    13. PROGNOSIS AND SUMMARY
     A. Overall Communication Profile:
     - Comprehensive summary of findings
     - Family/educational recommendations
     CRITICAL REQUIREMENTS:
+    1. Complete ALL 13 sections - do not stop early
     2. Provide exact counts for all markers with specific examples
     3. Calculate all percentages and rates with formulas shown
     4. Include direct quotes from transcript for examples
     5. Analyze patterns and provide clinical interpretations
     6. Focus on actionable, clinically relevant insights
     7. If response is incomplete, end with <CONTINUE>
+    8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
     """
     return call_claude_api_with_continuation(analysis_prompt)
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
+    print("🔄 Starting comprehensive 13-section analysis...")
     print("⏱️ This may take 3-5 minutes for complex analyses...")
     # Define all required sections
         "4. FIGURATIVE LANGUAGE ANALYSIS",
         "5. PRAGMATIC LANGUAGE ASSESSMENT",
         "6. VOCABULARY AND SEMANTIC ANALYSIS",
+        "7. NLP-DERIVED LINGUISTIC FEATURES",
+        "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
+        "9. COGNITIVE-LINGUISTIC FACTORS",
+        "10. FLUENCY AND RHYTHM ANALYSIS",
+        "11. QUANTITATIVE METRICS",
+        "12. CLINICAL IMPLICATIONS",
+        "13. PROGNOSIS AND SUMMARY"
     ]
     # Safety limits to prevent infinite loops
         last_section_count = 0  # Track progress between calls
         # Add continuation instruction to original prompt
+        initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL 13 sections of the analysis. If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Do not skip any sections. Use the checklist to ensure all sections are completed."
         while True:  # Unlimited continuations until complete
             if continuation_count == 0:
     # Log final results
     print(f"\n=== FINAL SMART VALIDATION ===")
     print(f"Total sections found: {len(all_sections)}")
+    print(f"All sections present: {len(all_sections) == 13}")
     print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
     print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
     print(f"Total API calls: {continuation_count + 1}")
         final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
     # Add warning if incomplete due to safety limits
+    if len(all_sections) < 13:
         missing_sections = [s for s in required_sections if s not in all_sections]
         final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
         final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
     {metrics_text}
     ANALYSIS INSTRUCTIONS:
+    Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL 13 sections below:
     COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
     - Calculate precise percentages and show your work
     - Give specific examples from the transcript
     - If annotation is incomplete, supplement with analysis of the original transcript
+    - Complete ALL 13 sections - use <CONTINUE> if needed
     """
     return call_claude_api_with_continuation(analysis_prompt)
         # Create comprehensive analysis prompt
         final_prompt = f"""
+        You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL 13 sections with detailed structure.
         Patient: {age}-year-old {gender}
         INSTRUCTIONS:
         1. Use ONLY the verified statistical values above - do not recount anything
+        2. Complete ALL 13 sections without stopping
         3. Provide specific examples and quotes from the transcript
         4. Calculate rates and percentages using verified counts
         5. Focus on clinical interpretation and actionable insights
         - Treatment planning and expected outcomes
         - Follow-up recommendations
+        CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
         """
         # Get comprehensive analysis