SreekarB commited on
Commit
ec1ed75
·
verified ·
1 Parent(s): e935875

Update annotated_casl_app.py

Browse files
Files changed (1) hide show
  1. annotated_casl_app.py +66 -35
annotated_casl_app.py CHANGED
@@ -27,12 +27,13 @@ def segment_response_by_sections(response_text):
27
  "4. FIGURATIVE LANGUAGE ANALYSIS",
28
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
29
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
30
- "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
31
- "8. COGNITIVE-LINGUISTIC FACTORS",
32
- "9. FLUENCY AND RHYTHM ANALYSIS",
33
- "10. QUANTITATIVE METRICS",
34
- "11. CLINICAL IMPLICATIONS",
35
- "12. PROGNOSIS AND SUMMARY"
 
36
  ]
37
 
38
  sections = {}
@@ -75,12 +76,13 @@ def combine_sections_smartly(sections_dict):
75
  "4. FIGURATIVE LANGUAGE ANALYSIS",
76
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
77
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
78
- "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
79
- "8. COGNITIVE-LINGUISTIC FACTORS",
80
- "9. FLUENCY AND RHYTHM ANALYSIS",
81
- "10. QUANTITATIVE METRICS",
82
- "11. CLINICAL IMPLICATIONS",
83
- "12. PROGNOSIS AND SUMMARY"
 
84
  ]
85
 
86
  combined_parts = []
@@ -568,7 +570,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
568
  ANNOTATED TRANSCRIPT:
569
  {annotated_transcript}{notes_section}
570
 
571
- INSTRUCTIONS: Complete ALL 12 sections below. Use simple formatting (no excessive markdown, headers, or bullets). Focus on clinical utility and completeness. Count all markers precisely and provide specific examples.
572
 
573
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS
574
 
@@ -712,7 +714,34 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
712
  - Academic vs. conversational vocabulary ratio
713
  - Age-appropriate vocabulary development
714
 
715
- 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716
 
717
  A. Morphological Patterns:
718
  - Derivational morphology: Prefixes and suffixes
@@ -725,7 +754,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
725
  - Syllable structure complexity
726
  - Phonological awareness indicators
727
 
728
- 8. COGNITIVE-LINGUISTIC FACTORS
729
 
730
  A. Working Memory Indicators:
731
  - Sentence length and complexity management
@@ -742,7 +771,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
742
  - Planning and organization in discourse
743
  - Cognitive flexibility in topic management
744
 
745
- 9. FLUENCY AND RHYTHM ANALYSIS
746
 
747
  A. Disfluency Patterns:
748
  - Total disfluency count and rate per 100 words
@@ -755,7 +784,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
755
  - Rhythm and prosodic patterns (if evident)
756
  - Overall fluency profile and age-appropriateness
757
 
758
- 10. QUANTITATIVE METRICS
759
 
760
  A. Basic Measures:
761
  - Total words: [exact count]
@@ -771,7 +800,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
771
  - Vocabulary sophistication ratio
772
  - Sentence complexity distribution percentages
773
 
774
- 11. CLINICAL IMPLICATIONS
775
 
776
  A. Strengths (ranked by prominence):
777
  - Primary strengths with supporting evidence
@@ -789,7 +818,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
789
  - Frequency and duration recommendations
790
  - Progress monitoring strategies
791
 
792
- 12. PROGNOSIS AND SUMMARY
793
 
794
  A. Overall Communication Profile:
795
  - Comprehensive summary of findings
@@ -803,13 +832,14 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
803
  - Family/educational recommendations
804
 
805
  CRITICAL REQUIREMENTS:
806
- 1. Complete ALL 12 sections - do not stop early
807
  2. Provide exact counts for all markers with specific examples
808
  3. Calculate all percentages and rates with formulas shown
809
  4. Include direct quotes from transcript for examples
810
  5. Analyze patterns and provide clinical interpretations
811
  6. Focus on actionable, clinically relevant insights
812
  7. If response is incomplete, end with <CONTINUE>
 
813
  """
814
 
815
  return call_claude_api_with_continuation(analysis_prompt)
@@ -1290,7 +1320,7 @@ def call_claude_api_with_continuation(prompt):
1290
  if not ANTHROPIC_API_KEY:
1291
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
1292
 
1293
- print("🔄 Starting comprehensive 12-section analysis...")
1294
  print("⏱️ This may take 3-5 minutes for complex analyses...")
1295
 
1296
  # Define all required sections
@@ -1301,12 +1331,13 @@ def call_claude_api_with_continuation(prompt):
1301
  "4. FIGURATIVE LANGUAGE ANALYSIS",
1302
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
1303
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
1304
- "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
1305
- "8. COGNITIVE-LINGUISTIC FACTORS",
1306
- "9. FLUENCY AND RHYTHM ANALYSIS",
1307
- "10. QUANTITATIVE METRICS",
1308
- "11. CLINICAL IMPLICATIONS",
1309
- "12. PROGNOSIS AND SUMMARY"
 
1310
  ]
1311
 
1312
  # Safety limits to prevent infinite loops
@@ -1321,7 +1352,7 @@ def call_claude_api_with_continuation(prompt):
1321
  last_section_count = 0 # Track progress between calls
1322
 
1323
  # Add continuation instruction to original prompt
1324
- initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL 12 sections of the analysis. If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Do not skip any sections. Use the checklist to ensure all sections are completed."
1325
 
1326
  while True: # Unlimited continuations until complete
1327
  if continuation_count == 0:
@@ -1464,7 +1495,7 @@ def call_claude_api_with_continuation(prompt):
1464
  # Log final results
1465
  print(f"\n=== FINAL SMART VALIDATION ===")
1466
  print(f"Total sections found: {len(all_sections)}")
1467
- print(f"All sections present: {len(all_sections) == 12}")
1468
  print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
1469
  print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
1470
  print(f"Total API calls: {continuation_count + 1}")
@@ -1475,7 +1506,7 @@ def call_claude_api_with_continuation(prompt):
1475
  final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
1476
 
1477
  # Add warning if incomplete due to safety limits
1478
- if len(all_sections) < 12:
1479
  missing_sections = [s for s in required_sections if s not in all_sections]
1480
  final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
1481
  final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
@@ -1536,7 +1567,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1536
  {metrics_text}
1537
 
1538
  ANALYSIS INSTRUCTIONS:
1539
- Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL 12 sections below:
1540
 
1541
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
1542
 
@@ -1689,7 +1720,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1689
  - Calculate precise percentages and show your work
1690
  - Give specific examples from the transcript
1691
  - If annotation is incomplete, supplement with analysis of the original transcript
1692
- - Complete ALL 12 sections - use <CONTINUE> if needed
1693
  """
1694
 
1695
  return call_claude_api_with_continuation(analysis_prompt)
@@ -2166,7 +2197,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2166
 
2167
  # Create comprehensive analysis prompt
2168
  final_prompt = f"""
2169
- You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL 12 sections with detailed structure.
2170
 
2171
  Patient: {age}-year-old {gender}
2172
 
@@ -2177,7 +2208,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2177
 
2178
  INSTRUCTIONS:
2179
  1. Use ONLY the verified statistical values above - do not recount anything
2180
- 2. Complete ALL 12 sections without stopping
2181
  3. Provide specific examples and quotes from the transcript
2182
  4. Calculate rates and percentages using verified counts
2183
  5. Focus on clinical interpretation and actionable insights
@@ -2306,7 +2337,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2306
  - Treatment planning and expected outcomes
2307
  - Follow-up recommendations
2308
 
2309
- CRITICAL: Complete ALL 12 sections using verified data and specific transcript examples.
2310
  """
2311
 
2312
  # Get comprehensive analysis
 
27
  "4. FIGURATIVE LANGUAGE ANALYSIS",
28
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
29
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
30
+ "7. NLP-DERIVED LINGUISTIC FEATURES",
31
+ "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
32
+ "9. COGNITIVE-LINGUISTIC FACTORS",
33
+ "10. FLUENCY AND RHYTHM ANALYSIS",
34
+ "11. QUANTITATIVE METRICS",
35
+ "12. CLINICAL IMPLICATIONS",
36
+ "13. PROGNOSIS AND SUMMARY"
37
  ]
38
 
39
  sections = {}
 
76
  "4. FIGURATIVE LANGUAGE ANALYSIS",
77
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
78
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
79
+ "7. NLP-DERIVED LINGUISTIC FEATURES",
80
+ "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
81
+ "9. COGNITIVE-LINGUISTIC FACTORS",
82
+ "10. FLUENCY AND RHYTHM ANALYSIS",
83
+ "11. QUANTITATIVE METRICS",
84
+ "12. CLINICAL IMPLICATIONS",
85
+ "13. PROGNOSIS AND SUMMARY"
86
  ]
87
 
88
  combined_parts = []
 
570
  ANNOTATED TRANSCRIPT:
571
  {annotated_transcript}{notes_section}
572
 
573
+ INSTRUCTIONS: Complete ALL 13 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on clinical utility and completeness. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon.
574
 
575
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS
576
 
 
714
  - Academic vs. conversational vocabulary ratio
715
  - Age-appropriate vocabulary development
716
 
717
+ 7. NLP-DERIVED LINGUISTIC FEATURES
718
+
719
+ A. Lexical Diversity Measures (provide exact calculations):
720
+ - Type-Token Ratio (TTR): Unique words divided by total words
721
+ * Calculate: [unique words] / [total words] = [ratio]
722
+ * Interpretation: Higher ratios indicate greater lexical diversity
723
+ - Moving Average Type-Token Ratio (MATTR): Average TTR across text segments
724
+ * Calculate and interpret stability of lexical diversity
725
+ - Measure of Textual Lexical Diversity (MTLD): Length of text segments maintaining TTR threshold
726
+ * Higher values indicate sustained lexical diversity
727
+ * Provide exact MTLD score and interpretation
728
+ - Hypergeometric Distribution D (HDD): Probability-based diversity measure
729
+ * Controls for text length effects
730
+ * Provide HDD score and clinical significance
731
+
732
+ B. Word Frequency Analysis:
733
+ - Most frequent words used (top 10 with counts)
734
+ - High-frequency vs. low-frequency word distribution
735
+ - Function words vs. content words ratio
736
+ - Repetitive word patterns and their clinical significance
737
+
738
+ C. Linguistic Complexity Indicators:
739
+ - Average word length in syllables
740
+ - Syllable complexity patterns
741
+ - Morphological complexity index
742
+ - Syntactic complexity derived from automated parsing
743
+
744
+ 8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
745
 
746
  A. Morphological Patterns:
747
  - Derivational morphology: Prefixes and suffixes
 
754
  - Syllable structure complexity
755
  - Phonological awareness indicators
756
 
757
+ 9. COGNITIVE-LINGUISTIC FACTORS
758
 
759
  A. Working Memory Indicators:
760
  - Sentence length and complexity management
 
771
  - Planning and organization in discourse
772
  - Cognitive flexibility in topic management
773
 
774
+ 10. FLUENCY AND RHYTHM ANALYSIS
775
 
776
  A. Disfluency Patterns:
777
  - Total disfluency count and rate per 100 words
 
784
  - Rhythm and prosodic patterns (if evident)
785
  - Overall fluency profile and age-appropriateness
786
 
787
+ 11. QUANTITATIVE METRICS
788
 
789
  A. Basic Measures:
790
  - Total words: [exact count]
 
800
  - Vocabulary sophistication ratio
801
  - Sentence complexity distribution percentages
802
 
803
+ 12. CLINICAL IMPLICATIONS
804
 
805
  A. Strengths (ranked by prominence):
806
  - Primary strengths with supporting evidence
 
818
  - Frequency and duration recommendations
819
  - Progress monitoring strategies
820
 
821
+ 13. PROGNOSIS AND SUMMARY
822
 
823
  A. Overall Communication Profile:
824
  - Comprehensive summary of findings
 
832
  - Family/educational recommendations
833
 
834
  CRITICAL REQUIREMENTS:
835
+ 1. Complete ALL 13 sections - do not stop early
836
  2. Provide exact counts for all markers with specific examples
837
  3. Calculate all percentages and rates with formulas shown
838
  4. Include direct quotes from transcript for examples
839
  5. Analyze patterns and provide clinical interpretations
840
  6. Focus on actionable, clinically relevant insights
841
  7. If response is incomplete, end with <CONTINUE>
842
+ 8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
843
  """
844
 
845
  return call_claude_api_with_continuation(analysis_prompt)
 
1320
  if not ANTHROPIC_API_KEY:
1321
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
1322
 
1323
+ print("🔄 Starting comprehensive 13-section analysis...")
1324
  print("⏱️ This may take 3-5 minutes for complex analyses...")
1325
 
1326
  # Define all required sections
 
1331
  "4. FIGURATIVE LANGUAGE ANALYSIS",
1332
  "5. PRAGMATIC LANGUAGE ASSESSMENT",
1333
  "6. VOCABULARY AND SEMANTIC ANALYSIS",
1334
+ "7. NLP-DERIVED LINGUISTIC FEATURES",
1335
+ "8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
1336
+ "9. COGNITIVE-LINGUISTIC FACTORS",
1337
+ "10. FLUENCY AND RHYTHM ANALYSIS",
1338
+ "11. QUANTITATIVE METRICS",
1339
+ "12. CLINICAL IMPLICATIONS",
1340
+ "13. PROGNOSIS AND SUMMARY"
1341
  ]
1342
 
1343
  # Safety limits to prevent infinite loops
 
1352
  last_section_count = 0 # Track progress between calls
1353
 
1354
  # Add continuation instruction to original prompt
1355
+ initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL 13 sections of the analysis. If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Do not skip any sections. Use the checklist to ensure all sections are completed."
1356
 
1357
  while True: # Unlimited continuations until complete
1358
  if continuation_count == 0:
 
1495
  # Log final results
1496
  print(f"\n=== FINAL SMART VALIDATION ===")
1497
  print(f"Total sections found: {len(all_sections)}")
1498
+ print(f"All sections present: {len(all_sections) == 13}")
1499
  print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
1500
  print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
1501
  print(f"Total API calls: {continuation_count + 1}")
 
1506
  final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
1507
 
1508
  # Add warning if incomplete due to safety limits
1509
+ if len(all_sections) < 13:
1510
  missing_sections = [s for s in required_sections if s not in all_sections]
1511
  final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
1512
  final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
 
1567
  {metrics_text}
1568
 
1569
  ANALYSIS INSTRUCTIONS:
1570
+ Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL 13 sections below:
1571
 
1572
  COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
1573
 
 
1720
  - Calculate precise percentages and show your work
1721
  - Give specific examples from the transcript
1722
  - If annotation is incomplete, supplement with analysis of the original transcript
1723
+ - Complete ALL 13 sections - use <CONTINUE> if needed
1724
  """
1725
 
1726
  return call_claude_api_with_continuation(analysis_prompt)
 
2197
 
2198
  # Create comprehensive analysis prompt
2199
  final_prompt = f"""
2200
+ You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL 13 sections with detailed structure.
2201
 
2202
  Patient: {age}-year-old {gender}
2203
 
 
2208
 
2209
  INSTRUCTIONS:
2210
  1. Use ONLY the verified statistical values above - do not recount anything
2211
+ 2. Complete ALL 13 sections without stopping
2212
  3. Provide specific examples and quotes from the transcript
2213
  4. Calculate rates and percentages using verified counts
2214
  5. Focus on clinical interpretation and actionable insights
 
2337
  - Treatment planning and expected outcomes
2338
  - Follow-up recommendations
2339
 
2340
+ CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
2341
  """
2342
 
2343
  # Get comprehensive analysis