Spaces:
Sleeping
Sleeping
Update annotated_casl_app.py
Browse files- annotated_casl_app.py +66 -35
annotated_casl_app.py
CHANGED
|
@@ -27,12 +27,13 @@ def segment_response_by_sections(response_text):
|
|
| 27 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 28 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 29 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 30 |
-
"7.
|
| 31 |
-
"8.
|
| 32 |
-
"9.
|
| 33 |
-
"10.
|
| 34 |
-
"11.
|
| 35 |
-
"12.
|
|
|
|
| 36 |
]
|
| 37 |
|
| 38 |
sections = {}
|
|
@@ -75,12 +76,13 @@ def combine_sections_smartly(sections_dict):
|
|
| 75 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 76 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 77 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 78 |
-
"7.
|
| 79 |
-
"8.
|
| 80 |
-
"9.
|
| 81 |
-
"10.
|
| 82 |
-
"11.
|
| 83 |
-
"12.
|
|
|
|
| 84 |
]
|
| 85 |
|
| 86 |
combined_parts = []
|
|
@@ -568,7 +570,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 568 |
ANNOTATED TRANSCRIPT:
|
| 569 |
{annotated_transcript}{notes_section}
|
| 570 |
|
| 571 |
-
INSTRUCTIONS: Complete ALL
|
| 572 |
|
| 573 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS
|
| 574 |
|
|
@@ -712,7 +714,34 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 712 |
- Academic vs. conversational vocabulary ratio
|
| 713 |
- Age-appropriate vocabulary development
|
| 714 |
|
| 715 |
-
7.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
|
| 717 |
A. Morphological Patterns:
|
| 718 |
- Derivational morphology: Prefixes and suffixes
|
|
@@ -725,7 +754,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 725 |
- Syllable structure complexity
|
| 726 |
- Phonological awareness indicators
|
| 727 |
|
| 728 |
-
|
| 729 |
|
| 730 |
A. Working Memory Indicators:
|
| 731 |
- Sentence length and complexity management
|
|
@@ -742,7 +771,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 742 |
- Planning and organization in discourse
|
| 743 |
- Cognitive flexibility in topic management
|
| 744 |
|
| 745 |
-
|
| 746 |
|
| 747 |
A. Disfluency Patterns:
|
| 748 |
- Total disfluency count and rate per 100 words
|
|
@@ -755,7 +784,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 755 |
- Rhythm and prosodic patterns (if evident)
|
| 756 |
- Overall fluency profile and age-appropriateness
|
| 757 |
|
| 758 |
-
|
| 759 |
|
| 760 |
A. Basic Measures:
|
| 761 |
- Total words: [exact count]
|
|
@@ -771,7 +800,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 771 |
- Vocabulary sophistication ratio
|
| 772 |
- Sentence complexity distribution percentages
|
| 773 |
|
| 774 |
-
|
| 775 |
|
| 776 |
A. Strengths (ranked by prominence):
|
| 777 |
- Primary strengths with supporting evidence
|
|
@@ -789,7 +818,7 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 789 |
- Frequency and duration recommendations
|
| 790 |
- Progress monitoring strategies
|
| 791 |
|
| 792 |
-
|
| 793 |
|
| 794 |
A. Overall Communication Profile:
|
| 795 |
- Comprehensive summary of findings
|
|
@@ -803,13 +832,14 @@ def analyze_annotated_transcript(annotated_transcript, age, gender, slp_notes):
|
|
| 803 |
- Family/educational recommendations
|
| 804 |
|
| 805 |
CRITICAL REQUIREMENTS:
|
| 806 |
-
1. Complete ALL
|
| 807 |
2. Provide exact counts for all markers with specific examples
|
| 808 |
3. Calculate all percentages and rates with formulas shown
|
| 809 |
4. Include direct quotes from transcript for examples
|
| 810 |
5. Analyze patterns and provide clinical interpretations
|
| 811 |
6. Focus on actionable, clinically relevant insights
|
| 812 |
7. If response is incomplete, end with <CONTINUE>
|
|
|
|
| 813 |
"""
|
| 814 |
|
| 815 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
@@ -1290,7 +1320,7 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1290 |
if not ANTHROPIC_API_KEY:
|
| 1291 |
return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 1292 |
|
| 1293 |
-
print("🔄 Starting comprehensive
|
| 1294 |
print("⏱️ This may take 3-5 minutes for complex analyses...")
|
| 1295 |
|
| 1296 |
# Define all required sections
|
|
@@ -1301,12 +1331,13 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1301 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 1302 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 1303 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 1304 |
-
"7.
|
| 1305 |
-
"8.
|
| 1306 |
-
"9.
|
| 1307 |
-
"10.
|
| 1308 |
-
"11.
|
| 1309 |
-
"12.
|
|
|
|
| 1310 |
]
|
| 1311 |
|
| 1312 |
# Safety limits to prevent infinite loops
|
|
@@ -1321,7 +1352,7 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1321 |
last_section_count = 0 # Track progress between calls
|
| 1322 |
|
| 1323 |
# Add continuation instruction to original prompt
|
| 1324 |
-
initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL
|
| 1325 |
|
| 1326 |
while True: # Unlimited continuations until complete
|
| 1327 |
if continuation_count == 0:
|
|
@@ -1464,7 +1495,7 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1464 |
# Log final results
|
| 1465 |
print(f"\n=== FINAL SMART VALIDATION ===")
|
| 1466 |
print(f"Total sections found: {len(all_sections)}")
|
| 1467 |
-
print(f"All sections present: {len(all_sections) ==
|
| 1468 |
print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
|
| 1469 |
print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
|
| 1470 |
print(f"Total API calls: {continuation_count + 1}")
|
|
@@ -1475,7 +1506,7 @@ def call_claude_api_with_continuation(prompt):
|
|
| 1475 |
final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
|
| 1476 |
|
| 1477 |
# Add warning if incomplete due to safety limits
|
| 1478 |
-
if len(all_sections) <
|
| 1479 |
missing_sections = [s for s in required_sections if s not in all_sections]
|
| 1480 |
final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
|
| 1481 |
final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
|
|
@@ -1536,7 +1567,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1536 |
{metrics_text}
|
| 1537 |
|
| 1538 |
ANALYSIS INSTRUCTIONS:
|
| 1539 |
-
Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL
|
| 1540 |
|
| 1541 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
|
| 1542 |
|
|
@@ -1689,7 +1720,7 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1689 |
- Calculate precise percentages and show your work
|
| 1690 |
- Give specific examples from the transcript
|
| 1691 |
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1692 |
-
- Complete ALL
|
| 1693 |
"""
|
| 1694 |
|
| 1695 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
@@ -2166,7 +2197,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2166 |
|
| 2167 |
# Create comprehensive analysis prompt
|
| 2168 |
final_prompt = f"""
|
| 2169 |
-
You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL
|
| 2170 |
|
| 2171 |
Patient: {age}-year-old {gender}
|
| 2172 |
|
|
@@ -2177,7 +2208,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2177 |
|
| 2178 |
INSTRUCTIONS:
|
| 2179 |
1. Use ONLY the verified statistical values above - do not recount anything
|
| 2180 |
-
2. Complete ALL
|
| 2181 |
3. Provide specific examples and quotes from the transcript
|
| 2182 |
4. Calculate rates and percentages using verified counts
|
| 2183 |
5. Focus on clinical interpretation and actionable insights
|
|
@@ -2306,7 +2337,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2306 |
- Treatment planning and expected outcomes
|
| 2307 |
- Follow-up recommendations
|
| 2308 |
|
| 2309 |
-
CRITICAL: Complete ALL
|
| 2310 |
"""
|
| 2311 |
|
| 2312 |
# Get comprehensive analysis
|
|
|
|
| 27 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 28 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 29 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 30 |
+
"7. NLP-DERIVED LINGUISTIC FEATURES",
|
| 31 |
+
"8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 32 |
+
"9. COGNITIVE-LINGUISTIC FACTORS",
|
| 33 |
+
"10. FLUENCY AND RHYTHM ANALYSIS",
|
| 34 |
+
"11. QUANTITATIVE METRICS",
|
| 35 |
+
"12. CLINICAL IMPLICATIONS",
|
| 36 |
+
"13. PROGNOSIS AND SUMMARY"
|
| 37 |
]
|
| 38 |
|
| 39 |
sections = {}
|
|
|
|
| 76 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 77 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 78 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 79 |
+
"7. NLP-DERIVED LINGUISTIC FEATURES",
|
| 80 |
+
"8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 81 |
+
"9. COGNITIVE-LINGUISTIC FACTORS",
|
| 82 |
+
"10. FLUENCY AND RHYTHM ANALYSIS",
|
| 83 |
+
"11. QUANTITATIVE METRICS",
|
| 84 |
+
"12. CLINICAL IMPLICATIONS",
|
| 85 |
+
"13. PROGNOSIS AND SUMMARY"
|
| 86 |
]
|
| 87 |
|
| 88 |
combined_parts = []
|
|
|
|
| 570 |
ANNOTATED TRANSCRIPT:
|
| 571 |
{annotated_transcript}{notes_section}
|
| 572 |
|
| 573 |
+
INSTRUCTIONS: Complete ALL 13 sections below. Use simple formatting with NO BOLDING (no ** or asterisks), NO hashtags (###), and minimal markdown. Focus on clinical utility and completeness. Count all markers precisely and provide specific examples. Write section headers as plain text followed by a colon.
|
| 574 |
|
| 575 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS
|
| 576 |
|
|
|
|
| 714 |
- Academic vs. conversational vocabulary ratio
|
| 715 |
- Age-appropriate vocabulary development
|
| 716 |
|
| 717 |
+
7. NLP-DERIVED LINGUISTIC FEATURES
|
| 718 |
+
|
| 719 |
+
A. Lexical Diversity Measures (provide exact calculations):
|
| 720 |
+
- Type-Token Ratio (TTR): Unique words divided by total words
|
| 721 |
+
* Calculate: [unique words] / [total words] = [ratio]
|
| 722 |
+
* Interpretation: Higher ratios indicate greater lexical diversity
|
| 723 |
+
- Moving Average Type-Token Ratio (MATTR): Average TTR across text segments
|
| 724 |
+
* Calculate and interpret stability of lexical diversity
|
| 725 |
+
- Measure of Textual Lexical Diversity (MTLD): Length of text segments maintaining TTR threshold
|
| 726 |
+
* Higher values indicate sustained lexical diversity
|
| 727 |
+
* Provide exact MTLD score and interpretation
|
| 728 |
+
- Hypergeometric Distribution D (HDD): Probability-based diversity measure
|
| 729 |
+
* Controls for text length effects
|
| 730 |
+
* Provide HDD score and clinical significance
|
| 731 |
+
|
| 732 |
+
B. Word Frequency Analysis:
|
| 733 |
+
- Most frequent words used (top 10 with counts)
|
| 734 |
+
- High-frequency vs. low-frequency word distribution
|
| 735 |
+
- Function words vs. content words ratio
|
| 736 |
+
- Repetitive word patterns and their clinical significance
|
| 737 |
+
|
| 738 |
+
C. Linguistic Complexity Indicators:
|
| 739 |
+
- Average word length in syllables
|
| 740 |
+
- Syllable complexity patterns
|
| 741 |
+
- Morphological complexity index
|
| 742 |
+
- Syntactic complexity derived from automated parsing
|
| 743 |
+
|
| 744 |
+
8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
|
| 745 |
|
| 746 |
A. Morphological Patterns:
|
| 747 |
- Derivational morphology: Prefixes and suffixes
|
|
|
|
| 754 |
- Syllable structure complexity
|
| 755 |
- Phonological awareness indicators
|
| 756 |
|
| 757 |
+
9. COGNITIVE-LINGUISTIC FACTORS
|
| 758 |
|
| 759 |
A. Working Memory Indicators:
|
| 760 |
- Sentence length and complexity management
|
|
|
|
| 771 |
- Planning and organization in discourse
|
| 772 |
- Cognitive flexibility in topic management
|
| 773 |
|
| 774 |
+
10. FLUENCY AND RHYTHM ANALYSIS
|
| 775 |
|
| 776 |
A. Disfluency Patterns:
|
| 777 |
- Total disfluency count and rate per 100 words
|
|
|
|
| 784 |
- Rhythm and prosodic patterns (if evident)
|
| 785 |
- Overall fluency profile and age-appropriateness
|
| 786 |
|
| 787 |
+
11. QUANTITATIVE METRICS
|
| 788 |
|
| 789 |
A. Basic Measures:
|
| 790 |
- Total words: [exact count]
|
|
|
|
| 800 |
- Vocabulary sophistication ratio
|
| 801 |
- Sentence complexity distribution percentages
|
| 802 |
|
| 803 |
+
12. CLINICAL IMPLICATIONS
|
| 804 |
|
| 805 |
A. Strengths (ranked by prominence):
|
| 806 |
- Primary strengths with supporting evidence
|
|
|
|
| 818 |
- Frequency and duration recommendations
|
| 819 |
- Progress monitoring strategies
|
| 820 |
|
| 821 |
+
13. PROGNOSIS AND SUMMARY
|
| 822 |
|
| 823 |
A. Overall Communication Profile:
|
| 824 |
- Comprehensive summary of findings
|
|
|
|
| 832 |
- Family/educational recommendations
|
| 833 |
|
| 834 |
CRITICAL REQUIREMENTS:
|
| 835 |
+
1. Complete ALL 13 sections - do not stop early
|
| 836 |
2. Provide exact counts for all markers with specific examples
|
| 837 |
3. Calculate all percentages and rates with formulas shown
|
| 838 |
4. Include direct quotes from transcript for examples
|
| 839 |
5. Analyze patterns and provide clinical interpretations
|
| 840 |
6. Focus on actionable, clinically relevant insights
|
| 841 |
7. If response is incomplete, end with <CONTINUE>
|
| 842 |
+
8. FORMATTING: Use NO asterisks (**), NO hashtags (###), NO bolding - plain text only
|
| 843 |
"""
|
| 844 |
|
| 845 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
|
|
| 1320 |
if not ANTHROPIC_API_KEY:
|
| 1321 |
return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 1322 |
|
| 1323 |
+
print("🔄 Starting comprehensive 13-section analysis...")
|
| 1324 |
print("⏱️ This may take 3-5 minutes for complex analyses...")
|
| 1325 |
|
| 1326 |
# Define all required sections
|
|
|
|
| 1331 |
"4. FIGURATIVE LANGUAGE ANALYSIS",
|
| 1332 |
"5. PRAGMATIC LANGUAGE ASSESSMENT",
|
| 1333 |
"6. VOCABULARY AND SEMANTIC ANALYSIS",
|
| 1334 |
+
"7. NLP-DERIVED LINGUISTIC FEATURES",
|
| 1335 |
+
"8. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
|
| 1336 |
+
"9. COGNITIVE-LINGUISTIC FACTORS",
|
| 1337 |
+
"10. FLUENCY AND RHYTHM ANALYSIS",
|
| 1338 |
+
"11. QUANTITATIVE METRICS",
|
| 1339 |
+
"12. CLINICAL IMPLICATIONS",
|
| 1340 |
+
"13. PROGNOSIS AND SUMMARY"
|
| 1341 |
]
|
| 1342 |
|
| 1343 |
# Safety limits to prevent infinite loops
|
|
|
|
| 1352 |
last_section_count = 0 # Track progress between calls
|
| 1353 |
|
| 1354 |
# Add continuation instruction to original prompt
|
| 1355 |
+
initial_prompt = prompt + "\n\nCRITICAL INSTRUCTIONS: You MUST complete ALL 13 sections of the analysis. If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Do not skip any sections. Use the checklist to ensure all sections are completed."
|
| 1356 |
|
| 1357 |
while True: # Unlimited continuations until complete
|
| 1358 |
if continuation_count == 0:
|
|
|
|
| 1495 |
# Log final results
|
| 1496 |
print(f"\n=== FINAL SMART VALIDATION ===")
|
| 1497 |
print(f"Total sections found: {len(all_sections)}")
|
| 1498 |
+
print(f"All sections present: {len(all_sections) == 13}")
|
| 1499 |
print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
|
| 1500 |
print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
|
| 1501 |
print(f"Total API calls: {continuation_count + 1}")
|
|
|
|
| 1506 |
final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
|
| 1507 |
|
| 1508 |
# Add warning if incomplete due to safety limits
|
| 1509 |
+
if len(all_sections) < 13:
|
| 1510 |
missing_sections = [s for s in required_sections if s not in all_sections]
|
| 1511 |
final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
|
| 1512 |
final_response += f"\n\n💡 TIP: Try running the analysis again, or use the 'Targeted Analysis' tab to focus on specific areas."
|
|
|
|
| 1567 |
{metrics_text}
|
| 1568 |
|
| 1569 |
ANALYSIS INSTRUCTIONS:
|
| 1570 |
+
Using the detailed linguistic markers in the annotated transcript and the calculated metrics above, provide a comprehensive analysis with EXACT counts, percentages, and specific examples. Complete ALL 13 sections below:
|
| 1571 |
|
| 1572 |
COMPREHENSIVE SPEECH SAMPLE ANALYSIS:
|
| 1573 |
|
|
|
|
| 1720 |
- Calculate precise percentages and show your work
|
| 1721 |
- Give specific examples from the transcript
|
| 1722 |
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1723 |
+
- Complete ALL 13 sections - use <CONTINUE> if needed
|
| 1724 |
"""
|
| 1725 |
|
| 1726 |
return call_claude_api_with_continuation(analysis_prompt)
|
|
|
|
| 2197 |
|
| 2198 |
# Create comprehensive analysis prompt
|
| 2199 |
final_prompt = f"""
|
| 2200 |
+
You are a speech-language pathologist conducting a comprehensive speech analysis. Use the verified statistical data provided and complete ALL 13 sections with detailed structure.
|
| 2201 |
|
| 2202 |
Patient: {age}-year-old {gender}
|
| 2203 |
|
|
|
|
| 2208 |
|
| 2209 |
INSTRUCTIONS:
|
| 2210 |
1. Use ONLY the verified statistical values above - do not recount anything
|
| 2211 |
+
2. Complete ALL 13 sections without stopping
|
| 2212 |
3. Provide specific examples and quotes from the transcript
|
| 2213 |
4. Calculate rates and percentages using verified counts
|
| 2214 |
5. Focus on clinical interpretation and actionable insights
|
|
|
|
| 2337 |
- Treatment planning and expected outcomes
|
| 2338 |
- Follow-up recommendations
|
| 2339 |
|
| 2340 |
+
CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
|
| 2341 |
"""
|
| 2342 |
|
| 2343 |
# Get comprehensive analysis
|