Spaces:

SreekarB
/

SLPAnalysis

Sleeping

App Files Files Community

SreekarB commited on Apr 18

Commit

d66b701

verified ·

1 Parent(s): 88dce03

Upload casl_analysis.py

Browse files

Files changed (1) hide show

casl_analysis.py +279 -106

casl_analysis.py CHANGED Viewed

@@ -730,62 +730,168 @@ def create_casl_radar_chart(speech_factors):
 def analyze_transcript(transcript, age, gender):
     """Analyze a speech transcript using the CASL framework"""
     # Instructions for the LLM analysis
     instructions = """
-    You're a professional Speech-Language Pathologist analyzing this transcription sample.
-    For your analysis, count occurrences of:
-    1. Difficulty producing fluent, grammatical speech - Speech that is slow, halting, with pauses while searching for words
-    2. Word retrieval issues - Trouble finding specific words, using fillers like "um", circumlocution, or semantically similar substitutions
-    3. Grammatical errors - Missing/incorrect function words, verb tense problems, simplified sentences
-    4. Repetitions and revisions - Repeating or restating due to word-finding or sentence construction difficulties
-    5. Neologisms - Creating nonexistent "new" words
-    6. Perseveration - Unintentionally repeating words or phrases
-    7. Comprehension issues - Difficulty understanding complex sentences or fast speech
-    Analyze using the CASL-2 (Comprehensive Assessment of Spoken Language) framework:
-    Lexical/Semantic Skills:
-    - Evaluate vocabulary diversity, word retrieval difficulties, and semantic precision
-    - Estimate Standard Score (mean=100, SD=15), percentile rank, and performance level
-    - Quote specific examples from the transcript to support your assessment
-    Syntactic Skills:
-    - Assess sentence structure, grammatical accuracy, and syntactic complexity
-    - Estimate Standard Score, percentile rank, and performance level
-    - Quote specific examples from the transcript to support your assessment
-    Supralinguistic Skills:
-    - Evaluate figurative language use, inferencing, and contextual understanding
-    - Estimate Standard Score, percentile rank, and performance level
-    - Quote specific examples from the transcript to support your assessment
-    Format your analysis with:
-    1. Speech factor counts with severity percentiles - Include direct quotes for examples of each factor you identify
-    2. CASL-2 domain scores with performance levels - Include direct quotes for examples in each domain
-    3. Treatment recommendations based on findings
-    4. Brief explanation of your rationale
-    5. Any additional insights
-    IMPORTANT: For each factor and domain you analyze, provide direct quotes from the transcript as evidence. For example:
-    Word retrieval issues: 7, 65% - Example: "today I would &-um like to talk about &-um a fun trip"
-    Grammatical errors: 3, 45% - Example: "after swimming we [//] I eat [: ate] [*] &-um ice cream"
-    This specificity is critical for diagnostic accuracy and treatment planning.
     """
-    # Prepare prompt for Claude
     prompt = f"""
-    You are an experienced Speech-Language Pathologist analyzing this transcript for a patient who is {age} years old and {gender}.
     TRANSCRIPT:
     {transcript}
     {instructions}
-    Be precise, professional, and empathetic in your analysis. Focus on the linguistic patterns present in the sample.
     """
     # Call the appropriate API or fallback to demo mode
@@ -1179,34 +1285,14 @@ def create_interface():
                             with gr.Tabs() as results_tabs:
                                 # Summary tab
                                 with gr.TabItem("Summary", id=0, elem_classes="tab-content"):
-                                    with gr.Row():
-                                        output_image = gr.Image(
-                                            label="Speech Factors & CASL-2 Scores",
-                                            show_label=True,
-                                            elem_classes="viz-container"
-                                        )
-                                    with gr.Row():
-                                        radar_chart = gr.Image(
-                                            label="Severity Profile",
-                                            show_label=True,
-                                            elem_classes="viz-container"
-                                        )
                                     with gr.Group():
                                         gr.Markdown("### Key Findings", elem_classes="markdown-text")
-                                        speech_factors_table = gr.DataFrame(
-                                            label="Speech Factors Analysis",
-                                            headers=["Factor", "Occurrences", "Severity (Percentile)", "Example Errors"],
-                                            interactive=False
-                                        )
-                                        casl_table = gr.DataFrame(
-                                            label="CASL-2 Assessment",
-                                            headers=["Domain", "Standard Score", "Percentile", "Performance Level", "Example"],
-                                            interactive=False
-                                        )
-                                    with gr.Accordion("Specific Error Examples", open=False):
                                         specific_errors_md = gr.Markdown(elem_classes="markdown-text")
                                 # Treatment tab
@@ -1529,36 +1615,133 @@ def create_interface():
         def on_analyze_click(transcript_text, age_val, gender_val, patient_name_val, record_id_val, clinician_val, assessment_date_val):
             if not transcript_text or len(transcript_text.strip()) < 50:
                 return (
-                    pd.DataFrame(),
-                    pd.DataFrame(),
-                    None,
-                    None,
                     "Error: Please provide a longer transcript for analysis.",
                     "The transcript is too short for meaningful analysis.",
                     "Please provide a speech sample with at least 50 characters.",
                     "",
                     "",
                     ""
                 )
             try:
-                results, plot_img, radar_img, full_text = analyze_transcript(transcript_text, age_val, gender_val)
-                # Format treatment suggestions as markdown
                 treatment_text = ""
-                for i, suggestion in enumerate(results['treatment_suggestions']):
-                    treatment_text += f"- {suggestion}\n"
-                # Format specific error examples
-                specific_errors_text = "## Speech Error Examples\n\n"
-                if 'specific_errors' in results:
-                    for factor, example in results['specific_errors'].items():
-                        if example:
-                            specific_errors_text += f"**{factor}:** \"{example}\"\n\n"
-                        else:
-                            specific_errors_text += f"**{factor}:** No specific example found\n\n"
                 else:
-                    specific_errors_text += "No specific error examples were identified."
                 # Save the record to storage
                 patient_info = {
@@ -1608,20 +1791,14 @@ def create_interface():
                 else:
                     full_report = f"## Complete Analysis Report\n\n{full_text}"
-                # Get the raw LLM response
-                raw_output = results.get('raw_response', full_text)
-                # Convert image buffers to PIL images
-                plot_img_pil = Image.open(plot_img)
-                radar_img_pil = Image.open(radar_img)
                 return (
-                    results['speech_factors'],
-                    results['casl_data'],
-                    plot_img_pil,
-                    radar_img_pil,
                     treatment_text,
-                    results['explanation'],
                     full_report,
                     save_message,
                     specific_errors_text,
@@ -1631,11 +1808,9 @@ def create_interface():
                 logger.exception("Error during analysis")
                 error_message = f"Error during analysis: {str(e)}"
                 return (
-                    pd.DataFrame(),
-                    pd.DataFrame(),
-                    None,
-                    None,
-                    f"Error during analysis: {str(e)}",
                     "An error occurred while processing the transcript.",
                     f"Error details: {str(e)}",
                     "",
@@ -1650,10 +1825,8 @@ def create_interface():
                 patient_name, record_id, clinician_name, assessment_date
             ],
             outputs=[
-                speech_factors_table,
-                casl_table,
-                output_image,
-                radar_chart,
                 treatment_md,
                 explanation_md,
                 full_analysis,

 def analyze_transcript(transcript, age, gender):
     """Analyze a speech transcript using the CASL framework"""
+    # CHAT transcription symbol cheat sheet
+    cheat_sheet = """
+    CHAT TRANSCRIPTION SYMBOL SUMMARY -- Abridged for AphasiaBank
+    Basic Utterance Terminators
+    .    period
+    ?    question
+    !    exclamation
+    Special Utterance Terminators
+    +…    trailing off
+    +..?    trailing off of a question
+    +/.    interruption by another speaker
+    +/?    interruption of a question by another speaker
+    +//.    self-interruption
+    +//?    self-interruption of a question
+    +"/.    quotation follows on next line
+    +"    quoted utterance occurs on this line (use at beginning of utterance
+        as link, not a terminator)
+    +<    lazy overlap marking (at beginning of utterance that overlapped the
+        the previous utterance)
+    @n    neologism (e.g., sakov@n)
+    exclamations    common ones:  ah, aw, haha, ow, oy, sh, ugh, uhoh
+    interjections    common ones:  mhm, uhhuh, hm, uhuh
+    fillers    common ones:  &-um, &-uh
+    letters    s@l
+    letter sequence    abcdefg@k
+    xxx    unintelligible speech, not treated as a word
+    www        untranscribed material (e.g., looking through pictures, talking with
+            spouse), must be followed by %exp tier (see below)
+    &+sounds    phonological fragment (&+sh &+w  we came home)
+    Scoped Symbols
+    [: text]    target/intended word for errors (e.g., tried [: cried])
+    [*]    error (e.g., paraphasia -- wɛk@u [: wet] [*])
+    [/]    retracing without correction (e.g., simple repetition)
+        put repeated items between <> unless only one word was repeated
+    [//]    retracing with correction (e.g., simple word or grammar change)
+        put changed items between <> unless only one word was changed
+    """
     # Instructions for the LLM analysis
     instructions = """
+    You are a speech pathologist analyzing this transcription sample. Provide a detailed analysis focused on specific quotes from the transcript.
+    The factors of speech that you need to count are:
+    1. Difficulty producing fluent, grammatical speech - speech that is slow, halting, with pauses while searching for words
+    2. Word retrieval issues - trouble thinking of specific words, use of filler words like um, circumlocution, semantically similar word substitutions
+    3. Grammatical errors - missing/incorrect function words, problems with verb tenses, conjugation, agreement, simplified sentences
+    4. Repetitions and revisions - repeating or restating words, phrases or sentences due to trouble finding the right words
+    5. Neologisms - creating nonexistent "new" words
+    6. Perseveration - unintentionally repeating words or phrases over and over
+    7. Comprehension issues - trouble understanding complex sentences, fast speech, relying more on context and cues
+    For each factor, provide:
+    - Number of occurrences
+    - Severity percentile (estimate based on your clinical judgment)
+    - At least 2-3 specific quotes from the transcript as examples
+    Then evaluate using the CASL-2 Speech and Language Analysis Framework across these domains:
+    1. Lexical/Semantic Skills:
+       - Assess vocabulary diversity, word-finding abilities, semantic precision
+       - Provide Standard Score (mean=100, SD=15), percentile rank, and performance level
+       - Include SPECIFIC QUOTES as evidence
+    2. Syntactic Skills:
+       - Evaluate grammatical accuracy, sentence complexity, morphological skills
+       - Provide Standard Score, percentile rank, and performance level
+       - Include SPECIFIC QUOTES as evidence
+    3. Supralinguistic Skills:
+       - Assess figurative language use, inferencing, and abstract reasoning
+       - Provide Standard Score, percentile rank, and performance level
+       - Include SPECIFIC QUOTES as evidence
+    YOUR RESPONSE MUST USE THESE EXACT SECTION MARKERS FOR PARSING:
+    <SPEECH_FACTORS_START>
+    Difficulty producing fluent, grammatical speech: (occurrences), (percentile)
+    Examples:
+    - "(direct quote from transcript)"
+    - "(direct quote from transcript)"
+    Word retrieval issues: (occurrences), (percentile)
+    Examples:
+    - "(direct quote from transcript)"
+    - "(direct quote from transcript)"
+    (And so on for each factor)
+    <SPEECH_FACTORS_END>
+    <CASL_SKILLS_START>
+    Lexical/Semantic Skills: Standard Score (X), Percentile Rank (X%), Performance Level
+    Examples:
+    - "(direct quote showing strength or weakness)"
+    - "(direct quote showing strength or weakness)"
+    Syntactic Skills: Standard Score (X), Percentile Rank (X%), Performance Level
+    Examples:
+    - "(direct quote showing strength or weakness)"
+    - "(direct quote showing strength or weakness)"
+    Supralinguistic Skills: Standard Score (X), Percentile Rank (X%), Performance Level
+    Examples:
+    - "(direct quote showing strength or weakness)"
+    - "(direct quote showing strength or weakness)"
+    <CASL_SKILLS_END>
+    <TREATMENT_RECOMMENDATIONS_START>
+    - (treatment recommendation)
+    - (treatment recommendation)
+    - (treatment recommendation)
+    <TREATMENT_RECOMMENDATIONS_END>
+    <EXPLANATION_START>
+    (brief diagnostic rationale based on findings)
+    <EXPLANATION_END>
+    <ADDITIONAL_ANALYSIS_START>
+    (specific insights that would be helpful for treatment planning)
+    <ADDITIONAL_ANALYSIS_END>
+    <DIAGNOSTIC_IMPRESSIONS_START>
+    (summarize findings across domains using specific examples and clear explanations)
+    <DIAGNOSTIC_IMPRESSIONS_END>
+    <ERROR_EXAMPLES_START>
+    (Copy all the specific quote examples here again, organized by error type or skill domain)
+    <ERROR_EXAMPLES_END>
+    MOST IMPORTANT:
+    1. Use EXACTLY the section markers provided (like <SPEECH_FACTORS_START>) to make parsing reliable
+    2. For EVERY factor and domain you analyze, you MUST provide direct quotes from the transcript as evidence
+    3. Be very specific and cite the exact text
+    4. Do not omit any of the required sections
+    """
+    # Prepare prompt for Claude with the user's role context
+    role_context = """
+    You are a speech pathologist, a healthcare professional who specializes in evaluating, diagnosing, and treating communication disorders, including speech, language, cognitive-communication, voice, swallowing, and fluency disorders. Your role is to help patients improve their speech and communication skills through various therapeutic techniques and exercises.
+    You are working with a student with speech impediments.
+    The most important thing is that you stay kind to the child. Be constructive and helpful rather than critical.
     """
     prompt = f"""
+    {role_context}
+    You are analyzing a transcript for a patient who is {age} years old and {gender}.
     TRANSCRIPT:
     {transcript}
+    {cheat_sheet}
     {instructions}
+    Remember to be precise but compassionate in your analysis. Use direct quotes from the transcript for every factor and domain you analyze.
     """
     # Call the appropriate API or fallback to demo mode
                             with gr.Tabs() as results_tabs:
                                 # Summary tab
                                 with gr.TabItem("Summary", id=0, elem_classes="tab-content"):
                                     with gr.Group():
                                         gr.Markdown("### Key Findings", elem_classes="markdown-text")
+                                        speech_factors_md = gr.Markdown(elem_classes="markdown-text")
+                                    with gr.Accordion("CASL Assessment Results", open=True):
+                                        casl_results_md = gr.Markdown(elem_classes="markdown-text")
+                                    with gr.Accordion("Detailed Error Examples", open=True):
                                         specific_errors_md = gr.Markdown(elem_classes="markdown-text")
                                 # Treatment tab
         def on_analyze_click(transcript_text, age_val, gender_val, patient_name_val, record_id_val, clinician_val, assessment_date_val):
             if not transcript_text or len(transcript_text.strip()) < 50:
                 return (
                     "Error: Please provide a longer transcript for analysis.",
                     "The transcript is too short for meaningful analysis.",
                     "Please provide a speech sample with at least 50 characters.",
+                    "Error: Insufficient data",
+                    "Please provide a speech sample with at least 50 characters.",
                     "",
                     "",
                     ""
                 )
             try:
+                # Get the raw analysis response
+                results, _, _, full_text = analyze_transcript(transcript_text, age_val, gender_val)
+                # Extract speech factors section using section markers
+                speech_factors_section = ""
+                factors_pattern = re.compile(r"<SPEECH_FACTORS_START>(.*?)<SPEECH_FACTORS_END>", re.DOTALL)
+                factors_match = factors_pattern.search(full_text)
+                if factors_match:
+                    speech_factors_section = factors_match.group(1).strip()
+                else:
+                    # Fallback to old pattern if markers aren't found
+                    old_factors_pattern = re.compile(r"(Difficulty producing fluent.*?)(?:Evaluation of CASL Skills|<CASL_SKILLS_START>)", re.DOTALL)
+                    old_factors_match = old_factors_pattern.search(full_text)
+                    if old_factors_match:
+                        speech_factors_section = old_factors_match.group(1).strip()
+                    else:
+                        speech_factors_section = "Error extracting speech factors from analysis."
+                # Extract CASL skills section
+                casl_section = ""
+                casl_pattern = re.compile(r"<CASL_SKILLS_START>(.*?)<CASL_SKILLS_END>", re.DOTALL)
+                casl_match = casl_pattern.search(full_text)
+                if casl_match:
+                    casl_section = casl_match.group(1).strip()
+                else:
+                    # Fallback pattern
+                    old_casl_pattern = re.compile(r"(?:Evaluation of CASL Skills:|Lexical/Semantic Skills:)(.*?)(?:Other analysis/Best plans of action:|<TREATMENT_RECOMMENDATIONS_START>)", re.DOTALL)
+                    old_casl_match = old_casl_pattern.search(full_text)
+                    if old_casl_match:
+                        casl_section = old_casl_match.group(1).strip()
+                        # Add a header if it's missing
+                        if not casl_section.startswith("Lexical"):
+                            casl_section = "Evaluation of CASL Skills:\n\n" + casl_section
+                    else:
+                        casl_section = "Error extracting CASL skills from analysis."
+                # Extract treatment recommendations
                 treatment_text = ""
+                treatment_pattern = re.compile(r"<TREATMENT_RECOMMENDATIONS_START>(.*?)<TREATMENT_RECOMMENDATIONS_END>", re.DOTALL)
+                treatment_match = treatment_pattern.search(full_text)
+                if treatment_match:
+                    treatment_text = "### Treatment Recommendations\n\n" + treatment_match.group(1).strip()
+                else:
+                    # Fallback pattern
+                    old_treatment_pattern = re.compile(r"(?:Other analysis/Best plans of action:)(.*?)(?:Explanation:|<EXPLANATION_START>)", re.DOTALL)
+                    old_treatment_match = old_treatment_pattern.search(full_text)
+                    if old_treatment_match:
+                        treatment_text = "### Treatment Recommendations\n\n" + old_treatment_match.group(1).strip()
+                    elif 'treatment_suggestions' in results:
+                        treatment_text = "### Treatment Recommendations\n\n"
+                        for suggestion in results['treatment_suggestions']:
+                            treatment_text += f"- {suggestion}\n"
+                # Extract explanation section
+                explanation_text = "### Clinical Rationale\n\n"
+                explanation_pattern = re.compile(r"<EXPLANATION_START>(.*?)<EXPLANATION_END>", re.DOTALL)
+                explanation_match = explanation_pattern.search(full_text)
+                if explanation_match:
+                    explanation_text += explanation_match.group(1).strip()
+                else:
+                    # Fallback pattern
+                    old_explanation_pattern = re.compile(r"(?:Explanation:)(.*?)(?:Additional Analysis:|<ADDITIONAL_ANALYSIS_START>)", re.DOTALL)
+                    old_explanation_match = old_explanation_pattern.search(full_text)
+                    if old_explanation_match:
+                        explanation_text += old_explanation_match.group(1).strip()
+                    else:
+                        explanation_text += results.get('explanation', "No explanation provided.")
+                # Extract additional analysis
+                additional_analysis = ""
+                additional_pattern = re.compile(r"<ADDITIONAL_ANALYSIS_START>(.*?)<ADDITIONAL_ANALYSIS_END>", re.DOTALL)
+                additional_match = additional_pattern.search(full_text)
+                if additional_match:
+                    additional_analysis = additional_match.group(1).strip()
+                    explanation_text += "\n\n### Additional Analysis\n\n" + additional_analysis
+                else:
+                    # Fallback pattern
+                    old_additional_pattern = re.compile(r"(?:Additional Analysis:)(.*?)(?:Diagnostic Impressions:|<DIAGNOSTIC_IMPRESSIONS_START>)", re.DOTALL)
+                    old_additional_match = old_additional_pattern.search(full_text)
+                    if old_additional_match:
+                        explanation_text += "\n\n### Additional Analysis\n\n" + old_additional_match.group(1).strip()
+                    elif 'additional_analysis' in results:
+                        explanation_text += "\n\n### Additional Analysis\n\n" + results.get('additional_analysis', "")
+                # Extract diagnostic impressions
+                diagnostic_impressions = ""
+                diagnostic_pattern = re.compile(r"<DIAGNOSTIC_IMPRESSIONS_START>(.*?)<DIAGNOSTIC_IMPRESSIONS_END>", re.DOTALL)
+                diagnostic_match = diagnostic_pattern.search(full_text)
+                if diagnostic_match:
+                    diagnostic_impressions = diagnostic_match.group(1).strip()
+                    # Add to the explanation section
+                    explanation_text += "\n\n### Diagnostic Impressions\n\n" + diagnostic_impressions
+                # Extract specific error examples
+                specific_errors_text = "## Detailed Error Examples\n\n"
+                # First try the dedicated section
+                errors_pattern = re.compile(r"<ERROR_EXAMPLES_START>(.*?)<ERROR_EXAMPLES_END>", re.DOTALL)
+                errors_match = errors_pattern.search(full_text)
+                if errors_match:
+                    specific_errors_text += errors_match.group(1).strip()
                 else:
+                    # Fallback to extracting examples from the text
+                    example_sections = re.findall(r"Examples:\s*\n((?:- \".*\"\s*\n)+)", full_text)
+                    for section in example_sections:
+                        specific_errors_text += section + "\n"
+                    if not example_sections:
+                        specific_errors_text += "No specific error examples were found in the analysis."
                 # Save the record to storage
                 patient_info = {
                 else:
                     full_report = f"## Complete Analysis Report\n\n{full_text}"
+                # Get the raw LLM response for debugging
+                raw_output = full_text
                 return (
+                    speech_factors_section,
+                    casl_section,
                     treatment_text,
+                    explanation_text,
                     full_report,
                     save_message,
                     specific_errors_text,
                 logger.exception("Error during analysis")
                 error_message = f"Error during analysis: {str(e)}"
                 return (
+                    f"Error: {str(e)}",
+                    "Error: Analysis failed. Please check input data.",
+                    "Error: Treatment analysis not available.",
                     "An error occurred while processing the transcript.",
                     f"Error details: {str(e)}",
                     "",
                 patient_name, record_id, clinician_name, assessment_date
             ],
             outputs=[
+                speech_factors_md,
+                casl_results_md,
                 treatment_md,
                 explanation_md,
                 full_analysis,