import matplotlib.pyplot as plt import pandas as pd import numpy as np from collections import Counter import time from utils.model_loader import load_translator from utils.helpers import fig_to_html, df_to_html_table def translation_handler(text_input, source_lang="auto", target_lang="en"): """Show machine translation capabilities.""" output_html = [] # Add result area container output_html.append('
') output_html.append('

Machine Translation

') output_html.append("""
Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
""") # Model info output_html.append("""

Model Used:

""") try: # Check if text is empty if not text_input.strip(): output_html.append("""

No Text Provided

Please enter some text to translate.

""") output_html.append('
') # Close result-area div return '\n'.join(output_html) # Display source text output_html.append('

Source Text

') # Language mapping for display language_names = { "auto": "Auto-detect", "en": "English", "es": "Spanish", "fr": "French", "de": "German", "ru": "Russian", "zh": "Chinese", "ar": "Arabic", "hi": "Hindi", "ja": "Japanese", "pt": "Portuguese", "it": "Italian" } source_lang_display = language_names.get(source_lang, source_lang) target_lang_display = language_names.get(target_lang, target_lang) # Format source text info output_html.append(f"""
{source_lang_display}
""") # Display source text output_html.append(f'
{text_input}
') # Load translation model translator = load_translator(source_lang, target_lang) # Translate text start_time = time.time() # Check text length and apply limit if needed MAX_TEXT_LENGTH = 500 # Characters truncated = False if len(text_input) > MAX_TEXT_LENGTH: truncated_text = text_input[:MAX_TEXT_LENGTH] truncated = True else: truncated_text = text_input # Perform translation translation = translator(truncated_text) translated_text = translation[0]['translation_text'] # Calculate processing time translation_time = time.time() - start_time # Display translation results output_html.append('

Translation

') # Show target language output_html.append(f"""
{target_lang_display}
""") # Display translated text output_html.append(f'
{translated_text}
') # Show truncation warning if needed if truncated: output_html.append(f"""

⚠️ Note: Your text was truncated to {MAX_TEXT_LENGTH} characters due to model limitations. Only the first part was translated.

""") # Translation statistics output_html.append('

Translation Analysis

') # Calculate basic stats source_chars = len(text_input) source_words = len(text_input.split()) target_chars = len(translated_text) target_words = len(translated_text.split()) # Display stats in a nice format output_html.append(f"""
{source_words}
Source Words
{target_words}
Translated Words
{translation_time:.2f}s
Processing Time
""") # Length comparison output_html.append('

Length Comparison

') # Create bar chart comparing text lengths fig, ax = plt.subplots(figsize=(10, 5)) # Create grouped bar chart x = np.arange(2) width = 0.35 ax.bar(x - width/2, [source_words, source_chars], width, label='Source Text', color='#1976D2') ax.bar(x + width/2, [target_words, target_chars], width, label='Translated Text', color='#4CAF50') ax.set_xticks(x) ax.set_xticklabels(['Word Count', 'Character Count']) ax.legend() # Add value labels on top of bars for i, v in enumerate([source_words, source_chars]): ax.text(i - width/2, v + 0.5, str(v), ha='center') for i, v in enumerate([target_words, target_chars]): ax.text(i + width/2, v + 0.5, str(v), ha='center') plt.title('Source vs. Translation Length Comparison') plt.tight_layout() output_html.append(fig_to_html(fig)) # Expansion/contraction ratio word_ratio = target_words / source_words if source_words > 0 else 0 char_ratio = target_chars / source_chars if source_chars > 0 else 0 expansion_type = "expansion" if word_ratio > 1.1 else "contraction" if word_ratio < 0.9 else "similar length" output_html.append(f"""

Translation Length Analysis

The translation shows {expansion_type} compared to the source text.

Note: Different languages naturally have different word and character counts when expressing the same meaning.

""") # Language characteristics comparison source_avg_word_len = source_chars / source_words if source_words > 0 else 0 target_avg_word_len = target_chars / target_words if target_words > 0 else 0 output_html.append('

Language Characteristics

') # Create comparison table lang_data = { 'Metric': ['Average Word Length', 'Words per Character', 'Characters per Word'], f'Source ({source_lang_display})': [ f"{source_avg_word_len:.2f} chars", f"{source_words / source_chars:.3f}" if source_chars > 0 else "N/A", f"{source_chars / source_words:.2f}" if source_words > 0 else "N/A" ], f'Target ({target_lang_display})': [ f"{target_avg_word_len:.2f} chars", f"{target_words / target_chars:.3f}" if target_chars > 0 else "N/A", f"{target_chars / target_words:.2f}" if target_words > 0 else "N/A" ] } lang_df = pd.DataFrame(lang_data) output_html.append(df_to_html_table(lang_df)) # Alternative translations section output_html.append('

Alternative Translation Options

') output_html.append('

Machine translation models often have different ways of translating the same text. Here are some general tips for better translations:

') output_html.append("""

Tips for Better Machine Translation

""") # Common translation challenges output_html.append('

Common Translation Challenges

') challenge_data = { 'Challenge': [ 'Ambiguity', 'Idioms & Expressions', 'Cultural References', 'Technical Terminology', 'Grammatical Differences' ], 'Description': [ 'Words with multiple meanings may be incorrectly translated without proper context', 'Expressions that are unique to a culture often lose meaning when translated literally', 'References to culture-specific concepts may not have direct equivalents', 'Specialized terminology may not translate accurately without domain-specific models', 'Different languages have different grammatical structures that can affect translation' ], 'Example': [ '"Bank" could mean financial institution or river edge', '"It\'s raining cats and dogs" translated literally loses its meaning', 'References to local holidays or customs may be confusing when translated', 'Medical or legal terms often need specialized translation knowledge', 'Languages differ in word order, gender agreement, verb tenses, etc.' ] } challenge_df = pd.DataFrame(challenge_data) output_html.append(df_to_html_table(challenge_df)) except Exception as e: output_html.append(f"""

Translation Error

{str(e)}

This could be due to an unsupported language pair or an issue loading the translation model.

""") # About Machine Translation section output_html.append("""

About Machine Translation

What is Machine Translation?

Machine translation is the automated translation of text from one language to another using computer software. Modern machine translation systems use neural networks to understand and generate text, leading to significant improvements in fluency and accuracy compared to older rule-based or statistical systems.

Types of Machine Translation:
Applications:
""") output_html.append('') # Close result-area div return '\n'.join(output_html)