aradhyapavan's picture
nlp ultimate tutor
ca2c89c verified
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import Counter
import time
from utils.model_loader import load_translator
from utils.helpers import fig_to_html, df_to_html_table
def translation_handler(text_input, source_lang="auto", target_lang="en"):
"""Show machine translation capabilities."""
output_html = []
# Add result area container
output_html.append('<div class="result-area">')
output_html.append('<h2 class="task-header">Machine Translation</h2>')
output_html.append("""
<div class="alert alert-info">
<i class="fas fa-language"></i>
Machine translation converts text from one language to another while preserving meaning and context as accurately as possible.
</div>
""")
# Model info
output_html.append("""
<div class="alert alert-info">
<h4><i class="fas fa-tools"></i> Model Used:</h4>
<ul>
<li><b>Helsinki-NLP/opus-mt</b> - A collection of pre-trained neural machine translation models</li>
<li><b>Capabilities</b> - Translates between various language pairs with good accuracy</li>
<li><b>Architecture</b> - Transformer-based sequence-to-sequence model</li>
</ul>
</div>
""")
try:
# Check if text is empty
if not text_input.strip():
output_html.append("""
<div class="alert alert-warning">
<h3>No Text Provided</h3>
<p>Please enter some text to translate.</p>
</div>
""")
output_html.append('</div>') # Close result-area div
return '\n'.join(output_html)
# Display source text
output_html.append('<h3 class="task-subheader">Source Text</h3>')
# Language mapping for display
language_names = {
"auto": "Auto-detect",
"en": "English",
"es": "Spanish",
"fr": "French",
"de": "German",
"ru": "Russian",
"zh": "Chinese",
"ar": "Arabic",
"hi": "Hindi",
"ja": "Japanese",
"pt": "Portuguese",
"it": "Italian"
}
source_lang_display = language_names.get(source_lang, source_lang)
target_lang_display = language_names.get(target_lang, target_lang)
# Format source text info
output_html.append(f"""
<div class="mb-2">
<span class="badge bg-primary">
{source_lang_display}
</span>
</div>
""")
# Display source text
output_html.append(f'<div class="card"><div class="card-body">{text_input}</div></div>')
# Load translation model
translator = load_translator(source_lang, target_lang)
# Translate text
start_time = time.time()
# Check text length and apply limit if needed
MAX_TEXT_LENGTH = 500 # Characters
truncated = False
if len(text_input) > MAX_TEXT_LENGTH:
truncated_text = text_input[:MAX_TEXT_LENGTH]
truncated = True
else:
truncated_text = text_input
# Perform translation
translation = translator(truncated_text)
translated_text = translation[0]['translation_text']
# Calculate processing time
translation_time = time.time() - start_time
# Display translation results
output_html.append('<h3 class="task-subheader">Translation</h3>')
# Show target language
output_html.append(f"""
<div class="mb-2">
<span class="badge bg-success">
{target_lang_display}
</span>
</div>
""")
# Display translated text
output_html.append(f'<div class="card"><div class="card-body bg-light">{translated_text}</div></div>')
# Show truncation warning if needed
if truncated:
output_html.append(f"""
<div class="alert alert-warning">
<p class="mb-0"><b>⚠️ Note:</b> Your text was truncated to {MAX_TEXT_LENGTH} characters due to model limitations. Only the first part was translated.</p>
</div>
""")
# Translation statistics
output_html.append('<h3 class="task-subheader">Translation Analysis</h3>')
# Calculate basic stats
source_chars = len(text_input)
source_words = len(text_input.split())
target_chars = len(translated_text)
target_words = len(translated_text.split())
# Display stats in a nice format
output_html.append(f"""
<div class="row text-center mb-4">
<div class="col-md-4">
<div class="card">
<div class="card-body">
<div class="display-4 text-primary">{source_words}</div>
<div>Source Words</div>
</div>
</div>
</div>
<div class="col-md-4">
<div class="card">
<div class="card-body">
<div class="display-4 text-success">{target_words}</div>
<div>Translated Words</div>
</div>
</div>
</div>
<div class="col-md-4">
<div class="card">
<div class="card-body">
<div class="display-4 text-warning">{translation_time:.2f}s</div>
<div>Processing Time</div>
</div>
</div>
</div>
</div>
""")
# Length comparison
output_html.append('<h4>Length Comparison</h4>')
# Create bar chart comparing text lengths
fig, ax = plt.subplots(figsize=(10, 5))
# Create grouped bar chart
x = np.arange(2)
width = 0.35
ax.bar(x - width/2, [source_words, source_chars], width, label='Source Text', color='#1976D2')
ax.bar(x + width/2, [target_words, target_chars], width, label='Translated Text', color='#4CAF50')
ax.set_xticks(x)
ax.set_xticklabels(['Word Count', 'Character Count'])
ax.legend()
# Add value labels on top of bars
for i, v in enumerate([source_words, source_chars]):
ax.text(i - width/2, v + 0.5, str(v), ha='center')
for i, v in enumerate([target_words, target_chars]):
ax.text(i + width/2, v + 0.5, str(v), ha='center')
plt.title('Source vs. Translation Length Comparison')
plt.tight_layout()
output_html.append(fig_to_html(fig))
# Expansion/contraction ratio
word_ratio = target_words / source_words if source_words > 0 else 0
char_ratio = target_chars / source_chars if source_chars > 0 else 0
expansion_type = "expansion" if word_ratio > 1.1 else "contraction" if word_ratio < 0.9 else "similar length"
output_html.append(f"""
<div class="alert alert-info">
<h4>Translation Length Analysis</h4>
<p>The translation shows <b>{expansion_type}</b> compared to the source text.</p>
<ul>
<li>Word ratio: {word_ratio:.2f} (target/source)</li>
<li>Character ratio: {char_ratio:.2f} (target/source)</li>
</ul>
<p><small>Note: Different languages naturally have different word and character counts when expressing the same meaning.</small></p>
</div>
""")
# Language characteristics comparison
source_avg_word_len = source_chars / source_words if source_words > 0 else 0
target_avg_word_len = target_chars / target_words if target_words > 0 else 0
output_html.append('<h4>Language Characteristics</h4>')
# Create comparison table
lang_data = {
'Metric': ['Average Word Length', 'Words per Character', 'Characters per Word'],
f'Source ({source_lang_display})': [
f"{source_avg_word_len:.2f} chars",
f"{source_words / source_chars:.3f}" if source_chars > 0 else "N/A",
f"{source_chars / source_words:.2f}" if source_words > 0 else "N/A"
],
f'Target ({target_lang_display})': [
f"{target_avg_word_len:.2f} chars",
f"{target_words / target_chars:.3f}" if target_chars > 0 else "N/A",
f"{target_chars / target_words:.2f}" if target_words > 0 else "N/A"
]
}
lang_df = pd.DataFrame(lang_data)
output_html.append(df_to_html_table(lang_df))
# Alternative translations section
output_html.append('<h3 class="task-subheader">Alternative Translation Options</h3>')
output_html.append('<p>Machine translation models often have different ways of translating the same text. Here are some general tips for better translations:</p>')
output_html.append("""
<div class="alert alert-info">
<h4>Tips for Better Machine Translation</h4>
<ul class="mb-0">
<li><b>Use clear, simple language</b> in your source text</li>
<li><b>Avoid idioms and slang</b> that may not translate well across cultures</li>
<li><b>Break up long, complex sentences</b> into simpler ones</li>
<li><b>Provide context</b> when dealing with ambiguous terms</li>
<li><b>Review and post-edit</b> machine translations for important documents</li>
</ul>
</div>
""")
# Common translation challenges
output_html.append('<h4>Common Translation Challenges</h4>')
challenge_data = {
'Challenge': [
'Ambiguity',
'Idioms & Expressions',
'Cultural References',
'Technical Terminology',
'Grammatical Differences'
],
'Description': [
'Words with multiple meanings may be incorrectly translated without proper context',
'Expressions that are unique to a culture often lose meaning when translated literally',
'References to culture-specific concepts may not have direct equivalents',
'Specialized terminology may not translate accurately without domain-specific models',
'Different languages have different grammatical structures that can affect translation'
],
'Example': [
'"Bank" could mean financial institution or river edge',
'"It\'s raining cats and dogs" translated literally loses its meaning',
'References to local holidays or customs may be confusing when translated',
'Medical or legal terms often need specialized translation knowledge',
'Languages differ in word order, gender agreement, verb tenses, etc.'
]
}
challenge_df = pd.DataFrame(challenge_data)
output_html.append(df_to_html_table(challenge_df))
except Exception as e:
output_html.append(f"""
<div class="alert alert-danger">
<h3>Translation Error</h3>
<p>{str(e)}</p>
<p>This could be due to an unsupported language pair or an issue loading the translation model.</p>
</div>
""")
# About Machine Translation section
output_html.append("""
<div class="card mt-4">
<div class="card-header">
<h4 class="mb-0">
<i class="fas fa-info-circle"></i>
About Machine Translation
</h4>
</div>
<div class="card-body">
<h5>What is Machine Translation?</h5>
<p>Machine translation is the automated translation of text from one language to another using computer software.
Modern machine translation systems use neural networks to understand and generate text, leading to significant
improvements in fluency and accuracy compared to older rule-based or statistical systems.</p>
<h5>Types of Machine Translation:</h5>
<ul>
<li><b>Rule-based MT</b> - Uses linguistic rules crafted by human experts</li>
<li><b>Statistical MT</b> - Uses statistical models trained on parallel texts</li>
<li><b>Neural MT</b> - Uses deep learning and neural networks (current state-of-the-art)</li>
<li><b>Hybrid MT</b> - Combines multiple approaches for better results</li>
</ul>
<h5>Applications:</h5>
<ul>
<li><b>Website localization</b> - Translating web content for international audiences</li>
<li><b>Document translation</b> - Quickly obtaining translations of documents</li>
<li><b>Real-time communication</b> - Enabling conversations across language barriers</li>
<li><b>E-commerce</b> - Making product listings available in multiple languages</li>
<li><b>Content accessibility</b> - Making information available to speakers of different languages</li>
</ul>
</div>
</div>
""")
output_html.append('</div>') # Close result-area div
return '\n'.join(output_html)