import matplotlib.pyplot as plt
import pandas as pd
import nltk
import time
from utils.model_loader import load_text_generator
from utils.helpers import fig_to_html, df_to_html_table
def text_generation_handler(text_input, max_length=100, temperature=0.7, top_p=0.9, num_sequences=1):
"""Show text generation capabilities."""
output_html = []
# Add result area container
output_html.append('
')
output_html.append('')
output_html.append("""
Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
""")
# Model info
output_html.append("""
Model Used:
- GPT-2 - 124M parameter language model trained on a diverse corpus of internet text
- Capabilities - Can generate coherent text continuations and completions
- Limitations - May occasionally produce repetitive or nonsensical content
""")
try:
# Check text length and possibly truncate
MAX_PROMPT_LENGTH = 100 # tokens
# Count tokens (rough approximation)
token_count = len(text_input.split())
# Truncate if necessary
if token_count > MAX_PROMPT_LENGTH:
prompt_text = " ".join(text_input.split()[:MAX_PROMPT_LENGTH])
output_html.append("""
⚠️ Text truncated to approximately 100 tokens for better generation results.
""")
else:
prompt_text = text_input
# Display prompt
output_html.append('')
output_html.append(f'
')
# Load model
text_generator = load_text_generator()
# Set up generation parameters
generation_kwargs = {
"max_length": token_count + max_length,
"num_return_sequences": num_sequences,
"temperature": temperature,
"top_p": top_p,
"do_sample": True,
"no_repeat_ngram_size": 2,
"pad_token_id": 50256 # GPT-2's pad token ID
}
# Generate text
start_time = time.time()
result = text_generator(prompt_text, **generation_kwargs)
generation_time = time.time() - start_time
# Display results
output_html.append('')
for i, sequence in enumerate(result):
generated_text = sequence['generated_text']
new_text = generated_text[len(prompt_text):]
# Display in a nice format with the prompt and generated text distinguished
if num_sequences > 1:
output_html.append(f'
Version {i+1}
')
output_html.append(f"""
""")
# Generation stats for this sequence
prompt_tokens = len(prompt_text.split())
gen_tokens = len(new_text.split())
# Calculate average word length as a crude complexity metric
avg_word_len = sum(len(word) for word in new_text.split()) / max(1, len(new_text.split()))
output_html.append(f"""
Generation Statistics
Prompt length: {prompt_tokens} tokens
Generated length: {gen_tokens} tokens
Total length: {prompt_tokens + gen_tokens} tokens
Temperature: {temperature}
Top-p: {top_p}
Avg word length: {avg_word_len:.2f} characters
Generation time: {generation_time:.2f} seconds
""")
# Option to see full text
output_html.append(f"""
""")
# Generate a text complexity analysis
if len(result) > 0:
output_html.append('')
# Get the first generated text for analysis
full_text = result[0]['generated_text']
prompt_words = prompt_text.split()
full_words = full_text.split()
generated_words = full_words[len(prompt_words):]
# Analyze word length distribution
prompt_word_lengths = [len(word) for word in prompt_words]
generated_word_lengths = [len(word) for word in generated_words]
# Create comparison chart
fig, ax = plt.subplots(figsize=(10, 5))
# Plot histograms
bins = range(1, 16) # Word lengths from 1 to 15
ax.hist(prompt_word_lengths, bins=bins, alpha=0.7, label='Prompt', color='#1976D2')
ax.hist(generated_word_lengths, bins=bins, alpha=0.7, label='Generated', color='#4CAF50')
ax.set_xlabel('Word Length (characters)')
ax.set_ylabel('Frequency')
ax.set_title('Word Length Distribution: Prompt vs Generated')
ax.legend()
ax.grid(alpha=0.3)
output_html.append(fig_to_html(fig))
# Calculate some linguistic statistics
prompt_avg_word_len = sum(prompt_word_lengths) / len(prompt_word_lengths) if prompt_word_lengths else 0
generated_avg_word_len = sum(generated_word_lengths) / len(generated_word_lengths) if generated_word_lengths else 0
# Create comparison table
stats_data = {
'Metric': ['Word count', 'Average word length', 'Unique words', 'Lexical diversity*'],
'Prompt': [
len(prompt_words),
f"{prompt_avg_word_len:.2f}",
len(set(word.lower() for word in prompt_words)),
f"{len(set(word.lower() for word in prompt_words)) / len(prompt_words):.2f}" if prompt_words else "0"
],
'Generated': [
len(generated_words),
f"{generated_avg_word_len:.2f}",
len(set(word.lower() for word in generated_words)),
f"{len(set(word.lower() for word in generated_words)) / len(generated_words):.2f}" if generated_words else "0"
]
}
stats_df = pd.DataFrame(stats_data)
output_html.append('
')
output_html.append(df_to_html_table(stats_df))
output_html.append('
*Lexical diversity = unique words / total words
')
output_html.append('
')
# Show tips for better results
output_html.append("""
Tips for Better Generation Results
- Be specific - More detailed prompts give the model better context
- Format matters - If you want a list, start with a list item; if you want dialogue, include dialogue format
- Play with temperature - Lower values (0.3-0.5) for focused, consistent text; higher values (0.7-1.0) for creative, varied output
- Try multiple generations - Generate several options to pick the best result
""")
except Exception as e:
output_html.append(f"""
Error
Failed to generate text: {str(e)}
""")
# About Text Generation section
output_html.append("""
What is Text Generation?
Text generation is the task of creating human-like text using machine learning models. Modern text generation
systems use large neural networks trained on vast amounts of text data to predict the next tokens in a sequence.
How It Works:
- Training - Models learn patterns in language by predicting the next word in billions of text examples
- Prompting - You provide a starting text that gives context and direction
- Generation - The model repeatedly predicts the most likely next token based on previous context
- Sampling - Various techniques (temperature, top-p) control the randomness and creativity of output
Applications:
- Content creation - Drafting articles, stories, and marketing copy
- Assistive writing - Helping with email drafting, summarization, and editing
- Conversational AI - Powering chatbots and digital assistants
- Code generation - Assisting developers with coding tasks
- Creative writing - Generating stories, poetry, and other creative content
""")
output_html.append('
') # Close result-area div
return '\n'.join(output_html)