import matplotlib.pyplot as plt import pandas as pd import nltk import time from utils.model_loader import load_text_generator from utils.helpers import fig_to_html, df_to_html_table def text_generation_handler(text_input, max_length=100, temperature=0.7, top_p=0.9, num_sequences=1): """Show text generation capabilities.""" output_html = [] # Add result area container output_html.append('
') output_html.append('

Text Generation

') output_html.append("""
Text generation models can continue or expand on a given text prompt, creating new content that follows the style and context of the input.
""") # Model info output_html.append("""

Model Used:

""") try: # Check text length and possibly truncate MAX_PROMPT_LENGTH = 100 # tokens # Count tokens (rough approximation) token_count = len(text_input.split()) # Truncate if necessary if token_count > MAX_PROMPT_LENGTH: prompt_text = " ".join(text_input.split()[:MAX_PROMPT_LENGTH]) output_html.append("""

⚠️ Text truncated to approximately 100 tokens for better generation results.

""") else: prompt_text = text_input # Display prompt output_html.append('

Prompt

') output_html.append(f'
{prompt_text}
') # Load model text_generator = load_text_generator() # Set up generation parameters generation_kwargs = { "max_length": token_count + max_length, "num_return_sequences": num_sequences, "temperature": temperature, "top_p": top_p, "do_sample": True, "no_repeat_ngram_size": 2, "pad_token_id": 50256 # GPT-2's pad token ID } # Generate text start_time = time.time() result = text_generator(prompt_text, **generation_kwargs) generation_time = time.time() - start_time # Display results output_html.append('

Generated Text

') for i, sequence in enumerate(result): generated_text = sequence['generated_text'] new_text = generated_text[len(prompt_text):] # Display in a nice format with the prompt and generated text distinguished if num_sequences > 1: output_html.append(f'

Version {i+1}

') output_html.append(f"""
{prompt_text} {new_text}
""") # Generation stats for this sequence prompt_tokens = len(prompt_text.split()) gen_tokens = len(new_text.split()) # Calculate average word length as a crude complexity metric avg_word_len = sum(len(word) for word in new_text.split()) / max(1, len(new_text.split())) output_html.append(f"""

Generation Statistics

Prompt length: {prompt_tokens} tokens

Generated length: {gen_tokens} tokens

Total length: {prompt_tokens + gen_tokens} tokens

Temperature: {temperature}

Top-p: {top_p}

Avg word length: {avg_word_len:.2f} characters

Generation time: {generation_time:.2f} seconds

""") # Option to see full text output_html.append(f"""
{generated_text}
""") # Generate a text complexity analysis if len(result) > 0: output_html.append('

Text Analysis

') # Get the first generated text for analysis full_text = result[0]['generated_text'] prompt_words = prompt_text.split() full_words = full_text.split() generated_words = full_words[len(prompt_words):] # Analyze word length distribution prompt_word_lengths = [len(word) for word in prompt_words] generated_word_lengths = [len(word) for word in generated_words] # Create comparison chart fig, ax = plt.subplots(figsize=(10, 5)) # Plot histograms bins = range(1, 16) # Word lengths from 1 to 15 ax.hist(prompt_word_lengths, bins=bins, alpha=0.7, label='Prompt', color='#1976D2') ax.hist(generated_word_lengths, bins=bins, alpha=0.7, label='Generated', color='#4CAF50') ax.set_xlabel('Word Length (characters)') ax.set_ylabel('Frequency') ax.set_title('Word Length Distribution: Prompt vs Generated') ax.legend() ax.grid(alpha=0.3) output_html.append(fig_to_html(fig)) # Calculate some linguistic statistics prompt_avg_word_len = sum(prompt_word_lengths) / len(prompt_word_lengths) if prompt_word_lengths else 0 generated_avg_word_len = sum(generated_word_lengths) / len(generated_word_lengths) if generated_word_lengths else 0 # Create comparison table stats_data = { 'Metric': ['Word count', 'Average word length', 'Unique words', 'Lexical diversity*'], 'Prompt': [ len(prompt_words), f"{prompt_avg_word_len:.2f}", len(set(word.lower() for word in prompt_words)), f"{len(set(word.lower() for word in prompt_words)) / len(prompt_words):.2f}" if prompt_words else "0" ], 'Generated': [ len(generated_words), f"{generated_avg_word_len:.2f}", len(set(word.lower() for word in generated_words)), f"{len(set(word.lower() for word in generated_words)) / len(generated_words):.2f}" if generated_words else "0" ] } stats_df = pd.DataFrame(stats_data) output_html.append('
') output_html.append(df_to_html_table(stats_df)) output_html.append('

*Lexical diversity = unique words / total words

') output_html.append('
') # Show tips for better results output_html.append("""

Tips for Better Generation Results

""") except Exception as e: output_html.append(f"""

Error

Failed to generate text: {str(e)}

""") # About Text Generation section output_html.append("""

About Text Generation

What is Text Generation?

Text generation is the task of creating human-like text using machine learning models. Modern text generation systems use large neural networks trained on vast amounts of text data to predict the next tokens in a sequence.

How It Works:
  1. Training - Models learn patterns in language by predicting the next word in billions of text examples
  2. Prompting - You provide a starting text that gives context and direction
  3. Generation - The model repeatedly predicts the most likely next token based on previous context
  4. Sampling - Various techniques (temperature, top-p) control the randomness and creativity of output
Applications:
  • Content creation - Drafting articles, stories, and marketing copy
  • Assistive writing - Helping with email drafting, summarization, and editing
  • Conversational AI - Powering chatbots and digital assistants
  • Code generation - Assisting developers with coding tasks
  • Creative writing - Generating stories, poetry, and other creative content
""") output_html.append('
') # Close result-area div return '\n'.join(output_html)