BasalGanglia's picture
πŸ”§ Fix 503 timeout: Port 7860 + Enhanced fallbacks + Better error handling
65be7f3 verified
"""Tests for ExecutorAgent stub implementation.
This module provides comprehensive testing for the StubExecutorAgent and McpExecutorAgent classes.
The tests focus on validating mock execution simulation, tool-specific output generation,
error handling mechanisms, and system integration scenarios for MVP 3.
Key Testing Areas:
- Mock execution simulation with realistic tool outputs
- Error handling and recovery mechanisms
- Tool-specific behavior validation (sentiment, summarization, code analysis, image processing)
- Input validation and edge case handling
- System integration and MCP protocol compliance
Educational Notes:
- StubExecutorAgent provides mock execution for testing without external dependencies
- McpExecutorAgent handles real MCP server communication with retry logic
- Tests use fixtures to create reusable test data and maintain consistency
- Parameterized tests validate behavior across multiple input scenarios
"""
import json
import time
from unittest.mock import Mock, patch
import pytest
import requests
from agents.executor import McpExecutorAgent, StubExecutorAgent
from kg_services.ontology import MCPPrompt, MCPTool, PlannedStep
class TestStubExecutorAgent:
"""Comprehensive test suite for StubExecutorAgent class.
This test class validates the mock execution capabilities of StubExecutorAgent,
which simulates tool execution without requiring actual external services.
Test Categories:
- Basic execution simulation and response structure
- Tool-specific output generation and validation
- Input handling and validation scenarios
- Error simulation and handling
- Performance and consistency testing
Educational Purpose:
These tests demonstrate how to properly mock complex system interactions
while maintaining realistic behavior patterns for system validation.
"""
@pytest.fixture
def executor_agent(self) -> StubExecutorAgent:
"""Create a StubExecutorAgent instance for testing.
Returns:
StubExecutorAgent: Fresh instance configured for testing scenarios.
Educational Note:
Fixtures provide isolated test instances, ensuring tests don't interfere
with each other and maintaining predictable test conditions.
"""
return StubExecutorAgent()
@pytest.fixture
def sample_tool(self) -> MCPTool:
"""Create a sample sentiment analysis MCPTool for testing.
Returns:
MCPTool: Pre-configured sentiment analyzer tool with realistic metadata.
Educational Note:
This fixture represents a typical NLP tool configuration that would
be used in production systems for text sentiment analysis.
"""
return MCPTool(
tool_id="sentiment-analyzer-v1",
name="Advanced Sentiment Analyzer",
description="Analyze sentiment and emotional tone of text",
tags=["sentiment", "analysis", "nlp"],
invocation_command_stub="sentiment_analyze --input {text} --format json",
)
@pytest.fixture
def sentiment_prompt(self) -> MCPPrompt:
"""Create a sentiment analysis prompt for testing.
Returns:
MCPPrompt: Template for sentiment analysis with input variable mapping.
Educational Note:
Prompts define how tools should be invoked with specific inputs,
acting as a bridge between user intent and tool execution.
"""
return MCPPrompt(
prompt_id="sentiment-basic-001",
name="Basic Sentiment Analysis",
description="Analyze sentiment of provided text",
target_tool_id="sentiment-analyzer-v1",
template_string="Analyze the sentiment of this text: {{text_content}}",
input_variables=["text_content"],
difficulty_level="beginner",
)
@pytest.fixture
def summarizer_tool(self) -> MCPTool:
"""Create a text summarizer tool for testing.
Returns:
MCPTool: Pre-configured text summarization tool for document processing.
Educational Note:
Summarization tools represent complex NLP operations that require
multiple parameters and produce structured outputs.
"""
return MCPTool(
tool_id="text-summarizer-v2",
name="Intelligent Text Summarizer",
description="Generate concise summaries of long text documents",
tags=["summarization", "text", "nlp"],
invocation_command_stub="summarize --input {text} --length {length}",
)
@pytest.fixture
def summary_prompt(self) -> MCPPrompt:
"""Create a text summarization prompt for testing.
Returns:
MCPPrompt: Multi-input template for advanced document summarization.
Educational Note:
This prompt demonstrates how complex tools can accept multiple inputs
to customize their behavior (document + focus area).
"""
return MCPPrompt(
prompt_id="summary-advanced-001",
name="Advanced Document Summary",
description="Create comprehensive summary with key points",
target_tool_id="text-summarizer-v2",
template_string="Summarize this document: {{document_text}} with focus on {{focus_area}}",
input_variables=["document_text", "focus_area"],
difficulty_level="intermediate",
)
@pytest.fixture
def code_quality_tool(self) -> MCPTool:
"""Create a code quality analysis tool for testing.
Returns:
MCPTool: Pre-configured code analysis tool for quality assessment.
Educational Note:
Code quality tools represent static analysis capabilities that
examine source code for security, performance, and style issues.
"""
return MCPTool(
tool_id="code-quality-linter",
name="Code Quality Analyzer",
description="Analyze code quality, security, and best practices",
tags=["code", "quality", "security"],
invocation_command_stub="lint_code --file {code_file} --rules {ruleset}",
)
@pytest.fixture
def image_caption_tool(self) -> MCPTool:
"""Create an image captioning tool for testing.
Returns:
MCPTool: Pre-configured AI-powered image analysis tool.
Educational Note:
Image processing tools demonstrate multimodal AI capabilities,
processing visual inputs to generate textual descriptions.
"""
return MCPTool(
tool_id="image-captioner-ai",
name="AI Image Caption Generator",
description="Generate descriptive captions for images using AI",
tags=["image", "caption", "ai", "vision"],
invocation_command_stub="caption_image --image {image_path} --style {caption_style}",
)
@pytest.fixture
def sample_planned_step(
self, sample_tool: MCPTool, sentiment_prompt: MCPPrompt
) -> PlannedStep:
"""Create a sample PlannedStep for testing execution workflows.
Args:
sample_tool: MCPTool fixture for sentiment analysis
sentiment_prompt: MCPPrompt fixture for sentiment analysis
Returns:
PlannedStep: Complete execution plan combining tool, prompt, and relevance.
Educational Note:
PlannedStep represents a complete execution plan that binds a tool
with a prompt and includes a relevance score indicating how well
this combination matches the user's intent.
"""
return PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.92
)
def test_executor_initialization(self, executor_agent: StubExecutorAgent) -> None:
"""Test that StubExecutorAgent initializes correctly.
Args:
executor_agent: StubExecutorAgent fixture
Educational Purpose:
Validates basic object instantiation and type checking.
This is a fundamental sanity check that the class can be created.
"""
assert isinstance(executor_agent, StubExecutorAgent)
@patch("agents.executor.logger")
def test_executor_initialization_logging(self, mock_logger: Mock) -> None:
"""Test that initialization logs correctly.
Args:
mock_logger: Mocked logger to capture log messages
Educational Purpose:
Demonstrates how to test logging behavior using mocks.
Proper logging is crucial for debugging and monitoring system behavior.
"""
# Act: Create executor instance to trigger logging
StubExecutorAgent()
# Assert: Verify the expected log message was emitted
mock_logger.info.assert_called_once_with(
"StubExecutorAgent initialized for MVP 3"
)
@patch("agents.executor.random.random")
def test_simulate_execution_basic_success(
self,
mock_random: Mock,
executor_agent: StubExecutorAgent,
sample_planned_step: PlannedStep
) -> None:
"""Test basic successful execution simulation with realistic inputs.
Args:
mock_random: Mocked random function to control error simulation
executor_agent: StubExecutorAgent fixture
sample_planned_step: PlannedStep fixture for testing
Educational Purpose:
Demonstrates the happy path for execution simulation, showing how
the system processes valid inputs and generates structured responses.
Test Strategy:
- Mock random to prevent error simulation (>15% threshold)
- Provide realistic sentiment analysis input
- Validate complete response structure and content
"""
# Arrange: Set up test conditions to prevent random errors
mock_random.return_value = 0.9 # Above 15% threshold - no random errors
test_inputs: dict[str, str] = {"text_content": "This product is amazing and I love it!"}
# Act: Simulate execution with the planned step and inputs
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert: Validate response structure and content
assert isinstance(result, dict), "Response should be a dictionary"
assert result["status"] == "simulated_success", "Should indicate successful simulation"
assert "execution_id" in result, "Should include unique execution identifier"
assert result["tool_information"]["tool_name"] == "Advanced Sentiment Analyzer"
assert result["prompt_information"]["prompt_name"] == "Basic Sentiment Analysis"
assert result["execution_details"]["inputs_received"] == test_inputs
assert "mock_output" in result["results"], "Should include simulated tool output"
@patch("agents.executor.random.random")
def test_simulate_execution_comprehensive_structure(
self,
mock_random: Mock,
executor_agent: StubExecutorAgent,
sample_planned_step: PlannedStep
) -> None:
"""Test that execution response has complete and consistent structure.
Args:
mock_random: Mocked random function to control behavior
executor_agent: StubExecutorAgent fixture
sample_planned_step: PlannedStep fixture for testing
Educational Purpose:
Validates the complete response schema that external systems can rely on.
This ensures API contract compliance and prevents integration issues.
Test Strategy:
- Verify all required top-level response keys are present
- Validate nested object structures for completeness
- Ensure data types match expected schema
"""
# Arrange: Configure test to avoid random errors
mock_random.return_value = 0.9 # Disable random error simulation
test_inputs: dict[str, str] = {"text_content": "Test content"}
# Act: Execute simulation and capture full response
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert: Validate complete response structure
# Top-level keys must be present for API contract compliance
expected_keys: list[str] = [
"status", # Execution outcome indicator
"execution_id", # Unique identifier for tracking
"tool_information", # Tool metadata and configuration
"prompt_information",# Prompt template and variables
"execution_details",# Runtime information and metrics
"results", # Actual tool output and analysis
"metadata", # Additional context and debugging info
]
for key in expected_keys:
assert key in result, f"Required key '{key}' missing from response"
# Assert: Validate tool_information nested structure
tool_info = result["tool_information"]
assert "tool_id" in tool_info, "Tool identifier required"
assert "tool_name" in tool_info, "Tool name required for UI display"
assert "tool_description" in tool_info, "Tool description required for context"
# Assert: Validate execution_details structure
exec_details = result["execution_details"]
assert "inputs_received" in exec_details, "Input tracking required"
assert "inputs_count" in exec_details, "Input metrics required"
assert "execution_time_ms" in exec_details, "Performance metrics required"
assert "complexity_level" in exec_details, "Complexity assessment required"
# Assert: Validate results structure contains actual outputs
results = result["results"]
assert "message" in results, "Human-readable message required"
assert "mock_output" in results, "Simulated tool output required"
assert "confidence_score" in results, "Quality assessment required"
@patch("agents.executor.random.random")
def test_simulate_execution_sentiment_tool_output(
self,
mock_random: Mock,
executor_agent: StubExecutorAgent,
sample_tool: MCPTool,
sentiment_prompt: MCPPrompt
) -> None:
"""Test sentiment analysis tool produces realistic mock output format.
Args:
mock_random: Mocked random function to control error simulation
executor_agent: StubExecutorAgent fixture
sample_tool: MCPTool fixture for sentiment analysis
sentiment_prompt: MCPPrompt fixture for sentiment analysis
Educational Purpose:
Validates that the simulator generates realistic sentiment analysis output
that matches what real sentiment analysis tools would produce.
Test Strategy:
- Create planned step combining sentiment tool and prompt
- Provide positive sentiment text input
- Verify output contains expected sentiment analysis elements
- Check for realistic formatting and content structure
"""
# Arrange: Disable random errors and create realistic test scenario
mock_random.return_value = 0.5 # Above 15% threshold - no random errors
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
# Use clearly positive text to test sentiment detection
test_inputs: dict[str, str] = {"text_content": "I really enjoy this product!"}
# Act: Execute sentiment analysis simulation
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert: Validate sentiment analysis specific output format
mock_output = result["results"]["mock_output"]
# Check for standard sentiment analysis output components
assert "Sentiment Analysis Results" in mock_output, "Should contain results header"
assert "Overall Sentiment Classification" in mock_output, "Should classify sentiment"
assert "Confidence Metrics" in mock_output, "Should provide confidence scores"
assert "Positive" in mock_output, "Should detect positive sentiment in test text"
assert "Generated by Sentiment Analyzer Tool" in mock_output, "Should indicate tool source"
@patch("agents.executor.random.random")
def test_simulate_execution_summarizer_tool_output(
self,
mock_random: Mock,
executor_agent: StubExecutorAgent,
summarizer_tool: MCPTool,
summary_prompt: MCPPrompt
) -> None:
"""Test text summarizer tool produces realistic mock summary output.
Args:
mock_random: Mocked random function to control error simulation
executor_agent: StubExecutorAgent fixture
summarizer_tool: MCPTool fixture for text summarization
summary_prompt: MCPPrompt fixture for multi-input summarization
Educational Purpose:
Validates that the simulator generates realistic text summarization output
with structured sections that would be expected from real NLP tools.
Test Strategy:
- Test multi-input prompt handling (document + focus area)
- Verify output contains expected summarization sections
- Check for realistic summary structure and formatting
"""
# Arrange: Configure test to avoid random errors
mock_random.return_value = 0.5 # Above 15% threshold - no random errors
planned_step = PlannedStep(
tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
)
# Test with multi-input scenario (document text + focus area)
test_inputs: dict[str, str] = {
"document_text": "Long document content...",
"focus_area": "key insights",
}
# Act: Execute summarization simulation
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert: Validate summarization specific output format
mock_output = result["results"]["mock_output"]
# Check for standard summarization output components
assert "Text Summarization Complete" in mock_output, "Should contain completion indicator"
assert "Executive Summary" in mock_output, "Should provide executive summary section"
assert "Key Points" in mock_output, "Should extract key points from content"
assert "focus_area" in str(test_inputs), "Should handle multi-input processing"
# Validate that the tool properly processes multiple inputs
exec_details = result["execution_details"]
assert exec_details["inputs_count"] == 2, "Should recognize two input parameters"
@patch("agents.executor.random.random")
def test_simulate_execution_code_quality_tool_output(
self, mock_random, executor_agent, code_quality_tool
):
"""Test code quality analysis specific mock output."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
code_prompt = MCPPrompt(
prompt_id="code-quality-001",
name="Comprehensive Code Review",
description="Analyze code for quality and security",
target_tool_id="code-quality-linter",
template_string="Review this code: {{source_code}}",
input_variables=["source_code"],
difficulty_level="advanced",
)
planned_step = PlannedStep(
tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
)
test_inputs = {"source_code": "def hello_world():\n print('Hello!')"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Code Quality Analysis Complete" in mock_output
assert "Overall Quality Score" in mock_output
assert "Analysis Summary" in mock_output
assert "Quality Metrics" in mock_output
assert "Recommendations" in mock_output
assert "Generated by Code Quality Linter Tool" in mock_output
@patch("agents.executor.random.random")
def test_simulate_execution_image_caption_tool_output(
self, mock_random, executor_agent, image_caption_tool
):
"""Test image captioning specific mock output."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
caption_prompt = MCPPrompt(
prompt_id="image-caption-001",
name="Descriptive Image Caption",
description="Generate detailed image captions",
target_tool_id="image-captioner-ai",
template_string="Caption this image: {{image_url}}",
input_variables=["image_url"],
difficulty_level="intermediate",
)
planned_step = PlannedStep(
tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
)
test_inputs = {"image_url": "https://example.com/office.jpg"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Image Caption Generation Results" in mock_output
assert "Primary Caption" in mock_output
assert "Technical Analysis" in mock_output
assert "Confidence Level" in mock_output
assert "Alternative Descriptions" in mock_output
assert "Generated by Image Caption Generator Tool" in mock_output
@patch("agents.executor.random.random")
def test_simulate_execution_generic_tool_output(self, mock_random, executor_agent):
"""Test generic mock output for unknown tool types."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
generic_tool = MCPTool(
tool_id="unknown-tool",
name="Unknown Analysis Tool",
description="A tool for unknown analysis",
tags=["unknown"],
invocation_command_stub="unknown_analyze {input}",
)
generic_prompt = MCPPrompt(
prompt_id="generic-prompt",
name="Generic Processing",
description="Generic prompt for unknown tool",
target_tool_id="unknown-tool",
template_string="Process: {{data}}",
input_variables=["data"],
difficulty_level="beginner",
)
planned_step = PlannedStep(
tool=generic_tool, prompt=generic_prompt, relevance_score=0.75
)
test_inputs = {"data": "sample data"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Execution Results for Unknown Analysis Tool" in mock_output
assert "Successfully processed" in mock_output
assert "Generic Processing" in mock_output
assert "Input Analysis" in mock_output
assert "data**: sample data" in mock_output
@patch("agents.executor.random.random")
def test_simulate_execution_empty_inputs(self, mock_random, executor_agent, sample_planned_step):
"""Test execution with empty inputs dictionary."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
empty_inputs: dict[str, str] = {}
# Act
result = executor_agent.simulate_execution(sample_planned_step, empty_inputs)
# Assert
assert result["status"] == "simulated_success"
assert result["execution_details"]["inputs_received"] == empty_inputs
assert result["execution_details"]["inputs_count"] == 0
@staticmethod
def test_simulate_execution_multiple_inputs(
executor_agent, summarizer_tool, summary_prompt
):
"""Test execution with multiple input variables."""
# Arrange
planned_step = PlannedStep(
tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.85
)
test_inputs = {
"document_text": "Very long document with lots of content...",
"focus_area": "business insights and recommendations",
}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
assert result["execution_details"]["inputs_count"] == 2
assert result["execution_details"]["inputs_received"] == test_inputs
@staticmethod
def test_simulate_execution_invalid_plan_type(executor_agent):
"""Test error handling with invalid plan type."""
# Arrange
invalid_plan = {"not": "a planned step"}
test_inputs = {"input": "test"}
# Act & Assert
with pytest.raises(ValueError, match="Plan must be a PlannedStep instance"):
executor_agent.simulate_execution(invalid_plan, test_inputs)
@staticmethod
def test_simulate_execution_invalid_inputs_type(
executor_agent, sample_planned_step
):
"""Test error handling with invalid inputs type."""
# Arrange
invalid_inputs = "not a dictionary"
# Act & Assert
with pytest.raises(ValueError, match="Inputs must be a dictionary"):
executor_agent.simulate_execution(sample_planned_step, invalid_inputs)
@patch("agents.executor.random.random")
@patch("agents.executor.logger")
def test_simulate_execution_logging(
self, mock_logger, mock_random, executor_agent, sample_planned_step
):
"""Test that execution logs appropriately."""
# Arrange - Disable random errors to ensure consistent logging
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
test_inputs = {"text_content": "test content"}
# Act
executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert mock_logger.info.call_count >= 3 # 3 execution logs (init not captured by fixture)
# Check specific log messages
log_calls = [call[0][0] for call in mock_logger.info.call_args_list]
assert any("Simulating execution for tool" in log for log in log_calls)
assert any("Received inputs" in log for log in log_calls)
assert any("Generated mock response" in log for log in log_calls)
@patch("agents.executor.random.random")
def test_execution_id_generation(
self, mock_random, executor_agent, sample_planned_step
):
"""Test that unique execution IDs are generated."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
test_inputs_1 = {"text_content": "first input"}
test_inputs_2 = {"text_content": "second input"}
# Act
result_1 = executor_agent.simulate_execution(sample_planned_step, test_inputs_1)
result_2 = executor_agent.simulate_execution(sample_planned_step, test_inputs_2)
# Assert
assert result_1["execution_id"] != result_2["execution_id"]
assert result_1["execution_id"].startswith("exec_sentiment-analyzer-v1_")
assert result_2["execution_id"].startswith("exec_sentiment-analyzer-v1_")
@staticmethod
def test_confidence_score_consistency(executor_agent, sample_planned_step):
"""Test that confidence scores are consistent."""
# Arrange
test_inputs = {"text_content": "test content"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
confidence = result["results"]["confidence_score"]
assert isinstance(confidence, int | float)
assert 0.0 <= confidence <= 1.0
@staticmethod
def test_metadata_structure(executor_agent, sample_planned_step):
"""Test that metadata has expected structure."""
# Arrange
test_inputs = {"text_content": "test content"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
metadata = result["metadata"]
assert "simulation_version" in metadata
assert "timestamp" in metadata
assert "notes" in metadata
assert metadata["simulation_version"] == "MVP3_Sprint4"
# Enhanced Input-Aware Mock Tests
@patch("agents.executor.random.random")
def test_text_summarizer_empty_input_handling(
self, mock_random, executor_agent, summarizer_tool, summary_prompt
):
"""Test text summarizer with empty input returns appropriate error message."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
)
test_inputs = {"document_text": ""}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "No text content provided for summarization" in mock_output
assert "⚠️ **Input Analysis:**" in mock_output
assert "Recommendation: Please provide text content" in mock_output
@patch("agents.executor.random.random")
def test_text_summarizer_content_type_detection(
self, mock_random, executor_agent, summarizer_tool, summary_prompt
):
"""Test text summarizer detects content type and generates appropriate response."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
)
# Test technical content
tech_inputs = {
"text": "This function implements a class variable to store programming code patterns."
}
# Act
result = executor_agent.simulate_execution(planned_step, tech_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "technical content" in mock_output
assert "Technical classification" in mock_output
assert "code structure, functionality patterns" in mock_output
@patch("agents.executor.random.random")
def test_text_summarizer_business_content_detection(
self, mock_random, executor_agent, summarizer_tool, summary_prompt
):
"""Test text summarizer detects business content correctly."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
)
business_inputs = {
"content": "Our company's market analysis shows excellent customer retention and product sales growth in business sectors."
}
# Act
result = executor_agent.simulate_execution(planned_step, business_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "business content" in mock_output
assert "Business classification" in mock_output
assert "market dynamics, customer insights" in mock_output
@patch("agents.executor.random.random")
def test_sentiment_analyzer_empty_input_handling(
self, mock_random, executor_agent, sample_tool, sentiment_prompt
):
"""Test sentiment analyzer with empty input returns appropriate error message."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
test_inputs = {"text": ""}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "No text content provided for sentiment analysis" in mock_output
assert "⚠️ **Input Analysis:**" in mock_output
assert "Supported input fields:" in mock_output
@patch("agents.executor.random.random")
def test_sentiment_analyzer_positive_content_detection(
self, mock_random, executor_agent, sample_tool, sentiment_prompt
):
"""Test sentiment analyzer correctly detects positive sentiment."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
positive_inputs = {
"text": "This product is absolutely amazing and fantastic! I love it and highly recommend it."
}
# Act
result = executor_agent.simulate_execution(planned_step, positive_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Primary**: Positive" in mock_output
assert "Joy/Satisfaction:" in mock_output
assert "**Positive Indicators**: 4 detected" in mock_output
@patch("agents.executor.random.random")
def test_sentiment_analyzer_negative_content_detection(
self, mock_random, executor_agent, sample_tool, sentiment_prompt
):
"""Test sentiment analyzer correctly detects negative sentiment."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
negative_inputs = {
"feedback": "This service was terrible and awful. I hate it and it's the worst experience ever."
}
# Act
result = executor_agent.simulate_execution(planned_step, negative_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Primary**: Negative" in mock_output
assert "Frustration:" in mock_output
assert "**Negative Indicators**:" in mock_output
assert "feedback" in mock_output # Source field detection
@patch("agents.executor.random.random")
def test_sentiment_analyzer_neutral_content_detection(
self, mock_random, executor_agent, sample_tool, sentiment_prompt
):
"""Test sentiment analyzer correctly detects neutral sentiment."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
neutral_inputs = {
"message": "The weather today is okay and normal. It's fine and adequate for our needs."
}
# Act
result = executor_agent.simulate_execution(planned_step, neutral_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Primary**: Neutral" in mock_output
assert "Neutral:" in mock_output
assert "**Neutral Indicators**:" in mock_output
@patch("agents.executor.random.random")
def test_image_caption_empty_input_handling(
self, mock_random, executor_agent, image_caption_tool
):
"""Test image caption generator with empty input returns appropriate error message."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
caption_prompt = MCPPrompt(
prompt_id="image-caption-001",
name="Descriptive Image Caption",
description="Generate detailed image captions",
target_tool_id="image-captioner-ai",
template_string="Caption this image: {{image_url}}",
input_variables=["image_url"],
difficulty_level="intermediate",
)
planned_step = PlannedStep(
tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
)
test_inputs = {"image_url": ""}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "No image source provided for caption generation" in mock_output
assert "⚠️ **Input Analysis:**" in mock_output
assert "Supported input fields:" in mock_output
@patch("agents.executor.random.random")
def test_image_caption_workspace_detection(
self, mock_random, executor_agent, image_caption_tool
):
"""Test image caption generator detects workspace images correctly."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
caption_prompt = MCPPrompt(
prompt_id="image-caption-001",
name="Descriptive Image Caption",
description="Generate detailed image captions",
target_tool_id="image-captioner-ai",
template_string="Caption this image: {{image_path}}",
input_variables=["image_path"],
difficulty_level="intermediate",
)
planned_step = PlannedStep(
tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
)
test_inputs = {"image_path": "/uploads/office_workspace_desk.jpg"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "professional workspace" in mock_output
assert "workspace" in mock_output.lower()
assert "Indoor workspace/office environment" in mock_output
assert "desk" in mock_output
@patch("agents.executor.random.random")
def test_image_caption_with_context(self, mock_random, executor_agent, image_caption_tool):
"""Test image caption generator incorporates additional context."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
caption_prompt = MCPPrompt(
prompt_id="image-caption-001",
name="Descriptive Image Caption",
description="Generate detailed image captions",
target_tool_id="image-captioner-ai",
template_string="Caption this image: {{image}} with context: {{context}}",
input_variables=["image", "context"],
difficulty_level="intermediate",
)
planned_step = PlannedStep(
tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
)
test_inputs = {
"image": "nature_photo.jpg",
"context": "Taken during sunrise in the mountains",
}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "additional context: Taken during sunrise" in mock_output
assert "Context Provided**: Yes" in mock_output
assert "sunrise in the mountains" in mock_output
@patch("agents.executor.random.random")
def test_code_linter_empty_input_handling(self, mock_random, executor_agent, code_quality_tool):
"""Test code linter with empty input returns appropriate error message."""
# Arrange - Disable random errors to ensure consistent behavior
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
code_prompt = MCPPrompt(
prompt_id="code-quality-001",
name="Comprehensive Code Review",
description="Analyze code for quality and security",
target_tool_id="code-quality-linter",
template_string="Review this code: {{source_code}}",
input_variables=["source_code"],
difficulty_level="advanced",
)
planned_step = PlannedStep(
tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
)
test_inputs = {"source_code": ""}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "No code content provided for analysis" in mock_output
assert "⚠️ **Input Analysis:**" in mock_output
assert "Supported input fields:" in mock_output
@patch("agents.executor.random.random")
def test_code_linter_python_detection(self, mock_random, executor_agent, code_quality_tool):
"""Test code linter correctly detects Python code."""
# Arrange - Disable random errors to ensure consistent behavior
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
code_prompt = MCPPrompt(
prompt_id="code-quality-001",
name="Comprehensive Code Review",
description="Analyze code for quality and security",
target_tool_id="code-quality-linter",
template_string="Review this code: {{code}}",
input_variables=["code"],
difficulty_level="advanced",
)
planned_step = PlannedStep(
tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
)
python_code = """
def hello_world():
\"\"\"Print hello world message.\"\"\"
print("Hello, World!")
return True
class MyClass:
def __init__(self):
pass
"""
test_inputs = {"code": python_code}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Language**: Python" in mock_output
assert "Lines Analyzed**: " in mock_output
assert "def hello_world():" in mock_output # Code preview
@patch("agents.executor.random.random")
def test_code_linter_issue_detection(self, mock_random, executor_agent, code_quality_tool):
"""Test code linter detects common code issues."""
# Arrange - Disable random errors
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
code_prompt = MCPPrompt(
prompt_id="code-quality-001",
name="Comprehensive Code Review",
description="Analyze code for quality and security",
target_tool_id="code-quality-linter",
template_string="Review this code: {{script}}",
input_variables=["script"],
difficulty_level="advanced",
)
planned_step = PlannedStep(
tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
)
problematic_code = """
# TODO: Fix this function
def bad_function():
x = 1
y = 2
z = very_long_variable_name_that_exceeds_normal_line_length_and_should_be_flagged_as_an_issue_by_linter = 3
return x + y + z
"""
test_inputs = {"script": problematic_code}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Todo/Fixme comments found" in mock_output
assert "Long lines detected" in mock_output
assert "Issues Found**: " in mock_output
@patch("agents.executor.random.random")
def test_generic_tool_input_analysis(self, mock_random, executor_agent):
"""Test generic tool provides detailed input analysis."""
# Arrange - Disable random errors to ensure consistent behavior
mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors
custom_tool = MCPTool(
tool_id="custom-analyzer",
name="Custom Analysis Tool",
description="Performs custom data analysis",
tags=["analysis"],
invocation_command_stub="analyze {input}",
)
custom_prompt = MCPPrompt(
prompt_id="custom-prompt",
name="Custom Analysis",
description="Custom analysis prompt",
target_tool_id="custom-analyzer",
template_string="Analyze: {{data}} with {{method}}",
input_variables=["data", "method"],
difficulty_level="intermediate",
)
planned_step = PlannedStep(
tool=custom_tool, prompt=custom_prompt, relevance_score=0.82
)
test_inputs = {
"data": "Large dataset with complex information patterns and detailed analysis requirements",
"method": "statistical",
}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert (
"Processing Complexity**: Simple" in mock_output
) # Based on total input length
assert "Inputs Received**: 2 parameter(s)" in mock_output
assert "Total Content Length**:" in mock_output
assert (
"data**: Large dataset with complex" in mock_output
) # Check partial match without ellipsis
assert "method**: statistical" in mock_output
@patch("agents.executor.random.random")
def test_long_input_processing(self, mock_random, executor_agent, sample_tool, sentiment_prompt):
"""Test processing of very long input content."""
# Arrange
mock_random.return_value = 0.9 # Disable random errors (15% threshold)
planned_step = PlannedStep(
tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
)
# Create long input content
long_text = "This is an excellent service. " * 50 # 1500+ characters
test_inputs = {"text_content": long_text}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
mock_output = result["results"]["mock_output"]
assert "Primary**: Positive" in mock_output # Should detect positive sentiment
assert "Text Length**: " in mock_output
assert "Analysis Confidence" in mock_output
class TestErrorSimulation:
"""Test suite for error simulation functionality."""
@pytest.fixture
def executor_agent(self) -> StubExecutorAgent:
"""Create a StubExecutorAgent instance for testing."""
return StubExecutorAgent()
@pytest.fixture
def sample_tool(self) -> MCPTool:
"""Create a sample MCPTool for testing."""
return MCPTool(
tool_id="test-tool-error",
name="Test Error Tool",
description="A tool for testing error scenarios",
tags=["test", "error"],
invocation_command_stub="test_error {input}",
)
@pytest.fixture
def sample_prompt(self) -> MCPPrompt:
"""Create a sample MCPPrompt for testing."""
return MCPPrompt(
prompt_id="test-prompt-error",
name="Test Error Prompt",
description="A prompt for testing error scenarios",
target_tool_id="test-tool-error",
template_string="Process: {{text_input}}",
input_variables=["text_input"],
difficulty_level="beginner",
)
@pytest.fixture
def sample_planned_step(
self, sample_tool: MCPTool, sample_prompt: MCPPrompt
) -> PlannedStep:
"""Create a sample PlannedStep for testing."""
return PlannedStep(tool=sample_tool, prompt=sample_prompt, relevance_score=0.95)
@staticmethod
def test_user_requested_error_simulation(executor_agent, sample_planned_step):
"""Test error simulation triggered by user input containing error keywords."""
# Arrange
test_inputs = {"text_input": "This should fail and show an error"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "user_requested"
assert error_info["error_code"] == "USR_REQ_001"
assert error_info["retry_recommended"] is True
assert (
"User explicitly requested error simulation" in error_info["error_message"]
)
# Check that the output contains user-friendly error message
mock_output = result["results"]["mock_output"]
assert "Error Simulation Activated" in mock_output
assert "User-Requested Error" in mock_output
@staticmethod
def test_test_scenario_error_simulation(executor_agent, sample_planned_step):
"""Test error simulation triggered by test scenario keywords."""
# Arrange
test_inputs = {"text_input": "test error scenario for validation"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "test_scenario"
assert error_info["error_code"] == "TST_ERR_001"
assert error_info["retry_recommended"] is True
mock_output = result["results"]["mock_output"]
assert "Test Error Scenario" in mock_output
assert "Error Simulation Active" in mock_output
@staticmethod
def test_input_too_large_error_simulation(
executor_agent, sample_planned_step
):
"""Test error simulation for input size limits."""
# Arrange - Create input larger than 10,000 characters
large_input = "x" * 10001
test_inputs = {"text_input": large_input}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "input_too_large"
assert error_info["error_code"] == "VAL_001"
assert error_info["retry_recommended"] is True
assert "10001 characters" in error_info["error_details"]
mock_output = result["results"]["mock_output"]
assert "Input Size Error" in mock_output
assert "Input Too Large" in mock_output
@staticmethod
def test_security_violation_error_simulation(
executor_agent, sample_planned_step
):
"""Test error simulation for security violations."""
# Arrange
test_inputs = {
"text_input": "Process this <script>alert('hack')</script> content"
}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "security_violation"
assert error_info["error_code"] == "SEC_001"
assert error_info["retry_recommended"] is False
mock_output = result["results"]["mock_output"]
assert "Security Error" in mock_output
assert "Security Violation Detected" in mock_output
@staticmethod
def test_corrupted_file_error_for_image_tool(executor_agent):
"""Test error simulation for corrupted files in image tools."""
# Arrange
image_tool = MCPTool(
tool_id="image_caption_003",
name="Image Caption Generator",
description="Generate captions for images",
tags=["image", "ai"],
invocation_command_stub="caption_image {image}",
)
image_prompt = MCPPrompt(
prompt_id="caption-prompt",
name="Image Captioning",
description="Caption an image",
target_tool_id="image_caption_003",
template_string="Caption: {{image_file}}",
input_variables=["image_file"],
difficulty_level="beginner",
)
planned_step = PlannedStep(
tool=image_tool, prompt=image_prompt, relevance_score=0.85
)
test_inputs = {"image_file": "broken_image.jpg"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "corrupted_file"
assert error_info["error_code"] == "FILE_001"
assert error_info["retry_recommended"] is True
mock_output = result["results"]["mock_output"]
assert "File Processing Error" in mock_output
assert "Corrupted File Detected" in mock_output
@staticmethod
def test_wrong_file_type_error_for_image_tool(executor_agent):
"""Test error simulation for wrong file types in image tools."""
# Arrange
image_tool = MCPTool(
tool_id="image_caption_003",
name="Image Caption Generator",
description="Generate captions for images",
tags=["image", "ai"],
invocation_command_stub="caption_image {image}",
)
image_prompt = MCPPrompt(
prompt_id="caption-prompt",
name="Image Captioning",
description="Caption an image",
target_tool_id="image_caption_003",
template_string="Caption: {{image_file}}",
input_variables=["image_file"],
difficulty_level="beginner",
)
planned_step = PlannedStep(
tool=image_tool, prompt=image_prompt, relevance_score=0.85
)
test_inputs = {"image_file": "document.txt"}
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "wrong_file_type"
assert error_info["error_code"] == "FILE_002"
assert error_info["retry_recommended"] is True
mock_output = result["results"]["mock_output"]
assert "File Type Error" in mock_output
assert "Unsupported File Type" in mock_output
@patch("agents.executor.random.random")
def test_random_error_simulation(
self, mock_random, executor_agent, sample_planned_step
):
"""Test random error simulation."""
# Arrange - Force random error (10% chance normally)
mock_random.return_value = 0.05 # Less than 0.1, should trigger random error
test_inputs = {"text_input": "normal input"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert - Should get a random error
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] in [
"network_timeout",
"service_unavailable",
"rate_limit_exceeded",
"temporary_overload",
]
assert error_info["retry_recommended"] is True
@patch("agents.executor.random.random")
def test_no_random_error_simulation(
self, mock_random, executor_agent, sample_planned_step
):
"""Test that random errors don't trigger when probability is too high."""
# Arrange - Prevent random error
mock_random.return_value = (
0.15 # Greater than 0.1, should not trigger random error
)
test_inputs = {"text_input": "normal input"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert - Should get normal success
assert result["status"] == "simulated_success"
assert "error_information" not in result
@staticmethod
def test_error_response_structure(executor_agent, sample_planned_step):
"""Test that error responses have the correct structure."""
# Arrange
test_inputs = {"text_input": "trigger error simulation"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert - Check error response structure
assert result["status"] == "simulated_error"
# Check required top-level keys
expected_keys = [
"status",
"execution_id",
"tool_information",
"prompt_information",
"execution_details",
"error_information",
"results",
"metadata",
]
for key in expected_keys:
assert key in result, f"Missing key: {key}"
# Check error_information structure
error_info = result["error_information"]
error_keys = [
"error_type",
"error_severity",
"error_code",
"error_message",
"error_details",
"suggested_fixes",
"retry_recommended",
]
for key in error_keys:
assert key in error_info, f"Missing error info key: {key}"
# Check execution details for errors
exec_details = result["execution_details"]
assert "error_occurred_at" in exec_details
assert isinstance(exec_details["error_occurred_at"], int)
assert 10 <= exec_details["error_occurred_at"] <= 80
@staticmethod
def test_error_execution_time_shorter(executor_agent, sample_planned_step):
"""Test that error scenarios have shorter execution times."""
# Arrange
test_inputs = {"text_input": "simulate error"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
execution_time = result["execution_details"]["execution_time_ms"]
# Error execution times should be between 100-1000ms (shorter than success 800-2500ms)
assert 100 <= execution_time <= 1000
@staticmethod
def test_error_confidence_score_zero(executor_agent, sample_planned_step):
"""Test that error responses have zero confidence score."""
# Arrange
test_inputs = {"text_input": "error test"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
assert result["results"]["confidence_score"] == 0.0
@staticmethod
def test_error_logging(executor_agent, sample_planned_step):
"""Test that error scenarios are properly logged."""
# Arrange
test_inputs = {"text_input": "error logging test"}
# Act
with patch("agents.executor.logger") as mock_logger:
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
# Check that warning was logged for the error
mock_logger.warning.assert_called_once()
warning_call = mock_logger.warning.call_args[0][0]
assert "Simulated" in warning_call
assert "error" in warning_call
@staticmethod
def test_high_severity_error_priority(executor_agent):
"""Test that high severity errors are prioritized over lower severity ones."""
# Arrange - Create inputs that would trigger both high and medium severity errors
test_inputs = {
"text_input": "error with <script>alert('test')</script> content"
}
tool = MCPTool(
tool_id="test-tool",
name="Test Tool",
description="Test tool",
tags=["test"],
invocation_command_stub="test {input}",
)
prompt = MCPPrompt(
prompt_id="test-prompt",
name="Test Prompt",
description="Test prompt",
target_tool_id="test-tool",
template_string="Process: {{text_input}}",
input_variables=["text_input"],
difficulty_level="beginner",
)
planned_step = PlannedStep(tool=tool, prompt=prompt, relevance_score=0.95)
# Act
result = executor_agent.simulate_execution(planned_step, test_inputs)
# Assert - Should get the high severity security violation, not the medium severity user_requested error
assert result["status"] == "simulated_error"
error_info = result["error_information"]
assert error_info["error_type"] == "security_violation" # High severity
assert error_info["error_severity"] == "high"
@staticmethod
def test_error_metadata_tracking(executor_agent, sample_planned_step):
"""Test that error metadata is properly tracked."""
# Arrange
test_inputs = {"text_input": "error metadata test"}
# Act
result = executor_agent.simulate_execution(sample_planned_step, test_inputs)
# Assert
assert result["status"] == "simulated_error"
metadata = result["metadata"]
assert metadata["error_simulation"] == "user_requested"
assert metadata["simulation_version"] == "MVP3_Sprint4"
assert "trigger_info" in metadata
assert len(metadata["trigger_info"]) <= 100 # Should be truncated to 100 chars
class TestMcpExecutorAgentEnhancedErrorHandling:
"""Test enhanced error handling in McpExecutorAgent for MVP4 Sprint 2."""
@pytest.fixture
def executor_agent(self) -> McpExecutorAgent:
"""Create a McpExecutorAgent instance for testing."""
return McpExecutorAgent()
@pytest.fixture
def mcp_tool(self) -> MCPTool:
"""Create an MCP tool for testing."""
return MCPTool(
tool_id="test-mcp-tool",
name="Test MCP Tool",
description="A tool for testing MCP integration",
execution_type="remote_mcp_gradio",
mcp_endpoint_url="https://test-mcp-server.hf.space/mcp",
timeout_seconds=30,
)
@pytest.fixture
def sample_prompt(self) -> MCPPrompt:
"""Create a sample MCPPrompt for testing."""
return MCPPrompt(
prompt_id="test-prompt",
name="Test Prompt",
description="A prompt for testing",
target_tool_id="test-mcp-tool",
template_string="Process: {{text_input}}",
input_variables=["text_input"],
difficulty_level="beginner",
)
@pytest.fixture
def planned_step(self, mcp_tool: MCPTool, sample_prompt: MCPPrompt) -> PlannedStep:
"""Create a PlannedStep for testing."""
return PlannedStep(tool=mcp_tool, prompt=sample_prompt, relevance_score=0.95)
@staticmethod
def test_retry_mechanism_for_server_errors(executor_agent, planned_step):
"""Test that server errors (5xx) trigger retry logic."""
inputs = {"text_input": "test input"}
# Mock responses: first two fail with 503, third succeeds
mock_responses = [
Mock(status_code=503, text="Service Unavailable"),
Mock(status_code=503, text="Service Unavailable"),
Mock(status_code=200)
]
# Configure the first two to raise HTTPError, third to succeed
mock_responses[0].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[0])
mock_responses[1].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[1])
mock_responses[2].raise_for_status.return_value = None
mock_responses[2].json.return_value = {"data": ["Success after retry!"]}
with patch("agents.executor.time.sleep"), \
patch.object(executor_agent.http_session, "post", side_effect=mock_responses):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should succeed after retries
assert result["status"] == "success_live_mcp"
assert result["attempts_made"] == 3
assert "Success after retry!" in result["tool_specific_output"]
@staticmethod
def test_retry_mechanism_for_timeouts(executor_agent, planned_step):
"""Test that timeouts trigger retry logic."""
inputs = {"text_input": "test input"}
# Mock timeout on first two attempts, success on third
side_effects = [
requests.exceptions.Timeout(),
requests.exceptions.Timeout(),
Mock(status_code=200)
]
# Configure successful response
success_response = side_effects[2]
success_response.raise_for_status.return_value = None
success_response.json.return_value = {"data": ["Success after timeout retries!"]}
with patch("agents.executor.time.sleep"), \
patch.object(executor_agent.http_session, "post", side_effect=side_effects):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should succeed after retries
assert result["status"] == "success_live_mcp"
assert result["attempts_made"] == 3
assert "Success after timeout retries!" in result["tool_specific_output"]
@staticmethod
def test_no_retry_for_client_errors(executor_agent, planned_step):
"""Test that client errors (4xx) don't trigger retries."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=400, text="Bad Request")
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should fail immediately without retries
assert result["status"] == "error_live_mcp_http"
assert result["error_details"]["attempts_made"] == 1
assert result["error_information"]["error_category"] == "input_validation"
@staticmethod
def test_enhanced_error_categorization(executor_agent, planned_step):
"""Test that errors are properly categorized."""
inputs = {"text_input": "test input"}
# Test different HTTP status codes
test_cases = [
(429, "rate_limit"),
(503, "server_error"),
(401, "authentication"),
(400, "input_validation"),
(404, "not_found"),
]
for status_code, expected_category in test_cases:
mock_response = Mock(status_code=status_code, text="Error")
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
assert result["error_information"]["error_category"] == expected_category
@staticmethod
def test_recovery_suggestions_for_different_errors(executor_agent, planned_step):
"""Test that appropriate recovery suggestions are provided."""
inputs = {"text_input": "test input"}
# Test rate limit error
mock_response = Mock(status_code=429, text="Rate Limited")
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
suggestions = result["error_information"]["recovery_suggestions"]
assert any("wait" in suggestion.lower() for suggestion in suggestions)
assert any("rate limit" in suggestion.lower() for suggestion in suggestions)
@staticmethod
def test_connection_error_handling(executor_agent, planned_step):
"""Test handling of connection errors with retry logic."""
inputs = {"text_input": "test input"}
# Mock connection errors on first two attempts, success on third
side_effects = [
requests.exceptions.ConnectionError("Connection failed"),
requests.exceptions.ConnectionError("Connection failed"),
Mock(status_code=200)
]
# Configure successful response
success_response = side_effects[2]
success_response.raise_for_status.return_value = None
success_response.json.return_value = {"data": ["Success after connection retries!"]}
with patch("agents.executor.time.sleep"), \
patch.object(executor_agent.http_session, "post", side_effect=side_effects):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should succeed after retries
assert result["status"] == "success_live_mcp"
assert result["attempts_made"] == 3
@staticmethod
def test_max_retries_exhausted(executor_agent, planned_step):
"""Test behavior when max retries are exhausted."""
inputs = {"text_input": "test input"}
# Mock persistent timeout
with patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should fail after max retries
assert result["status"] == "error_live_mcp_timeout"
assert result["error_details"]["attempts_made"] == 3 # 1 + 2 retries
assert result["error_information"]["retry_recommended"] is True
@staticmethod
def test_json_parsing_error_handling(executor_agent, planned_step):
"""Test handling of JSON parsing errors."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=200, text="Invalid JSON Response")
mock_response.raise_for_status.return_value = None
mock_response.json.side_effect = json.JSONDecodeError("Invalid JSON", "doc", 0)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
assert result["status"] == "error_mcp_response_parsing"
assert result["error_information"]["error_category"] == "data"
assert "Invalid JSON Response" in result["error_details"]["response_preview"]
@staticmethod
def test_invalid_response_format_handling(executor_agent, planned_step):
"""Test handling of invalid MCP response format."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=200)
mock_response.raise_for_status.return_value = None
mock_response.json.return_value = {"error": "No data field"} # Missing 'data' field
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
assert result["status"] == "error_mcp_response_parsing"
assert "No 'data' field" in result["error_details"]["parse_error"]
@staticmethod
def test_empty_data_array_handling(executor_agent, planned_step):
"""Test handling of empty data array in response."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=200)
mock_response.raise_for_status.return_value = None
mock_response.json.return_value = {"data": []} # Empty data array
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
assert result["status"] == "error_mcp_response_parsing"
assert "Empty 'data' array" in result["error_details"]["parse_error"]
@staticmethod
def test_enhanced_error_response_format(executor_agent, planned_step):
"""Test that enhanced error responses contain all required fields."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=503, text="Service Unavailable")
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Check required fields in error response
assert "error_information" in result
error_info = result["error_information"]
required_fields = [
"error_category", "error_type", "error_message",
"recovery_suggestions", "retry_recommended",
"user_action_required", "timestamp"
]
for field in required_fields:
assert field in error_info, f"Missing required field: {field}"
# Check error details
assert "error_details" in result
error_details = result["error_details"]
assert "status_code" in error_details
assert "endpoint" in error_details
assert "attempts_made" in error_details
@staticmethod
def test_successful_mcp_call_with_enhanced_response(executor_agent, planned_step):
"""Test successful MCP call returns enhanced response format."""
inputs = {"text_input": "test input"}
mock_response = Mock(status_code=200)
mock_response.raise_for_status.return_value = None
mock_response.json.return_value = {"data": ["Successful response!"]}
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
# Check enhanced success response
assert result["status"] == "success_live_mcp"
assert result["execution_mode"] == "live_mcp"
assert result["attempts_made"] == 1
assert result["mcp_endpoint"] == planned_step.tool.mcp_endpoint_url
assert "βœ… Successfully executed" in result["message"]
@staticmethod
def test_unknown_execution_type_error(executor_agent, sample_prompt):
"""Test handling of unknown execution type falls back to simulation."""
# Create tool with invalid execution type by bypassing validation
with patch.object(MCPTool, "__post_init__", return_value=None):
invalid_tool = MCPTool(
tool_id="invalid-tool",
name="Invalid Tool",
description="Tool with invalid execution type",
execution_type="invalid_type",
)
# Create planned step with patched tool
with patch.object(PlannedStep, "__post_init__", return_value=None):
invalid_planned_step = PlannedStep(
tool=invalid_tool,
prompt=sample_prompt,
relevance_score=0.5
)
inputs = {"text_input": "test input"}
# Mock random functions to prevent error simulation
with patch('agents.executor.random.random', return_value=0.5), \
patch('agents.executor.random.choice', return_value="timeout"), \
patch('agents.executor.random.randint', return_value=50), \
patch('agents.executor.random.uniform', return_value=0.8):
result = executor_agent.execute_plan_step(invalid_planned_step, inputs)
# With improved fallback logic, unknown execution types should fall back to simulation
assert result["status"] == "simulated_success"
assert result["execution_mode"] == "simulated"
# Should include fallback information in metadata
assert "fallback_reason" in result["metadata"] or "execution_type" in result["metadata"]
@staticmethod
def test_retry_delay_timing(executor_agent, planned_step):
"""Test that retry delays are properly implemented."""
inputs = {"text_input": "test input"}
with patch("agents.executor.time.sleep") as mock_sleep, \
patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()):
start_time = time.time()
result = executor_agent.execute_plan_step(planned_step, inputs)
# Should have called sleep twice (for 2 retries)
assert mock_sleep.call_count == 2
# Should have called with correct delay
mock_sleep.assert_called_with(executor_agent.retry_delay)
@staticmethod
def test_error_message_user_friendliness(executor_agent, planned_step):
"""Test that error messages are user-friendly and informative."""
inputs = {"text_input": "test input"}
test_cases = [
(429, "Rate limit exceeded"),
(503, "Service temporarily unavailable"),
(500, "Server error"),
(400, "Client error"),
]
for status_code, expected_message_part in test_cases:
mock_response = Mock(status_code=status_code, text="Error details")
mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)
with patch.object(executor_agent.http_session, "post", return_value=mock_response):
result = executor_agent.execute_plan_step(planned_step, inputs)
assert expected_message_part.lower() in result["message"].lower()