"""Tests for ExecutorAgent stub implementation. This module provides comprehensive testing for the StubExecutorAgent and McpExecutorAgent classes. The tests focus on validating mock execution simulation, tool-specific output generation, error handling mechanisms, and system integration scenarios for MVP 3. Key Testing Areas: - Mock execution simulation with realistic tool outputs - Error handling and recovery mechanisms - Tool-specific behavior validation (sentiment, summarization, code analysis, image processing) - Input validation and edge case handling - System integration and MCP protocol compliance Educational Notes: - StubExecutorAgent provides mock execution for testing without external dependencies - McpExecutorAgent handles real MCP server communication with retry logic - Tests use fixtures to create reusable test data and maintain consistency - Parameterized tests validate behavior across multiple input scenarios """ import json import time from unittest.mock import Mock, patch import pytest import requests from agents.executor import McpExecutorAgent, StubExecutorAgent from kg_services.ontology import MCPPrompt, MCPTool, PlannedStep class TestStubExecutorAgent: """Comprehensive test suite for StubExecutorAgent class. This test class validates the mock execution capabilities of StubExecutorAgent, which simulates tool execution without requiring actual external services. Test Categories: - Basic execution simulation and response structure - Tool-specific output generation and validation - Input handling and validation scenarios - Error simulation and handling - Performance and consistency testing Educational Purpose: These tests demonstrate how to properly mock complex system interactions while maintaining realistic behavior patterns for system validation. """ @pytest.fixture def executor_agent(self) -> StubExecutorAgent: """Create a StubExecutorAgent instance for testing. Returns: StubExecutorAgent: Fresh instance configured for testing scenarios. Educational Note: Fixtures provide isolated test instances, ensuring tests don't interfere with each other and maintaining predictable test conditions. """ return StubExecutorAgent() @pytest.fixture def sample_tool(self) -> MCPTool: """Create a sample sentiment analysis MCPTool for testing. Returns: MCPTool: Pre-configured sentiment analyzer tool with realistic metadata. Educational Note: This fixture represents a typical NLP tool configuration that would be used in production systems for text sentiment analysis. """ return MCPTool( tool_id="sentiment-analyzer-v1", name="Advanced Sentiment Analyzer", description="Analyze sentiment and emotional tone of text", tags=["sentiment", "analysis", "nlp"], invocation_command_stub="sentiment_analyze --input {text} --format json", ) @pytest.fixture def sentiment_prompt(self) -> MCPPrompt: """Create a sentiment analysis prompt for testing. Returns: MCPPrompt: Template for sentiment analysis with input variable mapping. Educational Note: Prompts define how tools should be invoked with specific inputs, acting as a bridge between user intent and tool execution. """ return MCPPrompt( prompt_id="sentiment-basic-001", name="Basic Sentiment Analysis", description="Analyze sentiment of provided text", target_tool_id="sentiment-analyzer-v1", template_string="Analyze the sentiment of this text: {{text_content}}", input_variables=["text_content"], difficulty_level="beginner", ) @pytest.fixture def summarizer_tool(self) -> MCPTool: """Create a text summarizer tool for testing. Returns: MCPTool: Pre-configured text summarization tool for document processing. Educational Note: Summarization tools represent complex NLP operations that require multiple parameters and produce structured outputs. """ return MCPTool( tool_id="text-summarizer-v2", name="Intelligent Text Summarizer", description="Generate concise summaries of long text documents", tags=["summarization", "text", "nlp"], invocation_command_stub="summarize --input {text} --length {length}", ) @pytest.fixture def summary_prompt(self) -> MCPPrompt: """Create a text summarization prompt for testing. Returns: MCPPrompt: Multi-input template for advanced document summarization. Educational Note: This prompt demonstrates how complex tools can accept multiple inputs to customize their behavior (document + focus area). """ return MCPPrompt( prompt_id="summary-advanced-001", name="Advanced Document Summary", description="Create comprehensive summary with key points", target_tool_id="text-summarizer-v2", template_string="Summarize this document: {{document_text}} with focus on {{focus_area}}", input_variables=["document_text", "focus_area"], difficulty_level="intermediate", ) @pytest.fixture def code_quality_tool(self) -> MCPTool: """Create a code quality analysis tool for testing. Returns: MCPTool: Pre-configured code analysis tool for quality assessment. Educational Note: Code quality tools represent static analysis capabilities that examine source code for security, performance, and style issues. """ return MCPTool( tool_id="code-quality-linter", name="Code Quality Analyzer", description="Analyze code quality, security, and best practices", tags=["code", "quality", "security"], invocation_command_stub="lint_code --file {code_file} --rules {ruleset}", ) @pytest.fixture def image_caption_tool(self) -> MCPTool: """Create an image captioning tool for testing. Returns: MCPTool: Pre-configured AI-powered image analysis tool. Educational Note: Image processing tools demonstrate multimodal AI capabilities, processing visual inputs to generate textual descriptions. """ return MCPTool( tool_id="image-captioner-ai", name="AI Image Caption Generator", description="Generate descriptive captions for images using AI", tags=["image", "caption", "ai", "vision"], invocation_command_stub="caption_image --image {image_path} --style {caption_style}", ) @pytest.fixture def sample_planned_step( self, sample_tool: MCPTool, sentiment_prompt: MCPPrompt ) -> PlannedStep: """Create a sample PlannedStep for testing execution workflows. Args: sample_tool: MCPTool fixture for sentiment analysis sentiment_prompt: MCPPrompt fixture for sentiment analysis Returns: PlannedStep: Complete execution plan combining tool, prompt, and relevance. Educational Note: PlannedStep represents a complete execution plan that binds a tool with a prompt and includes a relevance score indicating how well this combination matches the user's intent. """ return PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.92 ) def test_executor_initialization(self, executor_agent: StubExecutorAgent) -> None: """Test that StubExecutorAgent initializes correctly. Args: executor_agent: StubExecutorAgent fixture Educational Purpose: Validates basic object instantiation and type checking. This is a fundamental sanity check that the class can be created. """ assert isinstance(executor_agent, StubExecutorAgent) @patch("agents.executor.logger") def test_executor_initialization_logging(self, mock_logger: Mock) -> None: """Test that initialization logs correctly. Args: mock_logger: Mocked logger to capture log messages Educational Purpose: Demonstrates how to test logging behavior using mocks. Proper logging is crucial for debugging and monitoring system behavior. """ # Act: Create executor instance to trigger logging StubExecutorAgent() # Assert: Verify the expected log message was emitted mock_logger.info.assert_called_once_with( "StubExecutorAgent initialized for MVP 3" ) @patch("agents.executor.random.random") def test_simulate_execution_basic_success( self, mock_random: Mock, executor_agent: StubExecutorAgent, sample_planned_step: PlannedStep ) -> None: """Test basic successful execution simulation with realistic inputs. Args: mock_random: Mocked random function to control error simulation executor_agent: StubExecutorAgent fixture sample_planned_step: PlannedStep fixture for testing Educational Purpose: Demonstrates the happy path for execution simulation, showing how the system processes valid inputs and generates structured responses. Test Strategy: - Mock random to prevent error simulation (>15% threshold) - Provide realistic sentiment analysis input - Validate complete response structure and content """ # Arrange: Set up test conditions to prevent random errors mock_random.return_value = 0.9 # Above 15% threshold - no random errors test_inputs: dict[str, str] = {"text_content": "This product is amazing and I love it!"} # Act: Simulate execution with the planned step and inputs result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert: Validate response structure and content assert isinstance(result, dict), "Response should be a dictionary" assert result["status"] == "simulated_success", "Should indicate successful simulation" assert "execution_id" in result, "Should include unique execution identifier" assert result["tool_information"]["tool_name"] == "Advanced Sentiment Analyzer" assert result["prompt_information"]["prompt_name"] == "Basic Sentiment Analysis" assert result["execution_details"]["inputs_received"] == test_inputs assert "mock_output" in result["results"], "Should include simulated tool output" @patch("agents.executor.random.random") def test_simulate_execution_comprehensive_structure( self, mock_random: Mock, executor_agent: StubExecutorAgent, sample_planned_step: PlannedStep ) -> None: """Test that execution response has complete and consistent structure. Args: mock_random: Mocked random function to control behavior executor_agent: StubExecutorAgent fixture sample_planned_step: PlannedStep fixture for testing Educational Purpose: Validates the complete response schema that external systems can rely on. This ensures API contract compliance and prevents integration issues. Test Strategy: - Verify all required top-level response keys are present - Validate nested object structures for completeness - Ensure data types match expected schema """ # Arrange: Configure test to avoid random errors mock_random.return_value = 0.9 # Disable random error simulation test_inputs: dict[str, str] = {"text_content": "Test content"} # Act: Execute simulation and capture full response result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert: Validate complete response structure # Top-level keys must be present for API contract compliance expected_keys: list[str] = [ "status", # Execution outcome indicator "execution_id", # Unique identifier for tracking "tool_information", # Tool metadata and configuration "prompt_information",# Prompt template and variables "execution_details",# Runtime information and metrics "results", # Actual tool output and analysis "metadata", # Additional context and debugging info ] for key in expected_keys: assert key in result, f"Required key '{key}' missing from response" # Assert: Validate tool_information nested structure tool_info = result["tool_information"] assert "tool_id" in tool_info, "Tool identifier required" assert "tool_name" in tool_info, "Tool name required for UI display" assert "tool_description" in tool_info, "Tool description required for context" # Assert: Validate execution_details structure exec_details = result["execution_details"] assert "inputs_received" in exec_details, "Input tracking required" assert "inputs_count" in exec_details, "Input metrics required" assert "execution_time_ms" in exec_details, "Performance metrics required" assert "complexity_level" in exec_details, "Complexity assessment required" # Assert: Validate results structure contains actual outputs results = result["results"] assert "message" in results, "Human-readable message required" assert "mock_output" in results, "Simulated tool output required" assert "confidence_score" in results, "Quality assessment required" @patch("agents.executor.random.random") def test_simulate_execution_sentiment_tool_output( self, mock_random: Mock, executor_agent: StubExecutorAgent, sample_tool: MCPTool, sentiment_prompt: MCPPrompt ) -> None: """Test sentiment analysis tool produces realistic mock output format. Args: mock_random: Mocked random function to control error simulation executor_agent: StubExecutorAgent fixture sample_tool: MCPTool fixture for sentiment analysis sentiment_prompt: MCPPrompt fixture for sentiment analysis Educational Purpose: Validates that the simulator generates realistic sentiment analysis output that matches what real sentiment analysis tools would produce. Test Strategy: - Create planned step combining sentiment tool and prompt - Provide positive sentiment text input - Verify output contains expected sentiment analysis elements - Check for realistic formatting and content structure """ # Arrange: Disable random errors and create realistic test scenario mock_random.return_value = 0.5 # Above 15% threshold - no random errors planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) # Use clearly positive text to test sentiment detection test_inputs: dict[str, str] = {"text_content": "I really enjoy this product!"} # Act: Execute sentiment analysis simulation result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert: Validate sentiment analysis specific output format mock_output = result["results"]["mock_output"] # Check for standard sentiment analysis output components assert "Sentiment Analysis Results" in mock_output, "Should contain results header" assert "Overall Sentiment Classification" in mock_output, "Should classify sentiment" assert "Confidence Metrics" in mock_output, "Should provide confidence scores" assert "Positive" in mock_output, "Should detect positive sentiment in test text" assert "Generated by Sentiment Analyzer Tool" in mock_output, "Should indicate tool source" @patch("agents.executor.random.random") def test_simulate_execution_summarizer_tool_output( self, mock_random: Mock, executor_agent: StubExecutorAgent, summarizer_tool: MCPTool, summary_prompt: MCPPrompt ) -> None: """Test text summarizer tool produces realistic mock summary output. Args: mock_random: Mocked random function to control error simulation executor_agent: StubExecutorAgent fixture summarizer_tool: MCPTool fixture for text summarization summary_prompt: MCPPrompt fixture for multi-input summarization Educational Purpose: Validates that the simulator generates realistic text summarization output with structured sections that would be expected from real NLP tools. Test Strategy: - Test multi-input prompt handling (document + focus area) - Verify output contains expected summarization sections - Check for realistic summary structure and formatting """ # Arrange: Configure test to avoid random errors mock_random.return_value = 0.5 # Above 15% threshold - no random errors planned_step = PlannedStep( tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90 ) # Test with multi-input scenario (document text + focus area) test_inputs: dict[str, str] = { "document_text": "Long document content...", "focus_area": "key insights", } # Act: Execute summarization simulation result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert: Validate summarization specific output format mock_output = result["results"]["mock_output"] # Check for standard summarization output components assert "Text Summarization Complete" in mock_output, "Should contain completion indicator" assert "Executive Summary" in mock_output, "Should provide executive summary section" assert "Key Points" in mock_output, "Should extract key points from content" assert "focus_area" in str(test_inputs), "Should handle multi-input processing" # Validate that the tool properly processes multiple inputs exec_details = result["execution_details"] assert exec_details["inputs_count"] == 2, "Should recognize two input parameters" @patch("agents.executor.random.random") def test_simulate_execution_code_quality_tool_output( self, mock_random, executor_agent, code_quality_tool ): """Test code quality analysis specific mock output.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors code_prompt = MCPPrompt( prompt_id="code-quality-001", name="Comprehensive Code Review", description="Analyze code for quality and security", target_tool_id="code-quality-linter", template_string="Review this code: {{source_code}}", input_variables=["source_code"], difficulty_level="advanced", ) planned_step = PlannedStep( tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87 ) test_inputs = {"source_code": "def hello_world():\n print('Hello!')"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Code Quality Analysis Complete" in mock_output assert "Overall Quality Score" in mock_output assert "Analysis Summary" in mock_output assert "Quality Metrics" in mock_output assert "Recommendations" in mock_output assert "Generated by Code Quality Linter Tool" in mock_output @patch("agents.executor.random.random") def test_simulate_execution_image_caption_tool_output( self, mock_random, executor_agent, image_caption_tool ): """Test image captioning specific mock output.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) caption_prompt = MCPPrompt( prompt_id="image-caption-001", name="Descriptive Image Caption", description="Generate detailed image captions", target_tool_id="image-captioner-ai", template_string="Caption this image: {{image_url}}", input_variables=["image_url"], difficulty_level="intermediate", ) planned_step = PlannedStep( tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91 ) test_inputs = {"image_url": "https://example.com/office.jpg"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Image Caption Generation Results" in mock_output assert "Primary Caption" in mock_output assert "Technical Analysis" in mock_output assert "Confidence Level" in mock_output assert "Alternative Descriptions" in mock_output assert "Generated by Image Caption Generator Tool" in mock_output @patch("agents.executor.random.random") def test_simulate_execution_generic_tool_output(self, mock_random, executor_agent): """Test generic mock output for unknown tool types.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors generic_tool = MCPTool( tool_id="unknown-tool", name="Unknown Analysis Tool", description="A tool for unknown analysis", tags=["unknown"], invocation_command_stub="unknown_analyze {input}", ) generic_prompt = MCPPrompt( prompt_id="generic-prompt", name="Generic Processing", description="Generic prompt for unknown tool", target_tool_id="unknown-tool", template_string="Process: {{data}}", input_variables=["data"], difficulty_level="beginner", ) planned_step = PlannedStep( tool=generic_tool, prompt=generic_prompt, relevance_score=0.75 ) test_inputs = {"data": "sample data"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Execution Results for Unknown Analysis Tool" in mock_output assert "Successfully processed" in mock_output assert "Generic Processing" in mock_output assert "Input Analysis" in mock_output assert "data**: sample data" in mock_output @patch("agents.executor.random.random") def test_simulate_execution_empty_inputs(self, mock_random, executor_agent, sample_planned_step): """Test execution with empty inputs dictionary.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors empty_inputs: dict[str, str] = {} # Act result = executor_agent.simulate_execution(sample_planned_step, empty_inputs) # Assert assert result["status"] == "simulated_success" assert result["execution_details"]["inputs_received"] == empty_inputs assert result["execution_details"]["inputs_count"] == 0 @staticmethod def test_simulate_execution_multiple_inputs( executor_agent, summarizer_tool, summary_prompt ): """Test execution with multiple input variables.""" # Arrange planned_step = PlannedStep( tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.85 ) test_inputs = { "document_text": "Very long document with lots of content...", "focus_area": "business insights and recommendations", } # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert assert result["execution_details"]["inputs_count"] == 2 assert result["execution_details"]["inputs_received"] == test_inputs @staticmethod def test_simulate_execution_invalid_plan_type(executor_agent): """Test error handling with invalid plan type.""" # Arrange invalid_plan = {"not": "a planned step"} test_inputs = {"input": "test"} # Act & Assert with pytest.raises(ValueError, match="Plan must be a PlannedStep instance"): executor_agent.simulate_execution(invalid_plan, test_inputs) @staticmethod def test_simulate_execution_invalid_inputs_type( executor_agent, sample_planned_step ): """Test error handling with invalid inputs type.""" # Arrange invalid_inputs = "not a dictionary" # Act & Assert with pytest.raises(ValueError, match="Inputs must be a dictionary"): executor_agent.simulate_execution(sample_planned_step, invalid_inputs) @patch("agents.executor.random.random") @patch("agents.executor.logger") def test_simulate_execution_logging( self, mock_logger, mock_random, executor_agent, sample_planned_step ): """Test that execution logs appropriately.""" # Arrange - Disable random errors to ensure consistent logging mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors test_inputs = {"text_content": "test content"} # Act executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert mock_logger.info.call_count >= 3 # 3 execution logs (init not captured by fixture) # Check specific log messages log_calls = [call[0][0] for call in mock_logger.info.call_args_list] assert any("Simulating execution for tool" in log for log in log_calls) assert any("Received inputs" in log for log in log_calls) assert any("Generated mock response" in log for log in log_calls) @patch("agents.executor.random.random") def test_execution_id_generation( self, mock_random, executor_agent, sample_planned_step ): """Test that unique execution IDs are generated.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors test_inputs_1 = {"text_content": "first input"} test_inputs_2 = {"text_content": "second input"} # Act result_1 = executor_agent.simulate_execution(sample_planned_step, test_inputs_1) result_2 = executor_agent.simulate_execution(sample_planned_step, test_inputs_2) # Assert assert result_1["execution_id"] != result_2["execution_id"] assert result_1["execution_id"].startswith("exec_sentiment-analyzer-v1_") assert result_2["execution_id"].startswith("exec_sentiment-analyzer-v1_") @staticmethod def test_confidence_score_consistency(executor_agent, sample_planned_step): """Test that confidence scores are consistent.""" # Arrange test_inputs = {"text_content": "test content"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert confidence = result["results"]["confidence_score"] assert isinstance(confidence, int | float) assert 0.0 <= confidence <= 1.0 @staticmethod def test_metadata_structure(executor_agent, sample_planned_step): """Test that metadata has expected structure.""" # Arrange test_inputs = {"text_content": "test content"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert metadata = result["metadata"] assert "simulation_version" in metadata assert "timestamp" in metadata assert "notes" in metadata assert metadata["simulation_version"] == "MVP3_Sprint4" # Enhanced Input-Aware Mock Tests @patch("agents.executor.random.random") def test_text_summarizer_empty_input_handling( self, mock_random, executor_agent, summarizer_tool, summary_prompt ): """Test text summarizer with empty input returns appropriate error message.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90 ) test_inputs = {"document_text": ""} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "No text content provided for summarization" in mock_output assert "⚠️ **Input Analysis:**" in mock_output assert "Recommendation: Please provide text content" in mock_output @patch("agents.executor.random.random") def test_text_summarizer_content_type_detection( self, mock_random, executor_agent, summarizer_tool, summary_prompt ): """Test text summarizer detects content type and generates appropriate response.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90 ) # Test technical content tech_inputs = { "text": "This function implements a class variable to store programming code patterns." } # Act result = executor_agent.simulate_execution(planned_step, tech_inputs) # Assert mock_output = result["results"]["mock_output"] assert "technical content" in mock_output assert "Technical classification" in mock_output assert "code structure, functionality patterns" in mock_output @patch("agents.executor.random.random") def test_text_summarizer_business_content_detection( self, mock_random, executor_agent, summarizer_tool, summary_prompt ): """Test text summarizer detects business content correctly.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90 ) business_inputs = { "content": "Our company's market analysis shows excellent customer retention and product sales growth in business sectors." } # Act result = executor_agent.simulate_execution(planned_step, business_inputs) # Assert mock_output = result["results"]["mock_output"] assert "business content" in mock_output assert "Business classification" in mock_output assert "market dynamics, customer insights" in mock_output @patch("agents.executor.random.random") def test_sentiment_analyzer_empty_input_handling( self, mock_random, executor_agent, sample_tool, sentiment_prompt ): """Test sentiment analyzer with empty input returns appropriate error message.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) test_inputs = {"text": ""} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "No text content provided for sentiment analysis" in mock_output assert "⚠️ **Input Analysis:**" in mock_output assert "Supported input fields:" in mock_output @patch("agents.executor.random.random") def test_sentiment_analyzer_positive_content_detection( self, mock_random, executor_agent, sample_tool, sentiment_prompt ): """Test sentiment analyzer correctly detects positive sentiment.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) positive_inputs = { "text": "This product is absolutely amazing and fantastic! I love it and highly recommend it." } # Act result = executor_agent.simulate_execution(planned_step, positive_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Primary**: Positive" in mock_output assert "Joy/Satisfaction:" in mock_output assert "**Positive Indicators**: 4 detected" in mock_output @patch("agents.executor.random.random") def test_sentiment_analyzer_negative_content_detection( self, mock_random, executor_agent, sample_tool, sentiment_prompt ): """Test sentiment analyzer correctly detects negative sentiment.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) negative_inputs = { "feedback": "This service was terrible and awful. I hate it and it's the worst experience ever." } # Act result = executor_agent.simulate_execution(planned_step, negative_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Primary**: Negative" in mock_output assert "Frustration:" in mock_output assert "**Negative Indicators**:" in mock_output assert "feedback" in mock_output # Source field detection @patch("agents.executor.random.random") def test_sentiment_analyzer_neutral_content_detection( self, mock_random, executor_agent, sample_tool, sentiment_prompt ): """Test sentiment analyzer correctly detects neutral sentiment.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) neutral_inputs = { "message": "The weather today is okay and normal. It's fine and adequate for our needs." } # Act result = executor_agent.simulate_execution(planned_step, neutral_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Primary**: Neutral" in mock_output assert "Neutral:" in mock_output assert "**Neutral Indicators**:" in mock_output @patch("agents.executor.random.random") def test_image_caption_empty_input_handling( self, mock_random, executor_agent, image_caption_tool ): """Test image caption generator with empty input returns appropriate error message.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) caption_prompt = MCPPrompt( prompt_id="image-caption-001", name="Descriptive Image Caption", description="Generate detailed image captions", target_tool_id="image-captioner-ai", template_string="Caption this image: {{image_url}}", input_variables=["image_url"], difficulty_level="intermediate", ) planned_step = PlannedStep( tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91 ) test_inputs = {"image_url": ""} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "No image source provided for caption generation" in mock_output assert "⚠️ **Input Analysis:**" in mock_output assert "Supported input fields:" in mock_output @patch("agents.executor.random.random") def test_image_caption_workspace_detection( self, mock_random, executor_agent, image_caption_tool ): """Test image caption generator detects workspace images correctly.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) caption_prompt = MCPPrompt( prompt_id="image-caption-001", name="Descriptive Image Caption", description="Generate detailed image captions", target_tool_id="image-captioner-ai", template_string="Caption this image: {{image_path}}", input_variables=["image_path"], difficulty_level="intermediate", ) planned_step = PlannedStep( tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91 ) test_inputs = {"image_path": "/uploads/office_workspace_desk.jpg"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "professional workspace" in mock_output assert "workspace" in mock_output.lower() assert "Indoor workspace/office environment" in mock_output assert "desk" in mock_output @patch("agents.executor.random.random") def test_image_caption_with_context(self, mock_random, executor_agent, image_caption_tool): """Test image caption generator incorporates additional context.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) caption_prompt = MCPPrompt( prompt_id="image-caption-001", name="Descriptive Image Caption", description="Generate detailed image captions", target_tool_id="image-captioner-ai", template_string="Caption this image: {{image}} with context: {{context}}", input_variables=["image", "context"], difficulty_level="intermediate", ) planned_step = PlannedStep( tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91 ) test_inputs = { "image": "nature_photo.jpg", "context": "Taken during sunrise in the mountains", } # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "additional context: Taken during sunrise" in mock_output assert "Context Provided**: Yes" in mock_output assert "sunrise in the mountains" in mock_output @patch("agents.executor.random.random") def test_code_linter_empty_input_handling(self, mock_random, executor_agent, code_quality_tool): """Test code linter with empty input returns appropriate error message.""" # Arrange - Disable random errors to ensure consistent behavior mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors code_prompt = MCPPrompt( prompt_id="code-quality-001", name="Comprehensive Code Review", description="Analyze code for quality and security", target_tool_id="code-quality-linter", template_string="Review this code: {{source_code}}", input_variables=["source_code"], difficulty_level="advanced", ) planned_step = PlannedStep( tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87 ) test_inputs = {"source_code": ""} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "No code content provided for analysis" in mock_output assert "⚠️ **Input Analysis:**" in mock_output assert "Supported input fields:" in mock_output @patch("agents.executor.random.random") def test_code_linter_python_detection(self, mock_random, executor_agent, code_quality_tool): """Test code linter correctly detects Python code.""" # Arrange - Disable random errors to ensure consistent behavior mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors code_prompt = MCPPrompt( prompt_id="code-quality-001", name="Comprehensive Code Review", description="Analyze code for quality and security", target_tool_id="code-quality-linter", template_string="Review this code: {{code}}", input_variables=["code"], difficulty_level="advanced", ) planned_step = PlannedStep( tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87 ) python_code = """ def hello_world(): \"\"\"Print hello world message.\"\"\" print("Hello, World!") return True class MyClass: def __init__(self): pass """ test_inputs = {"code": python_code} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Language**: Python" in mock_output assert "Lines Analyzed**: " in mock_output assert "def hello_world():" in mock_output # Code preview @patch("agents.executor.random.random") def test_code_linter_issue_detection(self, mock_random, executor_agent, code_quality_tool): """Test code linter detects common code issues.""" # Arrange - Disable random errors mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors code_prompt = MCPPrompt( prompt_id="code-quality-001", name="Comprehensive Code Review", description="Analyze code for quality and security", target_tool_id="code-quality-linter", template_string="Review this code: {{script}}", input_variables=["script"], difficulty_level="advanced", ) planned_step = PlannedStep( tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87 ) problematic_code = """ # TODO: Fix this function def bad_function(): x = 1 y = 2 z = very_long_variable_name_that_exceeds_normal_line_length_and_should_be_flagged_as_an_issue_by_linter = 3 return x + y + z """ test_inputs = {"script": problematic_code} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Todo/Fixme comments found" in mock_output assert "Long lines detected" in mock_output assert "Issues Found**: " in mock_output @patch("agents.executor.random.random") def test_generic_tool_input_analysis(self, mock_random, executor_agent): """Test generic tool provides detailed input analysis.""" # Arrange - Disable random errors to ensure consistent behavior mock_random.return_value = 0.5 # Above 0.1 threshold, no random errors custom_tool = MCPTool( tool_id="custom-analyzer", name="Custom Analysis Tool", description="Performs custom data analysis", tags=["analysis"], invocation_command_stub="analyze {input}", ) custom_prompt = MCPPrompt( prompt_id="custom-prompt", name="Custom Analysis", description="Custom analysis prompt", target_tool_id="custom-analyzer", template_string="Analyze: {{data}} with {{method}}", input_variables=["data", "method"], difficulty_level="intermediate", ) planned_step = PlannedStep( tool=custom_tool, prompt=custom_prompt, relevance_score=0.82 ) test_inputs = { "data": "Large dataset with complex information patterns and detailed analysis requirements", "method": "statistical", } # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert ( "Processing Complexity**: Simple" in mock_output ) # Based on total input length assert "Inputs Received**: 2 parameter(s)" in mock_output assert "Total Content Length**:" in mock_output assert ( "data**: Large dataset with complex" in mock_output ) # Check partial match without ellipsis assert "method**: statistical" in mock_output @patch("agents.executor.random.random") def test_long_input_processing(self, mock_random, executor_agent, sample_tool, sentiment_prompt): """Test processing of very long input content.""" # Arrange mock_random.return_value = 0.9 # Disable random errors (15% threshold) planned_step = PlannedStep( tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88 ) # Create long input content long_text = "This is an excellent service. " * 50 # 1500+ characters test_inputs = {"text_content": long_text} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert mock_output = result["results"]["mock_output"] assert "Primary**: Positive" in mock_output # Should detect positive sentiment assert "Text Length**: " in mock_output assert "Analysis Confidence" in mock_output class TestErrorSimulation: """Test suite for error simulation functionality.""" @pytest.fixture def executor_agent(self) -> StubExecutorAgent: """Create a StubExecutorAgent instance for testing.""" return StubExecutorAgent() @pytest.fixture def sample_tool(self) -> MCPTool: """Create a sample MCPTool for testing.""" return MCPTool( tool_id="test-tool-error", name="Test Error Tool", description="A tool for testing error scenarios", tags=["test", "error"], invocation_command_stub="test_error {input}", ) @pytest.fixture def sample_prompt(self) -> MCPPrompt: """Create a sample MCPPrompt for testing.""" return MCPPrompt( prompt_id="test-prompt-error", name="Test Error Prompt", description="A prompt for testing error scenarios", target_tool_id="test-tool-error", template_string="Process: {{text_input}}", input_variables=["text_input"], difficulty_level="beginner", ) @pytest.fixture def sample_planned_step( self, sample_tool: MCPTool, sample_prompt: MCPPrompt ) -> PlannedStep: """Create a sample PlannedStep for testing.""" return PlannedStep(tool=sample_tool, prompt=sample_prompt, relevance_score=0.95) @staticmethod def test_user_requested_error_simulation(executor_agent, sample_planned_step): """Test error simulation triggered by user input containing error keywords.""" # Arrange test_inputs = {"text_input": "This should fail and show an error"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "user_requested" assert error_info["error_code"] == "USR_REQ_001" assert error_info["retry_recommended"] is True assert ( "User explicitly requested error simulation" in error_info["error_message"] ) # Check that the output contains user-friendly error message mock_output = result["results"]["mock_output"] assert "Error Simulation Activated" in mock_output assert "User-Requested Error" in mock_output @staticmethod def test_test_scenario_error_simulation(executor_agent, sample_planned_step): """Test error simulation triggered by test scenario keywords.""" # Arrange test_inputs = {"text_input": "test error scenario for validation"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "test_scenario" assert error_info["error_code"] == "TST_ERR_001" assert error_info["retry_recommended"] is True mock_output = result["results"]["mock_output"] assert "Test Error Scenario" in mock_output assert "Error Simulation Active" in mock_output @staticmethod def test_input_too_large_error_simulation( executor_agent, sample_planned_step ): """Test error simulation for input size limits.""" # Arrange - Create input larger than 10,000 characters large_input = "x" * 10001 test_inputs = {"text_input": large_input} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "input_too_large" assert error_info["error_code"] == "VAL_001" assert error_info["retry_recommended"] is True assert "10001 characters" in error_info["error_details"] mock_output = result["results"]["mock_output"] assert "Input Size Error" in mock_output assert "Input Too Large" in mock_output @staticmethod def test_security_violation_error_simulation( executor_agent, sample_planned_step ): """Test error simulation for security violations.""" # Arrange test_inputs = { "text_input": "Process this content" } # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "security_violation" assert error_info["error_code"] == "SEC_001" assert error_info["retry_recommended"] is False mock_output = result["results"]["mock_output"] assert "Security Error" in mock_output assert "Security Violation Detected" in mock_output @staticmethod def test_corrupted_file_error_for_image_tool(executor_agent): """Test error simulation for corrupted files in image tools.""" # Arrange image_tool = MCPTool( tool_id="image_caption_003", name="Image Caption Generator", description="Generate captions for images", tags=["image", "ai"], invocation_command_stub="caption_image {image}", ) image_prompt = MCPPrompt( prompt_id="caption-prompt", name="Image Captioning", description="Caption an image", target_tool_id="image_caption_003", template_string="Caption: {{image_file}}", input_variables=["image_file"], difficulty_level="beginner", ) planned_step = PlannedStep( tool=image_tool, prompt=image_prompt, relevance_score=0.85 ) test_inputs = {"image_file": "broken_image.jpg"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "corrupted_file" assert error_info["error_code"] == "FILE_001" assert error_info["retry_recommended"] is True mock_output = result["results"]["mock_output"] assert "File Processing Error" in mock_output assert "Corrupted File Detected" in mock_output @staticmethod def test_wrong_file_type_error_for_image_tool(executor_agent): """Test error simulation for wrong file types in image tools.""" # Arrange image_tool = MCPTool( tool_id="image_caption_003", name="Image Caption Generator", description="Generate captions for images", tags=["image", "ai"], invocation_command_stub="caption_image {image}", ) image_prompt = MCPPrompt( prompt_id="caption-prompt", name="Image Captioning", description="Caption an image", target_tool_id="image_caption_003", template_string="Caption: {{image_file}}", input_variables=["image_file"], difficulty_level="beginner", ) planned_step = PlannedStep( tool=image_tool, prompt=image_prompt, relevance_score=0.85 ) test_inputs = {"image_file": "document.txt"} # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "wrong_file_type" assert error_info["error_code"] == "FILE_002" assert error_info["retry_recommended"] is True mock_output = result["results"]["mock_output"] assert "File Type Error" in mock_output assert "Unsupported File Type" in mock_output @patch("agents.executor.random.random") def test_random_error_simulation( self, mock_random, executor_agent, sample_planned_step ): """Test random error simulation.""" # Arrange - Force random error (10% chance normally) mock_random.return_value = 0.05 # Less than 0.1, should trigger random error test_inputs = {"text_input": "normal input"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert - Should get a random error assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] in [ "network_timeout", "service_unavailable", "rate_limit_exceeded", "temporary_overload", ] assert error_info["retry_recommended"] is True @patch("agents.executor.random.random") def test_no_random_error_simulation( self, mock_random, executor_agent, sample_planned_step ): """Test that random errors don't trigger when probability is too high.""" # Arrange - Prevent random error mock_random.return_value = ( 0.15 # Greater than 0.1, should not trigger random error ) test_inputs = {"text_input": "normal input"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert - Should get normal success assert result["status"] == "simulated_success" assert "error_information" not in result @staticmethod def test_error_response_structure(executor_agent, sample_planned_step): """Test that error responses have the correct structure.""" # Arrange test_inputs = {"text_input": "trigger error simulation"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert - Check error response structure assert result["status"] == "simulated_error" # Check required top-level keys expected_keys = [ "status", "execution_id", "tool_information", "prompt_information", "execution_details", "error_information", "results", "metadata", ] for key in expected_keys: assert key in result, f"Missing key: {key}" # Check error_information structure error_info = result["error_information"] error_keys = [ "error_type", "error_severity", "error_code", "error_message", "error_details", "suggested_fixes", "retry_recommended", ] for key in error_keys: assert key in error_info, f"Missing error info key: {key}" # Check execution details for errors exec_details = result["execution_details"] assert "error_occurred_at" in exec_details assert isinstance(exec_details["error_occurred_at"], int) assert 10 <= exec_details["error_occurred_at"] <= 80 @staticmethod def test_error_execution_time_shorter(executor_agent, sample_planned_step): """Test that error scenarios have shorter execution times.""" # Arrange test_inputs = {"text_input": "simulate error"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" execution_time = result["execution_details"]["execution_time_ms"] # Error execution times should be between 100-1000ms (shorter than success 800-2500ms) assert 100 <= execution_time <= 1000 @staticmethod def test_error_confidence_score_zero(executor_agent, sample_planned_step): """Test that error responses have zero confidence score.""" # Arrange test_inputs = {"text_input": "error test"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" assert result["results"]["confidence_score"] == 0.0 @staticmethod def test_error_logging(executor_agent, sample_planned_step): """Test that error scenarios are properly logged.""" # Arrange test_inputs = {"text_input": "error logging test"} # Act with patch("agents.executor.logger") as mock_logger: result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" # Check that warning was logged for the error mock_logger.warning.assert_called_once() warning_call = mock_logger.warning.call_args[0][0] assert "Simulated" in warning_call assert "error" in warning_call @staticmethod def test_high_severity_error_priority(executor_agent): """Test that high severity errors are prioritized over lower severity ones.""" # Arrange - Create inputs that would trigger both high and medium severity errors test_inputs = { "text_input": "error with content" } tool = MCPTool( tool_id="test-tool", name="Test Tool", description="Test tool", tags=["test"], invocation_command_stub="test {input}", ) prompt = MCPPrompt( prompt_id="test-prompt", name="Test Prompt", description="Test prompt", target_tool_id="test-tool", template_string="Process: {{text_input}}", input_variables=["text_input"], difficulty_level="beginner", ) planned_step = PlannedStep(tool=tool, prompt=prompt, relevance_score=0.95) # Act result = executor_agent.simulate_execution(planned_step, test_inputs) # Assert - Should get the high severity security violation, not the medium severity user_requested error assert result["status"] == "simulated_error" error_info = result["error_information"] assert error_info["error_type"] == "security_violation" # High severity assert error_info["error_severity"] == "high" @staticmethod def test_error_metadata_tracking(executor_agent, sample_planned_step): """Test that error metadata is properly tracked.""" # Arrange test_inputs = {"text_input": "error metadata test"} # Act result = executor_agent.simulate_execution(sample_planned_step, test_inputs) # Assert assert result["status"] == "simulated_error" metadata = result["metadata"] assert metadata["error_simulation"] == "user_requested" assert metadata["simulation_version"] == "MVP3_Sprint4" assert "trigger_info" in metadata assert len(metadata["trigger_info"]) <= 100 # Should be truncated to 100 chars class TestMcpExecutorAgentEnhancedErrorHandling: """Test enhanced error handling in McpExecutorAgent for MVP4 Sprint 2.""" @pytest.fixture def executor_agent(self) -> McpExecutorAgent: """Create a McpExecutorAgent instance for testing.""" return McpExecutorAgent() @pytest.fixture def mcp_tool(self) -> MCPTool: """Create an MCP tool for testing.""" return MCPTool( tool_id="test-mcp-tool", name="Test MCP Tool", description="A tool for testing MCP integration", execution_type="remote_mcp_gradio", mcp_endpoint_url="https://test-mcp-server.hf.space/mcp", timeout_seconds=30, ) @pytest.fixture def sample_prompt(self) -> MCPPrompt: """Create a sample MCPPrompt for testing.""" return MCPPrompt( prompt_id="test-prompt", name="Test Prompt", description="A prompt for testing", target_tool_id="test-mcp-tool", template_string="Process: {{text_input}}", input_variables=["text_input"], difficulty_level="beginner", ) @pytest.fixture def planned_step(self, mcp_tool: MCPTool, sample_prompt: MCPPrompt) -> PlannedStep: """Create a PlannedStep for testing.""" return PlannedStep(tool=mcp_tool, prompt=sample_prompt, relevance_score=0.95) @staticmethod def test_retry_mechanism_for_server_errors(executor_agent, planned_step): """Test that server errors (5xx) trigger retry logic.""" inputs = {"text_input": "test input"} # Mock responses: first two fail with 503, third succeeds mock_responses = [ Mock(status_code=503, text="Service Unavailable"), Mock(status_code=503, text="Service Unavailable"), Mock(status_code=200) ] # Configure the first two to raise HTTPError, third to succeed mock_responses[0].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[0]) mock_responses[1].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[1]) mock_responses[2].raise_for_status.return_value = None mock_responses[2].json.return_value = {"data": ["Success after retry!"]} with patch("agents.executor.time.sleep"), \ patch.object(executor_agent.http_session, "post", side_effect=mock_responses): result = executor_agent.execute_plan_step(planned_step, inputs) # Should succeed after retries assert result["status"] == "success_live_mcp" assert result["attempts_made"] == 3 assert "Success after retry!" in result["tool_specific_output"] @staticmethod def test_retry_mechanism_for_timeouts(executor_agent, planned_step): """Test that timeouts trigger retry logic.""" inputs = {"text_input": "test input"} # Mock timeout on first two attempts, success on third side_effects = [ requests.exceptions.Timeout(), requests.exceptions.Timeout(), Mock(status_code=200) ] # Configure successful response success_response = side_effects[2] success_response.raise_for_status.return_value = None success_response.json.return_value = {"data": ["Success after timeout retries!"]} with patch("agents.executor.time.sleep"), \ patch.object(executor_agent.http_session, "post", side_effect=side_effects): result = executor_agent.execute_plan_step(planned_step, inputs) # Should succeed after retries assert result["status"] == "success_live_mcp" assert result["attempts_made"] == 3 assert "Success after timeout retries!" in result["tool_specific_output"] @staticmethod def test_no_retry_for_client_errors(executor_agent, planned_step): """Test that client errors (4xx) don't trigger retries.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=400, text="Bad Request") mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) # Should fail immediately without retries assert result["status"] == "error_live_mcp_http" assert result["error_details"]["attempts_made"] == 1 assert result["error_information"]["error_category"] == "input_validation" @staticmethod def test_enhanced_error_categorization(executor_agent, planned_step): """Test that errors are properly categorized.""" inputs = {"text_input": "test input"} # Test different HTTP status codes test_cases = [ (429, "rate_limit"), (503, "server_error"), (401, "authentication"), (400, "input_validation"), (404, "not_found"), ] for status_code, expected_category in test_cases: mock_response = Mock(status_code=status_code, text="Error") mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) assert result["error_information"]["error_category"] == expected_category @staticmethod def test_recovery_suggestions_for_different_errors(executor_agent, planned_step): """Test that appropriate recovery suggestions are provided.""" inputs = {"text_input": "test input"} # Test rate limit error mock_response = Mock(status_code=429, text="Rate Limited") mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) suggestions = result["error_information"]["recovery_suggestions"] assert any("wait" in suggestion.lower() for suggestion in suggestions) assert any("rate limit" in suggestion.lower() for suggestion in suggestions) @staticmethod def test_connection_error_handling(executor_agent, planned_step): """Test handling of connection errors with retry logic.""" inputs = {"text_input": "test input"} # Mock connection errors on first two attempts, success on third side_effects = [ requests.exceptions.ConnectionError("Connection failed"), requests.exceptions.ConnectionError("Connection failed"), Mock(status_code=200) ] # Configure successful response success_response = side_effects[2] success_response.raise_for_status.return_value = None success_response.json.return_value = {"data": ["Success after connection retries!"]} with patch("agents.executor.time.sleep"), \ patch.object(executor_agent.http_session, "post", side_effect=side_effects): result = executor_agent.execute_plan_step(planned_step, inputs) # Should succeed after retries assert result["status"] == "success_live_mcp" assert result["attempts_made"] == 3 @staticmethod def test_max_retries_exhausted(executor_agent, planned_step): """Test behavior when max retries are exhausted.""" inputs = {"text_input": "test input"} # Mock persistent timeout with patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()): result = executor_agent.execute_plan_step(planned_step, inputs) # Should fail after max retries assert result["status"] == "error_live_mcp_timeout" assert result["error_details"]["attempts_made"] == 3 # 1 + 2 retries assert result["error_information"]["retry_recommended"] is True @staticmethod def test_json_parsing_error_handling(executor_agent, planned_step): """Test handling of JSON parsing errors.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=200, text="Invalid JSON Response") mock_response.raise_for_status.return_value = None mock_response.json.side_effect = json.JSONDecodeError("Invalid JSON", "doc", 0) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) assert result["status"] == "error_mcp_response_parsing" assert result["error_information"]["error_category"] == "data" assert "Invalid JSON Response" in result["error_details"]["response_preview"] @staticmethod def test_invalid_response_format_handling(executor_agent, planned_step): """Test handling of invalid MCP response format.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=200) mock_response.raise_for_status.return_value = None mock_response.json.return_value = {"error": "No data field"} # Missing 'data' field with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) assert result["status"] == "error_mcp_response_parsing" assert "No 'data' field" in result["error_details"]["parse_error"] @staticmethod def test_empty_data_array_handling(executor_agent, planned_step): """Test handling of empty data array in response.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=200) mock_response.raise_for_status.return_value = None mock_response.json.return_value = {"data": []} # Empty data array with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) assert result["status"] == "error_mcp_response_parsing" assert "Empty 'data' array" in result["error_details"]["parse_error"] @staticmethod def test_enhanced_error_response_format(executor_agent, planned_step): """Test that enhanced error responses contain all required fields.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=503, text="Service Unavailable") mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) # Check required fields in error response assert "error_information" in result error_info = result["error_information"] required_fields = [ "error_category", "error_type", "error_message", "recovery_suggestions", "retry_recommended", "user_action_required", "timestamp" ] for field in required_fields: assert field in error_info, f"Missing required field: {field}" # Check error details assert "error_details" in result error_details = result["error_details"] assert "status_code" in error_details assert "endpoint" in error_details assert "attempts_made" in error_details @staticmethod def test_successful_mcp_call_with_enhanced_response(executor_agent, planned_step): """Test successful MCP call returns enhanced response format.""" inputs = {"text_input": "test input"} mock_response = Mock(status_code=200) mock_response.raise_for_status.return_value = None mock_response.json.return_value = {"data": ["Successful response!"]} with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) # Check enhanced success response assert result["status"] == "success_live_mcp" assert result["execution_mode"] == "live_mcp" assert result["attempts_made"] == 1 assert result["mcp_endpoint"] == planned_step.tool.mcp_endpoint_url assert "✅ Successfully executed" in result["message"] @staticmethod def test_unknown_execution_type_error(executor_agent, sample_prompt): """Test handling of unknown execution type falls back to simulation.""" # Create tool with invalid execution type by bypassing validation with patch.object(MCPTool, "__post_init__", return_value=None): invalid_tool = MCPTool( tool_id="invalid-tool", name="Invalid Tool", description="Tool with invalid execution type", execution_type="invalid_type", ) # Create planned step with patched tool with patch.object(PlannedStep, "__post_init__", return_value=None): invalid_planned_step = PlannedStep( tool=invalid_tool, prompt=sample_prompt, relevance_score=0.5 ) inputs = {"text_input": "test input"} # Mock random functions to prevent error simulation with patch('agents.executor.random.random', return_value=0.5), \ patch('agents.executor.random.choice', return_value="timeout"), \ patch('agents.executor.random.randint', return_value=50), \ patch('agents.executor.random.uniform', return_value=0.8): result = executor_agent.execute_plan_step(invalid_planned_step, inputs) # With improved fallback logic, unknown execution types should fall back to simulation assert result["status"] == "simulated_success" assert result["execution_mode"] == "simulated" # Should include fallback information in metadata assert "fallback_reason" in result["metadata"] or "execution_type" in result["metadata"] @staticmethod def test_retry_delay_timing(executor_agent, planned_step): """Test that retry delays are properly implemented.""" inputs = {"text_input": "test input"} with patch("agents.executor.time.sleep") as mock_sleep, \ patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()): start_time = time.time() result = executor_agent.execute_plan_step(planned_step, inputs) # Should have called sleep twice (for 2 retries) assert mock_sleep.call_count == 2 # Should have called with correct delay mock_sleep.assert_called_with(executor_agent.retry_delay) @staticmethod def test_error_message_user_friendliness(executor_agent, planned_step): """Test that error messages are user-friendly and informative.""" inputs = {"text_input": "test input"} test_cases = [ (429, "Rate limit exceeded"), (503, "Service temporarily unavailable"), (500, "Server error"), (400, "Client error"), ] for status_code, expected_message_part in test_cases: mock_response = Mock(status_code=status_code, text="Error details") mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response) with patch.object(executor_agent.http_session, "post", return_value=mock_response): result = executor_agent.execute_plan_step(planned_step, inputs) assert expected_message_part.lower() in result["message"].lower()