Spaces:

BasalGanglia
/

kgraph-mcp-agent-platform

Sleeping

File size: 77,895 Bytes

"""Tests for ExecutorAgent stub implementation.

This module provides comprehensive testing for the StubExecutorAgent and McpExecutorAgent classes.
The tests focus on validating mock execution simulation, tool-specific output generation,
error handling mechanisms, and system integration scenarios for MVP 3.

Key Testing Areas:
- Mock execution simulation with realistic tool outputs
- Error handling and recovery mechanisms  
- Tool-specific behavior validation (sentiment, summarization, code analysis, image processing)
- Input validation and edge case handling
- System integration and MCP protocol compliance

Educational Notes:
- StubExecutorAgent provides mock execution for testing without external dependencies
- McpExecutorAgent handles real MCP server communication with retry logic
- Tests use fixtures to create reusable test data and maintain consistency
- Parameterized tests validate behavior across multiple input scenarios
"""

import json
import time
from unittest.mock import Mock, patch

import pytest
import requests

from agents.executor import McpExecutorAgent, StubExecutorAgent
from kg_services.ontology import MCPPrompt, MCPTool, PlannedStep


class TestStubExecutorAgent:
    """Comprehensive test suite for StubExecutorAgent class.
    
    This test class validates the mock execution capabilities of StubExecutorAgent,
    which simulates tool execution without requiring actual external services.
    
    Test Categories:
    - Basic execution simulation and response structure
    - Tool-specific output generation and validation
    - Input handling and validation scenarios
    - Error simulation and handling
    - Performance and consistency testing
    
    Educational Purpose:
    These tests demonstrate how to properly mock complex system interactions
    while maintaining realistic behavior patterns for system validation.
    """

    @pytest.fixture
    def executor_agent(self) -> StubExecutorAgent:
        """Create a StubExecutorAgent instance for testing.
        
        Returns:
            StubExecutorAgent: Fresh instance configured for testing scenarios.
            
        Educational Note:
            Fixtures provide isolated test instances, ensuring tests don't interfere
            with each other and maintaining predictable test conditions.
        """
        return StubExecutorAgent()

    @pytest.fixture
    def sample_tool(self) -> MCPTool:
        """Create a sample sentiment analysis MCPTool for testing.
        
        Returns:
            MCPTool: Pre-configured sentiment analyzer tool with realistic metadata.
            
        Educational Note:
            This fixture represents a typical NLP tool configuration that would
            be used in production systems for text sentiment analysis.
        """
        return MCPTool(
            tool_id="sentiment-analyzer-v1",
            name="Advanced Sentiment Analyzer",
            description="Analyze sentiment and emotional tone of text",
            tags=["sentiment", "analysis", "nlp"],
            invocation_command_stub="sentiment_analyze --input {text} --format json",
        )

    @pytest.fixture
    def sentiment_prompt(self) -> MCPPrompt:
        """Create a sentiment analysis prompt for testing.
        
        Returns:
            MCPPrompt: Template for sentiment analysis with input variable mapping.
            
        Educational Note:
            Prompts define how tools should be invoked with specific inputs,
            acting as a bridge between user intent and tool execution.
        """
        return MCPPrompt(
            prompt_id="sentiment-basic-001",
            name="Basic Sentiment Analysis",
            description="Analyze sentiment of provided text",
            target_tool_id="sentiment-analyzer-v1",
            template_string="Analyze the sentiment of this text: {{text_content}}",
            input_variables=["text_content"],
            difficulty_level="beginner",
        )

    @pytest.fixture
    def summarizer_tool(self) -> MCPTool:
        """Create a text summarizer tool for testing.
        
        Returns:
            MCPTool: Pre-configured text summarization tool for document processing.
            
        Educational Note:
            Summarization tools represent complex NLP operations that require
            multiple parameters and produce structured outputs.
        """
        return MCPTool(
            tool_id="text-summarizer-v2",
            name="Intelligent Text Summarizer",
            description="Generate concise summaries of long text documents",
            tags=["summarization", "text", "nlp"],
            invocation_command_stub="summarize --input {text} --length {length}",
        )

    @pytest.fixture
    def summary_prompt(self) -> MCPPrompt:
        """Create a text summarization prompt for testing.
        
        Returns:
            MCPPrompt: Multi-input template for advanced document summarization.
            
        Educational Note:
            This prompt demonstrates how complex tools can accept multiple inputs
            to customize their behavior (document + focus area).
        """
        return MCPPrompt(
            prompt_id="summary-advanced-001",
            name="Advanced Document Summary",
            description="Create comprehensive summary with key points",
            target_tool_id="text-summarizer-v2",
            template_string="Summarize this document: {{document_text}} with focus on {{focus_area}}",
            input_variables=["document_text", "focus_area"],
            difficulty_level="intermediate",
        )

    @pytest.fixture
    def code_quality_tool(self) -> MCPTool:
        """Create a code quality analysis tool for testing.
        
        Returns:
            MCPTool: Pre-configured code analysis tool for quality assessment.
            
        Educational Note:
            Code quality tools represent static analysis capabilities that
            examine source code for security, performance, and style issues.
        """
        return MCPTool(
            tool_id="code-quality-linter",
            name="Code Quality Analyzer",
            description="Analyze code quality, security, and best practices",
            tags=["code", "quality", "security"],
            invocation_command_stub="lint_code --file {code_file} --rules {ruleset}",
        )

    @pytest.fixture
    def image_caption_tool(self) -> MCPTool:
        """Create an image captioning tool for testing.
        
        Returns:
            MCPTool: Pre-configured AI-powered image analysis tool.
            
        Educational Note:
            Image processing tools demonstrate multimodal AI capabilities,
            processing visual inputs to generate textual descriptions.
        """
        return MCPTool(
            tool_id="image-captioner-ai",
            name="AI Image Caption Generator",
            description="Generate descriptive captions for images using AI",
            tags=["image", "caption", "ai", "vision"],
            invocation_command_stub="caption_image --image {image_path} --style {caption_style}",
        )

    @pytest.fixture
    def sample_planned_step(
        self, sample_tool: MCPTool, sentiment_prompt: MCPPrompt
    ) -> PlannedStep:
        """Create a sample PlannedStep for testing execution workflows.
        
        Args:
            sample_tool: MCPTool fixture for sentiment analysis
            sentiment_prompt: MCPPrompt fixture for sentiment analysis
            
        Returns:
            PlannedStep: Complete execution plan combining tool, prompt, and relevance.
            
        Educational Note:
            PlannedStep represents a complete execution plan that binds a tool
            with a prompt and includes a relevance score indicating how well
            this combination matches the user's intent.
        """
        return PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.92
        )

    def test_executor_initialization(self, executor_agent: StubExecutorAgent) -> None:
        """Test that StubExecutorAgent initializes correctly.
        
        Args:
            executor_agent: StubExecutorAgent fixture
            
        Educational Purpose:
            Validates basic object instantiation and type checking.
            This is a fundamental sanity check that the class can be created.
        """
        assert isinstance(executor_agent, StubExecutorAgent)

    @patch("agents.executor.logger")
    def test_executor_initialization_logging(self, mock_logger: Mock) -> None:
        """Test that initialization logs correctly.
        
        Args:
            mock_logger: Mocked logger to capture log messages
            
        Educational Purpose:
            Demonstrates how to test logging behavior using mocks.
            Proper logging is crucial for debugging and monitoring system behavior.
        """
        # Act: Create executor instance to trigger logging
        StubExecutorAgent()

        # Assert: Verify the expected log message was emitted
        mock_logger.info.assert_called_once_with(
            "StubExecutorAgent initialized for MVP 3"
        )

    @patch("agents.executor.random.random")
    def test_simulate_execution_basic_success(
        self,
        mock_random: Mock,
        executor_agent: StubExecutorAgent,
        sample_planned_step: PlannedStep
    ) -> None:
        """Test basic successful execution simulation with realistic inputs.
        
        Args:
            mock_random: Mocked random function to control error simulation
            executor_agent: StubExecutorAgent fixture
            sample_planned_step: PlannedStep fixture for testing
            
        Educational Purpose:
            Demonstrates the happy path for execution simulation, showing how
            the system processes valid inputs and generates structured responses.
            
        Test Strategy:
            - Mock random to prevent error simulation (>15% threshold)
            - Provide realistic sentiment analysis input
            - Validate complete response structure and content
        """
        # Arrange: Set up test conditions to prevent random errors
        mock_random.return_value = 0.9  # Above 15% threshold - no random errors
        test_inputs: dict[str, str] = {"text_content": "This product is amazing and I love it!"}

        # Act: Simulate execution with the planned step and inputs
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert: Validate response structure and content
        assert isinstance(result, dict), "Response should be a dictionary"
        assert result["status"] == "simulated_success", "Should indicate successful simulation"
        assert "execution_id" in result, "Should include unique execution identifier"
        assert result["tool_information"]["tool_name"] == "Advanced Sentiment Analyzer"
        assert result["prompt_information"]["prompt_name"] == "Basic Sentiment Analysis"
        assert result["execution_details"]["inputs_received"] == test_inputs
        assert "mock_output" in result["results"], "Should include simulated tool output"

    @patch("agents.executor.random.random")
    def test_simulate_execution_comprehensive_structure(
        self,
        mock_random: Mock,
        executor_agent: StubExecutorAgent,
        sample_planned_step: PlannedStep
    ) -> None:
        """Test that execution response has complete and consistent structure.
        
        Args:
            mock_random: Mocked random function to control behavior
            executor_agent: StubExecutorAgent fixture
            sample_planned_step: PlannedStep fixture for testing
            
        Educational Purpose:
            Validates the complete response schema that external systems can rely on.
            This ensures API contract compliance and prevents integration issues.
            
        Test Strategy:
            - Verify all required top-level response keys are present
            - Validate nested object structures for completeness
            - Ensure data types match expected schema
        """
        # Arrange: Configure test to avoid random errors
        mock_random.return_value = 0.9  # Disable random error simulation
        test_inputs: dict[str, str] = {"text_content": "Test content"}

        # Act: Execute simulation and capture full response
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert: Validate complete response structure
        # Top-level keys must be present for API contract compliance
        expected_keys: list[str] = [
            "status",           # Execution outcome indicator
            "execution_id",     # Unique identifier for tracking
            "tool_information", # Tool metadata and configuration
            "prompt_information",# Prompt template and variables
            "execution_details",# Runtime information and metrics
            "results",          # Actual tool output and analysis
            "metadata",         # Additional context and debugging info
        ]
        for key in expected_keys:
            assert key in result, f"Required key '{key}' missing from response"

        # Assert: Validate tool_information nested structure
        tool_info = result["tool_information"]
        assert "tool_id" in tool_info, "Tool identifier required"
        assert "tool_name" in tool_info, "Tool name required for UI display"
        assert "tool_description" in tool_info, "Tool description required for context"

        # Assert: Validate execution_details structure
        exec_details = result["execution_details"]
        assert "inputs_received" in exec_details, "Input tracking required"
        assert "inputs_count" in exec_details, "Input metrics required"
        assert "execution_time_ms" in exec_details, "Performance metrics required"
        assert "complexity_level" in exec_details, "Complexity assessment required"

        # Assert: Validate results structure contains actual outputs
        results = result["results"]
        assert "message" in results, "Human-readable message required"
        assert "mock_output" in results, "Simulated tool output required"
        assert "confidence_score" in results, "Quality assessment required"

    @patch("agents.executor.random.random")
    def test_simulate_execution_sentiment_tool_output(
        self,
        mock_random: Mock,
        executor_agent: StubExecutorAgent,
        sample_tool: MCPTool,
        sentiment_prompt: MCPPrompt
    ) -> None:
        """Test sentiment analysis tool produces realistic mock output format.
        
        Args:
            mock_random: Mocked random function to control error simulation
            executor_agent: StubExecutorAgent fixture
            sample_tool: MCPTool fixture for sentiment analysis
            sentiment_prompt: MCPPrompt fixture for sentiment analysis
            
        Educational Purpose:
            Validates that the simulator generates realistic sentiment analysis output
            that matches what real sentiment analysis tools would produce.
            
        Test Strategy:
            - Create planned step combining sentiment tool and prompt
            - Provide positive sentiment text input
            - Verify output contains expected sentiment analysis elements
            - Check for realistic formatting and content structure
        """
        # Arrange: Disable random errors and create realistic test scenario
        mock_random.return_value = 0.5  # Above 15% threshold - no random errors

        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )
        # Use clearly positive text to test sentiment detection
        test_inputs: dict[str, str] = {"text_content": "I really enjoy this product!"}

        # Act: Execute sentiment analysis simulation
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert: Validate sentiment analysis specific output format
        mock_output = result["results"]["mock_output"]
        # Check for standard sentiment analysis output components
        assert "Sentiment Analysis Results" in mock_output, "Should contain results header"
        assert "Overall Sentiment Classification" in mock_output, "Should classify sentiment"
        assert "Confidence Metrics" in mock_output, "Should provide confidence scores"
        assert "Positive" in mock_output, "Should detect positive sentiment in test text"
        assert "Generated by Sentiment Analyzer Tool" in mock_output, "Should indicate tool source"

    @patch("agents.executor.random.random")
    def test_simulate_execution_summarizer_tool_output(
        self,
        mock_random: Mock,
        executor_agent: StubExecutorAgent,
        summarizer_tool: MCPTool,
        summary_prompt: MCPPrompt
    ) -> None:
        """Test text summarizer tool produces realistic mock summary output.
        
        Args:
            mock_random: Mocked random function to control error simulation
            executor_agent: StubExecutorAgent fixture  
            summarizer_tool: MCPTool fixture for text summarization
            summary_prompt: MCPPrompt fixture for multi-input summarization
            
        Educational Purpose:
            Validates that the simulator generates realistic text summarization output
            with structured sections that would be expected from real NLP tools.
            
        Test Strategy:
            - Test multi-input prompt handling (document + focus area)
            - Verify output contains expected summarization sections
            - Check for realistic summary structure and formatting
        """
        # Arrange: Configure test to avoid random errors
        mock_random.return_value = 0.5  # Above 15% threshold - no random errors

        planned_step = PlannedStep(
            tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
        )
        # Test with multi-input scenario (document text + focus area)
        test_inputs: dict[str, str] = {
            "document_text": "Long document content...",
            "focus_area": "key insights",
        }

        # Act: Execute summarization simulation
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert: Validate summarization specific output format
        mock_output = result["results"]["mock_output"]
        # Check for standard summarization output components
        assert "Text Summarization Complete" in mock_output, "Should contain completion indicator"
        assert "Executive Summary" in mock_output, "Should provide executive summary section"
        assert "Key Points" in mock_output, "Should extract key points from content"
        assert "focus_area" in str(test_inputs), "Should handle multi-input processing"

        # Validate that the tool properly processes multiple inputs
        exec_details = result["execution_details"]
        assert exec_details["inputs_count"] == 2, "Should recognize two input parameters"

    @patch("agents.executor.random.random")
    def test_simulate_execution_code_quality_tool_output(
        self, mock_random, executor_agent, code_quality_tool
    ):
        """Test code quality analysis specific mock output."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        code_prompt = MCPPrompt(
            prompt_id="code-quality-001",
            name="Comprehensive Code Review",
            description="Analyze code for quality and security",
            target_tool_id="code-quality-linter",
            template_string="Review this code: {{source_code}}",
            input_variables=["source_code"],
            difficulty_level="advanced",
        )
        planned_step = PlannedStep(
            tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
        )
        test_inputs = {"source_code": "def hello_world():\n    print('Hello!')"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Code Quality Analysis Complete" in mock_output
        assert "Overall Quality Score" in mock_output
        assert "Analysis Summary" in mock_output
        assert "Quality Metrics" in mock_output
        assert "Recommendations" in mock_output
        assert "Generated by Code Quality Linter Tool" in mock_output

    @patch("agents.executor.random.random")
    def test_simulate_execution_image_caption_tool_output(
        self, mock_random, executor_agent, image_caption_tool
    ):
        """Test image captioning specific mock output."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        caption_prompt = MCPPrompt(
            prompt_id="image-caption-001",
            name="Descriptive Image Caption",
            description="Generate detailed image captions",
            target_tool_id="image-captioner-ai",
            template_string="Caption this image: {{image_url}}",
            input_variables=["image_url"],
            difficulty_level="intermediate",
        )
        planned_step = PlannedStep(
            tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
        )
        test_inputs = {"image_url": "https://example.com/office.jpg"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Image Caption Generation Results" in mock_output
        assert "Primary Caption" in mock_output
        assert "Technical Analysis" in mock_output
        assert "Confidence Level" in mock_output
        assert "Alternative Descriptions" in mock_output
        assert "Generated by Image Caption Generator Tool" in mock_output

    @patch("agents.executor.random.random")
    def test_simulate_execution_generic_tool_output(self, mock_random, executor_agent):
        """Test generic mock output for unknown tool types."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        generic_tool = MCPTool(
            tool_id="unknown-tool",
            name="Unknown Analysis Tool",
            description="A tool for unknown analysis",
            tags=["unknown"],
            invocation_command_stub="unknown_analyze {input}",
        )
        generic_prompt = MCPPrompt(
            prompt_id="generic-prompt",
            name="Generic Processing",
            description="Generic prompt for unknown tool",
            target_tool_id="unknown-tool",
            template_string="Process: {{data}}",
            input_variables=["data"],
            difficulty_level="beginner",
        )
        planned_step = PlannedStep(
            tool=generic_tool, prompt=generic_prompt, relevance_score=0.75
        )
        test_inputs = {"data": "sample data"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Execution Results for Unknown Analysis Tool" in mock_output
        assert "Successfully processed" in mock_output
        assert "Generic Processing" in mock_output
        assert "Input Analysis" in mock_output
        assert "data**: sample data" in mock_output

    @patch("agents.executor.random.random")
    def test_simulate_execution_empty_inputs(self, mock_random, executor_agent, sample_planned_step):
        """Test execution with empty inputs dictionary."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        empty_inputs: dict[str, str] = {}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, empty_inputs)

        # Assert
        assert result["status"] == "simulated_success"
        assert result["execution_details"]["inputs_received"] == empty_inputs
        assert result["execution_details"]["inputs_count"] == 0

    @staticmethod
    def test_simulate_execution_multiple_inputs(
        executor_agent, summarizer_tool, summary_prompt
    ):
        """Test execution with multiple input variables."""
        # Arrange
        planned_step = PlannedStep(
            tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.85
        )
        test_inputs = {
            "document_text": "Very long document with lots of content...",
            "focus_area": "business insights and recommendations",
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        assert result["execution_details"]["inputs_count"] == 2
        assert result["execution_details"]["inputs_received"] == test_inputs

    @staticmethod
    def test_simulate_execution_invalid_plan_type(executor_agent):
        """Test error handling with invalid plan type."""
        # Arrange
        invalid_plan = {"not": "a planned step"}
        test_inputs = {"input": "test"}

        # Act & Assert
        with pytest.raises(ValueError, match="Plan must be a PlannedStep instance"):
            executor_agent.simulate_execution(invalid_plan, test_inputs)

    @staticmethod
    def test_simulate_execution_invalid_inputs_type(
        executor_agent, sample_planned_step
    ):
        """Test error handling with invalid inputs type."""
        # Arrange
        invalid_inputs = "not a dictionary"

        # Act & Assert
        with pytest.raises(ValueError, match="Inputs must be a dictionary"):
            executor_agent.simulate_execution(sample_planned_step, invalid_inputs)

    @patch("agents.executor.random.random")
    @patch("agents.executor.logger")
    def test_simulate_execution_logging(
        self, mock_logger, mock_random, executor_agent, sample_planned_step
    ):
        """Test that execution logs appropriately."""
        # Arrange - Disable random errors to ensure consistent logging
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        test_inputs = {"text_content": "test content"}

        # Act
        executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert mock_logger.info.call_count >= 3  # 3 execution logs (init not captured by fixture)

        # Check specific log messages
        log_calls = [call[0][0] for call in mock_logger.info.call_args_list]
        assert any("Simulating execution for tool" in log for log in log_calls)
        assert any("Received inputs" in log for log in log_calls)
        assert any("Generated mock response" in log for log in log_calls)

    @patch("agents.executor.random.random")
    def test_execution_id_generation(
        self, mock_random, executor_agent, sample_planned_step
    ):
        """Test that unique execution IDs are generated."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        test_inputs_1 = {"text_content": "first input"}
        test_inputs_2 = {"text_content": "second input"}

        # Act
        result_1 = executor_agent.simulate_execution(sample_planned_step, test_inputs_1)
        result_2 = executor_agent.simulate_execution(sample_planned_step, test_inputs_2)

        # Assert
        assert result_1["execution_id"] != result_2["execution_id"]
        assert result_1["execution_id"].startswith("exec_sentiment-analyzer-v1_")
        assert result_2["execution_id"].startswith("exec_sentiment-analyzer-v1_")

    @staticmethod
    def test_confidence_score_consistency(executor_agent, sample_planned_step):
        """Test that confidence scores are consistent."""
        # Arrange
        test_inputs = {"text_content": "test content"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        confidence = result["results"]["confidence_score"]
        assert isinstance(confidence, int | float)
        assert 0.0 <= confidence <= 1.0

    @staticmethod
    def test_metadata_structure(executor_agent, sample_planned_step):
        """Test that metadata has expected structure."""
        # Arrange
        test_inputs = {"text_content": "test content"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        metadata = result["metadata"]
        assert "simulation_version" in metadata
        assert "timestamp" in metadata
        assert "notes" in metadata
        assert metadata["simulation_version"] == "MVP3_Sprint4"

    # Enhanced Input-Aware Mock Tests
    @patch("agents.executor.random.random")
    def test_text_summarizer_empty_input_handling(
        self, mock_random, executor_agent, summarizer_tool, summary_prompt
    ):
        """Test text summarizer with empty input returns appropriate error message."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
        )
        test_inputs = {"document_text": ""}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "No text content provided for summarization" in mock_output
        assert "⚠️ **Input Analysis:**" in mock_output
        assert "Recommendation: Please provide text content" in mock_output

    @patch("agents.executor.random.random")
    def test_text_summarizer_content_type_detection(
        self, mock_random, executor_agent, summarizer_tool, summary_prompt
    ):
        """Test text summarizer detects content type and generates appropriate response."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
        )

        # Test technical content
        tech_inputs = {
            "text": "This function implements a class variable to store programming code patterns."
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, tech_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "technical content" in mock_output
        assert "Technical classification" in mock_output
        assert "code structure, functionality patterns" in mock_output

    @patch("agents.executor.random.random")
    def test_text_summarizer_business_content_detection(
        self, mock_random, executor_agent, summarizer_tool, summary_prompt
    ):
        """Test text summarizer detects business content correctly."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=summarizer_tool, prompt=summary_prompt, relevance_score=0.90
        )

        business_inputs = {
            "content": "Our company's market analysis shows excellent customer retention and product sales growth in business sectors."
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, business_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "business content" in mock_output
        assert "Business classification" in mock_output
        assert "market dynamics, customer insights" in mock_output

    @patch("agents.executor.random.random")
    def test_sentiment_analyzer_empty_input_handling(
        self, mock_random, executor_agent, sample_tool, sentiment_prompt
    ):
        """Test sentiment analyzer with empty input returns appropriate error message."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )
        test_inputs = {"text": ""}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "No text content provided for sentiment analysis" in mock_output
        assert "⚠️ **Input Analysis:**" in mock_output
        assert "Supported input fields:" in mock_output

    @patch("agents.executor.random.random")
    def test_sentiment_analyzer_positive_content_detection(
        self, mock_random, executor_agent, sample_tool, sentiment_prompt
    ):
        """Test sentiment analyzer correctly detects positive sentiment."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )
        positive_inputs = {
            "text": "This product is absolutely amazing and fantastic! I love it and highly recommend it."
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, positive_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Primary**: Positive" in mock_output
        assert "Joy/Satisfaction:" in mock_output
        assert "**Positive Indicators**: 4 detected" in mock_output

    @patch("agents.executor.random.random")
    def test_sentiment_analyzer_negative_content_detection(
        self, mock_random, executor_agent, sample_tool, sentiment_prompt
    ):
        """Test sentiment analyzer correctly detects negative sentiment."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )
        negative_inputs = {
            "feedback": "This service was terrible and awful. I hate it and it's the worst experience ever."
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, negative_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Primary**: Negative" in mock_output
        assert "Frustration:" in mock_output
        assert "**Negative Indicators**:" in mock_output
        assert "feedback" in mock_output  # Source field detection

    @patch("agents.executor.random.random")
    def test_sentiment_analyzer_neutral_content_detection(
        self, mock_random, executor_agent, sample_tool, sentiment_prompt
    ):
        """Test sentiment analyzer correctly detects neutral sentiment."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors

        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )
        neutral_inputs = {
            "message": "The weather today is okay and normal. It's fine and adequate for our needs."
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, neutral_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Primary**: Neutral" in mock_output
        assert "Neutral:" in mock_output
        assert "**Neutral Indicators**:" in mock_output

    @patch("agents.executor.random.random")
    def test_image_caption_empty_input_handling(
        self, mock_random, executor_agent, image_caption_tool
    ):
        """Test image caption generator with empty input returns appropriate error message."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        caption_prompt = MCPPrompt(
            prompt_id="image-caption-001",
            name="Descriptive Image Caption",
            description="Generate detailed image captions",
            target_tool_id="image-captioner-ai",
            template_string="Caption this image: {{image_url}}",
            input_variables=["image_url"],
            difficulty_level="intermediate",
        )
        planned_step = PlannedStep(
            tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
        )
        test_inputs = {"image_url": ""}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "No image source provided for caption generation" in mock_output
        assert "⚠️ **Input Analysis:**" in mock_output
        assert "Supported input fields:" in mock_output

    @patch("agents.executor.random.random")
    def test_image_caption_workspace_detection(
        self, mock_random, executor_agent, image_caption_tool
    ):
        """Test image caption generator detects workspace images correctly."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        caption_prompt = MCPPrompt(
            prompt_id="image-caption-001",
            name="Descriptive Image Caption",
            description="Generate detailed image captions",
            target_tool_id="image-captioner-ai",
            template_string="Caption this image: {{image_path}}",
            input_variables=["image_path"],
            difficulty_level="intermediate",
        )
        planned_step = PlannedStep(
            tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
        )
        test_inputs = {"image_path": "/uploads/office_workspace_desk.jpg"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "professional workspace" in mock_output
        assert "workspace" in mock_output.lower()
        assert "Indoor workspace/office environment" in mock_output
        assert "desk" in mock_output

    @patch("agents.executor.random.random")
    def test_image_caption_with_context(self, mock_random, executor_agent, image_caption_tool):
        """Test image caption generator incorporates additional context."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        caption_prompt = MCPPrompt(
            prompt_id="image-caption-001",
            name="Descriptive Image Caption",
            description="Generate detailed image captions",
            target_tool_id="image-captioner-ai",
            template_string="Caption this image: {{image}} with context: {{context}}",
            input_variables=["image", "context"],
            difficulty_level="intermediate",
        )
        planned_step = PlannedStep(
            tool=image_caption_tool, prompt=caption_prompt, relevance_score=0.91
        )
        test_inputs = {
            "image": "nature_photo.jpg",
            "context": "Taken during sunrise in the mountains",
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "additional context: Taken during sunrise" in mock_output
        assert "Context Provided**: Yes" in mock_output
        assert "sunrise in the mountains" in mock_output

    @patch("agents.executor.random.random")
    def test_code_linter_empty_input_handling(self, mock_random, executor_agent, code_quality_tool):
        """Test code linter with empty input returns appropriate error message."""
        # Arrange - Disable random errors to ensure consistent behavior
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        code_prompt = MCPPrompt(
            prompt_id="code-quality-001",
            name="Comprehensive Code Review",
            description="Analyze code for quality and security",
            target_tool_id="code-quality-linter",
            template_string="Review this code: {{source_code}}",
            input_variables=["source_code"],
            difficulty_level="advanced",
        )
        planned_step = PlannedStep(
            tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
        )
        test_inputs = {"source_code": ""}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "No code content provided for analysis" in mock_output
        assert "⚠️ **Input Analysis:**" in mock_output
        assert "Supported input fields:" in mock_output

    @patch("agents.executor.random.random")
    def test_code_linter_python_detection(self, mock_random, executor_agent, code_quality_tool):
        """Test code linter correctly detects Python code."""
        # Arrange - Disable random errors to ensure consistent behavior
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        code_prompt = MCPPrompt(
            prompt_id="code-quality-001",
            name="Comprehensive Code Review",
            description="Analyze code for quality and security",
            target_tool_id="code-quality-linter",
            template_string="Review this code: {{code}}",
            input_variables=["code"],
            difficulty_level="advanced",
        )
        planned_step = PlannedStep(
            tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
        )
        python_code = """
def hello_world():
    \"\"\"Print hello world message.\"\"\"
    print("Hello, World!")
    return True

class MyClass:
    def __init__(self):
        pass
"""
        test_inputs = {"code": python_code}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Language**: Python" in mock_output
        assert "Lines Analyzed**: " in mock_output
        assert "def hello_world():" in mock_output  # Code preview

    @patch("agents.executor.random.random")
    def test_code_linter_issue_detection(self, mock_random, executor_agent, code_quality_tool):
        """Test code linter detects common code issues."""
        # Arrange - Disable random errors
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        code_prompt = MCPPrompt(
            prompt_id="code-quality-001",
            name="Comprehensive Code Review",
            description="Analyze code for quality and security",
            target_tool_id="code-quality-linter",
            template_string="Review this code: {{script}}",
            input_variables=["script"],
            difficulty_level="advanced",
        )
        planned_step = PlannedStep(
            tool=code_quality_tool, prompt=code_prompt, relevance_score=0.87
        )
        problematic_code = """
# TODO: Fix this function
def bad_function():
    x = 1


    y = 2
    z = very_long_variable_name_that_exceeds_normal_line_length_and_should_be_flagged_as_an_issue_by_linter = 3
    return x + y + z
"""
        test_inputs = {"script": problematic_code}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Todo/Fixme comments found" in mock_output
        assert "Long lines detected" in mock_output
        assert "Issues Found**: " in mock_output

    @patch("agents.executor.random.random")
    def test_generic_tool_input_analysis(self, mock_random, executor_agent):
        """Test generic tool provides detailed input analysis."""
        # Arrange - Disable random errors to ensure consistent behavior
        mock_random.return_value = 0.5  # Above 0.1 threshold, no random errors
        custom_tool = MCPTool(
            tool_id="custom-analyzer",
            name="Custom Analysis Tool",
            description="Performs custom data analysis",
            tags=["analysis"],
            invocation_command_stub="analyze {input}",
        )
        custom_prompt = MCPPrompt(
            prompt_id="custom-prompt",
            name="Custom Analysis",
            description="Custom analysis prompt",
            target_tool_id="custom-analyzer",
            template_string="Analyze: {{data}} with {{method}}",
            input_variables=["data", "method"],
            difficulty_level="intermediate",
        )
        planned_step = PlannedStep(
            tool=custom_tool, prompt=custom_prompt, relevance_score=0.82
        )

        test_inputs = {
            "data": "Large dataset with complex information patterns and detailed analysis requirements",
            "method": "statistical",
        }

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert (
            "Processing Complexity**: Simple" in mock_output
        )  # Based on total input length
        assert "Inputs Received**: 2 parameter(s)" in mock_output
        assert "Total Content Length**:" in mock_output
        assert (
            "data**: Large dataset with complex" in mock_output
        )  # Check partial match without ellipsis
        assert "method**: statistical" in mock_output

    @patch("agents.executor.random.random")
    def test_long_input_processing(self, mock_random, executor_agent, sample_tool, sentiment_prompt):
        """Test processing of very long input content."""
        # Arrange
        mock_random.return_value = 0.9  # Disable random errors (15% threshold)
        planned_step = PlannedStep(
            tool=sample_tool, prompt=sentiment_prompt, relevance_score=0.88
        )

        # Create long input content
        long_text = "This is an excellent service. " * 50  # 1500+ characters
        test_inputs = {"text_content": long_text}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        mock_output = result["results"]["mock_output"]
        assert "Primary**: Positive" in mock_output  # Should detect positive sentiment
        assert "Text Length**: " in mock_output
        assert "Analysis Confidence" in mock_output


class TestErrorSimulation:
    """Test suite for error simulation functionality."""

    @pytest.fixture
    def executor_agent(self) -> StubExecutorAgent:
        """Create a StubExecutorAgent instance for testing."""
        return StubExecutorAgent()

    @pytest.fixture
    def sample_tool(self) -> MCPTool:
        """Create a sample MCPTool for testing."""
        return MCPTool(
            tool_id="test-tool-error",
            name="Test Error Tool",
            description="A tool for testing error scenarios",
            tags=["test", "error"],
            invocation_command_stub="test_error {input}",
        )

    @pytest.fixture
    def sample_prompt(self) -> MCPPrompt:
        """Create a sample MCPPrompt for testing."""
        return MCPPrompt(
            prompt_id="test-prompt-error",
            name="Test Error Prompt",
            description="A prompt for testing error scenarios",
            target_tool_id="test-tool-error",
            template_string="Process: {{text_input}}",
            input_variables=["text_input"],
            difficulty_level="beginner",
        )

    @pytest.fixture
    def sample_planned_step(
        self, sample_tool: MCPTool, sample_prompt: MCPPrompt
    ) -> PlannedStep:
        """Create a sample PlannedStep for testing."""
        return PlannedStep(tool=sample_tool, prompt=sample_prompt, relevance_score=0.95)

    @staticmethod
    def test_user_requested_error_simulation(executor_agent, sample_planned_step):
        """Test error simulation triggered by user input containing error keywords."""
        # Arrange
        test_inputs = {"text_input": "This should fail and show an error"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "user_requested"
        assert error_info["error_code"] == "USR_REQ_001"
        assert error_info["retry_recommended"] is True
        assert (
            "User explicitly requested error simulation" in error_info["error_message"]
        )

        # Check that the output contains user-friendly error message
        mock_output = result["results"]["mock_output"]
        assert "Error Simulation Activated" in mock_output
        assert "User-Requested Error" in mock_output

    @staticmethod
    def test_test_scenario_error_simulation(executor_agent, sample_planned_step):
        """Test error simulation triggered by test scenario keywords."""
        # Arrange
        test_inputs = {"text_input": "test error scenario for validation"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "test_scenario"
        assert error_info["error_code"] == "TST_ERR_001"
        assert error_info["retry_recommended"] is True

        mock_output = result["results"]["mock_output"]
        assert "Test Error Scenario" in mock_output
        assert "Error Simulation Active" in mock_output

    @staticmethod
    def test_input_too_large_error_simulation(
        executor_agent, sample_planned_step
    ):
        """Test error simulation for input size limits."""
        # Arrange - Create input larger than 10,000 characters
        large_input = "x" * 10001
        test_inputs = {"text_input": large_input}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "input_too_large"
        assert error_info["error_code"] == "VAL_001"
        assert error_info["retry_recommended"] is True
        assert "10001 characters" in error_info["error_details"]

        mock_output = result["results"]["mock_output"]
        assert "Input Size Error" in mock_output
        assert "Input Too Large" in mock_output

    @staticmethod
    def test_security_violation_error_simulation(
        executor_agent, sample_planned_step
    ):
        """Test error simulation for security violations."""
        # Arrange
        test_inputs = {
            "text_input": "Process this <script>alert('hack')</script> content"
        }

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "security_violation"
        assert error_info["error_code"] == "SEC_001"
        assert error_info["retry_recommended"] is False

        mock_output = result["results"]["mock_output"]
        assert "Security Error" in mock_output
        assert "Security Violation Detected" in mock_output

    @staticmethod
    def test_corrupted_file_error_for_image_tool(executor_agent):
        """Test error simulation for corrupted files in image tools."""
        # Arrange
        image_tool = MCPTool(
            tool_id="image_caption_003",
            name="Image Caption Generator",
            description="Generate captions for images",
            tags=["image", "ai"],
            invocation_command_stub="caption_image {image}",
        )
        image_prompt = MCPPrompt(
            prompt_id="caption-prompt",
            name="Image Captioning",
            description="Caption an image",
            target_tool_id="image_caption_003",
            template_string="Caption: {{image_file}}",
            input_variables=["image_file"],
            difficulty_level="beginner",
        )
        planned_step = PlannedStep(
            tool=image_tool, prompt=image_prompt, relevance_score=0.85
        )

        test_inputs = {"image_file": "broken_image.jpg"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "corrupted_file"
        assert error_info["error_code"] == "FILE_001"
        assert error_info["retry_recommended"] is True

        mock_output = result["results"]["mock_output"]
        assert "File Processing Error" in mock_output
        assert "Corrupted File Detected" in mock_output

    @staticmethod
    def test_wrong_file_type_error_for_image_tool(executor_agent):
        """Test error simulation for wrong file types in image tools."""
        # Arrange
        image_tool = MCPTool(
            tool_id="image_caption_003",
            name="Image Caption Generator",
            description="Generate captions for images",
            tags=["image", "ai"],
            invocation_command_stub="caption_image {image}",
        )
        image_prompt = MCPPrompt(
            prompt_id="caption-prompt",
            name="Image Captioning",
            description="Caption an image",
            target_tool_id="image_caption_003",
            template_string="Caption: {{image_file}}",
            input_variables=["image_file"],
            difficulty_level="beginner",
        )
        planned_step = PlannedStep(
            tool=image_tool, prompt=image_prompt, relevance_score=0.85
        )

        test_inputs = {"image_file": "document.txt"}

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "wrong_file_type"
        assert error_info["error_code"] == "FILE_002"
        assert error_info["retry_recommended"] is True

        mock_output = result["results"]["mock_output"]
        assert "File Type Error" in mock_output
        assert "Unsupported File Type" in mock_output

    @patch("agents.executor.random.random")
    def test_random_error_simulation(
        self, mock_random, executor_agent, sample_planned_step
    ):
        """Test random error simulation."""
        # Arrange - Force random error (10% chance normally)
        mock_random.return_value = 0.05  # Less than 0.1, should trigger random error

        test_inputs = {"text_input": "normal input"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert - Should get a random error
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] in [
            "network_timeout",
            "service_unavailable",
            "rate_limit_exceeded",
            "temporary_overload",
        ]
        assert error_info["retry_recommended"] is True

    @patch("agents.executor.random.random")
    def test_no_random_error_simulation(
        self, mock_random, executor_agent, sample_planned_step
    ):
        """Test that random errors don't trigger when probability is too high."""
        # Arrange - Prevent random error
        mock_random.return_value = (
            0.15  # Greater than 0.1, should not trigger random error
        )

        test_inputs = {"text_input": "normal input"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert - Should get normal success
        assert result["status"] == "simulated_success"
        assert "error_information" not in result

    @staticmethod
    def test_error_response_structure(executor_agent, sample_planned_step):
        """Test that error responses have the correct structure."""
        # Arrange
        test_inputs = {"text_input": "trigger error simulation"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert - Check error response structure
        assert result["status"] == "simulated_error"

        # Check required top-level keys
        expected_keys = [
            "status",
            "execution_id",
            "tool_information",
            "prompt_information",
            "execution_details",
            "error_information",
            "results",
            "metadata",
        ]
        for key in expected_keys:
            assert key in result, f"Missing key: {key}"

        # Check error_information structure
        error_info = result["error_information"]
        error_keys = [
            "error_type",
            "error_severity",
            "error_code",
            "error_message",
            "error_details",
            "suggested_fixes",
            "retry_recommended",
        ]
        for key in error_keys:
            assert key in error_info, f"Missing error info key: {key}"

        # Check execution details for errors
        exec_details = result["execution_details"]
        assert "error_occurred_at" in exec_details
        assert isinstance(exec_details["error_occurred_at"], int)
        assert 10 <= exec_details["error_occurred_at"] <= 80

    @staticmethod
    def test_error_execution_time_shorter(executor_agent, sample_planned_step):
        """Test that error scenarios have shorter execution times."""
        # Arrange
        test_inputs = {"text_input": "simulate error"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"
        execution_time = result["execution_details"]["execution_time_ms"]

        # Error execution times should be between 100-1000ms (shorter than success 800-2500ms)
        assert 100 <= execution_time <= 1000

    @staticmethod
    def test_error_confidence_score_zero(executor_agent, sample_planned_step):
        """Test that error responses have zero confidence score."""
        # Arrange
        test_inputs = {"text_input": "error test"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"
        assert result["results"]["confidence_score"] == 0.0

    @staticmethod
    def test_error_logging(executor_agent, sample_planned_step):
        """Test that error scenarios are properly logged."""
        # Arrange
        test_inputs = {"text_input": "error logging test"}

        # Act
        with patch("agents.executor.logger") as mock_logger:
            result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        # Check that warning was logged for the error
        mock_logger.warning.assert_called_once()
        warning_call = mock_logger.warning.call_args[0][0]
        assert "Simulated" in warning_call
        assert "error" in warning_call

    @staticmethod
    def test_high_severity_error_priority(executor_agent):
        """Test that high severity errors are prioritized over lower severity ones."""
        # Arrange - Create inputs that would trigger both high and medium severity errors
        test_inputs = {
            "text_input": "error with <script>alert('test')</script> content"
        }

        tool = MCPTool(
            tool_id="test-tool",
            name="Test Tool",
            description="Test tool",
            tags=["test"],
            invocation_command_stub="test {input}",
        )
        prompt = MCPPrompt(
            prompt_id="test-prompt",
            name="Test Prompt",
            description="Test prompt",
            target_tool_id="test-tool",
            template_string="Process: {{text_input}}",
            input_variables=["text_input"],
            difficulty_level="beginner",
        )
        planned_step = PlannedStep(tool=tool, prompt=prompt, relevance_score=0.95)

        # Act
        result = executor_agent.simulate_execution(planned_step, test_inputs)

        # Assert - Should get the high severity security violation, not the medium severity user_requested error
        assert result["status"] == "simulated_error"

        error_info = result["error_information"]
        assert error_info["error_type"] == "security_violation"  # High severity
        assert error_info["error_severity"] == "high"

    @staticmethod
    def test_error_metadata_tracking(executor_agent, sample_planned_step):
        """Test that error metadata is properly tracked."""
        # Arrange
        test_inputs = {"text_input": "error metadata test"}

        # Act
        result = executor_agent.simulate_execution(sample_planned_step, test_inputs)

        # Assert
        assert result["status"] == "simulated_error"

        metadata = result["metadata"]
        assert metadata["error_simulation"] == "user_requested"
        assert metadata["simulation_version"] == "MVP3_Sprint4"
        assert "trigger_info" in metadata
        assert len(metadata["trigger_info"]) <= 100  # Should be truncated to 100 chars


class TestMcpExecutorAgentEnhancedErrorHandling:
    """Test enhanced error handling in McpExecutorAgent for MVP4 Sprint 2."""

    @pytest.fixture
    def executor_agent(self) -> McpExecutorAgent:
        """Create a McpExecutorAgent instance for testing."""
        return McpExecutorAgent()

    @pytest.fixture
    def mcp_tool(self) -> MCPTool:
        """Create an MCP tool for testing."""
        return MCPTool(
            tool_id="test-mcp-tool",
            name="Test MCP Tool",
            description="A tool for testing MCP integration",
            execution_type="remote_mcp_gradio",
            mcp_endpoint_url="https://test-mcp-server.hf.space/mcp",
            timeout_seconds=30,
        )

    @pytest.fixture
    def sample_prompt(self) -> MCPPrompt:
        """Create a sample MCPPrompt for testing."""
        return MCPPrompt(
            prompt_id="test-prompt",
            name="Test Prompt",
            description="A prompt for testing",
            target_tool_id="test-mcp-tool",
            template_string="Process: {{text_input}}",
            input_variables=["text_input"],
            difficulty_level="beginner",
        )

    @pytest.fixture
    def planned_step(self, mcp_tool: MCPTool, sample_prompt: MCPPrompt) -> PlannedStep:
        """Create a PlannedStep for testing."""
        return PlannedStep(tool=mcp_tool, prompt=sample_prompt, relevance_score=0.95)

    @staticmethod
    def test_retry_mechanism_for_server_errors(executor_agent, planned_step):
        """Test that server errors (5xx) trigger retry logic."""
        inputs = {"text_input": "test input"}

        # Mock responses: first two fail with 503, third succeeds
        mock_responses = [
            Mock(status_code=503, text="Service Unavailable"),
            Mock(status_code=503, text="Service Unavailable"),
            Mock(status_code=200)
        ]

        # Configure the first two to raise HTTPError, third to succeed
        mock_responses[0].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[0])
        mock_responses[1].raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_responses[1])
        mock_responses[2].raise_for_status.return_value = None
        mock_responses[2].json.return_value = {"data": ["Success after retry!"]}

        with patch("agents.executor.time.sleep"), \
             patch.object(executor_agent.http_session, "post", side_effect=mock_responses):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Should succeed after retries
        assert result["status"] == "success_live_mcp"
        assert result["attempts_made"] == 3
        assert "Success after retry!" in result["tool_specific_output"]

    @staticmethod
    def test_retry_mechanism_for_timeouts(executor_agent, planned_step):
        """Test that timeouts trigger retry logic."""
        inputs = {"text_input": "test input"}

        # Mock timeout on first two attempts, success on third
        side_effects = [
            requests.exceptions.Timeout(),
            requests.exceptions.Timeout(),
            Mock(status_code=200)
        ]

        # Configure successful response
        success_response = side_effects[2]
        success_response.raise_for_status.return_value = None
        success_response.json.return_value = {"data": ["Success after timeout retries!"]}

        with patch("agents.executor.time.sleep"), \
             patch.object(executor_agent.http_session, "post", side_effect=side_effects):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Should succeed after retries
        assert result["status"] == "success_live_mcp"
        assert result["attempts_made"] == 3
        assert "Success after timeout retries!" in result["tool_specific_output"]

    @staticmethod
    def test_no_retry_for_client_errors(executor_agent, planned_step):
        """Test that client errors (4xx) don't trigger retries."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=400, text="Bad Request")
        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Should fail immediately without retries
        assert result["status"] == "error_live_mcp_http"
        assert result["error_details"]["attempts_made"] == 1
        assert result["error_information"]["error_category"] == "input_validation"

    @staticmethod
    def test_enhanced_error_categorization(executor_agent, planned_step):
        """Test that errors are properly categorized."""
        inputs = {"text_input": "test input"}

        # Test different HTTP status codes
        test_cases = [
            (429, "rate_limit"),
            (503, "server_error"),
            (401, "authentication"),
            (400, "input_validation"),
            (404, "not_found"),
        ]

        for status_code, expected_category in test_cases:
            mock_response = Mock(status_code=status_code, text="Error")
            mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)

            with patch.object(executor_agent.http_session, "post", return_value=mock_response):
                result = executor_agent.execute_plan_step(planned_step, inputs)

            assert result["error_information"]["error_category"] == expected_category

    @staticmethod
    def test_recovery_suggestions_for_different_errors(executor_agent, planned_step):
        """Test that appropriate recovery suggestions are provided."""
        inputs = {"text_input": "test input"}

        # Test rate limit error
        mock_response = Mock(status_code=429, text="Rate Limited")
        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        suggestions = result["error_information"]["recovery_suggestions"]
        assert any("wait" in suggestion.lower() for suggestion in suggestions)
        assert any("rate limit" in suggestion.lower() for suggestion in suggestions)

    @staticmethod
    def test_connection_error_handling(executor_agent, planned_step):
        """Test handling of connection errors with retry logic."""
        inputs = {"text_input": "test input"}

        # Mock connection errors on first two attempts, success on third
        side_effects = [
            requests.exceptions.ConnectionError("Connection failed"),
            requests.exceptions.ConnectionError("Connection failed"),
            Mock(status_code=200)
        ]

        # Configure successful response
        success_response = side_effects[2]
        success_response.raise_for_status.return_value = None
        success_response.json.return_value = {"data": ["Success after connection retries!"]}

        with patch("agents.executor.time.sleep"), \
             patch.object(executor_agent.http_session, "post", side_effect=side_effects):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Should succeed after retries
        assert result["status"] == "success_live_mcp"
        assert result["attempts_made"] == 3

    @staticmethod
    def test_max_retries_exhausted(executor_agent, planned_step):
        """Test behavior when max retries are exhausted."""
        inputs = {"text_input": "test input"}

        # Mock persistent timeout
        with patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Should fail after max retries
        assert result["status"] == "error_live_mcp_timeout"
        assert result["error_details"]["attempts_made"] == 3  # 1 + 2 retries
        assert result["error_information"]["retry_recommended"] is True

    @staticmethod
    def test_json_parsing_error_handling(executor_agent, planned_step):
        """Test handling of JSON parsing errors."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=200, text="Invalid JSON Response")
        mock_response.raise_for_status.return_value = None
        mock_response.json.side_effect = json.JSONDecodeError("Invalid JSON", "doc", 0)

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        assert result["status"] == "error_mcp_response_parsing"
        assert result["error_information"]["error_category"] == "data"
        assert "Invalid JSON Response" in result["error_details"]["response_preview"]

    @staticmethod
    def test_invalid_response_format_handling(executor_agent, planned_step):
        """Test handling of invalid MCP response format."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=200)
        mock_response.raise_for_status.return_value = None
        mock_response.json.return_value = {"error": "No data field"}  # Missing 'data' field

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        assert result["status"] == "error_mcp_response_parsing"
        assert "No 'data' field" in result["error_details"]["parse_error"]

    @staticmethod
    def test_empty_data_array_handling(executor_agent, planned_step):
        """Test handling of empty data array in response."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=200)
        mock_response.raise_for_status.return_value = None
        mock_response.json.return_value = {"data": []}  # Empty data array

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        assert result["status"] == "error_mcp_response_parsing"
        assert "Empty 'data' array" in result["error_details"]["parse_error"]

    @staticmethod
    def test_enhanced_error_response_format(executor_agent, planned_step):
        """Test that enhanced error responses contain all required fields."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=503, text="Service Unavailable")
        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Check required fields in error response
        assert "error_information" in result
        error_info = result["error_information"]

        required_fields = [
            "error_category", "error_type", "error_message",
            "recovery_suggestions", "retry_recommended",
            "user_action_required", "timestamp"
        ]

        for field in required_fields:
            assert field in error_info, f"Missing required field: {field}"

        # Check error details
        assert "error_details" in result
        error_details = result["error_details"]
        assert "status_code" in error_details
        assert "endpoint" in error_details
        assert "attempts_made" in error_details

    @staticmethod
    def test_successful_mcp_call_with_enhanced_response(executor_agent, planned_step):
        """Test successful MCP call returns enhanced response format."""
        inputs = {"text_input": "test input"}

        mock_response = Mock(status_code=200)
        mock_response.raise_for_status.return_value = None
        mock_response.json.return_value = {"data": ["Successful response!"]}

        with patch.object(executor_agent.http_session, "post", return_value=mock_response):
            result = executor_agent.execute_plan_step(planned_step, inputs)

        # Check enhanced success response
        assert result["status"] == "success_live_mcp"
        assert result["execution_mode"] == "live_mcp"
        assert result["attempts_made"] == 1
        assert result["mcp_endpoint"] == planned_step.tool.mcp_endpoint_url
        assert "✅ Successfully executed" in result["message"]

    @staticmethod
    def test_unknown_execution_type_error(executor_agent, sample_prompt):
        """Test handling of unknown execution type falls back to simulation."""
        
        # Create tool with invalid execution type by bypassing validation
        with patch.object(MCPTool, "__post_init__", return_value=None):
            invalid_tool = MCPTool(
                tool_id="invalid-tool",
                name="Invalid Tool",
                description="Tool with invalid execution type",
                execution_type="invalid_type",
            )

        # Create planned step with patched tool
        with patch.object(PlannedStep, "__post_init__", return_value=None):
            invalid_planned_step = PlannedStep(
                tool=invalid_tool,
                prompt=sample_prompt,
                relevance_score=0.5
            )

        inputs = {"text_input": "test input"}

        # Mock random functions to prevent error simulation
        with patch('agents.executor.random.random', return_value=0.5), \
             patch('agents.executor.random.choice', return_value="timeout"), \
             patch('agents.executor.random.randint', return_value=50), \
             patch('agents.executor.random.uniform', return_value=0.8):
            result = executor_agent.execute_plan_step(invalid_planned_step, inputs)

        # With improved fallback logic, unknown execution types should fall back to simulation
        assert result["status"] == "simulated_success"
        assert result["execution_mode"] == "simulated"
        # Should include fallback information in metadata
        assert "fallback_reason" in result["metadata"] or "execution_type" in result["metadata"]

    @staticmethod
    def test_retry_delay_timing(executor_agent, planned_step):
        """Test that retry delays are properly implemented."""
        inputs = {"text_input": "test input"}

        with patch("agents.executor.time.sleep") as mock_sleep, \
             patch.object(executor_agent.http_session, "post", side_effect=requests.exceptions.Timeout()):

            start_time = time.time()
            result = executor_agent.execute_plan_step(planned_step, inputs)

            # Should have called sleep twice (for 2 retries)
            assert mock_sleep.call_count == 2
            # Should have called with correct delay
            mock_sleep.assert_called_with(executor_agent.retry_delay)

    @staticmethod
    def test_error_message_user_friendliness(executor_agent, planned_step):
        """Test that error messages are user-friendly and informative."""
        inputs = {"text_input": "test input"}

        test_cases = [
            (429, "Rate limit exceeded"),
            (503, "Service temporarily unavailable"),
            (500, "Server error"),
            (400, "Client error"),
        ]

        for status_code, expected_message_part in test_cases:
            mock_response = Mock(status_code=status_code, text="Error details")
            mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(response=mock_response)

            with patch.object(executor_agent.http_session, "post", return_value=mock_response):
                result = executor_agent.execute_plan_step(planned_step, inputs)

            assert expected_message_part.lower() in result["message"].lower()