"""Executor Agent for real and simulated execution of planned steps. This module contains the McpExecutorAgent implementation that bridges the gap between planned actions and actual execution. It supports multiple execution strategies to ensure the system remains functional across different deployment scenarios. Architecture Overview: The executor follows a sophisticated execution strategy pattern with multiple fallback layers to handle real-world deployment challenges: Execution Strategy Hierarchy: 1. Primary: Live MCP Server Calls (production quality) 2. Secondary: Simulation with Tool-Specific Logic (development/demo) 3. Tertiary: Generic Simulation (fallback safety net) Transport Layer Support: - HTTP POST: Standard RESTful API calls to MCP endpoints - Server-Sent Events (SSE): Real-time streaming for long operations - Gradio API: Alternative transport for Gradio-hosted tools - Retry Logic: Automatic recovery from transient failures MVP Evolution Context: - MVP1-3: Simulation-only execution for rapid prototyping - MVP4: Hybrid execution with live MCP server integration - MVP4+: Enhanced error handling and recovery mechanisms - MVP5+: Advanced execution optimization and monitoring Key Design Principles: 1. Resilience: Graceful degradation when services are unavailable 2. Observability: Comprehensive logging for debugging and monitoring 3. User Experience: Clear error messages with actionable recovery suggestions 4. Flexibility: Support for multiple tool execution paradigms 5. Performance: Efficient retry strategies and timeout management Error Handling Philosophy: The executor implements a comprehensive error categorization and recovery system: Error Categories: - Network: Connection, timeout, DNS resolution failures - Server: HTTP 5xx errors, service unavailability - Client: HTTP 4xx errors, authentication, rate limiting - Data: Malformed responses, parsing errors - Configuration: Invalid endpoints, missing parameters - System: Unexpected runtime errors, resource exhaustion Recovery Strategies: - Automatic retry with exponential backoff for transient errors - Fallback to simulation for API failures - User-friendly error messages with specific recovery suggestions - Detailed error context for debugging and support """ import json import logging import random import time from typing import Any, Dict, List, Optional import requests from kg_services.ontology import PlannedStep # Create logger for this module with structured output logger = logging.getLogger(__name__) class McpExecutorAgent: """Executor Agent that supports both real MCP calls and simulated execution. This class provides the core execution functionality for the KGraph-MCP system, bridging planned actions with actual tool invocation. It implements a resilient execution strategy that can handle various deployment scenarios and failure modes. Execution Architecture: The agent operates on a multi-layered execution model: Layer 1 - Real MCP Execution: - Direct HTTP calls to live MCP servers - Support for multiple transport protocols (HTTP, SSE, Gradio API) - Comprehensive retry logic with exponential backoff - Real-time error detection and categorization Layer 2 - Intelligent Simulation: - Tool-specific simulation logic for realistic outputs - Context-aware error simulation for testing - Fallback when live services are unavailable - Maintains user experience during service outages Layer 3 - Generic Fallback: - Basic simulation for unknown tool types - Safety net for unexpected execution paths - Ensures system never completely fails Error Handling Strategy: The agent implements sophisticated error handling with: - Categorized error types for targeted recovery - User-friendly error messages with actionable suggestions - Detailed error context for debugging and support - Automatic fallback to simulation for API failures Performance Characteristics: - HTTP timeouts: Configurable per tool (default: 30s) - Retry attempts: 2 retries with 2s delay between attempts - Memory usage: Minimal, stateless execution model - Concurrency: Thread-safe for parallel executions Example Usage: >>> executor = McpExecutorAgent() >>> >>> # Execute a planned step with user inputs >>> result = executor.execute_plan_step(planned_step, { ... "input_text": "customer feedback to analyze", ... "sentiment_type": "detailed" ... }) >>> >>> if result["status"].startswith("success_"): ... print(f"Output: {result['tool_specific_output']}") ... else: ... print(f"Error: {result['message']}") """ def __init__(self) -> None: """Initialize the McpExecutorAgent with HTTP session and retry configuration. Sets up the execution environment with optimized HTTP session configuration, retry parameters, and logging. The initialization is designed to be lightweight and thread-safe for use in concurrent environments. Configuration: - HTTP Session: Persistent connection pooling for efficiency - User Agent: Identifies requests as coming from KGraph-MCP - Content Type: JSON for all MCP communications - Retry Logic: 2 attempts with 2-second delay between retries - Timeout Handling: Per-tool configurable timeouts Side Effects: - Creates persistent HTTP session for connection pooling - Configures standard headers for MCP communication - Logs initialization for debugging and monitoring """ # Configure HTTP session with persistent connections and standard headers self.http_session = requests.Session() self.http_session.headers.update({ "User-Agent": "KGraph-MCP/1.0", # Identifies our system to MCP servers "Content-Type": "application/json" # Standard MCP communication format }) # MVP4 Sprint 2 Enhanced Error Handling Configuration self.max_retries = 2 # Number of retry attempts for transient failures self.retry_delay = 2.0 # Seconds to wait between retry attempts logger.info( "McpExecutorAgent initialized for MVP 4 with enhanced error handling " f"(max_retries={self.max_retries}, retry_delay={self.retry_delay}s)" ) def execute_plan_step( self, plan: PlannedStep, inputs: Dict[str, str] ) -> Dict[str, Any]: """Execute a planned step using the optimal execution strategy. This is the main entry point for plan execution. It implements the multi-layered execution strategy, attempting live MCP execution first, then falling back to simulation if needed. The method ensures that execution always completes with either real results or realistic simulation. Execution Decision Tree: 1. Check tool execution type 2. If remote_mcp_gradio: Attempt live MCP execution 3. If live execution fails with API errors: Fall back to simulation 4. If live execution fails with network errors: Return error details 5. If not remote: Use simulation directly 6. Log all execution paths for observability Args: plan: The PlannedStep containing tool and prompt information Must have valid tool and prompt with proper target_tool_id matching inputs: Dictionary mapping input variable names to user-provided values Keys should match prompt.input_variables Returns: Dictionary containing execution results with standardized structure: Success Response Structure: { "status": "success_live_mcp" | "success_simulation", "tool_id_used": str, "tool_name_used": str, "prompt_id_used": str, "prompt_name_used": str, "message": str, # User-friendly success message "tool_specific_output": str, # Main result content "execution_mode": "live_mcp" | "simulation", "inputs_sent": dict, # What was actually sent to the tool ...additional context... } Error Response Structure: { "status": "error_*", # Specific error category "message": str, # User-friendly error description "error_information": { "error_category": str, # network, server_error, etc. "error_type": str, # Specific error classification "recovery_suggestions": List[str] # Actionable user guidance }, "error_details": dict, # Technical details for debugging ...execution context... } Execution Modes: - live_mcp: Real HTTP call to MCP server succeeded - simulation: Tool-specific simulation (fallback or direct) - error_*: Various error conditions with specific categorization Performance Considerations: - Network calls: May take 100ms-30s depending on tool complexity - Simulation: Typically <100ms for immediate response - Retry logic: May add 2-6 seconds for transient failures - Memory: Minimal per execution, stateless design Example: >>> plan = PlannedStep(tool=sentiment_tool, prompt=analysis_prompt) >>> inputs = {"input_text": "Great product, highly recommend!"} >>> result = executor.execute_plan_step(plan, inputs) >>> >>> if result["status"] == "success_live_mcp": ... print(f"Live analysis result: {result['tool_specific_output']}") ... elif result["status"] == "success_simulation": ... print(f"Simulated result: {result['tool_specific_output']}") ... else: ... print(f"Error: {result['message']}") ... for suggestion in result.get("error_information", {}).get("recovery_suggestions", []): ... print(f" - {suggestion}") """ logger.info("Executor: Starting execution of tool '%s'", plan.tool.name) # Strategy 1: Attempt live MCP execution for remote tools if plan.tool.execution_type == "remote_mcp_gradio": logger.info("Executor: Attempting live MCP execution for '%s'", plan.tool.name) live_result = self._execute_remote_mcp(plan, inputs) # Success case: Return live execution results if live_result["status"].startswith("success_"): logger.info("Executor: Live MCP execution successful for '%s'", plan.tool.name) return live_result # API failure case: Fall back to simulation with context api_failure_statuses = { "error_live_mcp_gradio_api", "error_gradio_api_max_retries", "error_live_mcp_gradio_api_unexpected" } if live_result["status"] in api_failure_statuses: logger.warning( "Executor: Live MCP failed for '%s' with %s, falling back to simulation", plan.tool.name, live_result['status'] ) return self._execute_simulation(plan, inputs, fallback_reason="mcp_api_failure") # Network/infrastructure failure case: Return detailed error for user action logger.error("Executor: Live MCP failed for '%s' with %s", plan.tool.name, live_result['status']) return live_result # Strategy 2: Handle unknown execution types gracefully known_execution_types = {"remote_mcp_gradio", "local", "simulation", "stub"} if plan.tool.execution_type and plan.tool.execution_type not in known_execution_types: logger.warning( "Executor: Unknown execution type '%s' for tool '%s', falling back to simulation", plan.tool.execution_type, plan.tool.name ) return self._execute_simulation( plan, inputs, fallback_reason="unknown_execution_type", execution_type=plan.tool.execution_type ) # Strategy 3: Direct simulation for non-remote tools logger.info("Executor: Using simulation for non-remote tool '%s'", plan.tool.name) return self._execute_simulation(plan, inputs, fallback_reason="non_remote_tool") def _execute_remote_mcp( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any]: """Execute a planned step via HTTP call to live MCP server with retry logic. Args: plan: The PlannedStep with remote MCP tool inputs: Dictionary of input values Returns: Dictionary containing real execution results """ tool = plan.tool mcp_endpoint_url = tool.mcp_endpoint_url or tool.invocation_command_stub logger.info(f"Executor: Making LIVE MCP call to {mcp_endpoint_url}") # For MCP SSE endpoints, make direct SSE call if "/mcp/sse" in mcp_endpoint_url: logger.info("Executor: MCP SSE endpoint detected, making direct SSE call") return self._execute_mcp_sse(plan, inputs) # Try the request with retry logic for attempt in range(self.max_retries + 1): try: # Construct MCP payload mcp_payload = self._construct_mcp_payload(plan, inputs) logger.info(f"Executor: MCP payload (attempt {attempt + 1}): {mcp_payload}") # Make HTTP request response = self.http_session.post( mcp_endpoint_url, json=mcp_payload, timeout=tool.timeout_seconds ) # Handle HTTP errors response.raise_for_status() # Parse response response_data = response.json() tool_output = self._parse_mcp_response(response_data, tool.name) logger.info(f"Executor: Successfully received response from {tool.name}") return { "status": "success_live_mcp", "tool_id_used": tool.tool_id, "tool_name_used": tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"✅ Successfully executed live MCP tool '{tool.name}'", "inputs_sent": mcp_payload["data"], "tool_specific_output": tool_output, "execution_mode": "live_mcp", "mcp_endpoint": mcp_endpoint_url, "attempts_made": attempt + 1 } except requests.exceptions.HTTPError as http_err: status_code = http_err.response.status_code response_text = http_err.response.text[:500] if attempt < self.max_retries and status_code in [500, 502, 503, 504]: logger.warning( f"HTTP Error {status_code} on attempt {attempt + 1}/{self.max_retries + 1}. " f"Retrying in {self.retry_delay}s..." ) time.sleep(self.retry_delay) continue # Final attempt failed or non-retryable error error_category = self._categorize_http_error(status_code) recovery_suggestions = self._get_http_error_suggestions(status_code, tool.name) error_message = f"HTTP Error {status_code} calling MCP server {mcp_endpoint_url}" if status_code == 503: error_message += " - Service temporarily unavailable" elif status_code == 429: error_message += " - Rate limit exceeded" elif status_code >= 500: error_message += " - Server error" elif status_code >= 400: error_message += " - Client error" logger.error(f"{error_message}: {response_text}") return self._format_enhanced_error_response( "error_live_mcp_http", error_message, plan, inputs, error_category=error_category, recovery_suggestions=recovery_suggestions, error_details={ "status_code": status_code, "response_text": response_text, "endpoint": mcp_endpoint_url, "attempts_made": attempt + 1 } ) except requests.exceptions.Timeout: if attempt < self.max_retries: logger.warning( f"Timeout on attempt {attempt + 1}/{self.max_retries + 1}. " f"Retrying in {self.retry_delay}s..." ) time.sleep(self.retry_delay) continue error_message = f"Request timeout after {tool.timeout_seconds}s calling {mcp_endpoint_url}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_timeout", error_message, plan, inputs, error_category="network", recovery_suggestions=[ "Try again - the service may be temporarily slow", "Reduce the complexity or size of your input", "Check if the MCP server is responding at other times", "Contact the tool provider if timeouts persist" ], error_details={ "timeout_seconds": tool.timeout_seconds, "endpoint": mcp_endpoint_url, "attempts_made": attempt + 1 } ) except requests.exceptions.ConnectionError as conn_err: if attempt < self.max_retries: logger.warning( f"Connection error on attempt {attempt + 1}/{self.max_retries + 1}. " f"Retrying in {self.retry_delay}s..." ) time.sleep(self.retry_delay) continue error_message = f"Connection failed to MCP server {mcp_endpoint_url}: {conn_err}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_connection", error_message, plan, inputs, error_category="network", recovery_suggestions=[ "Check your internet connection", "Verify the MCP server URL is correct", "Try again later - the server may be temporarily down", "Contact the tool provider if the issue persists" ], error_details={ "endpoint": mcp_endpoint_url, "connection_error": str(conn_err), "attempts_made": attempt + 1 } ) except requests.exceptions.RequestException as req_err: error_message = f"Network error calling MCP server {mcp_endpoint_url}: {req_err}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_network", error_message, plan, inputs, error_category="network", recovery_suggestions=[ "Check your network connection", "Try again in a few moments", "Verify the MCP server is accessible", "Contact support if network issues persist" ], error_details={ "endpoint": mcp_endpoint_url, "network_error": str(req_err), "attempts_made": attempt + 1 } ) except (json.JSONDecodeError, KeyError, IndexError, TypeError) as parse_err: error_message = ( f"Invalid response format from MCP server {mcp_endpoint_url}: {parse_err}" ) logger.error(error_message) # Get response text for debugging response_text = "" if "response" in locals(): try: response_text = str(response.text)[:200] except (AttributeError, TypeError): response_text = str(response)[:200] if hasattr(response, "__str__") else "Mock response" return self._format_enhanced_error_response( "error_mcp_response_parsing", error_message, plan, inputs, error_category="data", recovery_suggestions=[ "The MCP server returned an unexpected response format", "Try again - this may be a temporary server issue", "Contact the tool provider about response format issues", "Verify the tool is configured correctly" ], error_details={ "parse_error": str(parse_err), "response_preview": response_text, "endpoint": mcp_endpoint_url, "attempts_made": attempt + 1 } ) # This should never be reached due to the exception handling above return self._format_enhanced_error_response( "error_unexpected", "Unexpected error in MCP execution", plan, inputs, error_category="system", recovery_suggestions=["Try again or contact support"] ) def _execute_gradio_api( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any]: """Execute a planned step via Gradio API instead of SSE MCP endpoint. This is an alternative transport method for MCP tools that use SSE endpoints which may not be compatible with direct HTTP calls. Args: plan: The PlannedStep with remote MCP tool inputs: Dictionary of input values Returns: Dictionary containing real execution results """ tool = plan.tool mcp_endpoint_url = tool.mcp_endpoint_url or tool.invocation_command_stub # Convert SSE endpoint to Gradio API endpoint gradio_api_url = mcp_endpoint_url.replace("/gradio_api/mcp/sse", "/gradio_api/call/predict") logger.info(f"Executor: Using Gradio API transport to {gradio_api_url}") for attempt in range(self.max_retries + 1): try: # Construct Gradio API payload mcp_data_payload_list = [] param_order = ( plan.tool.input_parameter_order if plan.tool.input_parameter_order else plan.prompt.input_variables ) if param_order: for var_name in param_order: value = inputs.get(var_name, "") # Convert numeric parameters to integers for Gradio compatibility if var_name in ["max_length", "min_length", "max_len", "min_len"] and value: try: value = int(value) except (ValueError, TypeError): logger.warning(f"Could not convert {var_name}='{value}' to int, using default") value = 150 if "max" in var_name else 30 mcp_data_payload_list.append(value) gradio_payload = { "data": mcp_data_payload_list, "fn_index": 0 # Assuming first function } logger.info(f"Executor: Gradio API payload (attempt {attempt + 1}): {gradio_payload}") # Step 1: Submit the job response = self.http_session.post( gradio_api_url, json=gradio_payload, timeout=tool.timeout_seconds ) response.raise_for_status() job_data = response.json() if "event_id" not in job_data: raise ValueError(f"No event_id in Gradio API response: {job_data}") event_id = job_data["event_id"] logger.info(f"Executor: Got event_id {event_id}, polling for results...") # Step 2: Poll for results result_url = f"{gradio_api_url}/{event_id}" max_polls = 30 # Maximum number of polling attempts poll_interval = 1 # seconds between polls for poll_attempt in range(max_polls): time.sleep(poll_interval) result_response = self.http_session.get( result_url, timeout=tool.timeout_seconds ) result_response.raise_for_status() result_text = result_response.text.strip() if result_text.startswith("event: complete"): # Parse the SSE-style response lines = result_text.split("\n") data_line = None for line in lines: if line.startswith("data: "): data_line = line[6:] # Remove "data: " prefix break if data_line: try: result_data = json.loads(data_line) if isinstance(result_data, list) and len(result_data) > 0: tool_output = result_data[0] logger.info(f"Executor: Successfully received response from {tool.name}") return { "status": "success_live_mcp", "tool_id_used": tool.tool_id, "tool_name_used": tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"✅ Successfully executed live MCP tool '{tool.name}' via Gradio API", "inputs_sent": mcp_data_payload_list, "tool_specific_output": str(tool_output), "execution_mode": "live_mcp", "mcp_endpoint": mcp_endpoint_url, "attempts_made": attempt + 1, "transport_method": "gradio_api" } raise ValueError(f"Empty or invalid result data: {result_data}") except json.JSONDecodeError as e: raise ValueError(f"Could not parse result JSON: {data_line}") from e else: raise ValueError(f"No data line found in SSE response: {result_text}") elif result_text.startswith("event: error"): # Handle error event lines = result_text.split("\n") error_msg = "Unknown error" for line in lines: if line.startswith("data: "): error_msg = line[6:] break raise RuntimeError(f"Gradio API error: {error_msg}") # If we get here, polling timed out raise TimeoutError(f"Polling timeout after {max_polls * poll_interval} seconds") except (requests.exceptions.HTTPError, RuntimeError, ValueError, TimeoutError) as e: if attempt < self.max_retries and isinstance(e, (requests.exceptions.HTTPError, TimeoutError)): logger.warning( f"Error on attempt {attempt + 1}/{self.max_retries + 1}: {e}. " f"Retrying in {self.retry_delay}s..." ) time.sleep(self.retry_delay) continue # Final attempt failed or non-retryable error error_category = "server_error" if isinstance(e, (RuntimeError, TimeoutError)) else "network" error_message = f"Gradio API error calling {gradio_api_url}: {e}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_gradio_api", error_message, plan, inputs, error_category=error_category, recovery_suggestions=[ "Try again - the service may be temporarily slow", "Check if the Gradio server is responding correctly", "Verify the tool configuration", "Contact the tool provider if issues persist" ], error_details={ "endpoint": gradio_api_url, "original_endpoint": mcp_endpoint_url, "error": str(e), "attempts_made": attempt + 1, "transport_method": "gradio_api" } ) except Exception as e: error_message = f"Unexpected error calling Gradio API {gradio_api_url}: {e}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_gradio_api_unexpected", error_message, plan, inputs, error_category="system", recovery_suggestions=[ "Try again", "Check the system logs for more details", "Contact support if the issue persists" ], error_details={ "endpoint": gradio_api_url, "error": str(e), "attempts_made": attempt + 1, "transport_method": "gradio_api" } ) return self._format_enhanced_error_response( "error_gradio_api_max_retries", f"Maximum retries exceeded for Gradio API {gradio_api_url}", plan, inputs, error_category="network", recovery_suggestions=["Try again later", "Contact support"] ) def _execute_mcp_sse( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any]: """Execute a planned step via MCP SSE endpoint. Args: plan: The PlannedStep with remote MCP tool inputs: Dictionary of input values Returns: Dictionary containing real execution results """ tool = plan.tool mcp_endpoint_url = tool.mcp_endpoint_url or tool.invocation_command_stub logger.info(f"Executor: Making MCP SSE call to {mcp_endpoint_url}") for attempt in range(self.max_retries + 1): try: # Construct MCP payload mcp_data_payload_list = [] param_order = ( plan.tool.input_parameter_order if plan.tool.input_parameter_order else plan.prompt.input_variables ) if param_order: for var_name in param_order: value = inputs.get(var_name, "") # Convert numeric parameters to integers for compatibility if var_name in ["max_length", "min_length", "max_len", "min_len"] and value: try: value = int(value) except (ValueError, TypeError): logger.warning(f"Could not convert {var_name}='{value}' to int, using default") value = 150 if "max" in var_name else 30 mcp_data_payload_list.append(value) mcp_payload = {"data": mcp_data_payload_list} logger.info(f"Executor: MCP SSE payload (attempt {attempt + 1}): {mcp_payload}") # Make POST request to SSE endpoint response = self.http_session.post( mcp_endpoint_url, json=mcp_payload, timeout=tool.timeout_seconds, stream=True # Enable streaming for SSE ) response.raise_for_status() # Parse SSE response response_text = response.text.strip() logger.info(f"Executor: SSE response: {response_text[:200]}...") # Handle different SSE response formats if response_text.startswith("event:"): # Parse SSE format lines = response_text.split("\n") data_line = None for line in lines: if line.startswith("data: "): data_line = line[6:] # Remove "data: " prefix break if data_line: try: result_data = json.loads(data_line) if isinstance(result_data, list) and len(result_data) > 0: tool_output = result_data[0] else: tool_output = result_data except json.JSONDecodeError: tool_output = data_line else: raise ValueError(f"No data found in SSE response: {response_text}") else: # Try parsing as regular JSON try: result_data = response.json() if "data" in result_data and isinstance(result_data["data"], list): tool_output = result_data["data"][0] if result_data["data"] else result_data else: tool_output = result_data except json.JSONDecodeError: # Fallback to raw text tool_output = response_text logger.info(f"Executor: Successfully received SSE response from {tool.name}") return { "status": "success_live_mcp", "tool_id_used": tool.tool_id, "tool_name_used": tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"✅ Successfully executed live MCP tool '{tool.name}' via SSE", "inputs_sent": mcp_data_payload_list, "tool_specific_output": str(tool_output), "execution_mode": "live_mcp", "mcp_endpoint": mcp_endpoint_url, "attempts_made": attempt + 1, "transport_method": "mcp_sse" } except (requests.exceptions.HTTPError, RuntimeError, ValueError, TimeoutError) as e: if attempt < self.max_retries and isinstance(e, (requests.exceptions.HTTPError, TimeoutError)): logger.warning( f"Error on attempt {attempt + 1}/{self.max_retries + 1}: {e}. " f"Retrying in {self.retry_delay}s..." ) time.sleep(self.retry_delay) continue # Final attempt failed or non-retryable error error_category = "server_error" if isinstance(e, (RuntimeError, TimeoutError)) else "network" error_message = f"MCP SSE error calling {mcp_endpoint_url}: {e}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_sse", error_message, plan, inputs, error_category=error_category, recovery_suggestions=[ "Try again - the MCP service may be temporarily slow", "Check if the MCP server is responding correctly", "Verify the tool configuration", "Contact the tool provider if issues persist" ], error_details={ "endpoint": mcp_endpoint_url, "error": str(e), "attempts_made": attempt + 1, "transport_method": "mcp_sse" } ) except Exception as e: error_message = f"Unexpected error calling MCP SSE {mcp_endpoint_url}: {e}" logger.error(error_message) return self._format_enhanced_error_response( "error_live_mcp_sse_unexpected", error_message, plan, inputs, error_category="system", recovery_suggestions=[ "Try again", "Check the system logs for more details", "Contact support if the issue persists" ], error_details={ "endpoint": mcp_endpoint_url, "error": str(e), "attempts_made": attempt + 1, "transport_method": "mcp_sse" } ) return self._format_enhanced_error_response( "error_mcp_sse_max_retries", f"Maximum retries exceeded for MCP SSE {mcp_endpoint_url}", plan, inputs, error_category="network", recovery_suggestions=["Try again later", "Contact support"] ) def _categorize_http_error(self, status_code: int) -> str: """Categorize HTTP errors for better user understanding.""" if status_code == 429: return "rate_limit" if status_code in [500, 502, 503, 504]: return "server_error" if status_code in [401, 403]: return "authentication" if status_code in [400, 422]: return "input_validation" if status_code == 404: return "not_found" return "http_error" def _get_http_error_suggestions(self, status_code: int, tool_name: str) -> list[str]: """Get recovery suggestions based on HTTP status code.""" if status_code == 429: return [ "Wait a few minutes before trying again", "The tool service has rate limits to prevent overuse", "Try again later when usage is lower" ] if status_code in [500, 502, 503, 504]: return [ "The tool service is experiencing technical difficulties", "Try again in a few minutes", f"Contact the {tool_name} service provider if issues persist", "Check the service status page if available" ] if status_code in [401, 403]: return [ "The tool service requires authentication", "Check if the tool configuration includes proper credentials", "Contact the system administrator for access issues" ] if status_code in [400, 422]: return [ "The input format may not be compatible with this tool", "Try simplifying or reformatting your input", "Check the tool documentation for input requirements", "Verify all required fields are provided" ] if status_code == 404: return [ "The tool endpoint could not be found", "Verify the tool URL is correct", "The tool service may have been moved or disabled", "Contact the tool provider for updated endpoint information" ] return [ "An unexpected HTTP error occurred", "Try again in a few moments", "Contact support if the issue persists" ] def _format_enhanced_error_response( self, status: str, message: str, plan: PlannedStep, inputs: dict[str, str], error_category: str = "general", recovery_suggestions: list[str] | None = None, error_details: dict[str, Any] | None = None ) -> dict[str, Any]: """Format an enhanced error response with detailed information and recovery suggestions.""" return { "status": status, "tool_id_used": plan.tool.tool_id, "tool_name_used": plan.tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"❌ {message}", "inputs_received": inputs, "tool_specific_output": None, "execution_mode": "live_mcp_failed", "error_information": { "error_category": error_category, "error_type": status, "error_message": message, "recovery_suggestions": recovery_suggestions or [ "Try again in a few moments", "Check your input for any issues", "Contact support if the problem persists" ], "retry_recommended": error_category in ["network", "server_error", "rate_limit"], "user_action_required": error_category in ["input_validation", "authentication"], "timestamp": time.time() }, "error_details": error_details or {} } def _construct_mcp_payload( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any]: """Construct MCP-compliant payload from inputs. Args: plan: The PlannedStep containing tool and prompt info inputs: Dictionary of input values Returns: Dictionary with 'data' key containing ordered list of arguments """ mcp_data_payload_list: list[Any] = [] # Use input_parameter_order if specified, otherwise use prompt input_variables param_order = ( plan.tool.input_parameter_order if plan.tool.input_parameter_order else plan.prompt.input_variables ) if param_order: for var_name in param_order: value = inputs.get(var_name) # Convert numeric parameters to integers for Gradio compatibility if var_name in ["max_length", "min_length", "max_len", "min_len"] and value: try: value = int(value) except (ValueError, TypeError): logger.warning(f"Could not convert {var_name}='{value}' to int, using default") value = 150 if "max" in var_name else 30 mcp_data_payload_list.append(value) else: logger.warning( f"No parameter order defined for tool '{plan.tool.name}' " f"or prompt '{plan.prompt.name}'. Sending empty data list." ) return {"data": mcp_data_payload_list} def _parse_mcp_response( self, response_data: dict[str, Any], tool_name: str ) -> str: """Parse MCP server response to extract tool output. Args: response_data: JSON response from MCP server tool_name: Name of the tool for error messages Returns: String containing the tool output Raises: KeyError: If response format is invalid IndexError: If data array is empty """ if "data" not in response_data: raise KeyError(f"No 'data' field in MCP response from {tool_name}") data_array = response_data["data"] if not data_array: raise IndexError(f"Empty 'data' array in MCP response from {tool_name}") # Get the first element as tool output tool_output = data_array[0] # Convert to string if needed if isinstance(tool_output, (dict, list)): return json.dumps(tool_output, indent=2) return str(tool_output) def _execute_simulation( self, plan: PlannedStep, inputs: dict[str, str], fallback_reason: str = "direct_simulation", execution_type: str | None = None ) -> dict[str, Any]: """Execute a planned step via simulation (fallback method). This method provides the same simulation logic as the original StubExecutorAgent for tools that don't have live MCP endpoints. Args: plan: The PlannedStep to simulate inputs: Dictionary of input values fallback_reason: Reason for falling back to simulation execution_type: Original execution type of the tool Returns: Dictionary containing simulated execution results """ logger.info(f"Executor: Falling back to simulation for tool '{plan.tool.name}'") # Check if we should simulate an error error_scenario = self._should_simulate_error(plan, inputs) if error_scenario: return error_scenario # Generate input-aware mock output based on tool type mock_output = self._generate_tool_specific_output(plan, inputs) # Build metadata metadata = { "fallback_reason": fallback_reason, "simulation_version": "MVP4_Sprint2_Enhanced", "timestamp": time.time() } # Include original execution type if provided if execution_type: metadata["execution_type"] = execution_type return { "status": "simulated_success", "tool_id_used": plan.tool.tool_id, "tool_name_used": plan.tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"Tool '{plan.tool.name}' execution SIMULATED successfully", "inputs_received": inputs, "tool_specific_output": mock_output, "execution_mode": "simulated", "metadata": metadata } def _should_simulate_error( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any] | None: """Check if an error should be simulated based on inputs or random chance. Args: plan: The PlannedStep being executed inputs: Dictionary of input values Returns: Error result dictionary if error should be simulated, None otherwise """ # Random error simulation (10% chance for testing robustness) if random.random() < 0.1: error_types = [ "timeout", "rate_limit", "invalid_input", "service_unavailable", "authentication_failed", ] error_type = random.choice(error_types) return { "status": f"simulated_error_{error_type}", "tool_id_used": plan.tool.tool_id, "tool_name_used": plan.tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"Simulated {error_type.replace('_', ' ')} error for tool '{plan.tool.name}'", "inputs_received": inputs, "tool_specific_output": None, "execution_mode": "simulated_error", "error_details": { "error_type": error_type, "simulated": True } } # Simulate input validation errors for var_name in plan.prompt.input_variables: if var_name not in inputs or not inputs[var_name].strip(): return { "status": "simulated_error_missing_input", "tool_id_used": plan.tool.tool_id, "tool_name_used": plan.tool.name, "prompt_id_used": plan.prompt.prompt_id, "prompt_name_used": plan.prompt.name, "message": f"Missing required input '{var_name}' for tool '{plan.tool.name}'", "inputs_received": inputs, "tool_specific_output": None, "execution_mode": "simulated_error", "error_details": { "missing_input": var_name, "simulated": True } } return None def _generate_tool_specific_output( self, plan: PlannedStep, inputs: dict[str, str] ) -> str: """Generate realistic mock output based on the tool type and inputs. Args: plan: The PlannedStep containing tool information inputs: Dictionary of input values Returns: String containing tool-specific mock output """ tool_id = plan.tool.tool_id.lower() tool_name = plan.tool.name.lower() # Sentiment Analysis Tools if "sentiment" in tool_id or "sentiment" in tool_name: # Try prompt variables first, then any text-containing input source_field = None text_input = None # First try prompt variables for var in plan.prompt.input_variables: if var in inputs: text_input = inputs[var] source_field = var break # If not found, try any text-containing input if text_input is None: for key in inputs: if any(word in key.lower() for word in ["text", "content", "message", "feedback", "data"]): text_input = inputs[key] source_field = key break # Fallback if text_input is None: text_input = "sample text" source_field = "default" # Handle empty input if not text_input.strip(): supported_fields = ", ".join(plan.prompt.input_variables) return f"""## ⚠️ Sentiment Analysis Error (Simulated) **No text content provided for sentiment analysis** ⚠️ **Input Analysis:** - No valid text content found in the provided inputs - **Supported input fields:** {supported_fields} **Recommendation: Please provide text content to analyze** *This is simulated output for demonstration purposes.*""" # Analyze text for sentiment text_lower = text_input.lower() positive_keywords = ["amazing", "fantastic", "love", "excellent", "great", "wonderful", "brilliant", "recommend"] negative_keywords = ["terrible", "awful", "hate", "worst", "horrible", "bad", "disappointing"] positive_indicators = len([word for word in positive_keywords if word in text_lower]) negative_indicators = len([word for word in negative_keywords if word in text_lower]) if positive_indicators > negative_indicators and positive_indicators > 0: chosen_sentiment = "Positive" emotion_details = "Joy/Satisfaction: High levels detected" confidence = round(random.uniform(0.8, 0.95), 2) elif negative_indicators > 0: chosen_sentiment = "Negative" emotion_details = "Frustration: Significant negative sentiment identified" confidence = round(random.uniform(0.7, 0.9), 2) else: chosen_sentiment = "Neutral" emotion_details = "Neutral: Balanced or objective tone detected" confidence = round(random.uniform(0.6, 0.85), 2) # Build indicator sections separately to avoid complex f-string expressions indicator_sections = "" if chosen_sentiment == "Positive" and positive_indicators > 0: indicator_sections += f"**Positive Indicators**: {positive_indicators} detected" elif chosen_sentiment == "Negative" and negative_indicators > 0: indicator_sections += f"**Negative Indicators**: {negative_indicators} detected" elif chosen_sentiment == "Neutral": indicator_sections += "**Neutral Indicators**: Balanced tone detected" return f"""## 😊 Sentiment Analysis Results (Simulated) **Overall Sentiment Classification:** {chosen_sentiment} **Primary**: {chosen_sentiment} **Text Analyzed:** "{text_input[:100]}..." **{emotion_details}** **Source Field Analysis:** - Input field analyzed: **{source_field}** - Content classification: Text-based sentiment analysis **Text Length**: {len(text_input)} characters **Confidence Metrics:** - Primary sentiment confidence: {confidence:.0%} - Analysis reliability: High **Analysis Confidence**: {confidence:.0%} based on content analysis {indicator_sections} **Detailed Scores:** - 😊 Positive: {random.randint(10, 80)}% - 😐 Neutral: {random.randint(10, 40)}% - 😞 Negative: {random.randint(5, 60)}% **Generated by Sentiment Analyzer Tool** *This is simulated output for demonstration purposes.*""" # Text Summarization Tools if "summar" in tool_id or "summar" in tool_name: # Try prompt variables first, then any text-containing input text_input = next( (inputs[var] for var in plan.prompt.input_variables if var in inputs), next( (inputs[key] for key in inputs if any(word in key.lower() for word in ["text", "content", "document", "data"])), "sample document content" ) ) # Handle empty input if not text_input.strip(): supported_fields = ", ".join(plan.prompt.input_variables) return f"""## ⚠️ Text Summarization Error (Simulated) **No text content provided for summarization** ⚠️ **Input Analysis:** - No valid text content found in the provided inputs - **Supported input fields:** {supported_fields} **Recommendation: Please provide text content to summarize** *This is simulated output for demonstration purposes.*""" # Detect content type text_lower = text_input.lower() if any(keyword in text_lower for keyword in ["function", "class", "variable", "code", "implement", "programming", "def ", "return", "import"]): content_type = "technical content" classification = "Technical classification" key_topics = "code structure, functionality patterns" elif any(keyword in text_lower for keyword in ["business", "market", "customer", "sales", "growth", "company", "revenue", "strategy"]): content_type = "business content" classification = "Business classification" key_topics = "market dynamics, customer insights" else: content_type = "general content" classification = "General classification" key_topics = "main concepts, key information" max_length = inputs.get("max_length", "150") return f"""## 📄 Text Summarization Complete (Simulated) **Document Metrics:** - Original Length: {len(text_input)} characters - Target Length: {max_length} words - Compression Ratio: {random.randint(60, 85)}% - Content Type: {content_type} **{classification}:** Detected {content_type} requiring specialized analysis approach. **Executive Summary:** This is a simulated summary of the provided text. The key points have been identified and condensed into a shorter format while preserving the essential information and context. **Key Points:** - {key_topics} - Important details preservation - Context maintenance **Generated by Text Summarizer Tool** *This is simulated output for demonstration purposes.*""" # Image Caption Generation Tools if "image" in tool_id or "caption" in tool_id or "image" in tool_name: # Try prompt variables first, then any image-containing input image_input = next( (inputs[var] for var in plan.prompt.input_variables if var in inputs), next( (inputs[key] for key in inputs if any(word in key.lower() for word in ["image", "photo", "picture", "file", "path", "url"])), "sample_image.jpg" ) ) # Handle empty input if not image_input.strip(): supported_fields = ", ".join(plan.prompt.input_variables) return f"""## ⚠️ Image Caption Error (Simulated) **No image source provided for caption generation** ⚠️ **Input Analysis:** - No valid image source found in the provided inputs - **Supported input fields:** {supported_fields} **Recommendation: Please provide an image source to caption** *This is simulated output for demonstration purposes.*""" # Check for additional context information context_info = None context_provided = False for key in inputs: if key.lower() == "context" and inputs[key].strip(): context_info = inputs[key] context_provided = True break # Analyze image path for context image_lower = image_input.lower() if any(keyword in image_lower for keyword in ["workspace", "office", "desk", "professional"]): primary_caption = "A professional workspace environment featuring office equipment and organizational setup" scene_type = "Indoor workspace/office environment" alt_caption = "Professional desk setup in modern office workspace" context_description = "Professional setting with clear workspace organization" objects_detected = random.randint(8, 12) # More objects in workspace elif any(keyword in image_lower for keyword in ["nature", "outdoor", "landscape", "mountain"]): primary_caption = "A natural outdoor scene showcasing environmental elements" scene_type = "Outdoor natural environment" alt_caption = "Scenic natural landscape with environmental features" context_description = "Natural outdoor setting with scenic elements" objects_detected = random.randint(3, 6) else: primary_caption = "An interesting perspective capturing the main subject matter" scene_type = "General" alt_caption = "A clear image depicting the primary focus with good lighting" context_description = "Professional setting with clear visibility" objects_detected = random.randint(3, 8) confidence = random.randint(85, 98) context_analysis_section = "" if context_provided: context_analysis_section = f"""**Context Analysis:** - **Context Provided**: Yes - additional context: {context_info} - Context integration: {context_info} """ return f"""## 🖼️ Image Caption Generation Results (Simulated) **Image Source:** {image_input} **Primary Caption:** {primary_caption} **Technical Analysis:** - Objects detected: {objects_detected} - Scene type: {scene_type} - Image quality: High **Confidence Level:** {confidence}% {context_analysis_section}**Alternative Descriptions:** - Secondary interpretation: {alt_caption} - Contextual description: {context_description} **Generated by Image Caption Generator Tool** *This is simulated output for demonstration purposes.*""" # Code Quality/Linting Tools if "code" in tool_id or "lint" in tool_id or "quality" in tool_id: # Try prompt variables first, then any code-containing input code_input = next( (inputs[var] for var in plan.prompt.input_variables if var in inputs), next( (inputs[key] for key in inputs if any(word in key.lower() for word in ["code", "script", "source", "file", "text", "data"])), "sample code" ) ) # Handle empty input if not code_input.strip(): supported_fields = ", ".join(plan.prompt.input_variables) return f"""## ⚠️ Code Quality Analysis Error (Simulated) **No code content provided for analysis** ⚠️ **Input Analysis:** - No valid code content found in the provided inputs - **Supported input fields:** {supported_fields} **Recommendation: Please provide code content to analyze** *This is simulated output for demonstration purposes.*""" # Detect programming language code_lower = code_input.lower() detected_language = "General" if any(keyword in code_lower for keyword in ["def ", "import ", "class ", "print(", "if __name__"]): detected_language = "Python" elif any(keyword in code_lower for keyword in ["function ", "var ", "const ", "console.log", "=>"]): detected_language = "JavaScript" elif any(keyword in code_lower for keyword in ["public class", "import java", "system.out"]): detected_language = "Java" elif any(keyword in code_lower for keyword in ["#include", "int main", "cout", "using namespace"]): detected_language = "C++" lines_count = len(code_input.split("\n")) # Detect specific issues issues_found = [] if "todo" in code_lower or "fixme" in code_lower: issues_found.append("Todo/Fixme comments found") if any(len(line) > 100 for line in code_input.split("\n")): issues_found.append("Long lines detected") issues_count = len(issues_found) if issues_found else random.randint(0, 3) quality_score = random.randint(75, 95) # Extract first non-empty line for preview (avoiding backslash in f-string) code_lines = code_input.split("\n") first_line = next((line.strip() for line in code_lines if line.strip()), "No code preview available") return f"""## 🔍 Code Quality Analysis Complete (Simulated) **Language**: {detected_language} **Lines Analyzed**: {lines_count} **Overall Quality Score:** {quality_score}/100 **Quality Metrics:** - Code Length: {len(code_input)} characters - Issues Found: {issues_count} - Compliance Level: {'High' if quality_score > 85 else 'Medium'} **Code Preview:** {first_line} **Analysis Summary:** {'✅ No major issues found!' if issues_count == 0 else f'⚠️ {issues_count} potential improvements identified'} {f"**Issues Found**: {issues_count}" if issues_count > 0 else ""} {chr(10).join([f"- {issue}" for issue in issues_found]) if issues_found else ""} **Recommendations:** - Style compliance: {'✅ Good' if random.choice([True, False]) else '⚠️ Minor issues'} - Security: {'✅ Secure' if random.choice([True, False]) else '⚠️ Review needed'} - Performance: {'✅ Optimized' if random.choice([True, False]) else '💡 Suggestions available'} **Generated by Code Quality Linter Tool** *This is simulated output for demonstration purposes.*""" # Generic fallback for other tools input_analysis = "\n".join([f"- **{k}**: {v}" for k, v in inputs.items() if v]) # Calculate processing complexity based on total input length total_content_length = sum(len(str(v)) for v in inputs.values()) if total_content_length < 100: complexity = "Simple" elif total_content_length < 500: complexity = "Medium" else: complexity = "Complex" return f"""## 🛠️ Execution Results for {plan.tool.name} (Simulated) **Successfully processed** the provided inputs using {plan.prompt.name}. **Processing Complexity**: {complexity} **Inputs Received**: {len(inputs)} parameter(s) **Total Content Length**: {total_content_length} characters **Input Analysis:** {input_analysis} **Generic Processing:** The tool has been executed successfully with the provided inputs. This is a generic simulated response demonstrating that the tool would process your request and return relevant results. **Processing Details:** - Execution time: {random.randint(500, 2000)}ms - Success rate: {random.randint(90, 99)}% - Data processed: {len(str(inputs))} bytes *This is simulated output for demonstration purposes.*""" # Legacy StubExecutorAgent for backward compatibility class StubExecutorAgent: """Legacy stub executor agent for backward compatibility.""" def __init__(self) -> None: """Initialize the StubExecutorAgent.""" logger.info("StubExecutorAgent initialized for MVP 3") def simulate_execution( self, plan: PlannedStep, inputs: dict[str, str] ) -> dict[str, Any]: """Simulate execution of a planned step. Args: plan: The PlannedStep to simulate inputs: Dictionary of input values Returns: Dictionary containing simulated execution results with expected test structure Raises: ValueError: If plan or inputs are invalid """ if not isinstance(plan, PlannedStep): raise ValueError("Plan must be a PlannedStep instance") if not isinstance(inputs, dict): raise ValueError("Inputs must be a dictionary") logger.info(f"Simulating execution for tool '{plan.tool.name}' with prompt '{plan.prompt.name}'") logger.info(f"Received inputs: {inputs}") # Check for sophisticated error simulation based on input content error_simulation = self._detect_error_simulation(plan, inputs) if error_simulation: return error_simulation # Use the McpExecutorAgent's simulation method for mock output generation mcp_executor = McpExecutorAgent() mock_output = mcp_executor._generate_tool_specific_output(plan, inputs) # Generate execution ID with timestamp and random component for uniqueness execution_id = f"exec_{plan.tool.tool_id}_{int(time.time() * 1000)}_{random.randint(100, 999)}" logger.info(f"Generated mock response for '{plan.tool.name}' with execution_id: {execution_id}") return { "status": "simulated_success", "execution_id": execution_id, "tool_information": { "tool_id": plan.tool.tool_id, "tool_name": plan.tool.name, "tool_description": plan.tool.description }, "prompt_information": { "prompt_id": plan.prompt.prompt_id, "prompt_name": plan.prompt.name, "prompt_description": plan.prompt.description }, "execution_details": { "inputs_received": inputs, "inputs_count": len(inputs), "execution_time_ms": random.randint(800, 2500), "complexity_level": plan.prompt.difficulty_level }, "results": { "message": f"Tool '{plan.tool.name}' execution SIMULATED successfully", "mock_output": mock_output, "confidence_score": round(random.uniform(0.75, 0.95), 2) }, "metadata": { "simulation_version": "MVP3_Sprint4", "timestamp": time.time(), "notes": "Simulated execution for testing and development purposes" } } def _detect_error_simulation(self, plan: PlannedStep, inputs: dict[str, str]) -> dict[str, Any] | None: """Detect various error simulation triggers and return appropriate error responses. Args: plan: The PlannedStep being executed inputs: Dictionary of input values Returns: Error response dictionary if error should be simulated, None otherwise """ # Combine all input text for analysis all_input_text = " ".join(str(v) for v in inputs.values()).lower() # Check for security violations (highest priority) if "