"""Ontology definitions for the Knowledge Graph MCP system.

This module contains the core data structures representing entities
in our knowledge graph, starting with MCP Tools.

This module contains the core data structures representing entities in our knowledge
graph system. These classes form the foundation of the KGraph-MCP architecture:

- MCPTool: Represents capabilities that agents can invoke (e.g., text analysis, image processing)
- MCPPrompt: Provides structured templates for effective tool usage with AI optimization
- PlannedStep: Combines tools and prompts into actionable recommendations

The ontology follows a sophisticated design pattern where tools define "what can be done"
and prompts define "how to do it effectively", with planned steps bridging user intent
to executable actions.

Architecture Pattern:
    User Query → Semantic Search → Tool Discovery → Prompt Selection → Planned Step

Key Design Decisions:
    1. Separation of concerns: Tools vs Prompts vs Execution
    2. Rich metadata for AI-driven optimization (MVP5 sampling preferences)
    3. Comprehensive validation with clear error messages
    4. Backward compatibility for evolving features
"""

from dataclasses import dataclass, field
from typing import Any


@dataclass
class MCPTool:
    """Represents an MCP (Multi-Agent Collaboration Platform) Tool.

    A tool is a discrete capability that can be invoked by agents
    to perform specific tasks within the knowledge graph system.
    Tools represent the "what can be done" aspect of the system.

    Architecture Context:
        MCPTool instances are stored in the knowledge graph and discovered through
        semantic similarity search. They can execute either locally (simulated) or
        remotely via MCP server endpoints.

    Evolution Path:
        - MVP1: Basic tool representation with simple metadata
        - MVP4: Added MCP server integration for remote execution
        - MVP5+: Enhanced with advanced execution parameters

    Attributes:
        tool_id: Unique identifier for the tool (e.g., "text_summarizer_001")
        name: Human-readable name of the tool (e.g., "Text Summarizer")
        description: Detailed description of what the tool does and its capabilities
        tags: List of tags for categorization and semantic discovery (e.g., ["text", "nlp"])
        invocation_command_stub: Template for invoking the tool (e.g., "summarize_text {input}")

        # MVP4+ MCP Server Integration
        execution_type: Type of execution - either "simulated" (local mock) or "remote_mcp_gradio" (HTTP)
        mcp_endpoint_url: Full URL to MCP server endpoint (e.g., "https://space.hf.co/tool/mcp")
        input_parameter_order: Ordered list of parameter names for MCP data array construction
        timeout_seconds: Request timeout for MCP calls to prevent hanging operations
        requires_auth: Whether the tool requires authentication for access

    Example:
        >>> tool = MCPTool(
        ...     tool_id="sentiment_analyzer_001",
        ...     name="Sentiment Analyzer",
        ...     description="Analyzes emotional tone in text using NLP",
        ...     tags=["text", "sentiment", "nlp"],
        ...     execution_type="remote_mcp_gradio",
        ...     mcp_endpoint_url="https://huggingface.co/spaces/sentiment-tool/mcp"
        ... )
        >>> print(tool.name)  # "Sentiment Analyzer"
    """

    # Core tool identification and metadata
    tool_id: str
    name: str
    description: str
    tags: list[str] = field(default_factory=list)
    invocation_command_stub: str = ""

    # MVP4+ MCP Server Support for remote tool execution
    execution_type: str = "simulated"  # "simulated" | "remote_mcp_gradio"
    mcp_endpoint_url: str | None = None  # HF Space URL + /mcp endpoint path
    input_parameter_order: list[str] = field(default_factory=list)  # For MCP data array construction
    timeout_seconds: int = 30  # Prevents hanging on unresponsive MCP servers
    requires_auth: bool = False  # Authentication requirement flag

    def __post_init__(self) -> None:
        """Validate the tool data after initialization.
        
        This validation ensures data integrity and provides clear error messages
        for configuration issues. It follows the fail-fast principle to catch
        problems early in the system lifecycle.
        
        Validation Rules:
            1. Core fields (tool_id, name, description) cannot be empty
            2. execution_type must be a valid enum value
            3. remote_mcp_gradio execution requires valid endpoint URL
            4. timeout_seconds must be positive for operational safety
            
        Raises:
            ValueError: If any validation rule fails with descriptive message
        """
        # Core field validation - essential for tool discovery and display
        if not self.tool_id:
            msg = "tool_id cannot be empty - required for unique identification"
            raise ValueError(msg)
        if not self.name:
            msg = "name cannot be empty - required for user-facing display"
            raise ValueError(msg)
        if not self.description:
            msg = "description cannot be empty - required for semantic search"
            raise ValueError(msg)

        # Execution type validation - ensures proper routing to execution backend
        valid_execution_types = ["simulated", "remote_mcp_gradio"]
        if self.execution_type not in valid_execution_types:
            msg = f"execution_type must be one of {valid_execution_types}"
            raise ValueError(msg)

        # MCP-specific validation - ensures remote execution can succeed
        if self.execution_type == "remote_mcp_gradio":
            if not self.mcp_endpoint_url:
                msg = "mcp_endpoint_url is required for remote_mcp_gradio execution"
                raise ValueError(msg)
            # Validate URL format to prevent runtime errors
            if not self.mcp_endpoint_url.startswith(("http://", "https://")):
                msg = "mcp_endpoint_url must be a valid HTTP/HTTPS URL"
                raise ValueError(msg)

        # Operational safety validation
        if self.timeout_seconds <= 0:
            msg = "timeout_seconds must be positive to prevent infinite waits"
            raise ValueError(msg)


@dataclass
class MCPPrompt:
    """Represents a prompt template for MCP tool usage with AI optimization features.

    A prompt provides structured guidance for effectively invoking MCP tools, including
    template strings with variable placeholders and usage examples. Prompts represent
    the "how to do it effectively" aspect of the system.

    Architecture Context:
        MCPPrompt instances are paired with MCPTool instances through PlannedStep objects.
        They enable semantic matching between user intent and effective tool usage patterns.

    Evolution Path:
        - MVP1: Basic prompt templates with simple placeholders
        - MVP2: Enhanced with difficulty levels and example inputs
        - MVP5: Added comprehensive AI sampling preferences for model optimization

    AI Optimization Features (MVP5):
        The sampling preferences enable intelligent model selection and configuration
        based on the specific requirements of each prompt. This allows the system
        to automatically optimize for cost, speed, or intelligence based on context.

    Attributes:
        # Core prompt identification and structure
        prompt_id: Unique identifier for the prompt (e.g., "sentiment_analysis_basic_001")
        name: Human-readable name (e.g., "Basic Sentiment Analysis")
        description: Detailed description of the prompt's purpose and use case
        target_tool_id: ID of the MCPTool this prompt is designed for (foreign key relationship)
        template_string: Template with {{variable}} placeholders for dynamic content
        tags: List of tags for categorization and discovery (e.g., ["beginner", "sentiment"])
        input_variables: List of variable names used in template_string (extracted automatically)
        use_case: Specific use case description for context-aware selection
        difficulty_level: Complexity level helping users choose appropriate prompts
        example_inputs: Dictionary mapping variable names to realistic example values

        # MVP5 AI Sampling Preferences for intelligent model selection
        preferred_model_hints: List of model names that work well for this prompt type
        cost_priority_score: Priority score for cost efficiency (0.0=low, 1.0=high priority)
        speed_priority_score: Priority score for response speed (0.0=low, 1.0=high priority)
        intelligence_priority_score: Priority score for model intelligence (0.0=low, 1.0=high priority)
        default_sampling_temperature: Default temperature for sampling (0.0=deterministic, 2.0=creative)
        default_max_tokens_sampling: Default max tokens for sampling responses
        default_system_prompt_hint: System prompt hint for this specific prompt context
        sampling_context_inclusion_hint: Context inclusion preference for MCP sampling

    Example:
        >>> prompt = MCPPrompt(
        ...     prompt_id="sentiment_basic_001",
        ...     name="Basic Sentiment Analysis",
        ...     description="Analyzes emotional tone with simple positive/negative classification",
        ...     target_tool_id="sentiment_analyzer_001",
        ...     template_string="Analyze the sentiment of: {{input_text}}",
        ...     difficulty_level="beginner",
        ...     preferred_model_hints=["gpt-3.5-turbo", "claude-instant"],
        ...     cost_priority_score=0.8,  # High cost priority for basic analysis
        ...     speed_priority_score=0.9   # High speed priority for simple tasks
        ... )
        >>> print(prompt.input_variables)  # Automatically extracted: ["input_text"]
    """

    # Core prompt identification and structure
    prompt_id: str
    name: str
    description: str
    target_tool_id: str  # Foreign key relationship to MCPTool
    template_string: str
    tags: list[str] = field(default_factory=list)
    input_variables: list[str] = field(default_factory=list)
    use_case: str = ""
    difficulty_level: str = "beginner"  # "beginner" | "intermediate" | "advanced"
    example_inputs: dict[str, str] = field(default_factory=dict)

    # MVP5 Sampling preferences for AI optimization (all optional for backward compatibility)
    preferred_model_hints: list[str] | None = field(default_factory=list)
    cost_priority_score: float | None = None  # 0.0 (low priority) to 1.0 (high priority)
    speed_priority_score: float | None = None  # 0.0 (low priority) to 1.0 (high priority)
    intelligence_priority_score: float | None = None  # 0.0 (low priority) to 1.0 (high priority)
    default_sampling_temperature: float | None = None  # e.g., 0.7 for balanced creativity
    default_max_tokens_sampling: int | None = None  # e.g., 512 for typical responses
    default_system_prompt_hint: str | None = None  # e.g., "You are an expert summarizer."
    sampling_context_inclusion_hint: str | None = None  # "none" | "thisServer" | "allServers"

    def __post_init__(self) -> None:
        """Validate prompt data after initialization.
        
        This comprehensive validation ensures prompt quality and prevents runtime
        errors during tool execution. It validates both the core prompt structure
        and the advanced MVP5 sampling preferences.
        
        Validation Categories:
            1. Core Structure: Essential fields and template validity
            2. Difficulty Level: Ensures proper user experience categorization
            3. Sampling Preferences: Validates AI optimization parameters
            4. Data Consistency: Ensures logical relationships between fields
            
        Raises:
            ValueError: If any validation rule fails with descriptive message
        """
        # Core structure validation - essential for prompt functionality
        if not self.prompt_id:
            raise ValueError("prompt_id cannot be empty - required for unique identification")
        if not self.name:
            raise ValueError("name cannot be empty - required for user-facing display")
        if not self.description:
            raise ValueError("description cannot be empty - required for semantic search")
        if not self.target_tool_id:
            raise ValueError("target_tool_id cannot be empty - required for tool-prompt linking")
        if not self.template_string:
            msg = "template_string cannot be empty - required for prompt execution"
            raise ValueError(msg)

        # Difficulty level validation - ensures proper UX categorization
        valid_difficulty_levels = ["beginner", "intermediate", "advanced"]
        if self.difficulty_level not in valid_difficulty_levels:
            msg = f"difficulty_level must be one of {valid_difficulty_levels}"
            raise ValueError(msg)

        # MVP5 Sampling preference validation - ensures valid AI optimization parameters
        self._validate_priority_score(self.cost_priority_score, "cost_priority_score")
        self._validate_priority_score(self.speed_priority_score, "speed_priority_score")
        self._validate_priority_score(self.intelligence_priority_score, "intelligence_priority_score")

        # Temperature validation - ensures valid sampling range
        if (
            self.default_sampling_temperature is not None
            and not 0.0 <= self.default_sampling_temperature <= 2.0
        ):
            msg = "default_sampling_temperature must be between 0.0 and 2.0 (OpenAI standard range)"
            raise ValueError(msg)

        # Token limit validation - ensures positive values for generation
        if (
            self.default_max_tokens_sampling is not None
            and self.default_max_tokens_sampling <= 0
        ):
            msg = "default_max_tokens_sampling must be positive for meaningful generation"
            raise ValueError(msg)

        # Context inclusion validation - ensures valid MCP sampling values
        valid_context_hints = ["none", "thisServer", "allServers", None]
        if self.sampling_context_inclusion_hint not in valid_context_hints:
            msg = f"sampling_context_inclusion_hint must be one of {valid_context_hints}"
            raise ValueError(msg)

    @staticmethod
    def _validate_priority_score(score: float | None, field_name: str) -> None:
        """Validate that a priority score is in the valid range [0.0, 1.0].
        
        Priority scores are used in MVP5 for intelligent model selection, where
        0.0 indicates low priority and 1.0 indicates high priority for the
        respective optimization dimension (cost, speed, intelligence).
        
        Args:
            score: The priority score to validate (None is valid for optional fields)
            field_name: Name of the field being validated for error messages
            
        Raises:
            ValueError: If score is not None and not in [0.0, 1.0] range
        """
        if score is not None and not 0.0 <= score <= 1.0:
            msg = f"{field_name} must be between 0.0 and 1.0 (normalized priority scale)"
            raise ValueError(msg)


@dataclass
class PlannedStep:
    """Represents a planned step combining a tool and its prompt with relevance scoring.

    A planned step is the core output of the enhanced planner, providing both a relevant
    MCPTool and a corresponding MCPPrompt that together form an actionable recommendation
    for the user. This represents the "executable action plan" aspect of the system.

    Architecture Context:
        PlannedStep objects bridge the gap between user intent (natural language queries)
        and executable actions (tool+prompt combinations). They are created by the
        SimplePlannerAgent through semantic similarity search and relevance scoring.

    Planning Algorithm Context:
        1. User Query → Query Embedding (EmbeddingService)
        2. Query Embedding → Similar Tools (InMemoryKG semantic search)
        3. Tool + Query → Similar Prompts (tool-specific prompt search)
        4. Tool + Prompt → PlannedStep (with relevance scoring)
        5. Multiple PlannedSteps → Ranked List (sorted by relevance)

    Relevance Scoring:
        The relevance_score combines multiple factors:
        - Tool semantic similarity to query (70% weight)
        - Prompt semantic similarity to query (25% weight)  
        - Difficulty level bonus for user experience (5% weight)
        
    Attributes:
        tool: The MCPTool selected for this step (defines what capability to use)
        prompt: The MCPPrompt selected to guide tool usage (defines how to use it effectively)
        relevance_score: Optional confidence score for this tool+prompt combination (0.0-1.0)

    Example:
        >>> step = PlannedStep(
        ...     tool=sentiment_tool,
        ...     prompt=sentiment_prompt,
        ...     relevance_score=0.95
        ... )
        >>> print(step.summary)  # "Use 'Sentiment Analyzer' with 'Basic Analysis' prompt"
        >>> print(step.get_formatted_template())  # Full template with context
    """

    tool: MCPTool  # The selected capability
    prompt: MCPPrompt  # The selected usage pattern
    relevance_score: float | None = None  # Confidence in this recommendation

    def __post_init__(self) -> None:
        """Validate planned step data after initialization.
        
        This validation ensures the logical consistency of the planned step,
        particularly the crucial relationship between tool and prompt. The
        validation follows the principle of early error detection to prevent
        runtime issues during execution.
        
        Validation Rules:
            1. Type Safety: Ensures proper object types for tool and prompt
            2. Relevance Score: Validates score is in [0.0, 1.0] range if provided
            3. Tool-Prompt Consistency: Ensures prompt targets the correct tool
            
        Design Rationale:
            The tool-prompt consistency check prevents logical errors where a
            prompt designed for one tool gets paired with a different tool,
            which would result in execution failures or poor results.
            
        Raises:
            TypeError: If tool or prompt are not the expected types
            ValueError: If relevance_score is invalid or tool-prompt mismatch
        """
        # Type safety validation - ensures proper object types
        if not isinstance(self.tool, MCPTool):
            msg = "tool must be an MCPTool instance for proper execution"
            raise TypeError(msg)
        if not isinstance(self.prompt, MCPPrompt):
            msg = "prompt must be an MCPPrompt instance for proper execution"
            raise TypeError(msg)
            
        # Relevance score validation - ensures valid confidence range
        if self.relevance_score is not None and not (
            0.0 <= self.relevance_score <= 1.0
        ):
            msg = "relevance_score must be between 0.0 and 1.0 if provided"
            raise ValueError(msg)

        # Critical consistency check - ensures tool and prompt are compatible
        if self.prompt.target_tool_id != self.tool.tool_id:
            error_msg = (
                f"Tool-Prompt mismatch: Prompt target_tool_id '{self.prompt.target_tool_id}' "
                f"does not match tool_id '{self.tool.tool_id}'. This would cause execution failure."
            )
            raise ValueError(error_msg)

    @property
    def summary(self) -> str:
        """Get a human-readable summary of this planned step.
        
        This property provides a concise, user-friendly description of the
        planned action that can be displayed in UIs or logs. It follows a
        consistent format for predictable user experience.
        
        Format: "Use 'ToolName' with 'PromptName' prompt"
        
        Returns:
            Human-readable summary string for display purposes
            
        Example:
            >>> step.summary
            "Use 'Text Summarizer' with 'Academic Paper Summary' prompt"
        """
        return f"Use '{self.tool.name}' with '{self.prompt.name}' prompt"

    def get_formatted_template(self) -> str:
        """Get the prompt template with full tool context for execution.
        
        This method provides the complete context needed for execution,
        combining tool information with the prompt template. This is useful
        for debugging, logging, and providing detailed execution context.
        
        Returns:
            Multi-line string with tool name, prompt name, and template
            
        Example:
            >>> step.get_formatted_template()
            Tool: Text Summarizer
            Prompt: Academic Paper Summary
            Template: Summarize this {{document_type}}: {{input_text}}
        """
        return (
            f"Tool: {self.tool.name}\n"
            f"Prompt: {self.prompt.name}\n"
            f"Template: {self.prompt.template_string}"
        )