"""Integration test for Sprint 1 - KG-Powered Tool Suggester MVP."""

from pathlib import Path

from kg_services.embedder import EmbeddingService
from kg_services.knowledge_graph import InMemoryKG
from kg_services.ontology import MCPTool


def test_sprint1_integration():
    """Test the complete Sprint 1 system working together."""
    # Initialize all components
    kg = InMemoryKG()
    embedder = EmbeddingService()

    # Load initial tools from JSON
    data_file = Path("data/initial_tools.json")
    kg.load_tools_from_json(data_file)

    # Verify tools were loaded
    all_tools = kg.get_all_tools()
    assert len(all_tools) == 4

    # Test tag-based search
    nlp_tools = kg.find_tools_by_tags(["nlp"])
    assert len(nlp_tools) >= 2  # Should find text summarizer and sentiment analyzer

    # Test embedding-based similarity search
    query = "analyze text sentiment and emotions"
    similar_tools = embedder.find_similar_tools(query, all_tools, top_k=2)
    assert len(similar_tools) == 2
    assert all(isinstance(tool, MCPTool) for tool in similar_tools)

    # Test specific tool retrieval
    sentiment_tool = kg.get_tool_by_id("sentiment_analyzer_002")
    assert sentiment_tool is not None
    assert sentiment_tool.name == "Sentiment Analyzer"
    assert "sentiment" in sentiment_tool.tags

    # Test embedding consistency
    tool_embedding = embedder.embed_tool_description(sentiment_tool)
    assert len(tool_embedding) == 128  # Default embedding dimension
    assert all(isinstance(x, float) for x in tool_embedding)

    # Test similarity computation
    query_embedding = embedder.embed_text(query)
    similarity = embedder.compute_similarity(query_embedding, tool_embedding)
    assert 0.0 <= similarity <= 1.0

    # Test tag diversity
    all_tags = kg.get_all_tags()
    assert len(all_tags) >= 8
    assert {"nlp", "text", "vision", "code"}.issubset(all_tags)


def test_sprint1_tool_suggestion_workflow():
    """Test a complete tool suggestion workflow."""
    # Setup
    kg = InMemoryKG()
    embedder = EmbeddingService()
    kg.load_tools_from_json(Path("data/initial_tools.json"))

    # Simulate user query: "I need to process some code"
    user_query = "I need to process some code"

    # Method 1: Tag-based search
    code_tools_by_tag = kg.find_tools_by_tags(["code"])
    assert len(code_tools_by_tag) >= 1

    # Method 2: Embedding-based search
    all_tools = kg.get_all_tools()
    code_tools_by_embedding = embedder.find_similar_tools(
        user_query, all_tools, top_k=3
    )
    assert len(code_tools_by_embedding) <= 3

    # Verify we can find the code linter
    tool_ids = {tool.tool_id for tool in all_tools}
    assert "code_linter_004" in tool_ids

    # Test that the system provides meaningful suggestions
    linter_tool = kg.get_tool_by_id("code_linter_004")
    assert linter_tool is not None
    assert "code" in linter_tool.tags
    assert "linting" in linter_tool.tags