"""Integration test for Sprint 1 - KG-Powered Tool Suggester MVP.""" from pathlib import Path from kg_services.embedder import EmbeddingService from kg_services.knowledge_graph import InMemoryKG from kg_services.ontology import MCPTool def test_sprint1_integration(): """Test the complete Sprint 1 system working together.""" # Initialize all components kg = InMemoryKG() embedder = EmbeddingService() # Load initial tools from JSON data_file = Path("data/initial_tools.json") kg.load_tools_from_json(data_file) # Verify tools were loaded all_tools = kg.get_all_tools() assert len(all_tools) == 4 # Test tag-based search nlp_tools = kg.find_tools_by_tags(["nlp"]) assert len(nlp_tools) >= 2 # Should find text summarizer and sentiment analyzer # Test embedding-based similarity search query = "analyze text sentiment and emotions" similar_tools = embedder.find_similar_tools(query, all_tools, top_k=2) assert len(similar_tools) == 2 assert all(isinstance(tool, MCPTool) for tool in similar_tools) # Test specific tool retrieval sentiment_tool = kg.get_tool_by_id("sentiment_analyzer_002") assert sentiment_tool is not None assert sentiment_tool.name == "Sentiment Analyzer" assert "sentiment" in sentiment_tool.tags # Test embedding consistency tool_embedding = embedder.embed_tool_description(sentiment_tool) assert len(tool_embedding) == 128 # Default embedding dimension assert all(isinstance(x, float) for x in tool_embedding) # Test similarity computation query_embedding = embedder.embed_text(query) similarity = embedder.compute_similarity(query_embedding, tool_embedding) assert 0.0 <= similarity <= 1.0 # Test tag diversity all_tags = kg.get_all_tags() assert len(all_tags) >= 8 assert {"nlp", "text", "vision", "code"}.issubset(all_tags) def test_sprint1_tool_suggestion_workflow(): """Test a complete tool suggestion workflow.""" # Setup kg = InMemoryKG() embedder = EmbeddingService() kg.load_tools_from_json(Path("data/initial_tools.json")) # Simulate user query: "I need to process some code" user_query = "I need to process some code" # Method 1: Tag-based search code_tools_by_tag = kg.find_tools_by_tags(["code"]) assert len(code_tools_by_tag) >= 1 # Method 2: Embedding-based search all_tools = kg.get_all_tools() code_tools_by_embedding = embedder.find_similar_tools( user_query, all_tools, top_k=3 ) assert len(code_tools_by_embedding) <= 3 # Verify we can find the code linter tool_ids = {tool.tool_id for tool in all_tools} assert "code_linter_004" in tool_ids # Test that the system provides meaningful suggestions linter_tool = kg.get_tool_by_id("code_linter_004") assert linter_tool is not None assert "code" in linter_tool.tags assert "linting" in linter_tool.tags