"""Integration tests for Sprint 2 functionality. Tests the integration between EmbeddingService and InMemoryKG to ensure the full semantic search pipeline works correctly. """ from unittest.mock import Mock, patch from kg_services.embedder import EmbeddingService from kg_services.knowledge_graph import InMemoryKG from kg_services.ontology import MCPTool def test_sprint2_task2_1_embedding_service_integration(): """Test Task 2.1: EmbeddingService generates embeddings.""" # Mock the OpenAI client and environment for testing with ( patch("kg_services.embedder.openai.OpenAI") as mock_openai, patch("kg_services.embedder.os.getenv") as mock_getenv, ): # Setup environment mock mock_getenv.return_value = "fake-api-key" # Setup mock response mock_client = Mock() mock_openai.return_value = mock_client mock_response = Mock() mock_response.data = [Mock()] mock_response.data[0].embedding = [0.1, 0.2, 0.3, 0.4, 0.5] mock_client.embeddings.create.return_value = mock_response # Test embedding generation embedder = EmbeddingService() embedding = embedder.get_embedding("Test text for embedding") assert embedding is not None assert len(embedding) == 5 assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5] # Verify OpenAI client was called correctly mock_client.embeddings.create.assert_called_once_with( model="text-embedding-3-small", input="Test text for embedding" ) def test_sprint2_task2_2_vector_index_building_integration(): """Test Task 2.2: InMemoryKG builds vector index with real EmbeddingService.""" kg = InMemoryKG() # Add test tools tool1 = MCPTool( tool_id="integration_tool_001", name="Test Tool 1", description="First test tool for integration testing.", tags=["test", "integration"], ) tool2 = MCPTool( tool_id="integration_tool_002", name="Test Tool 2", description="Second test tool for integration testing.", tags=["test", "mock"], ) kg.add_tool(tool1) kg.add_tool(tool2) # Mock embedder mock_embedder = Mock() mock_embedder.get_embedding.side_effect = [ [0.1, 0.2, 0.3], # For tool1 [0.4, 0.5, 0.6], # For tool2 ] # Build vector index kg.build_vector_index(mock_embedder) # Verify integration assert len(kg.tool_embeddings) == 2 assert len(kg.tool_ids_for_vectors) == 2 assert mock_embedder.get_embedding.call_count == 2 def test_sprint2_task2_3_semantic_search_integration(): """Test Task 2.3: End-to-end semantic search with cosine similarity.""" kg = InMemoryKG() # Add test tools tool1 = MCPTool( tool_id="semantic_tool_001", name="Text Analyzer", description="Analyzes text for sentiment and entities.", tags=["nlp", "text"], ) tool2 = MCPTool( tool_id="semantic_tool_002", name="Image Processor", description="Processes images for object detection.", tags=["vision", "image"], ) tool3 = MCPTool( tool_id="semantic_tool_003", name="Text Summarizer", description="Summarizes long text documents.", tags=["nlp", "text"], ) kg.add_tool(tool1) kg.add_tool(tool2) kg.add_tool(tool3) # Mock embedder with distinct, predictable embeddings mock_embedder = Mock() mock_embedder.get_embedding.side_effect = [ [1.0, 0.0, 0.0], # Text Analyzer - text-focused [0.0, 1.0, 0.0], # Image Processor - vision-focused [0.9, 0.1, 0.0], # Text Summarizer - similar to text analyzer ] # Build vector index kg.build_vector_index(mock_embedder) # Test semantic search with text-focused query text_query_embedding = [1.0, 0.0, 0.0] # Similar to text tools similar_tools = kg.find_similar_tools(text_query_embedding, top_k=2) # Should find text-related tools first assert len(similar_tools) == 2 assert "semantic_tool_001" in similar_tools # Text Analyzer assert "semantic_tool_003" in similar_tools # Text Summarizer # Test with vision-focused query vision_query_embedding = [0.0, 1.0, 0.0] # Similar to vision tools similar_tools = kg.find_similar_tools(vision_query_embedding, top_k=1) # Should find vision tool first assert len(similar_tools) == 1 assert similar_tools[0] == "semantic_tool_002" # Image Processor def test_sprint2_full_pipeline_integration(): """Test complete Sprint 2 pipeline: embeddings → index → search.""" # Mock the OpenAI client and environment for this test with ( patch("kg_services.embedder.openai.OpenAI") as mock_openai, patch("kg_services.embedder.os.getenv") as mock_getenv, ): # Setup environment mock mock_getenv.return_value = "fake-api-key" # Setup mock OpenAI client mock_client = Mock() mock_openai.return_value = mock_client # Mock different embeddings for different tools def mock_embedding_response(call_kwargs): text = call_kwargs["input"] if "Text Analyzer" in text: return Mock(data=[Mock(embedding=[1.0, 0.1, 0.0])]) if "Image Processor" in text: return Mock(data=[Mock(embedding=[0.1, 1.0, 0.0])]) return Mock(data=[Mock(embedding=[0.5, 0.5, 0.5])]) mock_client.embeddings.create.side_effect = ( lambda **kwargs: mock_embedding_response(kwargs) ) # Create real services embedder = EmbeddingService() kg = InMemoryKG() # Add tools kg.add_tool( MCPTool( tool_id="pipeline_tool_001", name="Text Analyzer", description="Analyzes text content.", tags=["nlp", "text"], ) ) kg.add_tool( MCPTool( tool_id="pipeline_tool_002", name="Image Processor", description="Processes image content.", tags=["vision", "image"], ) ) # Build vector index with real embedder kg.build_vector_index(embedder) # Verify index was built assert len(kg.tool_embeddings) == 2 assert len(kg.tool_ids_for_vectors) == 2 # Test semantic search # Query most similar to text analyzer text_focused_query = [1.0, 0.1, 0.0] results = kg.find_similar_tools(text_focused_query, top_k=1) assert len(results) == 1 assert results[0] == "pipeline_tool_001" # Text Analyzer should be most similar # Verify OpenAI was called for each tool assert mock_client.embeddings.create.call_count == 2