|
|
"""Integration tests for Sprint 2 functionality. |
|
|
|
|
|
Tests the integration between EmbeddingService and InMemoryKG |
|
|
to ensure the full semantic search pipeline works correctly. |
|
|
""" |
|
|
|
|
|
from unittest.mock import Mock, patch |
|
|
|
|
|
from kg_services.embedder import EmbeddingService |
|
|
from kg_services.knowledge_graph import InMemoryKG |
|
|
from kg_services.ontology import MCPTool |
|
|
|
|
|
|
|
|
def test_sprint2_task2_1_embedding_service_integration(): |
|
|
"""Test Task 2.1: EmbeddingService generates embeddings.""" |
|
|
|
|
|
with ( |
|
|
patch("kg_services.embedder.openai.OpenAI") as mock_openai, |
|
|
patch("kg_services.embedder.os.getenv") as mock_getenv, |
|
|
): |
|
|
|
|
|
|
|
|
mock_getenv.return_value = "fake-api-key" |
|
|
|
|
|
|
|
|
mock_client = Mock() |
|
|
mock_openai.return_value = mock_client |
|
|
|
|
|
mock_response = Mock() |
|
|
mock_response.data = [Mock()] |
|
|
mock_response.data[0].embedding = [0.1, 0.2, 0.3, 0.4, 0.5] |
|
|
mock_client.embeddings.create.return_value = mock_response |
|
|
|
|
|
|
|
|
embedder = EmbeddingService() |
|
|
embedding = embedder.get_embedding("Test text for embedding") |
|
|
|
|
|
assert embedding is not None |
|
|
assert len(embedding) == 5 |
|
|
assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5] |
|
|
|
|
|
|
|
|
mock_client.embeddings.create.assert_called_once_with( |
|
|
model="text-embedding-3-small", input="Test text for embedding" |
|
|
) |
|
|
|
|
|
|
|
|
def test_sprint2_task2_2_vector_index_building_integration(): |
|
|
"""Test Task 2.2: InMemoryKG builds vector index with real EmbeddingService.""" |
|
|
kg = InMemoryKG() |
|
|
|
|
|
|
|
|
tool1 = MCPTool( |
|
|
tool_id="integration_tool_001", |
|
|
name="Test Tool 1", |
|
|
description="First test tool for integration testing.", |
|
|
tags=["test", "integration"], |
|
|
) |
|
|
|
|
|
tool2 = MCPTool( |
|
|
tool_id="integration_tool_002", |
|
|
name="Test Tool 2", |
|
|
description="Second test tool for integration testing.", |
|
|
tags=["test", "mock"], |
|
|
) |
|
|
|
|
|
kg.add_tool(tool1) |
|
|
kg.add_tool(tool2) |
|
|
|
|
|
|
|
|
mock_embedder = Mock() |
|
|
mock_embedder.get_embedding.side_effect = [ |
|
|
[0.1, 0.2, 0.3], |
|
|
[0.4, 0.5, 0.6], |
|
|
] |
|
|
|
|
|
|
|
|
kg.build_vector_index(mock_embedder) |
|
|
|
|
|
|
|
|
assert len(kg.tool_embeddings) == 2 |
|
|
assert len(kg.tool_ids_for_vectors) == 2 |
|
|
assert mock_embedder.get_embedding.call_count == 2 |
|
|
|
|
|
|
|
|
def test_sprint2_task2_3_semantic_search_integration(): |
|
|
"""Test Task 2.3: End-to-end semantic search with cosine similarity.""" |
|
|
kg = InMemoryKG() |
|
|
|
|
|
|
|
|
tool1 = MCPTool( |
|
|
tool_id="semantic_tool_001", |
|
|
name="Text Analyzer", |
|
|
description="Analyzes text for sentiment and entities.", |
|
|
tags=["nlp", "text"], |
|
|
) |
|
|
|
|
|
tool2 = MCPTool( |
|
|
tool_id="semantic_tool_002", |
|
|
name="Image Processor", |
|
|
description="Processes images for object detection.", |
|
|
tags=["vision", "image"], |
|
|
) |
|
|
|
|
|
tool3 = MCPTool( |
|
|
tool_id="semantic_tool_003", |
|
|
name="Text Summarizer", |
|
|
description="Summarizes long text documents.", |
|
|
tags=["nlp", "text"], |
|
|
) |
|
|
|
|
|
kg.add_tool(tool1) |
|
|
kg.add_tool(tool2) |
|
|
kg.add_tool(tool3) |
|
|
|
|
|
|
|
|
mock_embedder = Mock() |
|
|
mock_embedder.get_embedding.side_effect = [ |
|
|
[1.0, 0.0, 0.0], |
|
|
[0.0, 1.0, 0.0], |
|
|
[0.9, 0.1, 0.0], |
|
|
] |
|
|
|
|
|
|
|
|
kg.build_vector_index(mock_embedder) |
|
|
|
|
|
|
|
|
text_query_embedding = [1.0, 0.0, 0.0] |
|
|
similar_tools = kg.find_similar_tools(text_query_embedding, top_k=2) |
|
|
|
|
|
|
|
|
assert len(similar_tools) == 2 |
|
|
assert "semantic_tool_001" in similar_tools |
|
|
assert "semantic_tool_003" in similar_tools |
|
|
|
|
|
|
|
|
vision_query_embedding = [0.0, 1.0, 0.0] |
|
|
similar_tools = kg.find_similar_tools(vision_query_embedding, top_k=1) |
|
|
|
|
|
|
|
|
assert len(similar_tools) == 1 |
|
|
assert similar_tools[0] == "semantic_tool_002" |
|
|
|
|
|
|
|
|
def test_sprint2_full_pipeline_integration(): |
|
|
"""Test complete Sprint 2 pipeline: embeddings β index β search.""" |
|
|
|
|
|
with ( |
|
|
patch("kg_services.embedder.openai.OpenAI") as mock_openai, |
|
|
patch("kg_services.embedder.os.getenv") as mock_getenv, |
|
|
): |
|
|
|
|
|
|
|
|
mock_getenv.return_value = "fake-api-key" |
|
|
|
|
|
|
|
|
mock_client = Mock() |
|
|
mock_openai.return_value = mock_client |
|
|
|
|
|
|
|
|
def mock_embedding_response(call_kwargs): |
|
|
text = call_kwargs["input"] |
|
|
if "Text Analyzer" in text: |
|
|
return Mock(data=[Mock(embedding=[1.0, 0.1, 0.0])]) |
|
|
if "Image Processor" in text: |
|
|
return Mock(data=[Mock(embedding=[0.1, 1.0, 0.0])]) |
|
|
return Mock(data=[Mock(embedding=[0.5, 0.5, 0.5])]) |
|
|
|
|
|
mock_client.embeddings.create.side_effect = ( |
|
|
lambda **kwargs: mock_embedding_response(kwargs) |
|
|
) |
|
|
|
|
|
|
|
|
embedder = EmbeddingService() |
|
|
kg = InMemoryKG() |
|
|
|
|
|
|
|
|
kg.add_tool( |
|
|
MCPTool( |
|
|
tool_id="pipeline_tool_001", |
|
|
name="Text Analyzer", |
|
|
description="Analyzes text content.", |
|
|
tags=["nlp", "text"], |
|
|
) |
|
|
) |
|
|
|
|
|
kg.add_tool( |
|
|
MCPTool( |
|
|
tool_id="pipeline_tool_002", |
|
|
name="Image Processor", |
|
|
description="Processes image content.", |
|
|
tags=["vision", "image"], |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
kg.build_vector_index(embedder) |
|
|
|
|
|
|
|
|
assert len(kg.tool_embeddings) == 2 |
|
|
assert len(kg.tool_ids_for_vectors) == 2 |
|
|
|
|
|
|
|
|
|
|
|
text_focused_query = [1.0, 0.1, 0.0] |
|
|
results = kg.find_similar_tools(text_focused_query, top_k=1) |
|
|
|
|
|
assert len(results) == 1 |
|
|
assert results[0] == "pipeline_tool_001" |
|
|
|
|
|
|
|
|
assert mock_client.embeddings.create.call_count == 2 |
|
|
|