kgraph-mcp-agent-platform / tests /test_sprint2_integration.py
BasalGanglia's picture
πŸ† Multi-Track Hackathon Submission
1f2d50a verified
"""Integration tests for Sprint 2 functionality.
Tests the integration between EmbeddingService and InMemoryKG
to ensure the full semantic search pipeline works correctly.
"""
from unittest.mock import Mock, patch
from kg_services.embedder import EmbeddingService
from kg_services.knowledge_graph import InMemoryKG
from kg_services.ontology import MCPTool
def test_sprint2_task2_1_embedding_service_integration():
"""Test Task 2.1: EmbeddingService generates embeddings."""
# Mock the OpenAI client and environment for testing
with (
patch("kg_services.embedder.openai.OpenAI") as mock_openai,
patch("kg_services.embedder.os.getenv") as mock_getenv,
):
# Setup environment mock
mock_getenv.return_value = "fake-api-key"
# Setup mock response
mock_client = Mock()
mock_openai.return_value = mock_client
mock_response = Mock()
mock_response.data = [Mock()]
mock_response.data[0].embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
mock_client.embeddings.create.return_value = mock_response
# Test embedding generation
embedder = EmbeddingService()
embedding = embedder.get_embedding("Test text for embedding")
assert embedding is not None
assert len(embedding) == 5
assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5]
# Verify OpenAI client was called correctly
mock_client.embeddings.create.assert_called_once_with(
model="text-embedding-3-small", input="Test text for embedding"
)
def test_sprint2_task2_2_vector_index_building_integration():
"""Test Task 2.2: InMemoryKG builds vector index with real EmbeddingService."""
kg = InMemoryKG()
# Add test tools
tool1 = MCPTool(
tool_id="integration_tool_001",
name="Test Tool 1",
description="First test tool for integration testing.",
tags=["test", "integration"],
)
tool2 = MCPTool(
tool_id="integration_tool_002",
name="Test Tool 2",
description="Second test tool for integration testing.",
tags=["test", "mock"],
)
kg.add_tool(tool1)
kg.add_tool(tool2)
# Mock embedder
mock_embedder = Mock()
mock_embedder.get_embedding.side_effect = [
[0.1, 0.2, 0.3], # For tool1
[0.4, 0.5, 0.6], # For tool2
]
# Build vector index
kg.build_vector_index(mock_embedder)
# Verify integration
assert len(kg.tool_embeddings) == 2
assert len(kg.tool_ids_for_vectors) == 2
assert mock_embedder.get_embedding.call_count == 2
def test_sprint2_task2_3_semantic_search_integration():
"""Test Task 2.3: End-to-end semantic search with cosine similarity."""
kg = InMemoryKG()
# Add test tools
tool1 = MCPTool(
tool_id="semantic_tool_001",
name="Text Analyzer",
description="Analyzes text for sentiment and entities.",
tags=["nlp", "text"],
)
tool2 = MCPTool(
tool_id="semantic_tool_002",
name="Image Processor",
description="Processes images for object detection.",
tags=["vision", "image"],
)
tool3 = MCPTool(
tool_id="semantic_tool_003",
name="Text Summarizer",
description="Summarizes long text documents.",
tags=["nlp", "text"],
)
kg.add_tool(tool1)
kg.add_tool(tool2)
kg.add_tool(tool3)
# Mock embedder with distinct, predictable embeddings
mock_embedder = Mock()
mock_embedder.get_embedding.side_effect = [
[1.0, 0.0, 0.0], # Text Analyzer - text-focused
[0.0, 1.0, 0.0], # Image Processor - vision-focused
[0.9, 0.1, 0.0], # Text Summarizer - similar to text analyzer
]
# Build vector index
kg.build_vector_index(mock_embedder)
# Test semantic search with text-focused query
text_query_embedding = [1.0, 0.0, 0.0] # Similar to text tools
similar_tools = kg.find_similar_tools(text_query_embedding, top_k=2)
# Should find text-related tools first
assert len(similar_tools) == 2
assert "semantic_tool_001" in similar_tools # Text Analyzer
assert "semantic_tool_003" in similar_tools # Text Summarizer
# Test with vision-focused query
vision_query_embedding = [0.0, 1.0, 0.0] # Similar to vision tools
similar_tools = kg.find_similar_tools(vision_query_embedding, top_k=1)
# Should find vision tool first
assert len(similar_tools) == 1
assert similar_tools[0] == "semantic_tool_002" # Image Processor
def test_sprint2_full_pipeline_integration():
"""Test complete Sprint 2 pipeline: embeddings β†’ index β†’ search."""
# Mock the OpenAI client and environment for this test
with (
patch("kg_services.embedder.openai.OpenAI") as mock_openai,
patch("kg_services.embedder.os.getenv") as mock_getenv,
):
# Setup environment mock
mock_getenv.return_value = "fake-api-key"
# Setup mock OpenAI client
mock_client = Mock()
mock_openai.return_value = mock_client
# Mock different embeddings for different tools
def mock_embedding_response(call_kwargs):
text = call_kwargs["input"]
if "Text Analyzer" in text:
return Mock(data=[Mock(embedding=[1.0, 0.1, 0.0])])
if "Image Processor" in text:
return Mock(data=[Mock(embedding=[0.1, 1.0, 0.0])])
return Mock(data=[Mock(embedding=[0.5, 0.5, 0.5])])
mock_client.embeddings.create.side_effect = (
lambda **kwargs: mock_embedding_response(kwargs)
)
# Create real services
embedder = EmbeddingService()
kg = InMemoryKG()
# Add tools
kg.add_tool(
MCPTool(
tool_id="pipeline_tool_001",
name="Text Analyzer",
description="Analyzes text content.",
tags=["nlp", "text"],
)
)
kg.add_tool(
MCPTool(
tool_id="pipeline_tool_002",
name="Image Processor",
description="Processes image content.",
tags=["vision", "image"],
)
)
# Build vector index with real embedder
kg.build_vector_index(embedder)
# Verify index was built
assert len(kg.tool_embeddings) == 2
assert len(kg.tool_ids_for_vectors) == 2
# Test semantic search
# Query most similar to text analyzer
text_focused_query = [1.0, 0.1, 0.0]
results = kg.find_similar_tools(text_focused_query, top_k=1)
assert len(results) == 1
assert results[0] == "pipeline_tool_001" # Text Analyzer should be most similar
# Verify OpenAI was called for each tool
assert mock_client.embeddings.create.call_count == 2