File size: 6,872 Bytes
1f2d50a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
"""Integration tests for Sprint 2 functionality.
Tests the integration between EmbeddingService and InMemoryKG
to ensure the full semantic search pipeline works correctly.
"""
from unittest.mock import Mock, patch
from kg_services.embedder import EmbeddingService
from kg_services.knowledge_graph import InMemoryKG
from kg_services.ontology import MCPTool
def test_sprint2_task2_1_embedding_service_integration():
"""Test Task 2.1: EmbeddingService generates embeddings."""
# Mock the OpenAI client and environment for testing
with (
patch("kg_services.embedder.openai.OpenAI") as mock_openai,
patch("kg_services.embedder.os.getenv") as mock_getenv,
):
# Setup environment mock
mock_getenv.return_value = "fake-api-key"
# Setup mock response
mock_client = Mock()
mock_openai.return_value = mock_client
mock_response = Mock()
mock_response.data = [Mock()]
mock_response.data[0].embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
mock_client.embeddings.create.return_value = mock_response
# Test embedding generation
embedder = EmbeddingService()
embedding = embedder.get_embedding("Test text for embedding")
assert embedding is not None
assert len(embedding) == 5
assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5]
# Verify OpenAI client was called correctly
mock_client.embeddings.create.assert_called_once_with(
model="text-embedding-3-small", input="Test text for embedding"
)
def test_sprint2_task2_2_vector_index_building_integration():
"""Test Task 2.2: InMemoryKG builds vector index with real EmbeddingService."""
kg = InMemoryKG()
# Add test tools
tool1 = MCPTool(
tool_id="integration_tool_001",
name="Test Tool 1",
description="First test tool for integration testing.",
tags=["test", "integration"],
)
tool2 = MCPTool(
tool_id="integration_tool_002",
name="Test Tool 2",
description="Second test tool for integration testing.",
tags=["test", "mock"],
)
kg.add_tool(tool1)
kg.add_tool(tool2)
# Mock embedder
mock_embedder = Mock()
mock_embedder.get_embedding.side_effect = [
[0.1, 0.2, 0.3], # For tool1
[0.4, 0.5, 0.6], # For tool2
]
# Build vector index
kg.build_vector_index(mock_embedder)
# Verify integration
assert len(kg.tool_embeddings) == 2
assert len(kg.tool_ids_for_vectors) == 2
assert mock_embedder.get_embedding.call_count == 2
def test_sprint2_task2_3_semantic_search_integration():
"""Test Task 2.3: End-to-end semantic search with cosine similarity."""
kg = InMemoryKG()
# Add test tools
tool1 = MCPTool(
tool_id="semantic_tool_001",
name="Text Analyzer",
description="Analyzes text for sentiment and entities.",
tags=["nlp", "text"],
)
tool2 = MCPTool(
tool_id="semantic_tool_002",
name="Image Processor",
description="Processes images for object detection.",
tags=["vision", "image"],
)
tool3 = MCPTool(
tool_id="semantic_tool_003",
name="Text Summarizer",
description="Summarizes long text documents.",
tags=["nlp", "text"],
)
kg.add_tool(tool1)
kg.add_tool(tool2)
kg.add_tool(tool3)
# Mock embedder with distinct, predictable embeddings
mock_embedder = Mock()
mock_embedder.get_embedding.side_effect = [
[1.0, 0.0, 0.0], # Text Analyzer - text-focused
[0.0, 1.0, 0.0], # Image Processor - vision-focused
[0.9, 0.1, 0.0], # Text Summarizer - similar to text analyzer
]
# Build vector index
kg.build_vector_index(mock_embedder)
# Test semantic search with text-focused query
text_query_embedding = [1.0, 0.0, 0.0] # Similar to text tools
similar_tools = kg.find_similar_tools(text_query_embedding, top_k=2)
# Should find text-related tools first
assert len(similar_tools) == 2
assert "semantic_tool_001" in similar_tools # Text Analyzer
assert "semantic_tool_003" in similar_tools # Text Summarizer
# Test with vision-focused query
vision_query_embedding = [0.0, 1.0, 0.0] # Similar to vision tools
similar_tools = kg.find_similar_tools(vision_query_embedding, top_k=1)
# Should find vision tool first
assert len(similar_tools) == 1
assert similar_tools[0] == "semantic_tool_002" # Image Processor
def test_sprint2_full_pipeline_integration():
"""Test complete Sprint 2 pipeline: embeddings β index β search."""
# Mock the OpenAI client and environment for this test
with (
patch("kg_services.embedder.openai.OpenAI") as mock_openai,
patch("kg_services.embedder.os.getenv") as mock_getenv,
):
# Setup environment mock
mock_getenv.return_value = "fake-api-key"
# Setup mock OpenAI client
mock_client = Mock()
mock_openai.return_value = mock_client
# Mock different embeddings for different tools
def mock_embedding_response(call_kwargs):
text = call_kwargs["input"]
if "Text Analyzer" in text:
return Mock(data=[Mock(embedding=[1.0, 0.1, 0.0])])
if "Image Processor" in text:
return Mock(data=[Mock(embedding=[0.1, 1.0, 0.0])])
return Mock(data=[Mock(embedding=[0.5, 0.5, 0.5])])
mock_client.embeddings.create.side_effect = (
lambda **kwargs: mock_embedding_response(kwargs)
)
# Create real services
embedder = EmbeddingService()
kg = InMemoryKG()
# Add tools
kg.add_tool(
MCPTool(
tool_id="pipeline_tool_001",
name="Text Analyzer",
description="Analyzes text content.",
tags=["nlp", "text"],
)
)
kg.add_tool(
MCPTool(
tool_id="pipeline_tool_002",
name="Image Processor",
description="Processes image content.",
tags=["vision", "image"],
)
)
# Build vector index with real embedder
kg.build_vector_index(embedder)
# Verify index was built
assert len(kg.tool_embeddings) == 2
assert len(kg.tool_ids_for_vectors) == 2
# Test semantic search
# Query most similar to text analyzer
text_focused_query = [1.0, 0.1, 0.0]
results = kg.find_similar_tools(text_focused_query, top_k=1)
assert len(results) == 1
assert results[0] == "pipeline_tool_001" # Text Analyzer should be most similar
# Verify OpenAI was called for each tool
assert mock_client.embeddings.create.call_count == 2
|