File size: 6,872 Bytes
1f2d50a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""Integration tests for Sprint 2 functionality.

Tests the integration between EmbeddingService and InMemoryKG
to ensure the full semantic search pipeline works correctly.
"""

from unittest.mock import Mock, patch

from kg_services.embedder import EmbeddingService
from kg_services.knowledge_graph import InMemoryKG
from kg_services.ontology import MCPTool


def test_sprint2_task2_1_embedding_service_integration():
    """Test Task 2.1: EmbeddingService generates embeddings."""
    # Mock the OpenAI client and environment for testing
    with (
        patch("kg_services.embedder.openai.OpenAI") as mock_openai,
        patch("kg_services.embedder.os.getenv") as mock_getenv,
    ):

        # Setup environment mock
        mock_getenv.return_value = "fake-api-key"

        # Setup mock response
        mock_client = Mock()
        mock_openai.return_value = mock_client

        mock_response = Mock()
        mock_response.data = [Mock()]
        mock_response.data[0].embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
        mock_client.embeddings.create.return_value = mock_response

        # Test embedding generation
        embedder = EmbeddingService()
        embedding = embedder.get_embedding("Test text for embedding")

        assert embedding is not None
        assert len(embedding) == 5
        assert embedding == [0.1, 0.2, 0.3, 0.4, 0.5]

        # Verify OpenAI client was called correctly
        mock_client.embeddings.create.assert_called_once_with(
            model="text-embedding-3-small", input="Test text for embedding"
        )


def test_sprint2_task2_2_vector_index_building_integration():
    """Test Task 2.2: InMemoryKG builds vector index with real EmbeddingService."""
    kg = InMemoryKG()

    # Add test tools
    tool1 = MCPTool(
        tool_id="integration_tool_001",
        name="Test Tool 1",
        description="First test tool for integration testing.",
        tags=["test", "integration"],
    )

    tool2 = MCPTool(
        tool_id="integration_tool_002",
        name="Test Tool 2",
        description="Second test tool for integration testing.",
        tags=["test", "mock"],
    )

    kg.add_tool(tool1)
    kg.add_tool(tool2)

    # Mock embedder
    mock_embedder = Mock()
    mock_embedder.get_embedding.side_effect = [
        [0.1, 0.2, 0.3],  # For tool1
        [0.4, 0.5, 0.6],  # For tool2
    ]

    # Build vector index
    kg.build_vector_index(mock_embedder)

    # Verify integration
    assert len(kg.tool_embeddings) == 2
    assert len(kg.tool_ids_for_vectors) == 2
    assert mock_embedder.get_embedding.call_count == 2


def test_sprint2_task2_3_semantic_search_integration():
    """Test Task 2.3: End-to-end semantic search with cosine similarity."""
    kg = InMemoryKG()

    # Add test tools
    tool1 = MCPTool(
        tool_id="semantic_tool_001",
        name="Text Analyzer",
        description="Analyzes text for sentiment and entities.",
        tags=["nlp", "text"],
    )

    tool2 = MCPTool(
        tool_id="semantic_tool_002",
        name="Image Processor",
        description="Processes images for object detection.",
        tags=["vision", "image"],
    )

    tool3 = MCPTool(
        tool_id="semantic_tool_003",
        name="Text Summarizer",
        description="Summarizes long text documents.",
        tags=["nlp", "text"],
    )

    kg.add_tool(tool1)
    kg.add_tool(tool2)
    kg.add_tool(tool3)

    # Mock embedder with distinct, predictable embeddings
    mock_embedder = Mock()
    mock_embedder.get_embedding.side_effect = [
        [1.0, 0.0, 0.0],  # Text Analyzer - text-focused
        [0.0, 1.0, 0.0],  # Image Processor - vision-focused
        [0.9, 0.1, 0.0],  # Text Summarizer - similar to text analyzer
    ]

    # Build vector index
    kg.build_vector_index(mock_embedder)

    # Test semantic search with text-focused query
    text_query_embedding = [1.0, 0.0, 0.0]  # Similar to text tools
    similar_tools = kg.find_similar_tools(text_query_embedding, top_k=2)

    # Should find text-related tools first
    assert len(similar_tools) == 2
    assert "semantic_tool_001" in similar_tools  # Text Analyzer
    assert "semantic_tool_003" in similar_tools  # Text Summarizer

    # Test with vision-focused query
    vision_query_embedding = [0.0, 1.0, 0.0]  # Similar to vision tools
    similar_tools = kg.find_similar_tools(vision_query_embedding, top_k=1)

    # Should find vision tool first
    assert len(similar_tools) == 1
    assert similar_tools[0] == "semantic_tool_002"  # Image Processor


def test_sprint2_full_pipeline_integration():
    """Test complete Sprint 2 pipeline: embeddings β†’ index β†’ search."""
    # Mock the OpenAI client and environment for this test
    with (
        patch("kg_services.embedder.openai.OpenAI") as mock_openai,
        patch("kg_services.embedder.os.getenv") as mock_getenv,
    ):

        # Setup environment mock
        mock_getenv.return_value = "fake-api-key"

        # Setup mock OpenAI client
        mock_client = Mock()
        mock_openai.return_value = mock_client

        # Mock different embeddings for different tools
        def mock_embedding_response(call_kwargs):
            text = call_kwargs["input"]
            if "Text Analyzer" in text:
                return Mock(data=[Mock(embedding=[1.0, 0.1, 0.0])])
            if "Image Processor" in text:
                return Mock(data=[Mock(embedding=[0.1, 1.0, 0.0])])
            return Mock(data=[Mock(embedding=[0.5, 0.5, 0.5])])

        mock_client.embeddings.create.side_effect = (
            lambda **kwargs: mock_embedding_response(kwargs)
        )

        # Create real services
        embedder = EmbeddingService()
        kg = InMemoryKG()

        # Add tools
        kg.add_tool(
            MCPTool(
                tool_id="pipeline_tool_001",
                name="Text Analyzer",
                description="Analyzes text content.",
                tags=["nlp", "text"],
            )
        )

        kg.add_tool(
            MCPTool(
                tool_id="pipeline_tool_002",
                name="Image Processor",
                description="Processes image content.",
                tags=["vision", "image"],
            )
        )

        # Build vector index with real embedder
        kg.build_vector_index(embedder)

        # Verify index was built
        assert len(kg.tool_embeddings) == 2
        assert len(kg.tool_ids_for_vectors) == 2

        # Test semantic search
        # Query most similar to text analyzer
        text_focused_query = [1.0, 0.1, 0.0]
        results = kg.find_similar_tools(text_focused_query, top_k=1)

        assert len(results) == 1
        assert results[0] == "pipeline_tool_001"  # Text Analyzer should be most similar

        # Verify OpenAI was called for each tool
        assert mock_client.embeddings.create.call_count == 2