Spaces:
Runtime error
Runtime error
Commit
·
983b7e6
1
Parent(s):
d3091bd
Add application files
Browse files- .claude/settings.local.json +2 -1
- Dockerfile +0 -71
- app.py +45 -8
- mcp/agents/autonomous_agent_ollama.py +51 -67
.claude/settings.local.json
CHANGED
|
@@ -23,7 +23,8 @@
|
|
| 23 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
|
| 24 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
|
| 25 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
|
| 26 |
-
"Bash(git config:*)"
|
|
|
|
| 27 |
],
|
| 28 |
"deny": [],
|
| 29 |
"ask": []
|
|
|
|
| 23 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
|
| 24 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
|
| 25 |
"Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
|
| 26 |
+
"Bash(git config:*)",
|
| 27 |
+
"Bash(del \"D:\\cx_ai_agent\\cx_ai_agent\\Dockerfile\")"
|
| 28 |
],
|
| 29 |
"deny": [],
|
| 30 |
"ask": []
|
Dockerfile
DELETED
|
@@ -1,71 +0,0 @@
|
|
| 1 |
-
# HuggingFace Spaces Dockerfile with Ollama
|
| 2 |
-
# Uses IBM Granite4:1b model for AI inference (completely FREE)
|
| 3 |
-
|
| 4 |
-
FROM python:3.10-slim
|
| 5 |
-
|
| 6 |
-
# Set environment variables
|
| 7 |
-
ENV PYTHONUNBUFFERED=1
|
| 8 |
-
ENV PYTHONDONTWRITEBYTECODE=1
|
| 9 |
-
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 10 |
-
ENV GRADIO_SERVER_PORT=7860
|
| 11 |
-
ENV OLLAMA_HOST=http://localhost:11434
|
| 12 |
-
ENV OLLAMA_MODEL=granite4:1b
|
| 13 |
-
|
| 14 |
-
# Install system dependencies
|
| 15 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 16 |
-
curl \
|
| 17 |
-
ca-certificates \
|
| 18 |
-
procps \
|
| 19 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
-
|
| 21 |
-
# Install Ollama
|
| 22 |
-
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 23 |
-
|
| 24 |
-
# Create app directory
|
| 25 |
-
WORKDIR /app
|
| 26 |
-
|
| 27 |
-
# Copy requirements and install Python dependencies
|
| 28 |
-
COPY requirements.txt .
|
| 29 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 30 |
-
|
| 31 |
-
# Copy application code
|
| 32 |
-
COPY . .
|
| 33 |
-
|
| 34 |
-
# Create startup script that:
|
| 35 |
-
# 1. Starts Ollama server in background
|
| 36 |
-
# 2. Waits for it to be ready
|
| 37 |
-
# 3. Pulls the model
|
| 38 |
-
# 4. Starts the Gradio app
|
| 39 |
-
RUN echo '#!/bin/bash\n\
|
| 40 |
-
set -e\n\
|
| 41 |
-
echo "=== Starting Ollama server ==="\n\
|
| 42 |
-
ollama serve &\n\
|
| 43 |
-
OLLAMA_PID=$!\n\
|
| 44 |
-
\n\
|
| 45 |
-
# Wait for Ollama to be ready\n\
|
| 46 |
-
echo "Waiting for Ollama to start..."\n\
|
| 47 |
-
for i in {1..30}; do\n\
|
| 48 |
-
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
|
| 49 |
-
echo "Ollama is ready!"\n\
|
| 50 |
-
break\n\
|
| 51 |
-
fi\n\
|
| 52 |
-
echo "Waiting... ($i/30)"\n\
|
| 53 |
-
sleep 2\n\
|
| 54 |
-
done\n\
|
| 55 |
-
\n\
|
| 56 |
-
echo "=== Pulling IBM Granite model ==="\n\
|
| 57 |
-
ollama pull granite4:1b || echo "Model pull failed, will retry on first use"\n\
|
| 58 |
-
\n\
|
| 59 |
-
echo "=== Starting Gradio application ==="\n\
|
| 60 |
-
exec python app.py\n\
|
| 61 |
-
' > /app/start.sh && chmod +x /app/start.sh
|
| 62 |
-
|
| 63 |
-
# Expose port
|
| 64 |
-
EXPOSE 7860
|
| 65 |
-
|
| 66 |
-
# Health check
|
| 67 |
-
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
|
| 68 |
-
CMD curl -f http://localhost:7860/ || exit 1
|
| 69 |
-
|
| 70 |
-
# Run the startup script
|
| 71 |
-
CMD ["/app/start.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -51,12 +51,13 @@ print("\n" + "="*80)
|
|
| 51 |
print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
|
| 52 |
print("="*80)
|
| 53 |
|
| 54 |
-
# AI Mode - Ollama
|
| 55 |
-
|
| 56 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
|
| 57 |
-
|
| 58 |
-
print(f"
|
| 59 |
print(f" Model: {OLLAMA_MODEL}")
|
|
|
|
| 60 |
|
| 61 |
serper_key = os.getenv('SERPER_API_KEY')
|
| 62 |
if serper_key:
|
|
@@ -78,6 +79,42 @@ except Exception as e:
|
|
| 78 |
raise
|
| 79 |
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
# ============================================================================
|
| 82 |
# KNOWLEDGE BASE - Session Storage
|
| 83 |
# ============================================================================
|
|
@@ -539,10 +576,10 @@ async def setup_client_company(company_name: str, progress=gr.Progress()):
|
|
| 539 |
progress(0.1, desc="Initializing...")
|
| 540 |
|
| 541 |
try:
|
|
|
|
| 542 |
agent = AutonomousMCPAgentOllama(
|
| 543 |
mcp_registry=mcp_registry,
|
| 544 |
-
model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
|
| 545 |
-
host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
|
| 546 |
)
|
| 547 |
output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
|
| 548 |
yield output
|
|
@@ -725,10 +762,10 @@ async def discover_prospects(num_prospects: int = 5, progress=gr.Progress()):
|
|
| 725 |
progress(0.1)
|
| 726 |
|
| 727 |
try:
|
|
|
|
| 728 |
agent = AutonomousMCPAgentOllama(
|
| 729 |
mcp_registry=mcp_registry,
|
| 730 |
-
model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
|
| 731 |
-
host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
|
| 732 |
)
|
| 733 |
output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
|
| 734 |
yield output
|
|
|
|
| 51 |
print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
|
| 52 |
print("="*80)
|
| 53 |
|
| 54 |
+
# AI Mode - Ollama with IBM Granite 4 1B model
|
| 55 |
+
# Uses ollama Python library directly (pip install ollama)
|
| 56 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
|
| 57 |
+
|
| 58 |
+
print(f"🤖 AI Mode: Ollama (Local LLM)")
|
| 59 |
print(f" Model: {OLLAMA_MODEL}")
|
| 60 |
+
print(f" Using: ollama.chat() Python client")
|
| 61 |
|
| 62 |
serper_key = os.getenv('SERPER_API_KEY')
|
| 63 |
if serper_key:
|
|
|
|
| 79 |
raise
|
| 80 |
|
| 81 |
|
| 82 |
+
# Warm-up Ollama model on startup (optional, for faster first request)
|
| 83 |
+
def warmup_ollama_model():
|
| 84 |
+
"""
|
| 85 |
+
Send a dummy prompt to warm up the Ollama model.
|
| 86 |
+
This ensures the model is loaded and ready for the first real request.
|
| 87 |
+
|
| 88 |
+
Uses ollama.chat() directly as per the guide.
|
| 89 |
+
"""
|
| 90 |
+
try:
|
| 91 |
+
from ollama import chat
|
| 92 |
+
print(f"🔥 Warming up Ollama model ({OLLAMA_MODEL})...")
|
| 93 |
+
|
| 94 |
+
# Send a simple dummy prompt to load the model into memory
|
| 95 |
+
response = chat(
|
| 96 |
+
model=OLLAMA_MODEL,
|
| 97 |
+
messages=[{'role': 'user', 'content': 'Hello'}],
|
| 98 |
+
options={'temperature': 0.0, 'num_predict': 10}
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
if response and hasattr(response, 'message'):
|
| 102 |
+
print(f"✅ Model warmed up and ready!")
|
| 103 |
+
else:
|
| 104 |
+
print("✅ Model warm-up complete")
|
| 105 |
+
except ImportError:
|
| 106 |
+
print("⚠️ ollama package not installed, skipping warm-up")
|
| 107 |
+
except Exception as e:
|
| 108 |
+
# Don't fail startup on warm-up error, just log it
|
| 109 |
+
print(f"⚠️ Model warm-up failed (will load on first request): {e}")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Run warm-up in background to not block startup
|
| 113 |
+
import threading
|
| 114 |
+
warmup_thread = threading.Thread(target=warmup_ollama_model, daemon=True)
|
| 115 |
+
warmup_thread.start()
|
| 116 |
+
|
| 117 |
+
|
| 118 |
# ============================================================================
|
| 119 |
# KNOWLEDGE BASE - Session Storage
|
| 120 |
# ============================================================================
|
|
|
|
| 576 |
progress(0.1, desc="Initializing...")
|
| 577 |
|
| 578 |
try:
|
| 579 |
+
# Initialize Ollama agent using ollama.chat() directly
|
| 580 |
agent = AutonomousMCPAgentOllama(
|
| 581 |
mcp_registry=mcp_registry,
|
| 582 |
+
model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
|
|
|
|
| 583 |
)
|
| 584 |
output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
|
| 585 |
yield output
|
|
|
|
| 762 |
progress(0.1)
|
| 763 |
|
| 764 |
try:
|
| 765 |
+
# Initialize Ollama agent using ollama.chat() directly
|
| 766 |
agent = AutonomousMCPAgentOllama(
|
| 767 |
mcp_registry=mcp_registry,
|
| 768 |
+
model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
|
|
|
|
| 769 |
)
|
| 770 |
output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
|
| 771 |
yield output
|
mcp/agents/autonomous_agent_ollama.py
CHANGED
|
@@ -2,7 +2,19 @@
|
|
| 2 |
Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
|
| 3 |
|
| 4 |
Uses the ollama Python package for LLM inference.
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
@@ -10,46 +22,35 @@ import json
|
|
| 10 |
import uuid
|
| 11 |
import logging
|
| 12 |
import asyncio
|
| 13 |
-
from typing import List, Dict, Any, AsyncGenerator
|
| 14 |
|
| 15 |
from mcp.tools.definitions import MCP_TOOLS
|
| 16 |
from mcp.registry import MCPRegistry
|
| 17 |
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
OLLAMA_MODELS = [
|
| 22 |
-
"granite4:1b", # IBM Granite 4 1B - user preferred
|
| 23 |
-
"granite3-dense:2b", # IBM Granite 3 Dense 2B
|
| 24 |
-
"qwen2.5:3b", # Best for tool calling, small
|
| 25 |
-
"llama3.2:3b", # Meta's small model
|
| 26 |
-
"phi3:mini", # Microsoft's small model
|
| 27 |
-
"gemma2:2b", # Google's small model
|
| 28 |
-
]
|
| 29 |
-
|
| 30 |
DEFAULT_MODEL = "granite4:1b"
|
| 31 |
-
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
| 32 |
|
| 33 |
|
| 34 |
class AutonomousMCPAgentOllama:
|
| 35 |
"""
|
| 36 |
-
AI Agent using Ollama (local LLM
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
|
| 39 |
def __init__(
|
| 40 |
self,
|
| 41 |
mcp_registry: MCPRegistry,
|
| 42 |
-
model: str = None
|
| 43 |
-
host: str = None
|
| 44 |
):
|
| 45 |
self.mcp_registry = mcp_registry
|
| 46 |
self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
|
| 47 |
-
self.host = host or OLLAMA_HOST
|
| 48 |
self.tools_description = self._build_tools_description()
|
| 49 |
|
| 50 |
-
logger.info(f"Ollama Agent initialized")
|
| 51 |
-
logger.info(f" Host: {self.host}")
|
| 52 |
-
logger.info(f" Model: {self.model}")
|
| 53 |
|
| 54 |
def _build_tools_description(self) -> str:
|
| 55 |
"""Build tool descriptions for the system prompt"""
|
|
@@ -107,16 +108,16 @@ Be concise."""
|
|
| 107 |
}
|
| 108 |
|
| 109 |
try:
|
| 110 |
-
# Call Ollama
|
| 111 |
response = await self._call_ollama(messages)
|
| 112 |
-
assistant_content = response.get("
|
| 113 |
|
| 114 |
if not assistant_content:
|
| 115 |
continue
|
| 116 |
|
| 117 |
# Check for completion
|
| 118 |
if "DONE" in assistant_content.upper():
|
| 119 |
-
final_text = assistant_content.replace("DONE", "").strip()
|
| 120 |
yield {
|
| 121 |
"type": "thought",
|
| 122 |
"thought": final_text,
|
|
@@ -194,68 +195,51 @@ Be concise."""
|
|
| 194 |
}
|
| 195 |
|
| 196 |
async def _call_ollama(self, messages: List[Dict]) -> Dict:
|
| 197 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
try:
|
| 199 |
-
import
|
| 200 |
-
from ollama import Client, ResponseError
|
| 201 |
except ImportError:
|
| 202 |
raise ImportError("ollama package not installed. Run: pip install ollama")
|
| 203 |
|
| 204 |
try:
|
| 205 |
-
#
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
# Run synchronous call in executor to not block
|
| 209 |
loop = asyncio.get_event_loop()
|
| 210 |
response = await loop.run_in_executor(
|
| 211 |
None,
|
| 212 |
-
lambda:
|
| 213 |
model=self.model,
|
| 214 |
messages=messages,
|
| 215 |
options={
|
| 216 |
-
"temperature": 0.0, # Deterministic for tool calling
|
| 217 |
-
"top_p": 1.0
|
| 218 |
-
"num_predict": 2048
|
| 219 |
}
|
| 220 |
)
|
| 221 |
)
|
| 222 |
|
| 223 |
-
#
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
| 229 |
|
| 230 |
except ResponseError as e:
|
| 231 |
-
|
|
|
|
|
|
|
| 232 |
except Exception as e:
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
return await self._call_ollama_http(messages)
|
| 236 |
-
|
| 237 |
-
async def _call_ollama_http(self, messages: List[Dict]) -> Dict:
|
| 238 |
-
"""Fallback: Call Ollama via HTTP API"""
|
| 239 |
-
import aiohttp
|
| 240 |
-
|
| 241 |
-
url = f"{self.host}/api/chat"
|
| 242 |
-
payload = {
|
| 243 |
-
"model": self.model,
|
| 244 |
-
"messages": messages,
|
| 245 |
-
"stream": False,
|
| 246 |
-
"options": {
|
| 247 |
-
"temperature": 0.0,
|
| 248 |
-
"top_p": 1.0,
|
| 249 |
-
"num_predict": 2048
|
| 250 |
-
}
|
| 251 |
-
}
|
| 252 |
-
|
| 253 |
-
async with aiohttp.ClientSession() as session:
|
| 254 |
-
async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=120)) as resp:
|
| 255 |
-
if resp.status != 200:
|
| 256 |
-
text = await resp.text()
|
| 257 |
-
raise Exception(f"Ollama HTTP error {resp.status}: {text}")
|
| 258 |
-
return await resp.json()
|
| 259 |
|
| 260 |
def _parse_tool_calls(self, text: str) -> List[Dict]:
|
| 261 |
"""Parse tool calls from response"""
|
|
|
|
| 2 |
Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
|
| 3 |
|
| 4 |
Uses the ollama Python package for LLM inference.
|
| 5 |
+
Based on: https://github.com/ollama/ollama-python
|
| 6 |
+
|
| 7 |
+
Example usage (from the guide):
|
| 8 |
+
from ollama import chat
|
| 9 |
+
response = chat(
|
| 10 |
+
model='granite4:1b',
|
| 11 |
+
messages=[
|
| 12 |
+
{'role': 'system', 'content': 'You are a helpful assistant.'},
|
| 13 |
+
{'role': 'user', 'content': user_input}
|
| 14 |
+
],
|
| 15 |
+
options={'temperature': 0.0, 'top_p': 1.0}
|
| 16 |
+
)
|
| 17 |
+
output = response.message.content
|
| 18 |
"""
|
| 19 |
|
| 20 |
import os
|
|
|
|
| 22 |
import uuid
|
| 23 |
import logging
|
| 24 |
import asyncio
|
| 25 |
+
from typing import List, Dict, Any, AsyncGenerator
|
| 26 |
|
| 27 |
from mcp.tools.definitions import MCP_TOOLS
|
| 28 |
from mcp.registry import MCPRegistry
|
| 29 |
|
| 30 |
logger = logging.getLogger(__name__)
|
| 31 |
|
| 32 |
+
# Default model - IBM Granite 4 1B
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
DEFAULT_MODEL = "granite4:1b"
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
class AutonomousMCPAgentOllama:
|
| 37 |
"""
|
| 38 |
+
AI Agent using Ollama Python client (FREE local LLM)
|
| 39 |
+
|
| 40 |
+
Uses ollama.chat() directly as per the official documentation.
|
| 41 |
+
Temperature=0.0 and top_p=1.0 recommended for Granite family models.
|
| 42 |
"""
|
| 43 |
|
| 44 |
def __init__(
|
| 45 |
self,
|
| 46 |
mcp_registry: MCPRegistry,
|
| 47 |
+
model: str = None
|
|
|
|
| 48 |
):
|
| 49 |
self.mcp_registry = mcp_registry
|
| 50 |
self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
|
|
|
|
| 51 |
self.tools_description = self._build_tools_description()
|
| 52 |
|
| 53 |
+
logger.info(f"Ollama Agent initialized with model: {self.model}")
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def _build_tools_description(self) -> str:
|
| 56 |
"""Build tool descriptions for the system prompt"""
|
|
|
|
| 108 |
}
|
| 109 |
|
| 110 |
try:
|
| 111 |
+
# Call Ollama using the Python client
|
| 112 |
response = await self._call_ollama(messages)
|
| 113 |
+
assistant_content = response.get("content", "")
|
| 114 |
|
| 115 |
if not assistant_content:
|
| 116 |
continue
|
| 117 |
|
| 118 |
# Check for completion
|
| 119 |
if "DONE" in assistant_content.upper():
|
| 120 |
+
final_text = assistant_content.replace("DONE", "").replace("done", "").strip()
|
| 121 |
yield {
|
| 122 |
"type": "thought",
|
| 123 |
"thought": final_text,
|
|
|
|
| 195 |
}
|
| 196 |
|
| 197 |
async def _call_ollama(self, messages: List[Dict]) -> Dict:
|
| 198 |
+
"""
|
| 199 |
+
Call Ollama using the official Python client.
|
| 200 |
+
|
| 201 |
+
Uses ollama.chat() directly as per the guide:
|
| 202 |
+
https://github.com/ollama/ollama-python
|
| 203 |
+
|
| 204 |
+
Temperature=0.0 and top_p=1.0 recommended for Granite models.
|
| 205 |
+
"""
|
| 206 |
try:
|
| 207 |
+
from ollama import chat, ResponseError
|
|
|
|
| 208 |
except ImportError:
|
| 209 |
raise ImportError("ollama package not installed. Run: pip install ollama")
|
| 210 |
|
| 211 |
try:
|
| 212 |
+
# Use ollama.chat() directly as shown in the guide
|
| 213 |
+
# Run in executor to not block the async event loop
|
|
|
|
|
|
|
| 214 |
loop = asyncio.get_event_loop()
|
| 215 |
response = await loop.run_in_executor(
|
| 216 |
None,
|
| 217 |
+
lambda: chat(
|
| 218 |
model=self.model,
|
| 219 |
messages=messages,
|
| 220 |
options={
|
| 221 |
+
"temperature": 0.0, # Deterministic output for tool calling
|
| 222 |
+
"top_p": 1.0 # Full probability mass (Granite recommended)
|
|
|
|
| 223 |
}
|
| 224 |
)
|
| 225 |
)
|
| 226 |
|
| 227 |
+
# Extract response content: response.message.content
|
| 228 |
+
content = ""
|
| 229 |
+
if hasattr(response, 'message') and hasattr(response.message, 'content'):
|
| 230 |
+
content = response.message.content
|
| 231 |
+
elif isinstance(response, dict):
|
| 232 |
+
content = response.get("message", {}).get("content", "")
|
| 233 |
+
|
| 234 |
+
return {"content": content}
|
| 235 |
|
| 236 |
except ResponseError as e:
|
| 237 |
+
# Handle Ollama-specific errors (model not available, etc.)
|
| 238 |
+
logger.error(f"Ollama ResponseError: {e}")
|
| 239 |
+
raise Exception(f"Ollama error: {e}. Make sure Ollama is running and the model '{self.model}' is pulled.")
|
| 240 |
except Exception as e:
|
| 241 |
+
logger.error(f"Ollama call failed: {e}")
|
| 242 |
+
raise Exception(f"Ollama error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
def _parse_tool_calls(self, text: str) -> List[Dict]:
|
| 245 |
"""Parse tool calls from response"""
|