muzakkirhussain011 commited on
Commit
983b7e6
·
1 Parent(s): d3091bd

Add application files

Browse files
.claude/settings.local.json CHANGED
@@ -23,7 +23,8 @@
23
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
24
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
25
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
26
- "Bash(git config:*)"
 
27
  ],
28
  "deny": [],
29
  "ask": []
 
23
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
24
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
25
  "Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
26
+ "Bash(git config:*)",
27
+ "Bash(del \"D:\\cx_ai_agent\\cx_ai_agent\\Dockerfile\")"
28
  ],
29
  "deny": [],
30
  "ask": []
Dockerfile DELETED
@@ -1,71 +0,0 @@
1
- # HuggingFace Spaces Dockerfile with Ollama
2
- # Uses IBM Granite4:1b model for AI inference (completely FREE)
3
-
4
- FROM python:3.10-slim
5
-
6
- # Set environment variables
7
- ENV PYTHONUNBUFFERED=1
8
- ENV PYTHONDONTWRITEBYTECODE=1
9
- ENV GRADIO_SERVER_NAME=0.0.0.0
10
- ENV GRADIO_SERVER_PORT=7860
11
- ENV OLLAMA_HOST=http://localhost:11434
12
- ENV OLLAMA_MODEL=granite4:1b
13
-
14
- # Install system dependencies
15
- RUN apt-get update && apt-get install -y --no-install-recommends \
16
- curl \
17
- ca-certificates \
18
- procps \
19
- && rm -rf /var/lib/apt/lists/*
20
-
21
- # Install Ollama
22
- RUN curl -fsSL https://ollama.com/install.sh | sh
23
-
24
- # Create app directory
25
- WORKDIR /app
26
-
27
- # Copy requirements and install Python dependencies
28
- COPY requirements.txt .
29
- RUN pip install --no-cache-dir -r requirements.txt
30
-
31
- # Copy application code
32
- COPY . .
33
-
34
- # Create startup script that:
35
- # 1. Starts Ollama server in background
36
- # 2. Waits for it to be ready
37
- # 3. Pulls the model
38
- # 4. Starts the Gradio app
39
- RUN echo '#!/bin/bash\n\
40
- set -e\n\
41
- echo "=== Starting Ollama server ==="\n\
42
- ollama serve &\n\
43
- OLLAMA_PID=$!\n\
44
- \n\
45
- # Wait for Ollama to be ready\n\
46
- echo "Waiting for Ollama to start..."\n\
47
- for i in {1..30}; do\n\
48
- if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
49
- echo "Ollama is ready!"\n\
50
- break\n\
51
- fi\n\
52
- echo "Waiting... ($i/30)"\n\
53
- sleep 2\n\
54
- done\n\
55
- \n\
56
- echo "=== Pulling IBM Granite model ==="\n\
57
- ollama pull granite4:1b || echo "Model pull failed, will retry on first use"\n\
58
- \n\
59
- echo "=== Starting Gradio application ==="\n\
60
- exec python app.py\n\
61
- ' > /app/start.sh && chmod +x /app/start.sh
62
-
63
- # Expose port
64
- EXPOSE 7860
65
-
66
- # Health check
67
- HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
68
- CMD curl -f http://localhost:7860/ || exit 1
69
-
70
- # Run the startup script
71
- CMD ["/app/start.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -51,12 +51,13 @@ print("\n" + "="*80)
51
  print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
52
  print("="*80)
53
 
54
- # AI Mode - Ollama (local/remote LLM)
55
- OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
56
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
57
- print(f"🤖 AI Mode: Ollama (Local/Remote LLM)")
58
- print(f" Host: {OLLAMA_HOST}")
59
  print(f" Model: {OLLAMA_MODEL}")
 
60
 
61
  serper_key = os.getenv('SERPER_API_KEY')
62
  if serper_key:
@@ -78,6 +79,42 @@ except Exception as e:
78
  raise
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  # ============================================================================
82
  # KNOWLEDGE BASE - Session Storage
83
  # ============================================================================
@@ -539,10 +576,10 @@ async def setup_client_company(company_name: str, progress=gr.Progress()):
539
  progress(0.1, desc="Initializing...")
540
 
541
  try:
 
542
  agent = AutonomousMCPAgentOllama(
543
  mcp_registry=mcp_registry,
544
- model=os.getenv('OLLAMA_MODEL', 'granite4:1b'),
545
- host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
546
  )
547
  output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
548
  yield output
@@ -725,10 +762,10 @@ async def discover_prospects(num_prospects: int = 5, progress=gr.Progress()):
725
  progress(0.1)
726
 
727
  try:
 
728
  agent = AutonomousMCPAgentOllama(
729
  mcp_registry=mcp_registry,
730
- model=os.getenv('OLLAMA_MODEL', 'granite4:1b'),
731
- host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
732
  )
733
  output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
734
  yield output
 
51
  print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
52
  print("="*80)
53
 
54
+ # AI Mode - Ollama with IBM Granite 4 1B model
55
+ # Uses ollama Python library directly (pip install ollama)
56
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
57
+
58
+ print(f"🤖 AI Mode: Ollama (Local LLM)")
59
  print(f" Model: {OLLAMA_MODEL}")
60
+ print(f" Using: ollama.chat() Python client")
61
 
62
  serper_key = os.getenv('SERPER_API_KEY')
63
  if serper_key:
 
79
  raise
80
 
81
 
82
+ # Warm-up Ollama model on startup (optional, for faster first request)
83
+ def warmup_ollama_model():
84
+ """
85
+ Send a dummy prompt to warm up the Ollama model.
86
+ This ensures the model is loaded and ready for the first real request.
87
+
88
+ Uses ollama.chat() directly as per the guide.
89
+ """
90
+ try:
91
+ from ollama import chat
92
+ print(f"🔥 Warming up Ollama model ({OLLAMA_MODEL})...")
93
+
94
+ # Send a simple dummy prompt to load the model into memory
95
+ response = chat(
96
+ model=OLLAMA_MODEL,
97
+ messages=[{'role': 'user', 'content': 'Hello'}],
98
+ options={'temperature': 0.0, 'num_predict': 10}
99
+ )
100
+
101
+ if response and hasattr(response, 'message'):
102
+ print(f"✅ Model warmed up and ready!")
103
+ else:
104
+ print("✅ Model warm-up complete")
105
+ except ImportError:
106
+ print("⚠️ ollama package not installed, skipping warm-up")
107
+ except Exception as e:
108
+ # Don't fail startup on warm-up error, just log it
109
+ print(f"⚠️ Model warm-up failed (will load on first request): {e}")
110
+
111
+
112
+ # Run warm-up in background to not block startup
113
+ import threading
114
+ warmup_thread = threading.Thread(target=warmup_ollama_model, daemon=True)
115
+ warmup_thread.start()
116
+
117
+
118
  # ============================================================================
119
  # KNOWLEDGE BASE - Session Storage
120
  # ============================================================================
 
576
  progress(0.1, desc="Initializing...")
577
 
578
  try:
579
+ # Initialize Ollama agent using ollama.chat() directly
580
  agent = AutonomousMCPAgentOllama(
581
  mcp_registry=mcp_registry,
582
+ model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
 
583
  )
584
  output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
585
  yield output
 
762
  progress(0.1)
763
 
764
  try:
765
+ # Initialize Ollama agent using ollama.chat() directly
766
  agent = AutonomousMCPAgentOllama(
767
  mcp_registry=mcp_registry,
768
+ model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
 
769
  )
770
  output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
771
  yield output
mcp/agents/autonomous_agent_ollama.py CHANGED
@@ -2,7 +2,19 @@
2
  Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
3
 
4
  Uses the ollama Python package for LLM inference.
5
- Supports both local Ollama servers and remote Ollama instances.
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  import os
@@ -10,46 +22,35 @@ import json
10
  import uuid
11
  import logging
12
  import asyncio
13
- from typing import List, Dict, Any, AsyncGenerator, Optional
14
 
15
  from mcp.tools.definitions import MCP_TOOLS
16
  from mcp.registry import MCPRegistry
17
 
18
  logger = logging.getLogger(__name__)
19
 
20
- # Ollama models (small, fast models for tool calling)
21
- OLLAMA_MODELS = [
22
- "granite4:1b", # IBM Granite 4 1B - user preferred
23
- "granite3-dense:2b", # IBM Granite 3 Dense 2B
24
- "qwen2.5:3b", # Best for tool calling, small
25
- "llama3.2:3b", # Meta's small model
26
- "phi3:mini", # Microsoft's small model
27
- "gemma2:2b", # Google's small model
28
- ]
29
-
30
  DEFAULT_MODEL = "granite4:1b"
31
- OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
32
 
33
 
34
  class AutonomousMCPAgentOllama:
35
  """
36
- AI Agent using Ollama (local LLM, FREE)
 
 
 
37
  """
38
 
39
  def __init__(
40
  self,
41
  mcp_registry: MCPRegistry,
42
- model: str = None,
43
- host: str = None
44
  ):
45
  self.mcp_registry = mcp_registry
46
  self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
47
- self.host = host or OLLAMA_HOST
48
  self.tools_description = self._build_tools_description()
49
 
50
- logger.info(f"Ollama Agent initialized")
51
- logger.info(f" Host: {self.host}")
52
- logger.info(f" Model: {self.model}")
53
 
54
  def _build_tools_description(self) -> str:
55
  """Build tool descriptions for the system prompt"""
@@ -107,16 +108,16 @@ Be concise."""
107
  }
108
 
109
  try:
110
- # Call Ollama
111
  response = await self._call_ollama(messages)
112
- assistant_content = response.get("message", {}).get("content", "")
113
 
114
  if not assistant_content:
115
  continue
116
 
117
  # Check for completion
118
  if "DONE" in assistant_content.upper():
119
- final_text = assistant_content.replace("DONE", "").strip()
120
  yield {
121
  "type": "thought",
122
  "thought": final_text,
@@ -194,68 +195,51 @@ Be concise."""
194
  }
195
 
196
  async def _call_ollama(self, messages: List[Dict]) -> Dict:
197
- """Call Ollama using the official Python client"""
 
 
 
 
 
 
 
198
  try:
199
- import ollama
200
- from ollama import Client, ResponseError
201
  except ImportError:
202
  raise ImportError("ollama package not installed. Run: pip install ollama")
203
 
204
  try:
205
- # Create client with host if specified
206
- client = Client(host=self.host) if self.host else ollama
207
-
208
- # Run synchronous call in executor to not block
209
  loop = asyncio.get_event_loop()
210
  response = await loop.run_in_executor(
211
  None,
212
- lambda: client.chat(
213
  model=self.model,
214
  messages=messages,
215
  options={
216
- "temperature": 0.0, # Deterministic for tool calling
217
- "top_p": 1.0,
218
- "num_predict": 2048
219
  }
220
  )
221
  )
222
 
223
- # Return in expected format
224
- return {
225
- "message": {
226
- "content": response.message.content if hasattr(response, 'message') else response.get("message", {}).get("content", "")
227
- }
228
- }
 
 
229
 
230
  except ResponseError as e:
231
- raise Exception(f"Ollama error: {e}")
 
 
232
  except Exception as e:
233
- # Fallback to aiohttp if ollama client fails
234
- logger.warning(f"Ollama client failed, trying HTTP: {e}")
235
- return await self._call_ollama_http(messages)
236
-
237
- async def _call_ollama_http(self, messages: List[Dict]) -> Dict:
238
- """Fallback: Call Ollama via HTTP API"""
239
- import aiohttp
240
-
241
- url = f"{self.host}/api/chat"
242
- payload = {
243
- "model": self.model,
244
- "messages": messages,
245
- "stream": False,
246
- "options": {
247
- "temperature": 0.0,
248
- "top_p": 1.0,
249
- "num_predict": 2048
250
- }
251
- }
252
-
253
- async with aiohttp.ClientSession() as session:
254
- async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=120)) as resp:
255
- if resp.status != 200:
256
- text = await resp.text()
257
- raise Exception(f"Ollama HTTP error {resp.status}: {text}")
258
- return await resp.json()
259
 
260
  def _parse_tool_calls(self, text: str) -> List[Dict]:
261
  """Parse tool calls from response"""
 
2
  Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
3
 
4
  Uses the ollama Python package for LLM inference.
5
+ Based on: https://github.com/ollama/ollama-python
6
+
7
+ Example usage (from the guide):
8
+ from ollama import chat
9
+ response = chat(
10
+ model='granite4:1b',
11
+ messages=[
12
+ {'role': 'system', 'content': 'You are a helpful assistant.'},
13
+ {'role': 'user', 'content': user_input}
14
+ ],
15
+ options={'temperature': 0.0, 'top_p': 1.0}
16
+ )
17
+ output = response.message.content
18
  """
19
 
20
  import os
 
22
  import uuid
23
  import logging
24
  import asyncio
25
+ from typing import List, Dict, Any, AsyncGenerator
26
 
27
  from mcp.tools.definitions import MCP_TOOLS
28
  from mcp.registry import MCPRegistry
29
 
30
  logger = logging.getLogger(__name__)
31
 
32
+ # Default model - IBM Granite 4 1B
 
 
 
 
 
 
 
 
 
33
  DEFAULT_MODEL = "granite4:1b"
 
34
 
35
 
36
  class AutonomousMCPAgentOllama:
37
  """
38
+ AI Agent using Ollama Python client (FREE local LLM)
39
+
40
+ Uses ollama.chat() directly as per the official documentation.
41
+ Temperature=0.0 and top_p=1.0 recommended for Granite family models.
42
  """
43
 
44
  def __init__(
45
  self,
46
  mcp_registry: MCPRegistry,
47
+ model: str = None
 
48
  ):
49
  self.mcp_registry = mcp_registry
50
  self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
 
51
  self.tools_description = self._build_tools_description()
52
 
53
+ logger.info(f"Ollama Agent initialized with model: {self.model}")
 
 
54
 
55
  def _build_tools_description(self) -> str:
56
  """Build tool descriptions for the system prompt"""
 
108
  }
109
 
110
  try:
111
+ # Call Ollama using the Python client
112
  response = await self._call_ollama(messages)
113
+ assistant_content = response.get("content", "")
114
 
115
  if not assistant_content:
116
  continue
117
 
118
  # Check for completion
119
  if "DONE" in assistant_content.upper():
120
+ final_text = assistant_content.replace("DONE", "").replace("done", "").strip()
121
  yield {
122
  "type": "thought",
123
  "thought": final_text,
 
195
  }
196
 
197
  async def _call_ollama(self, messages: List[Dict]) -> Dict:
198
+ """
199
+ Call Ollama using the official Python client.
200
+
201
+ Uses ollama.chat() directly as per the guide:
202
+ https://github.com/ollama/ollama-python
203
+
204
+ Temperature=0.0 and top_p=1.0 recommended for Granite models.
205
+ """
206
  try:
207
+ from ollama import chat, ResponseError
 
208
  except ImportError:
209
  raise ImportError("ollama package not installed. Run: pip install ollama")
210
 
211
  try:
212
+ # Use ollama.chat() directly as shown in the guide
213
+ # Run in executor to not block the async event loop
 
 
214
  loop = asyncio.get_event_loop()
215
  response = await loop.run_in_executor(
216
  None,
217
+ lambda: chat(
218
  model=self.model,
219
  messages=messages,
220
  options={
221
+ "temperature": 0.0, # Deterministic output for tool calling
222
+ "top_p": 1.0 # Full probability mass (Granite recommended)
 
223
  }
224
  )
225
  )
226
 
227
+ # Extract response content: response.message.content
228
+ content = ""
229
+ if hasattr(response, 'message') and hasattr(response.message, 'content'):
230
+ content = response.message.content
231
+ elif isinstance(response, dict):
232
+ content = response.get("message", {}).get("content", "")
233
+
234
+ return {"content": content}
235
 
236
  except ResponseError as e:
237
+ # Handle Ollama-specific errors (model not available, etc.)
238
+ logger.error(f"Ollama ResponseError: {e}")
239
+ raise Exception(f"Ollama error: {e}. Make sure Ollama is running and the model '{self.model}' is pulled.")
240
  except Exception as e:
241
+ logger.error(f"Ollama call failed: {e}")
242
+ raise Exception(f"Ollama error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  def _parse_tool_calls(self, text: str) -> List[Dict]:
245
  """Parse tool calls from response"""