Spaces:

MCP-1st-Birthday
/

cx_ai_agent

Runtime error

App Files Files Community

muzakkirhussain011 commited on 25 days ago

Commit

983b7e6

1 Parent(s): d3091bd

Add application files

Browse files

Files changed (4) hide show

.claude/settings.local.json +2 -1
Dockerfile +0 -71
app.py +45 -8
mcp/agents/autonomous_agent_ollama.py +51 -67

.claude/settings.local.json CHANGED Viewed

@@ -23,7 +23,8 @@
       "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
       "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
       "Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
-      "Bash(git config:*)"
     ],
     "deny": [],
     "ask": []

       "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff app.py)",
       "Bash(git -C D:/cx_ai_agent/cx_ai_agent diff --stat HEAD)",
       "Bash(git -C D:/cx_ai_agent/cx_ai_agent pull)",
+      "Bash(git config:*)",
+      "Bash(del \"D:\\cx_ai_agent\\cx_ai_agent\\Dockerfile\")"
     ],
     "deny": [],
     "ask": []

Dockerfile DELETED Viewed

@@ -1,71 +0,0 @@
-# HuggingFace Spaces Dockerfile with Ollama
-# Uses IBM Granite4:1b model for AI inference (completely FREE)
-FROM python:3.10-slim
-# Set environment variables
-ENV PYTHONUNBUFFERED=1
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV GRADIO_SERVER_NAME=0.0.0.0
-ENV GRADIO_SERVER_PORT=7860
-ENV OLLAMA_HOST=http://localhost:11434
-ENV OLLAMA_MODEL=granite4:1b
-# Install system dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    ca-certificates \
-    procps \
-    && rm -rf /var/lib/apt/lists/*
-# Install Ollama
-RUN curl -fsSL https://ollama.com/install.sh | sh
-# Create app directory
-WORKDIR /app
-# Copy requirements and install Python dependencies
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
-COPY . .
-# Create startup script that:
-# 1. Starts Ollama server in background
-# 2. Waits for it to be ready
-# 3. Pulls the model
-# 4. Starts the Gradio app
-RUN echo '#!/bin/bash\n\
-set -e\n\
-echo "=== Starting Ollama server ==="\n\
-ollama serve &\n\
-OLLAMA_PID=$!\n\
-\n\
-# Wait for Ollama to be ready\n\
-echo "Waiting for Ollama to start..."\n\
-for i in {1..30}; do\n\
-    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then\n\
-        echo "Ollama is ready!"\n\
-        break\n\
-    fi\n\
-    echo "Waiting... ($i/30)"\n\
-    sleep 2\n\
-done\n\
-\n\
-echo "=== Pulling IBM Granite model ==="\n\
-ollama pull granite4:1b || echo "Model pull failed, will retry on first use"\n\
-\n\
-echo "=== Starting Gradio application ==="\n\
-exec python app.py\n\
-' > /app/start.sh && chmod +x /app/start.sh
-# Expose port
-EXPOSE 7860
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
-    CMD curl -f http://localhost:7860/ || exit 1
-# Run the startup script
-CMD ["/app/start.sh"]

app.py CHANGED Viewed

@@ -51,12 +51,13 @@ print("\n" + "="*80)
 print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
 print("="*80)
-# AI Mode - Ollama (local/remote LLM)
-OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
-print(f"🤖 AI Mode: Ollama (Local/Remote LLM)")
-print(f"   Host: {OLLAMA_HOST}")
 print(f"   Model: {OLLAMA_MODEL}")
 serper_key = os.getenv('SERPER_API_KEY')
 if serper_key:
@@ -78,6 +79,42 @@ except Exception as e:
     raise
 # ============================================================================
 # KNOWLEDGE BASE - Session Storage
 # ============================================================================
@@ -539,10 +576,10 @@ async def setup_client_company(company_name: str, progress=gr.Progress()):
     progress(0.1, desc="Initializing...")
     try:
         agent = AutonomousMCPAgentOllama(
             mcp_registry=mcp_registry,
-            model=os.getenv('OLLAMA_MODEL', 'granite4:1b'),
-            host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
         )
         output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
         yield output
@@ -725,10 +762,10 @@ async def discover_prospects(num_prospects: int = 5, progress=gr.Progress()):
     progress(0.1)
     try:
         agent = AutonomousMCPAgentOllama(
             mcp_registry=mcp_registry,
-            model=os.getenv('OLLAMA_MODEL', 'granite4:1b'),
-            host=os.getenv('OLLAMA_HOST', 'http://localhost:11434')
         )
         output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
         yield output

 print("🚀 CX AI AGENT - ENTERPRISE B2B SALES INTELLIGENCE")
 print("="*80)
+# AI Mode - Ollama with IBM Granite 4 1B model
+# Uses ollama Python library directly (pip install ollama)
 OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "granite4:1b")
+print(f"🤖 AI Mode: Ollama (Local LLM)")
 print(f"   Model: {OLLAMA_MODEL}")
+print(f"   Using: ollama.chat() Python client")
 serper_key = os.getenv('SERPER_API_KEY')
 if serper_key:
     raise
+# Warm-up Ollama model on startup (optional, for faster first request)
+def warmup_ollama_model():
+    """
+    Send a dummy prompt to warm up the Ollama model.
+    This ensures the model is loaded and ready for the first real request.
+    Uses ollama.chat() directly as per the guide.
+    """
+    try:
+        from ollama import chat
+        print(f"🔥 Warming up Ollama model ({OLLAMA_MODEL})...")
+        # Send a simple dummy prompt to load the model into memory
+        response = chat(
+            model=OLLAMA_MODEL,
+            messages=[{'role': 'user', 'content': 'Hello'}],
+            options={'temperature': 0.0, 'num_predict': 10}
+        )
+        if response and hasattr(response, 'message'):
+            print(f"✅ Model warmed up and ready!")
+        else:
+            print("✅ Model warm-up complete")
+    except ImportError:
+        print("⚠️  ollama package not installed, skipping warm-up")
+    except Exception as e:
+        # Don't fail startup on warm-up error, just log it
+        print(f"⚠️  Model warm-up failed (will load on first request): {e}")
+# Run warm-up in background to not block startup
+import threading
+warmup_thread = threading.Thread(target=warmup_ollama_model, daemon=True)
+warmup_thread.start()
 # ============================================================================
 # KNOWLEDGE BASE - Session Storage
 # ============================================================================
     progress(0.1, desc="Initializing...")
     try:
+        # Initialize Ollama agent using ollama.chat() directly
         agent = AutonomousMCPAgentOllama(
             mcp_registry=mcp_registry,
+            model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
         )
         output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
         yield output
     progress(0.1)
     try:
+        # Initialize Ollama agent using ollama.chat() directly
         agent = AutonomousMCPAgentOllama(
             mcp_registry=mcp_registry,
+            model=os.getenv('OLLAMA_MODEL', 'granite4:1b')
         )
         output += f"✅ AI Agent initialized (Ollama - {agent.model})\n\n"
         yield output

mcp/agents/autonomous_agent_ollama.py CHANGED Viewed

@@ -2,7 +2,19 @@
 Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
 Uses the ollama Python package for LLM inference.
-Supports both local Ollama servers and remote Ollama instances.
 """
 import os
@@ -10,46 +22,35 @@ import json
 import uuid
 import logging
 import asyncio
-from typing import List, Dict, Any, AsyncGenerator, Optional
 from mcp.tools.definitions import MCP_TOOLS
 from mcp.registry import MCPRegistry
 logger = logging.getLogger(__name__)
-# Ollama models (small, fast models for tool calling)
-OLLAMA_MODELS = [
-    "granite4:1b",          # IBM Granite 4 1B - user preferred
-    "granite3-dense:2b",    # IBM Granite 3 Dense 2B
-    "qwen2.5:3b",           # Best for tool calling, small
-    "llama3.2:3b",          # Meta's small model
-    "phi3:mini",            # Microsoft's small model
-    "gemma2:2b",            # Google's small model
-]
 DEFAULT_MODEL = "granite4:1b"
-OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
 class AutonomousMCPAgentOllama:
     """
-    AI Agent using Ollama (local LLM, FREE)
     """
     def __init__(
         self,
         mcp_registry: MCPRegistry,
-        model: str = None,
-        host: str = None
     ):
         self.mcp_registry = mcp_registry
         self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
-        self.host = host or OLLAMA_HOST
         self.tools_description = self._build_tools_description()
-        logger.info(f"Ollama Agent initialized")
-        logger.info(f"  Host: {self.host}")
-        logger.info(f"  Model: {self.model}")
     def _build_tools_description(self) -> str:
         """Build tool descriptions for the system prompt"""
@@ -107,16 +108,16 @@ Be concise."""
             }
             try:
-                # Call Ollama
                 response = await self._call_ollama(messages)
-                assistant_content = response.get("message", {}).get("content", "")
                 if not assistant_content:
                     continue
                 # Check for completion
                 if "DONE" in assistant_content.upper():
-                    final_text = assistant_content.replace("DONE", "").strip()
                     yield {
                         "type": "thought",
                         "thought": final_text,
@@ -194,68 +195,51 @@ Be concise."""
         }
     async def _call_ollama(self, messages: List[Dict]) -> Dict:
-        """Call Ollama using the official Python client"""
         try:
-            import ollama
-            from ollama import Client, ResponseError
         except ImportError:
             raise ImportError("ollama package not installed. Run: pip install ollama")
         try:
-            # Create client with host if specified
-            client = Client(host=self.host) if self.host else ollama
-            # Run synchronous call in executor to not block
             loop = asyncio.get_event_loop()
             response = await loop.run_in_executor(
                 None,
-                lambda: client.chat(
                     model=self.model,
                     messages=messages,
                     options={
-                        "temperature": 0.0,  # Deterministic for tool calling
-                        "top_p": 1.0,
-                        "num_predict": 2048
                     }
                 )
             )
-            # Return in expected format
-            return {
-                "message": {
-                    "content": response.message.content if hasattr(response, 'message') else response.get("message", {}).get("content", "")
-                }
-            }
         except ResponseError as e:
-            raise Exception(f"Ollama error: {e}")
         except Exception as e:
-            # Fallback to aiohttp if ollama client fails
-            logger.warning(f"Ollama client failed, trying HTTP: {e}")
-            return await self._call_ollama_http(messages)
-    async def _call_ollama_http(self, messages: List[Dict]) -> Dict:
-        """Fallback: Call Ollama via HTTP API"""
-        import aiohttp
-        url = f"{self.host}/api/chat"
-        payload = {
-            "model": self.model,
-            "messages": messages,
-            "stream": False,
-            "options": {
-                "temperature": 0.0,
-                "top_p": 1.0,
-                "num_predict": 2048
-            }
-        }
-        async with aiohttp.ClientSession() as session:
-            async with session.post(url, json=payload, timeout=aiohttp.ClientTimeout(total=120)) as resp:
-                if resp.status != 200:
-                    text = await resp.text()
-                    raise Exception(f"Ollama HTTP error {resp.status}: {text}")
-                return await resp.json()
     def _parse_tool_calls(self, text: str) -> List[Dict]:
         """Parse tool calls from response"""

 Autonomous AI Agent with MCP Tool Calling using Ollama Python Client
 Uses the ollama Python package for LLM inference.
+Based on: https://github.com/ollama/ollama-python
+Example usage (from the guide):
+    from ollama import chat
+    response = chat(
+        model='granite4:1b',
+        messages=[
+            {'role': 'system', 'content': 'You are a helpful assistant.'},
+            {'role': 'user', 'content': user_input}
+        ],
+        options={'temperature': 0.0, 'top_p': 1.0}
+    )
+    output = response.message.content
 """
 import os
 import uuid
 import logging
 import asyncio
+from typing import List, Dict, Any, AsyncGenerator
 from mcp.tools.definitions import MCP_TOOLS
 from mcp.registry import MCPRegistry
 logger = logging.getLogger(__name__)
+# Default model - IBM Granite 4 1B
 DEFAULT_MODEL = "granite4:1b"
 class AutonomousMCPAgentOllama:
     """
+    AI Agent using Ollama Python client (FREE local LLM)
+    Uses ollama.chat() directly as per the official documentation.
+    Temperature=0.0 and top_p=1.0 recommended for Granite family models.
     """
     def __init__(
         self,
         mcp_registry: MCPRegistry,
+        model: str = None
     ):
         self.mcp_registry = mcp_registry
         self.model = model or os.getenv("OLLAMA_MODEL", DEFAULT_MODEL)
         self.tools_description = self._build_tools_description()
+        logger.info(f"Ollama Agent initialized with model: {self.model}")
     def _build_tools_description(self) -> str:
         """Build tool descriptions for the system prompt"""
             }
             try:
+                # Call Ollama using the Python client
                 response = await self._call_ollama(messages)
+                assistant_content = response.get("content", "")
                 if not assistant_content:
                     continue
                 # Check for completion
                 if "DONE" in assistant_content.upper():
+                    final_text = assistant_content.replace("DONE", "").replace("done", "").strip()
                     yield {
                         "type": "thought",
                         "thought": final_text,
         }
     async def _call_ollama(self, messages: List[Dict]) -> Dict:
+        """
+        Call Ollama using the official Python client.
+        Uses ollama.chat() directly as per the guide:
+        https://github.com/ollama/ollama-python
+        Temperature=0.0 and top_p=1.0 recommended for Granite models.
+        """
         try:
+            from ollama import chat, ResponseError
         except ImportError:
             raise ImportError("ollama package not installed. Run: pip install ollama")
         try:
+            # Use ollama.chat() directly as shown in the guide
+            # Run in executor to not block the async event loop
             loop = asyncio.get_event_loop()
             response = await loop.run_in_executor(
                 None,
+                lambda: chat(
                     model=self.model,
                     messages=messages,
                     options={
+                        "temperature": 0.0,  # Deterministic output for tool calling
+                        "top_p": 1.0         # Full probability mass (Granite recommended)
                     }
                 )
             )
+            # Extract response content: response.message.content
+            content = ""
+            if hasattr(response, 'message') and hasattr(response.message, 'content'):
+                content = response.message.content
+            elif isinstance(response, dict):
+                content = response.get("message", {}).get("content", "")
+            return {"content": content}
         except ResponseError as e:
+            # Handle Ollama-specific errors (model not available, etc.)
+            logger.error(f"Ollama ResponseError: {e}")
+            raise Exception(f"Ollama error: {e}. Make sure Ollama is running and the model '{self.model}' is pulled.")
         except Exception as e:
+            logger.error(f"Ollama call failed: {e}")
+            raise Exception(f"Ollama error: {e}")
     def _parse_tool_calls(self, text: str) -> List[Dict]:
         """Parse tool calls from response"""