Spaces:

MCP-1st-Birthday
/

cx_ai_agent

Runtime error

muzakkirhussain011 Claude commited on Nov 21, 2025

Commit

b85a2ff

1 Parent(s): 4003433

Add retry logic and fallback providers for inference API

- Added retry logic with 2 attempts per provider
- Added fallback chain: nscale -> nebius -> together -> sambanova
- Automatically switches to next provider on 504/502/503/timeout errors
- Logs which provider succeeded
- More resilient to temporary infrastructure issues

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

mcp/agents/autonomous_agent_hf.py +54 -15

mcp/agents/autonomous_agent_hf.py CHANGED Viewed

@@ -346,29 +346,68 @@ When you have completed the task, provide a summary of what you accomplished."""
                 "iterations": iteration
             }
-    def _call_inference_api(self, messages: List[Dict]) -> Any:
         """
-        Call HuggingFace Inference API (synchronous, for use in executor)
         Args:
             messages: Conversation messages
         Returns:
             API response
         """
-        try:
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                tools=self.tools,
-                tool_choice="auto",
-                max_tokens=1024,
-                temperature=0.1
-            )
-            return response
-        except Exception as e:
-            logger.error(f"Inference API call failed: {e}")
-            raise
     async def _execute_mcp_tool(self, tool_name: str, tool_input: Dict[str, Any]) -> Any:
         """

                 "iterations": iteration
             }
+    def _call_inference_api(self, messages: List[Dict], retry_count: int = 0) -> Any:
         """
+        Call HuggingFace Inference API with retry logic and fallback providers
         Args:
             messages: Conversation messages
+            retry_count: Current retry attempt
         Returns:
             API response
         """
+        # Fallback providers in order of preference
+        fallback_configs = [
+            {"provider": self.provider, "model": self.model},
+            {"provider": "nebius", "model": "Qwen/Qwen2.5-72B-Instruct"},
+            {"provider": "together", "model": "meta-llama/Llama-3.3-70B-Instruct"},
+            {"provider": "sambanova", "model": "meta-llama/Llama-3.3-70B-Instruct"},
+        ]
+        max_retries = 2
+        last_error = None
+        for config in fallback_configs:
+            for attempt in range(max_retries):
+                try:
+                    logger.info(f"Trying provider: {config['provider']}, model: {config['model']} (attempt {attempt + 1})")
+                    # Create client for this provider if different from current
+                    if config['provider'] != self.provider or config['model'] != self.model:
+                        from huggingface_hub import InferenceClient
+                        client = InferenceClient(
+                            provider=config['provider'],
+                            token=self.hf_token
+                        )
+                    else:
+                        client = self.client
+                    response = client.chat.completions.create(
+                        model=config['model'],
+                        messages=messages,
+                        tools=self.tools,
+                        tool_choice="auto",
+                        max_tokens=1024,
+                        temperature=0.1
+                    )
+                    logger.info(f"Success with {config['provider']}/{config['model']}")
+                    return response
+                except Exception as e:
+                    last_error = e
+                    error_str = str(e)
+                    logger.warning(f"Provider {config['provider']} failed (attempt {attempt + 1}): {error_str[:100]}")
+                    # If it's a timeout or 5xx error, try next attempt/provider
+                    if "504" in error_str or "502" in error_str or "503" in error_str or "timeout" in error_str.lower():
+                        continue
+                    # For other errors, try next provider
+                    break
+        # All providers failed
+        logger.error(f"All inference providers failed. Last error: {last_error}")
+        raise last_error
     async def _execute_mcp_tool(self, tool_name: str, tool_input: Dict[str, Any]) -> Any:
         """