muzakkirhussain011 Claude commited on
Commit
b85a2ff
·
1 Parent(s): 4003433

Add retry logic and fallback providers for inference API

Browse files

- Added retry logic with 2 attempts per provider
- Added fallback chain: nscale -> nebius -> together -> sambanova
- Automatically switches to next provider on 504/502/503/timeout errors
- Logs which provider succeeded
- More resilient to temporary infrastructure issues

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. mcp/agents/autonomous_agent_hf.py +54 -15
mcp/agents/autonomous_agent_hf.py CHANGED
@@ -346,29 +346,68 @@ When you have completed the task, provide a summary of what you accomplished."""
346
  "iterations": iteration
347
  }
348
 
349
- def _call_inference_api(self, messages: List[Dict]) -> Any:
350
  """
351
- Call HuggingFace Inference API (synchronous, for use in executor)
352
 
353
  Args:
354
  messages: Conversation messages
 
355
 
356
  Returns:
357
  API response
358
  """
359
- try:
360
- response = self.client.chat.completions.create(
361
- model=self.model,
362
- messages=messages,
363
- tools=self.tools,
364
- tool_choice="auto",
365
- max_tokens=1024,
366
- temperature=0.1
367
- )
368
- return response
369
- except Exception as e:
370
- logger.error(f"Inference API call failed: {e}")
371
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
  async def _execute_mcp_tool(self, tool_name: str, tool_input: Dict[str, Any]) -> Any:
374
  """
 
346
  "iterations": iteration
347
  }
348
 
349
+ def _call_inference_api(self, messages: List[Dict], retry_count: int = 0) -> Any:
350
  """
351
+ Call HuggingFace Inference API with retry logic and fallback providers
352
 
353
  Args:
354
  messages: Conversation messages
355
+ retry_count: Current retry attempt
356
 
357
  Returns:
358
  API response
359
  """
360
+ # Fallback providers in order of preference
361
+ fallback_configs = [
362
+ {"provider": self.provider, "model": self.model},
363
+ {"provider": "nebius", "model": "Qwen/Qwen2.5-72B-Instruct"},
364
+ {"provider": "together", "model": "meta-llama/Llama-3.3-70B-Instruct"},
365
+ {"provider": "sambanova", "model": "meta-llama/Llama-3.3-70B-Instruct"},
366
+ ]
367
+
368
+ max_retries = 2
369
+ last_error = None
370
+
371
+ for config in fallback_configs:
372
+ for attempt in range(max_retries):
373
+ try:
374
+ logger.info(f"Trying provider: {config['provider']}, model: {config['model']} (attempt {attempt + 1})")
375
+
376
+ # Create client for this provider if different from current
377
+ if config['provider'] != self.provider or config['model'] != self.model:
378
+ from huggingface_hub import InferenceClient
379
+ client = InferenceClient(
380
+ provider=config['provider'],
381
+ token=self.hf_token
382
+ )
383
+ else:
384
+ client = self.client
385
+
386
+ response = client.chat.completions.create(
387
+ model=config['model'],
388
+ messages=messages,
389
+ tools=self.tools,
390
+ tool_choice="auto",
391
+ max_tokens=1024,
392
+ temperature=0.1
393
+ )
394
+ logger.info(f"Success with {config['provider']}/{config['model']}")
395
+ return response
396
+
397
+ except Exception as e:
398
+ last_error = e
399
+ error_str = str(e)
400
+ logger.warning(f"Provider {config['provider']} failed (attempt {attempt + 1}): {error_str[:100]}")
401
+
402
+ # If it's a timeout or 5xx error, try next attempt/provider
403
+ if "504" in error_str or "502" in error_str or "503" in error_str or "timeout" in error_str.lower():
404
+ continue
405
+ # For other errors, try next provider
406
+ break
407
+
408
+ # All providers failed
409
+ logger.error(f"All inference providers failed. Last error: {last_error}")
410
+ raise last_error
411
 
412
  async def _execute_mcp_tool(self, tool_name: str, tool_input: Dict[str, Any]) -> Any:
413
  """