Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Sep 18

Commit

552430d

1 Parent(s): 66c4f69

.

Browse files

Files changed (3) hide show

hf_backend.py +3 -7
listener.py +31 -16
requirements.txt +1 -1

hf_backend.py CHANGED Viewed

@@ -141,8 +141,8 @@ class HFChatBackend(ChatBackend):
         messages = request.get("messages", [])
         tools = request.get("tools")
-        temperature = float(request.get("temperature", settings.LlmTemp or 0.7))
-        req_max_tokens = int(request.get("max_tokens", settings.LlmOpenAICtxSize or 512))
         rid = f"chatcmpl-hf-{int(time.time())}"
         now = int(time.time())
@@ -177,11 +177,7 @@ class HFChatBackend(ChatBackend):
         def _run_once(prompt: str, device: str, req_dtype: torch.dtype) -> str:
             model, eff_dtype = _get_model(device, req_dtype)
-            # Clamp max_new_tokens for CPU to prevent stalls
-            if device == "cpu":
-                max_new_tokens = min(req_max_tokens, 512)
-            else:
-                max_new_tokens = req_max_tokens
             # Build inputs with context-aware truncation
             inputs, orig_in_len, ctx, limit = _build_inputs_with_truncation(prompt, device, max_new_tokens, model, tokenizer)

         messages = request.get("messages", [])
         tools = request.get("tools")
+        temperature = float(request.get("temperature", settings.LlmTemp or 0.3))
+        req_max_tokens = int(request.get("max_tokens", settings.LlmOpenAICtxSize or 32000))
         rid = f"chatcmpl-hf-{int(time.time())}"
         now = int(time.time())
         def _run_once(prompt: str, device: str, req_dtype: torch.dtype) -> str:
             model, eff_dtype = _get_model(device, req_dtype)
+            max_new_tokens = req_max_tokens
             # Build inputs with context-aware truncation
             inputs, orig_in_len, ctx, limit = _build_inputs_with_truncation(prompt, device, max_new_tokens, model, tokenizer)

listener.py CHANGED Viewed

@@ -3,16 +3,16 @@ import logging
 from typing import Callable, Awaitable, Dict, Any, List
 import aio_pika
-Handler = Callable[[Any], Awaitable[None]]  # payload is envelope["data"]
 logger = logging.getLogger(__name__)
 class RabbitListenerBase:
     def __init__(self, base, instance_name: str, handlers: Dict[str, Handler]):
         self._base = base
-        self._instance_name = instance_name  # queue prefix (like your .NET instance name)
         self._handlers = handlers
         self._consumers: List[aio_pika.abc.AbstractRobustQueue] = []
@@ -30,7 +30,7 @@ class RabbitListenerBase:
             q = await self._base.declare_queue_bind(
                 exchange=exch, queue_name=qname, routing_keys=rks, ttl_ms=ttl
             )
-            # explicit manual-ack, parity with .NET (autoAck: false)
             await q.consume(self._make_consumer(d["FuncName"]), no_ack=False)
             self._consumers.append(q)
@@ -38,28 +38,43 @@ class RabbitListenerBase:
         handler = self._handlers.get(func_name)
         async def _on_msg(msg: aio_pika.IncomingMessage):
-            # manual ack after handler completes; no nack/requeue loops
             try:
                 raw_body = msg.body.decode("utf-8", errors="replace")
                 logger.info("Received message for handler '%s': %s", func_name, raw_body)
-                # safe JSON parse to mirror .NET ConvertToObject (no throw)
                 try:
                     envelope = json.loads(raw_body)
                 except Exception:
                     logger.exception("Invalid JSON for '%s'", func_name)
                     envelope = {"data": None}
                 data = envelope.get("data", None)
-                if handler:
-                    await handler(data)
-                else:
-                    logger.error("No handler bound for '%s'", func_name)
-                await msg.ack()  # ack on success path
             except Exception:
-                # match .NET: on exception, do not ack or nack; connection loss will requeue
-                logger.exception("Error processing message for '%s'", func_name)
         return _on_msg

 from typing import Callable, Awaitable, Dict, Any, List
 import aio_pika
+from aiormq.exceptions import ChannelInvalidStateError
+Handler = Callable[[Any], Awaitable[None]]
 logger = logging.getLogger(__name__)
 class RabbitListenerBase:
     def __init__(self, base, instance_name: str, handlers: Dict[str, Handler]):
         self._base = base
+        self._instance_name = instance_name
         self._handlers = handlers
         self._consumers: List[aio_pika.abc.AbstractRobustQueue] = []
             q = await self._base.declare_queue_bind(
                 exchange=exch, queue_name=qname, routing_keys=rks, ttl_ms=ttl
             )
+            # manual ack, parity with .NET (autoAck: false)
             await q.consume(self._make_consumer(d["FuncName"]), no_ack=False)
             self._consumers.append(q)
         handler = self._handlers.get(func_name)
         async def _on_msg(msg: aio_pika.IncomingMessage):
+            # decode
             try:
                 raw_body = msg.body.decode("utf-8", errors="replace")
                 logger.info("Received message for handler '%s': %s", func_name, raw_body)
                 try:
                     envelope = json.loads(raw_body)
                 except Exception:
                     logger.exception("Invalid JSON for '%s'", func_name)
                     envelope = {"data": None}
                 data = envelope.get("data", None)
+            except Exception:
+                # if we cannot decode, ack to drop (matches .NET non-requeue behavior)
+                try:
+                    await msg.ack()
+                except Exception:
+                    pass
+                return
+            # ACK FIRST (like C#)
+            try:
+                await msg.ack()
+            except ChannelInvalidStateError:
+                # channel died; message may be redelivered; avoid loops
+                logger.warning("Ack failed: channel invalid for '%s'. Skipping ack.", func_name)
+                return
             except Exception:
+                # swallow ack errors to avoid crash; mirrors resilient .NET behavior
+                logger.exception("Ack error for '%s'", func_name)
+                return
+            # run handler after ack; if it fails, caller handles own idempotency
+            if handler:
+                try:
+                    await handler(data)
+                except Exception:
+                    logger.exception("Handler error for '%s'", func_name)
+            else:
+                logger.error("No handler bound for '%s'", func_name)
         return _on_msg

requirements.txt CHANGED Viewed

@@ -13,5 +13,5 @@ huggingface_hub
 vllm>=0.10.0
 torch>=2.7.1
-transformers>=4.50.0

 vllm>=0.10.0
 torch>=2.7.1
+transformers>=4.51.0