Spaces:

ccm
/

chat-ui-with-agent-examples

Sleeping

App Files Files Community

ccm commited on Nov 10, 2025

Commit

a6d1b6d

1 Parent(s): 1a514b0

Attempting better agent reasoning trace

Browse files

Files changed (2) hide show

agent_server/agent_streaming.py +384 -87
agent_server/std_tee.py +265 -62

agent_server/agent_streaming.py CHANGED Viewed

@@ -1,19 +1,91 @@
 import asyncio
 import contextlib
 import os
 import threading
 import time
-import typing
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
-from agent_server.std_tee import QueueWriter, _serialize_step
-async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
     """
     Start the agent in a worker thread.
     Stream THREE sources of incremental data into the async generator:
@@ -21,97 +93,62 @@ async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = N
       (2) newly appended memory steps (polled),
       (3) any iterable the agent may yield (if supported).
     Finally emit a __final__ item with the last answer.
     """
     loop = asyncio.get_running_loop()
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
     stop_evt = threading.Event()
-    # 1) stdout/stderr live tee
     qwriter = QueueWriter(q)
     # 2) memory poller
-    def poll_memory():
-        last_len = 0
-        while not stop_evt.is_set():
-            try:
-                steps = []
-                try:
-                    # Common API: agent.memory.get_full_steps()
-                    steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
-                except Exception:
-                    # Fallbacks: different names across versions
-                    steps = (
-                        getattr(agent_to_use, "steps", [])
-                        or getattr(agent_to_use, "memory", [])
-                        or []
-                    )
-                if steps is None:
-                    steps = []
-                curr_len = len(steps)
-                if curr_len > last_len:
-                    new = steps[last_len:curr_len]
-                    last_len = curr_len
-                    for s in new:
-                        s_text = _serialize_step(s)
-                        if s_text:
-                            try:
-                                q.put_nowait({"__step__": s_text})
-                            except Exception:
-                                pass
-            except Exception:
-                pass
-            time.sleep(0.10)  # 100 ms cadence
     # 3) agent runner (may or may not yield)
     def run_agent():
         final_result = None
         try:
-            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
-                qwriter
-            ):
                 used_iterable = False
-                if hasattr(agent_to_use, "run") and callable(
-                    getattr(agent_to_use, "run")
-                ):
                     try:
                         res = agent_to_use.run(task, stream=True)
-                        if hasattr(res, "__iter__") and not isinstance(
-                            res, (str, bytes)
-                        ):
                             used_iterable = True
                             for it in res:
                                 try:
-                                    q.put_nowait(it)
                                 except Exception:
                                     pass
-                            final_result = (
-                                None  # iterable may already contain the answer
-                            )
                         else:
                             final_result = res
                     except TypeError:
-                        # run(stream=True) not supported -> fall back
                         pass
                 if final_result is None and not used_iterable:
-                    # Try other common streaming signatures
-                    for name in (
-                        "run_stream",
-                        "stream",
-                        "stream_run",
-                        "run_with_callback",
-                    ):
                         fn = getattr(agent_to_use, name, None)
                         if callable(fn):
                             try:
                                 res = fn(task)
-                                if hasattr(res, "__iter__") and not isinstance(
-                                    res, (str, bytes)
-                                ):
                                     for it in res:
-                                        q.put_nowait(it)
                                     final_result = None
                                 else:
                                     final_result = res
@@ -120,7 +157,7 @@ async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = N
                                 # maybe callback signature
                                 def cb(item):
                                     try:
-                                        q.put_nowait(item)
                                     except Exception:
                                         pass
@@ -132,17 +169,10 @@ async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = N
                                     continue
                 if final_result is None and not used_iterable:
-                    pass  # (typo guard removed below)
-                if final_result is None and not used_iterable:
-                    # Last resort: synchronous run()/generate()/callable
-                    if hasattr(agent_to_use, "run") and callable(
-                        getattr(agent_to_use, "run")
-                    ):
                         final_result = agent_to_use.run(task)
-                    elif hasattr(agent_to_use, "generate") and callable(
-                        getattr(agent_to_use, "generate")
-                    ):
                         final_result = agent_to_use.generate(task)
                     elif callable(agent_to_use):
                         final_result = agent_to_use(task)
@@ -167,20 +197,45 @@ async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = N
                 pass
             stop_evt.set()
-    # Kick off threads
-    mem_thread = threading.Thread(target=poll_memory, daemon=True)
-    run_thread = threading.Thread(target=run_agent, daemon=True)
-    mem_thread.start()
     run_thread.start()
-    # Async consumer
     while True:
         item = await q.get()
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
@@ -191,9 +246,10 @@ def _recursively_scrub(obj):
     return obj
-async def proxy_upstream_chat_completions(
-    body: dict, stream: bool, scrub_think: bool = False
-):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
@@ -206,12 +262,9 @@ async def proxy_upstream_chat_completions(
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
-                async with client.stream(
-                    "POST", url, headers=headers, json=body
-                ) as resp:
                     resp.raise_for_status()
                     if scrub_think:
-                        # Pull text segments, scrub tags, and yield bytes
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
@@ -239,6 +292,250 @@ async def proxy_upstream_chat_completions(
                 except Exception:
                     pass
-            return fastapi.responses.JSONResponse(
-                status_code=r.status_code, content=payload
-            )

+# agent_server/agent_streaming.py
+from __future__ import annotations
 import asyncio
 import contextlib
 import os
 import threading
 import time
+import typing as t
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
+from agent_server.std_tee import (
+    QueueWriter,
+    _serialize_step,
+    _format_reasoning_chunk,
+    _maybe_parse_final_from_stdout,
+)
+# ---------------------------------------------------------------------------
+# Memory poller: normalize all agent types to uniform step blocks.
+# ---------------------------------------------------------------------------
+def start_memory_poller(
+    agent: t.Any,
+    q: "asyncio.Queue[dict]",
+    stop_evt: "threading.Event",
+    interval: float = 0.10,
+) -> threading.Thread:
+    """
+    Starts a background thread that polls agent memory and enqueues formatted step blocks.
+    Tries several attribute paths to support different agent implementations.
+    """
+    last_len = 0
+    def _get_steps_safe() -> list:
+        # Try canonical memory APIs first
+        try:
+            mem = getattr(agent, "memory", None)
+            if mem is not None:
+                for attr in ("get_full_steps", "get_steps", "get_all_steps"):
+                    fn = getattr(mem, attr, None)
+                    if callable(fn):
+                        steps = fn()
+                        return list(steps or [])
+        except Exception:
+            pass
+        # Fallback: common direct list field
+        try:
+            raw = getattr(agent, "steps", None)
+            if raw:
+                return list(raw)
+        except Exception:
+            pass
+        return []
+    def _run():
+        nonlocal last_len
+        while not stop_evt.is_set():
+            try:
+                steps = _get_steps_safe()
+                if steps and len(steps) > last_len:
+                    new = steps[last_len:]
+                    last_len = len(steps)
+                    for s in new:
+                        try:
+                            s_text = _serialize_step(s)
+                            if s_text:
+                                q.put_nowait({"__step__": s_text})
+                        except Exception:
+                            # Never let formatting kill polling
+                            pass
+            except Exception:
+                pass
+            time.sleep(interval)
+    th = threading.Thread(target=_run, name="memory-poller", daemon=True)
+    th.start()
+    return th
+# ---------------------------------------------------------------------------
+# Unified agent streaming: stdout/stderr, memory steps, iterator yields.
+# Adds normalized reasoning via __reasoning__ while preserving legacy keys.
+# ---------------------------------------------------------------------------
+async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
     """
     Start the agent in a worker thread.
     Stream THREE sources of incremental data into the async generator:
       (2) newly appended memory steps (polled),
       (3) any iterable the agent may yield (if supported).
     Finally emit a __final__ item with the last answer.
+    Emits dict items. For compatibility, raw shapes are preserved:
+      - {"__stdout__": "<line>"}  (raw line)
+      - {"__step__": "<block>"}   (uniform Step/Thought/Tool/Args/Obs/Error block)
+      - {"__reasoning__": "<chunk>"} (normalized reasoning derived from stdout)
+      - {"__error__": "<error>"}  (if run errors)
+      - {"__final__": any}        (final result)
     """
     loop = asyncio.get_running_loop()
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
     stop_evt = threading.Event()
+    # 1) stdout/stderr live tee (lines go in as {"__stdout__": ...})
     qwriter = QueueWriter(q)
     # 2) memory poller
+    mem_thread = start_memory_poller(agent_to_use, q, stop_evt)
     # 3) agent runner (may or may not yield)
     def run_agent():
         final_result = None
         try:
+            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(qwriter):
                 used_iterable = False
+                # Preferred streaming signature
+                if hasattr(agent_to_use, "run") and callable(getattr(agent_to_use, "run")):
                     try:
                         res = agent_to_use.run(task, stream=True)
+                        if hasattr(res, "__iter__") and not isinstance(res, (str, bytes)):
                             used_iterable = True
                             for it in res:
                                 try:
+                                    q.put_nowait(it if isinstance(it, dict) else {"__stdout__": str(it)})
                                 except Exception:
                                     pass
+                            final_result = None  # may be contained in the iterable
                         else:
                             final_result = res
                     except TypeError:
+                        # run(stream=True) not supported -> fall through to other signatures
                         pass
                 if final_result is None and not used_iterable:
+                    # Try other common streaming variants
+                    for name in ("run_stream", "stream", "stream_run", "run_with_callback"):
                         fn = getattr(agent_to_use, name, None)
                         if callable(fn):
                             try:
                                 res = fn(task)
+                                if hasattr(res, "__iter__") and not isinstance(res, (str, bytes)):
                                     for it in res:
+                                        try:
+                                            q.put_nowait(it if isinstance(it, dict) else {"__stdout__": str(it)})
+                                        except Exception:
+                                            pass
                                     final_result = None
                                 else:
                                     final_result = res
                                 # maybe callback signature
                                 def cb(item):
                                     try:
+                                        q.put_nowait(item if isinstance(item, dict) else {"__stdout__": str(item)})
                                     except Exception:
                                         pass
                                     continue
                 if final_result is None and not used_iterable:
+                    # Last resort: synchronous APIs
+                    if hasattr(agent_to_use, "run") and callable(getattr(agent_to_use, "run")):
                         final_result = agent_to_use.run(task)
+                    elif hasattr(agent_to_use, "generate") and callable(getattr(agent_to_use, "generate")):
                         final_result = agent_to_use.generate(task)
                     elif callable(agent_to_use):
                         final_result = agent_to_use(task)
                 pass
             stop_evt.set()
+    run_thread = threading.Thread(target=run_agent, name="agent-runner", daemon=True)
     run_thread.start()
+    # Async consumer: normalize stdout -> reasoning chunk; forward steps & others
     while True:
         item = await q.get()
+        # Normalize stdout lines into compact reasoning chunks, and also
+        # opportunistically extract a "Final answer:" if the agent prints one.
+        if isinstance(item, dict) and "__stdout__" in item:
+            line = item["__stdout__"]
+            # Add compact, filtered reasoning chunk (drop banners, system prompts)
+            chunk = _format_reasoning_chunk(line, tag="stdout", idx=0)
+            if chunk:
+                yield {"__reasoning__": chunk}
+            # Keep legacy raw stdout for existing consumers
+            yield item
+            # Opportunistic final answer capture from stdout
+            maybe_final = _maybe_parse_final_from_stdout(line)
+            if maybe_final:
+                # Don't end the stream here; consumer can decide how to use it
+                yield {"__maybe_final__": maybe_final}
+            continue
+        # Steps already serialized uniformly in the poller
+        if isinstance(item, dict) and "__step__" in item:
+            yield item
+            continue
+        # Pass-through for other shapes (__error__, iterable events, etc.)
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
+# ---------------------------------------------------------------------------
+# Utilities: scrub nested structures of <think> tags when proxying upstream
+# ---------------------------------------------------------------------------
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
     return obj
+# ---------------------------------------------------------------------------
+# Upstream proxy (OpenAI-compatible) with optional think-tag scrubbing
+# ---------------------------------------------------------------------------
+async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think: bool = False):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
+                async with client.stream("POST", url, headers=headers, json=body) as resp:
                     resp.raise_for_status()
                     if scrub_think:
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
                 except Exception:
                     pass
+            return fastapi.responses.JSONResponse(status_code=r.status_code, content=payload)
+# import asyncio
+# import contextlib
+# import os
+# import threading
+# import time
+# import typing
+#
+# import fastapi
+# import httpx
+#
+# from agent_server.helpers import sse_headers
+# from agent_server.sanitizing_think_tags import scrub_think_tags
+# from agent_server.std_tee import QueueWriter, _serialize_step
+#
+#
+# async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
+#     """
+#     Start the agent in a worker thread.
+#     Stream THREE sources of incremental data into the async generator:
+#       (1) live stdout/stderr lines,
+#       (2) newly appended memory steps (polled),
+#       (3) any iterable the agent may yield (if supported).
+#     Finally emit a __final__ item with the last answer.
+#     """
+#     loop = asyncio.get_running_loop()
+#     q: asyncio.Queue = asyncio.Queue()
+#     agent_to_use = agent_obj
+#
+#     stop_evt = threading.Event()
+#
+#     # 1) stdout/stderr live tee
+#     qwriter = QueueWriter(q)
+#
+#     # 2) memory poller
+#     def poll_memory():
+#         last_len = 0
+#         while not stop_evt.is_set():
+#             try:
+#                 steps = []
+#                 try:
+#                     # Common API: agent.memory.get_full_steps()
+#                     steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
+#                 except Exception:
+#                     # Fallbacks: different names across versions
+#                     steps = (
+#                         getattr(agent_to_use, "steps", [])
+#                         or getattr(agent_to_use, "memory", [])
+#                         or []
+#                     )
+#                 if steps is None:
+#                     steps = []
+#                 curr_len = len(steps)
+#                 if curr_len > last_len:
+#                     new = steps[last_len:curr_len]
+#                     last_len = curr_len
+#                     for s in new:
+#                         s_text = _serialize_step(s)
+#                         if s_text:
+#                             try:
+#                                 q.put_nowait({"__step__": s_text})
+#                             except Exception:
+#                                 pass
+#             except Exception:
+#                 pass
+#             time.sleep(0.10)  # 100 ms cadence
+#
+#     # 3) agent runner (may or may not yield)
+#     def run_agent():
+#         final_result = None
+#         try:
+#             with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
+#                 qwriter
+#             ):
+#                 used_iterable = False
+#                 if hasattr(agent_to_use, "run") and callable(
+#                     getattr(agent_to_use, "run")
+#                 ):
+#                     try:
+#                         res = agent_to_use.run(task, stream=True)
+#                         if hasattr(res, "__iter__") and not isinstance(
+#                             res, (str, bytes)
+#                         ):
+#                             used_iterable = True
+#                             for it in res:
+#                                 try:
+#                                     q.put_nowait(it)
+#                                 except Exception:
+#                                     pass
+#                             final_result = (
+#                                 None  # iterable may already contain the answer
+#                             )
+#                         else:
+#                             final_result = res
+#                     except TypeError:
+#                         # run(stream=True) not supported -> fall back
+#                         pass
+#
+#                 if final_result is None and not used_iterable:
+#                     # Try other common streaming signatures
+#                     for name in (
+#                         "run_stream",
+#                         "stream",
+#                         "stream_run",
+#                         "run_with_callback",
+#                     ):
+#                         fn = getattr(agent_to_use, name, None)
+#                         if callable(fn):
+#                             try:
+#                                 res = fn(task)
+#                                 if hasattr(res, "__iter__") and not isinstance(
+#                                     res, (str, bytes)
+#                                 ):
+#                                     for it in res:
+#                                         q.put_nowait(it)
+#                                     final_result = None
+#                                 else:
+#                                     final_result = res
+#                                 break
+#                             except TypeError:
+#                                 # maybe callback signature
+#                                 def cb(item):
+#                                     try:
+#                                         q.put_nowait(item)
+#                                     except Exception:
+#                                         pass
+#
+#                                 try:
+#                                     fn(task, cb)
+#                                     final_result = None
+#                                     break
+#                                 except Exception:
+#                                     continue
+#
+#                 if final_result is None and not used_iterable:
+#                     pass  # (typo guard removed below)
+#
+#                 if final_result is None and not used_iterable:
+#                     # Last resort: synchronous run()/generate()/callable
+#                     if hasattr(agent_to_use, "run") and callable(
+#                         getattr(agent_to_use, "run")
+#                     ):
+#                         final_result = agent_to_use.run(task)
+#                     elif hasattr(agent_to_use, "generate") and callable(
+#                         getattr(agent_to_use, "generate")
+#                     ):
+#                         final_result = agent_to_use.generate(task)
+#                     elif callable(agent_to_use):
+#                         final_result = agent_to_use(task)
+#
+#         except Exception as e:
+#             try:
+#                 qwriter.flush()
+#             except Exception:
+#                 pass
+#             try:
+#                 q.put_nowait({"__error__": str(e)})
+#             except Exception:
+#                 pass
+#         finally:
+#             try:
+#                 qwriter.flush()
+#             except Exception:
+#                 pass
+#             try:
+#                 q.put_nowait({"__final__": final_result})
+#             except Exception:
+#                 pass
+#             stop_evt.set()
+#
+#     # Kick off threads
+#     mem_thread = threading.Thread(target=poll_memory, daemon=True)
+#     run_thread = threading.Thread(target=run_agent, daemon=True)
+#     mem_thread.start()
+#     run_thread.start()
+#
+#     # Async consumer
+#     while True:
+#         item = await q.get()
+#         yield item
+#         if isinstance(item, dict) and "__final__" in item:
+#             break
+#
+#
+# def _recursively_scrub(obj):
+#     if isinstance(obj, str):
+#         return scrub_think_tags(obj)
+#     if isinstance(obj, dict):
+#         return {k: _recursively_scrub(v) for k, v in obj.items()}
+#     if isinstance(obj, list):
+#         return [_recursively_scrub(v) for v in obj]
+#     return obj
+#
+#
+# async def proxy_upstream_chat_completions(
+#     body: dict, stream: bool, scrub_think: bool = False
+# ):
+#     HF_TOKEN = os.getenv("OPENAI_API_KEY")
+#     headers = {
+#         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
+#         "Content-Type": "application/json",
+#     }
+#     UPSTREAM_BASE = os.getenv("UPSTREAM_OPENAI_BASE", "").rstrip("/")
+#     url = f"{UPSTREAM_BASE}/chat/completions"
+#
+#     if stream:
+#
+#         async def proxy_stream():
+#             async with httpx.AsyncClient(timeout=None) as client:
+#                 async with client.stream(
+#                     "POST", url, headers=headers, json=body
+#                 ) as resp:
+#                     resp.raise_for_status()
+#                     if scrub_think:
+#                         # Pull text segments, scrub tags, and yield bytes
+#                         async for txt in resp.aiter_text():
+#                             try:
+#                                 cleaned = scrub_think_tags(txt)
+#                                 yield cleaned.encode("utf-8")
+#                             except Exception:
+#                                 yield txt.encode("utf-8")
+#                     else:
+#                         async for chunk in resp.aiter_bytes():
+#                             yield chunk
+#
+#         return fastapi.responses.StreamingResponse(
+#             proxy_stream(), media_type="text/event-stream", headers=sse_headers()
+#         )
+#     else:
+#         async with httpx.AsyncClient(timeout=None) as client:
+#             r = await client.post(url, headers=headers, json=body)
+#             try:
+#                 payload = r.json()
+#             except Exception:
+#                 payload = {"status_code": r.status_code, "text": r.text}
+#
+#             if scrub_think:
+#                 try:
+#                     payload = _recursively_scrub(payload)
+#                 except Exception:
+#                     pass
+#
+#             return fastapi.responses.JSONResponse(
+#                 status_code=r.status_code, content=payload
+#             )

agent_server/std_tee.py CHANGED Viewed

@@ -1,102 +1,305 @@
 import asyncio
 import io
 import json
 import re
 import threading
-from agent_server.sanitizing_think_tags import scrub_think_tags
-class QueueWriter(io.TextIOBase):
-    """
-    File-like object that pushes each write to an asyncio.Queue immediately.
-    """
-    def __init__(self, q: "asyncio.Queue"):
-        self.q = q
-        self._lock = threading.Lock()
-        self._buf = []  # accumulate until newline to reduce spam
-    def write(self, s: str):
-        if not s:
-            return 0
-        with self._lock:
-            self._buf.append(s)
-            # flush on newline to keep granularity reasonable
-            if "\n" in s:
-                chunk = "".join(self._buf)
-                self._buf.clear()
-                try:
-                    self.q.put_nowait({"__stdout__": chunk})
-                except Exception:
-                    pass
-        return len(s)
-    def flush(self):
-        with self._lock:
-            if self._buf:
-                chunk = "".join(self._buf)
-                self._buf.clear()
-                try:
-                    self.q.put_nowait({"__stdout__": chunk})
-                except Exception:
-                    pass
-def _serialize_step(step) -> str:
     """
-    Best-effort pretty string for a smolagents MemoryStep / ActionStep.
-    Works even if attributes are missing on some versions.
     """
-    parts = []
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
     thought_val = getattr(step, "thought", None)
     if thought_val:
-        parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
     tool_val = getattr(step, "tool", None)
     if tool_val:
-        parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
     code_val = getattr(step, "code", None)
     if code_val:
-        code_str = scrub_think_tags(str(code_val)).strip()
-        parts.append("```python\n" + code_str + "\n```")
     args = getattr(step, "args", None)
     if args:
         try:
-            parts.append(
-                "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
-            )
         except Exception:
-            parts.append("Args: " + scrub_think_tags(str(args)))
     error = getattr(step, "error", None)
     if error:
-        parts.append(f"Error: {scrub_think_tags(str(error))}")
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
-        parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
-    # If this looks like a FinalAnswer step object, surface a clean final answer
-    try:
-        tname = type(step).__name__
-    except Exception:
-        tname = ""
-    if tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
-            return f"Final answer: {scrub_think_tags(str(out)).strip()}"
-        # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
-        s = scrub_think_tags(str(step))
-        m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
-        if m:
-            return f"Final answer: {m.group(1).strip()}"
-    # If the only content would be an object repr like FinalAnswerStep(...), drop it;
-    # a cleaner "Final answer: ..." will come from the rule above or stdout.
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
-    return joined or scrub_think_tags(str(step))

+# agent_server/std_tee.py
+from __future__ import annotations
 import asyncio
 import io
 import json
 import re
 import threading
+import typing as t
+# ---- Think-tag scrubber (import with safe fallback) -------------------------
+try:
+    # Same-package relative import is preferred
+    from .sanitizing_think_tags import scrub_think_tags  # type: ignore
+except Exception:  # pragma: no cover
+    # No-op fallback if the project layout differs
+    def scrub_think_tags(s: str) -> str:
+        return s
+# ---- Formatting helpers (ANSI, noise, truncation) --------------------------
+_ANSI_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
+# Lines that should never be surfaced (system prompt and obvious boilerplate)
+_NOISY_PREFIXES = (
+    "OpenAIServerModel",
+    "Output message of the LLM",
+    "New run",
+    "─ Executing parsed code",
+    "╭", "╰", "│", "━", "─",
+    "System prompt", "SYSTEM PROMPT", "System Prompt",
+)
+# Very long single lines without enough alphanumerics are dropped
+_MIN_SIG_CHARS = re.compile(r"[A-Za-z0-9]{3,}")
+def _strip_ansi_and_think(s: str) -> str:
+    s = scrub_think_tags(s)
+    s = _ANSI_RE.sub("", s)
+    return s.strip()
+def _truncate(s: str, n: int) -> str:
+    s = s.strip()
+    if len(s) <= n:
+        return s
+    return s[:n] + "\n… [truncated]"
+def _clean_line(s: str) -> str:
+    return _strip_ansi_and_think(s).rstrip("\n")
+# ---- Public-ish helpers used by agent_streaming ----------------------------
+_FINAL_RE = re.compile(r"(?:^|\\b)Final\\s+answer:\\s*(.+)$", flags=re.IGNORECASE)
+def _maybe_parse_final_from_stdout(line: str) -> t.Optional[str]:
+    if not isinstance(line, str):
+        return None
+    line = _clean_line(line)
+    m = _FINAL_RE.search(line)
+    if not m:
+        return None
+    return _clean_line(m.group(1)) or None
+def _format_reasoning_chunk(text: str, tag: str, idx: int) -> str:
     """
+    Lightweight formatter for reasoning stream from stdout.
+    - scrubs <think>…</think>
+    - strips ANSI
+    - drops banners/box drawing and 'System prompt …'
+    - drops very-long low-signal lines
     """
+    stripped = _clean_line(text)
+    if not stripped:
+        return ""
+    if any(stripped.startswith(p) for p in _NOISY_PREFIXES):
+        return ""
+    if all(ch in " ─━╭╮╰╯│═·—-_=+•" for ch in stripped):
+        return ""
+    if len(stripped) > 240 and not _MIN_SIG_CHARS.search(stripped):
+        return ""
+    return f"{stripped}\n\n"
+def _serialize_step(step: t.Any) -> str:
+    """
+    Compact, uniform serializer for 'step' objects from different agent libs.
+    Produces:
+      Step N
+      🧠 Thought: …
+      🛠️ Tool: …
+      📥 Args: …
+      📤 Observation: …
+      💥 Error: …
+      (plus code fences when code is present)
+    With truncation to keep the reveal parsimonious.
+    """
+    parts: list[str] = []
+    # Step number (best-effort)
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
+    # Thought
     thought_val = getattr(step, "thought", None)
     if thought_val:
+        parts.append(f"🧠 Thought: {_truncate(_strip_ansi_and_think(str(thought_val)), 600)}")
+    # Tool
     tool_val = getattr(step, "tool", None)
     if tool_val:
+        parts.append(f"🛠️ Tool: {_truncate(_strip_ansi_and_think(str(tool_val)), 240)}")
+    # Code (if any)
     code_val = getattr(step, "code", None)
     if code_val:
+        code_str = _truncate(_strip_ansi_and_think(str(code_val)), 1600)
+        if code_str:
+            parts.append("```python\n" + code_str + "\n```")
+    # Args
     args = getattr(step, "args", None)
     if args:
         try:
+            arg_s = _truncate(_strip_ansi_and_think(json.dumps(args, ensure_ascii=False)), 800)
         except Exception:
+            arg_s = _truncate(_strip_ansi_and_think(str(args)), 800)
+        parts.append("📥 Args: " + arg_s)
+    # Error
     error = getattr(step, "error", None)
     if error:
+        parts.append(f"💥 Error: {_truncate(_strip_ansi_and_think(str(error)), 600)}")
+    # Observations
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
+        parts.append("📤 Observation:\n" + _truncate(_strip_ansi_and_think(obs_str), 1600))
+    # Final answer via explicit action type patterns (best-effort)
+    tname = getattr(step, "type_name", "") or getattr(step, "type", "") or ""
+    if isinstance(tname, str) and tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
+            return f"Final answer: {_strip_ansi_and_think(str(out))}"
+    # Fallback: parse repr
+    s = _strip_ansi_and_think(str(step))
+    m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
+    if m:
+        return f"Final answer: {m.group(1).strip()}"
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
+    return joined or s
+# ---- Tee for redirecting stdout/stderr into an asyncio.Queue ----------------
+class QueueWriter(io.TextIOBase):
+    """
+    Minimal text writer that sends lines into an asyncio.Queue.
+    Each non-empty line is enqueued as {"__stdout__": "<line>"}.
+    """
+    def __init__(self, q: "asyncio.Queue[dict]"):
+        self._q = q
+        self._buf = []
+    def write(self, s: str) -> int:
+        if not isinstance(s, str):
+            s = str(s)
+        # Buffer until newline; then emit line events
+        self._buf.append(s)
+        text = "".join(self._buf)
+        if "\n" in text:
+            lines = text.splitlines(keepends=True)
+            # keep last partial (no newline) in buffer
+            tail = "" if text.endswith("\n") else lines.pop()
+            for ln in lines:
+                clean = _clean_line(ln)
+                if clean:
+                    try:
+                        # downstream streamer will call _format_reasoning_chunk & co.
+                        self._q.put_nowait({"__stdout__": clean})
+                    except Exception:
+                        pass
+            self._buf = [tail]
+        return len(s)
+    def flush(self) -> None:
+        if self._buf:
+            text = "".join(self._buf)
+            self._buf.clear()
+            clean = _clean_line(text)
+            if clean:
+                try:
+                    self._q.put_nowait({"__stdout__": clean})
+                except Exception:
+                    pass
+    def isatty(self) -> bool:  # for libraries that test it
+        return False
+# import asyncio
+# import io
+# import json
+# import re
+# import threading
+#
+# from agent_server.sanitizing_think_tags import scrub_think_tags
+#
+#
+# class QueueWriter(io.TextIOBase):
+#     """
+#     File-like object that pushes each write to an asyncio.Queue immediately.
+#     """
+#
+#     def __init__(self, q: "asyncio.Queue"):
+#         self.q = q
+#         self._lock = threading.Lock()
+#         self._buf = []  # accumulate until newline to reduce spam
+#
+#     def write(self, s: str):
+#         if not s:
+#             return 0
+#         with self._lock:
+#             self._buf.append(s)
+#             # flush on newline to keep granularity reasonable
+#             if "\n" in s:
+#                 chunk = "".join(self._buf)
+#                 self._buf.clear()
+#                 try:
+#                     self.q.put_nowait({"__stdout__": chunk})
+#                 except Exception:
+#                     pass
+#         return len(s)
+#
+#     def flush(self):
+#         with self._lock:
+#             if self._buf:
+#                 chunk = "".join(self._buf)
+#                 self._buf.clear()
+#                 try:
+#                     self.q.put_nowait({"__stdout__": chunk})
+#                 except Exception:
+#                     pass
+#
+#
+# def _serialize_step(step) -> str:
+#     """
+#     Best-effort pretty string for a smolagents MemoryStep / ActionStep.
+#     Works even if attributes are missing on some versions.
+#     """
+#     parts = []
+#     sn = getattr(step, "step_number", None)
+#     if sn is not None:
+#         parts.append(f"Step {sn}")
+#     thought_val = getattr(step, "thought", None)
+#     if thought_val:
+#         parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
+#     tool_val = getattr(step, "tool", None)
+#     if tool_val:
+#         parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
+#     code_val = getattr(step, "code", None)
+#     if code_val:
+#         code_str = scrub_think_tags(str(code_val)).strip()
+#         parts.append("```python\n" + code_str + "\n```")
+#     args = getattr(step, "args", None)
+#     if args:
+#         try:
+#             parts.append(
+#                 "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
+#             )
+#         except Exception:
+#             parts.append("Args: " + scrub_think_tags(str(args)))
+#     error = getattr(step, "error", None)
+#     if error:
+#         parts.append(f"Error: {scrub_think_tags(str(error))}")
+#     obs = getattr(step, "observations", None)
+#     if obs is not None:
+#         if isinstance(obs, (list, tuple)):
+#             obs_str = "\n".join(map(str, obs))
+#         else:
+#             obs_str = str(obs)
+#         parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
+#     # If this looks like a FinalAnswer step object, surface a clean final answer
+#     try:
+#         tname = type(step).__name__
+#     except Exception:
+#         tname = ""
+#     if tname.lower().startswith("finalanswer"):
+#         out = getattr(step, "output", None)
+#         if out is not None:
+#             return f"Final answer: {scrub_think_tags(str(out)).strip()}"
+#         # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
+#         s = scrub_think_tags(str(step))
+#         m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
+#         if m:
+#             return f"Final answer: {m.group(1).strip()}"
+#     # If the only content would be an object repr like FinalAnswerStep(...), drop it;
+#     # a cleaner "Final answer: ..." will come from the rule above or stdout.
+#     joined = "\n".join(parts).strip()
+#     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
+#         return ""
+#     return joined or scrub_think_tags(str(step))