Spaces:

ccm
/

chat-ui-with-agent-examples

Sleeping

App Files Files Community

ccm commited on Nov 10

Commit

7e34bee

1 Parent(s): a2b9043

Reverting logging changes

Browse files

Files changed (2) hide show

agent_server/agent_streaming.py +144 -300
agent_server/std_tee.py +36 -251

agent_server/agent_streaming.py CHANGED Viewed

@@ -1,58 +1,152 @@
-# agent_server/agent_streaming.py
-from __future__ import annotations
 import asyncio
 import contextlib
 import os
 import threading
-import typing as t
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
-from agent_server.std_tee import (
-    QueueWriter,
-    _format_reasoning_chunk,
-)
-# -----------------------------------------------------------------------------
-# Minimal agent streaming:
-#   • capture ONLY stdout/stderr during agent execution
-#   • stream normalized reasoning chunks derived from those lines
-#   • emit a single {"__final__": ...} with the agent's returned result
-# -----------------------------------------------------------------------------
-async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
-    """
-    Streams compact reasoning derived from stdout/stderr lines while the agent runs.
-    When the agent finishes, emits {"__final__": <agent_return_value>}.
-    Yields dict events:
-      - {"__reasoning__": "<chunk>"}  # normalized, compact text
-      - {"__error__": "<message>"}    # if the agent runner throws
-      - {"__final__": <any>}          # terminal event with the returned result
     """
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
-    # redirect stdout/stderr into this queue (line-buffered in QueueWriter)
     qwriter = QueueWriter(q)
-    # Background runner executes the agent synchronously so stdout flushes as it prints
     def run_agent():
         final_result = None
         try:
-            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(qwriter):
-                # Keep this simple: don't use stream=True or iterator-style APIs.
-                if hasattr(agent_to_use, "run") and callable(getattr(agent_to_use, "run")):
-                    final_result = agent_to_use.run(task)
-                elif hasattr(agent_to_use, "generate") and callable(getattr(agent_to_use, "generate")):
-                    final_result = agent_to_use.generate(task)
-                elif callable(agent_to_use):
-                    final_result = agent_to_use(task)
-                else:
-                    raise RuntimeError("Agent object is not callable and exposes no run()/generate()")
         except Exception as e:
             try:
                 qwriter.flush()
@@ -71,31 +165,22 @@ async def run_agent_stream(task: str, agent_obj: t.Optional[t.Any] = None):
                 q.put_nowait({"__final__": final_result})
             except Exception:
                 pass
-    # Kick off the worker thread
-    run_thread = threading.Thread(target=run_agent, name="agent-runner", daemon=True)
     run_thread.start()
-    # Async consumer: convert raw stdout lines -> compact reasoning chunks
     while True:
         item = await q.get()
-        if isinstance(item, dict) and "__stdout__" in item:
-            # Normalize/clean the line (strips ANSI, think tags, box drawing, system prompts)
-            chunk = _format_reasoning_chunk(item["__stdout__"], tag="stdout", idx=0)
-            if chunk:
-                yield {"__reasoning__": chunk}
-            continue
-        # Pass through non-stdout items (errors, final)
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
-# -----------------------------------------------------------------------------
-# Utilities: scrub nested structures of <think> tags when proxying upstream
-# -----------------------------------------------------------------------------
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
@@ -106,10 +191,9 @@ def _recursively_scrub(obj):
     return obj
-# -----------------------------------------------------------------------------
-# Upstream proxy (OpenAI-compatible) with optional think-tag scrubbing
-# -----------------------------------------------------------------------------
-async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think: bool = False):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
@@ -122,9 +206,12 @@ async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think:
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
-                async with client.stream("POST", url, headers=headers, json=body) as resp:
                     resp.raise_for_status()
                     if scrub_think:
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
@@ -152,249 +239,6 @@ async def proxy_upstream_chat_completions(body: dict, stream: bool, scrub_think:
                 except Exception:
                     pass
-            return fastapi.responses.JSONResponse(status_code=r.status_code, content=payload)
-# import asyncio
-# import contextlib
-# import os
-# import threading
-# import time
-# import typing
-#
-# import fastapi
-# import httpx
-#
-# from agent_server.helpers import sse_headers
-# from agent_server.sanitizing_think_tags import scrub_think_tags
-# from agent_server.std_tee import QueueWriter, _serialize_step
-#
-#
-# async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
-#     """
-#     Start the agent in a worker thread.
-#     Stream THREE sources of incremental data into the async generator:
-#       (1) live stdout/stderr lines,
-#       (2) newly appended memory steps (polled),
-#       (3) any iterable the agent may yield (if supported).
-#     Finally emit a __final__ item with the last answer.
-#     """
-#     loop = asyncio.get_running_loop()
-#     q: asyncio.Queue = asyncio.Queue()
-#     agent_to_use = agent_obj
-#
-#     stop_evt = threading.Event()
-#
-#     # 1) stdout/stderr live tee
-#     qwriter = QueueWriter(q)
-#
-#     # 2) memory poller
-#     def poll_memory():
-#         last_len = 0
-#         while not stop_evt.is_set():
-#             try:
-#                 steps = []
-#                 try:
-#                     # Common API: agent.memory.get_full_steps()
-#                     steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
-#                 except Exception:
-#                     # Fallbacks: different names across versions
-#                     steps = (
-#                         getattr(agent_to_use, "steps", [])
-#                         or getattr(agent_to_use, "memory", [])
-#                         or []
-#                     )
-#                 if steps is None:
-#                     steps = []
-#                 curr_len = len(steps)
-#                 if curr_len > last_len:
-#                     new = steps[last_len:curr_len]
-#                     last_len = curr_len
-#                     for s in new:
-#                         s_text = _serialize_step(s)
-#                         if s_text:
-#                             try:
-#                                 q.put_nowait({"__step__": s_text})
-#                             except Exception:
-#                                 pass
-#             except Exception:
-#                 pass
-#             time.sleep(0.10)  # 100 ms cadence
-#
-#     # 3) agent runner (may or may not yield)
-#     def run_agent():
-#         final_result = None
-#         try:
-#             with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
-#                 qwriter
-#             ):
-#                 used_iterable = False
-#                 if hasattr(agent_to_use, "run") and callable(
-#                     getattr(agent_to_use, "run")
-#                 ):
-#                     try:
-#                         res = agent_to_use.run(task, stream=True)
-#                         if hasattr(res, "__iter__") and not isinstance(
-#                             res, (str, bytes)
-#                         ):
-#                             used_iterable = True
-#                             for it in res:
-#                                 try:
-#                                     q.put_nowait(it)
-#                                 except Exception:
-#                                     pass
-#                             final_result = (
-#                                 None  # iterable may already contain the answer
-#                             )
-#                         else:
-#                             final_result = res
-#                     except TypeError:
-#                         # run(stream=True) not supported -> fall back
-#                         pass
-#
-#                 if final_result is None and not used_iterable:
-#                     # Try other common streaming signatures
-#                     for name in (
-#                         "run_stream",
-#                         "stream",
-#                         "stream_run",
-#                         "run_with_callback",
-#                     ):
-#                         fn = getattr(agent_to_use, name, None)
-#                         if callable(fn):
-#                             try:
-#                                 res = fn(task)
-#                                 if hasattr(res, "__iter__") and not isinstance(
-#                                     res, (str, bytes)
-#                                 ):
-#                                     for it in res:
-#                                         q.put_nowait(it)
-#                                     final_result = None
-#                                 else:
-#                                     final_result = res
-#                                 break
-#                             except TypeError:
-#                                 # maybe callback signature
-#                                 def cb(item):
-#                                     try:
-#                                         q.put_nowait(item)
-#                                     except Exception:
-#                                         pass
-#
-#                                 try:
-#                                     fn(task, cb)
-#                                     final_result = None
-#                                     break
-#                                 except Exception:
-#                                     continue
-#
-#                 if final_result is None and not used_iterable:
-#                     pass  # (typo guard removed below)
-#
-#                 if final_result is None and not used_iterable:
-#                     # Last resort: synchronous run()/generate()/callable
-#                     if hasattr(agent_to_use, "run") and callable(
-#                         getattr(agent_to_use, "run")
-#                     ):
-#                         final_result = agent_to_use.run(task)
-#                     elif hasattr(agent_to_use, "generate") and callable(
-#                         getattr(agent_to_use, "generate")
-#                     ):
-#                         final_result = agent_to_use.generate(task)
-#                     elif callable(agent_to_use):
-#                         final_result = agent_to_use(task)
-#
-#         except Exception as e:
-#             try:
-#                 qwriter.flush()
-#             except Exception:
-#                 pass
-#             try:
-#                 q.put_nowait({"__error__": str(e)})
-#             except Exception:
-#                 pass
-#         finally:
-#             try:
-#                 qwriter.flush()
-#             except Exception:
-#                 pass
-#             try:
-#                 q.put_nowait({"__final__": final_result})
-#             except Exception:
-#                 pass
-#             stop_evt.set()
-#
-#     # Kick off threads
-#     mem_thread = threading.Thread(target=poll_memory, daemon=True)
-#     run_thread = threading.Thread(target=run_agent, daemon=True)
-#     mem_thread.start()
-#     run_thread.start()
-#
-#     # Async consumer
-#     while True:
-#         item = await q.get()
-#         yield item
-#         if isinstance(item, dict) and "__final__" in item:
-#             break
-#
-#
-# def _recursively_scrub(obj):
-#     if isinstance(obj, str):
-#         return scrub_think_tags(obj)
-#     if isinstance(obj, dict):
-#         return {k: _recursively_scrub(v) for k, v in obj.items()}
-#     if isinstance(obj, list):
-#         return [_recursively_scrub(v) for v in obj]
-#     return obj
-#
-#
-# async def proxy_upstream_chat_completions(
-#     body: dict, stream: bool, scrub_think: bool = False
-# ):
-#     HF_TOKEN = os.getenv("OPENAI_API_KEY")
-#     headers = {
-#         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
-#         "Content-Type": "application/json",
-#     }
-#     UPSTREAM_BASE = os.getenv("UPSTREAM_OPENAI_BASE", "").rstrip("/")
-#     url = f"{UPSTREAM_BASE}/chat/completions"
-#
-#     if stream:
-#
-#         async def proxy_stream():
-#             async with httpx.AsyncClient(timeout=None) as client:
-#                 async with client.stream(
-#                     "POST", url, headers=headers, json=body
-#                 ) as resp:
-#                     resp.raise_for_status()
-#                     if scrub_think:
-#                         # Pull text segments, scrub tags, and yield bytes
-#                         async for txt in resp.aiter_text():
-#                             try:
-#                                 cleaned = scrub_think_tags(txt)
-#                                 yield cleaned.encode("utf-8")
-#                             except Exception:
-#                                 yield txt.encode("utf-8")
-#                     else:
-#                         async for chunk in resp.aiter_bytes():
-#                             yield chunk
-#
-#         return fastapi.responses.StreamingResponse(
-#             proxy_stream(), media_type="text/event-stream", headers=sse_headers()
-#         )
-#     else:
-#         async with httpx.AsyncClient(timeout=None) as client:
-#             r = await client.post(url, headers=headers, json=body)
-#             try:
-#                 payload = r.json()
-#             except Exception:
-#                 payload = {"status_code": r.status_code, "text": r.text}
-#
-#             if scrub_think:
-#                 try:
-#                     payload = _recursively_scrub(payload)
-#                 except Exception:
-#                     pass
-#
-#             return fastapi.responses.JSONResponse(
-#                 status_code=r.status_code, content=payload
-#             )

 import asyncio
 import contextlib
 import os
 import threading
+import time
+import typing
 import fastapi
 import httpx
 from agent_server.helpers import sse_headers
 from agent_server.sanitizing_think_tags import scrub_think_tags
+from agent_server.std_tee import QueueWriter, _serialize_step
+async def run_agent_stream(task: str, agent_obj: typing.Optional[typing.Any] = None):
+    """
+    Start the agent in a worker thread.
+    Stream THREE sources of incremental data into the async generator:
+      (1) live stdout/stderr lines,
+      (2) newly appended memory steps (polled),
+      (3) any iterable the agent may yield (if supported).
+    Finally emit a __final__ item with the last answer.
     """
+    loop = asyncio.get_running_loop()
     q: asyncio.Queue = asyncio.Queue()
     agent_to_use = agent_obj
+    stop_evt = threading.Event()
+    # 1) stdout/stderr live tee
     qwriter = QueueWriter(q)
+    # 2) memory poller
+    def poll_memory():
+        last_len = 0
+        while not stop_evt.is_set():
+            try:
+                steps = []
+                try:
+                    # Common API: agent.memory.get_full_steps()
+                    steps = agent_to_use.memory.get_full_steps()  # type: ignore[attr-defined]
+                except Exception:
+                    # Fallbacks: different names across versions
+                    steps = (
+                        getattr(agent_to_use, "steps", [])
+                        or getattr(agent_to_use, "memory", [])
+                        or []
+                    )
+                if steps is None:
+                    steps = []
+                curr_len = len(steps)
+                if curr_len > last_len:
+                    new = steps[last_len:curr_len]
+                    last_len = curr_len
+                    for s in new:
+                        s_text = _serialize_step(s)
+                        if s_text:
+                            try:
+                                q.put_nowait({"__step__": s_text})
+                            except Exception:
+                                pass
+            except Exception:
+                pass
+            time.sleep(0.10)  # 100 ms cadence
+    # 3) agent runner (may or may not yield)
     def run_agent():
         final_result = None
         try:
+            with contextlib.redirect_stdout(qwriter), contextlib.redirect_stderr(
+                qwriter
+            ):
+                used_iterable = False
+                if hasattr(agent_to_use, "run") and callable(
+                    getattr(agent_to_use, "run")
+                ):
+                    try:
+                        res = agent_to_use.run(task, stream=True)
+                        if hasattr(res, "__iter__") and not isinstance(
+                            res, (str, bytes)
+                        ):
+                            used_iterable = True
+                            for it in res:
+                                try:
+                                    q.put_nowait(it)
+                                except Exception:
+                                    pass
+                            final_result = (
+                                None  # iterable may already contain the answer
+                            )
+                        else:
+                            final_result = res
+                    except TypeError:
+                        # run(stream=True) not supported -> fall back
+                        pass
+                if final_result is None and not used_iterable:
+                    # Try other common streaming signatures
+                    for name in (
+                        "run_stream",
+                        "stream",
+                        "stream_run",
+                        "run_with_callback",
+                    ):
+                        fn = getattr(agent_to_use, name, None)
+                        if callable(fn):
+                            try:
+                                res = fn(task)
+                                if hasattr(res, "__iter__") and not isinstance(
+                                    res, (str, bytes)
+                                ):
+                                    for it in res:
+                                        q.put_nowait(it)
+                                    final_result = None
+                                else:
+                                    final_result = res
+                                break
+                            except TypeError:
+                                # maybe callback signature
+                                def cb(item):
+                                    try:
+                                        q.put_nowait(item)
+                                    except Exception:
+                                        pass
+                                try:
+                                    fn(task, cb)
+                                    final_result = None
+                                    break
+                                except Exception:
+                                    continue
+                if final_result is None and not used_iterable:
+                    pass  # (typo guard removed below)
+                if final_result is None and not used_iterable:
+                    # Last resort: synchronous run()/generate()/callable
+                    if hasattr(agent_to_use, "run") and callable(
+                        getattr(agent_to_use, "run")
+                    ):
+                        final_result = agent_to_use.run(task)
+                    elif hasattr(agent_to_use, "generate") and callable(
+                        getattr(agent_to_use, "generate")
+                    ):
+                        final_result = agent_to_use.generate(task)
+                    elif callable(agent_to_use):
+                        final_result = agent_to_use(task)
         except Exception as e:
             try:
                 qwriter.flush()
                 q.put_nowait({"__final__": final_result})
             except Exception:
                 pass
+            stop_evt.set()
+    # Kick off threads
+    mem_thread = threading.Thread(target=poll_memory, daemon=True)
+    run_thread = threading.Thread(target=run_agent, daemon=True)
+    mem_thread.start()
     run_thread.start()
+    # Async consumer
     while True:
         item = await q.get()
         yield item
         if isinstance(item, dict) and "__final__" in item:
             break
 def _recursively_scrub(obj):
     if isinstance(obj, str):
         return scrub_think_tags(obj)
     return obj
+async def proxy_upstream_chat_completions(
+    body: dict, stream: bool, scrub_think: bool = False
+):
     HF_TOKEN = os.getenv("OPENAI_API_KEY")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}" if HF_TOKEN else "",
         async def proxy_stream():
             async with httpx.AsyncClient(timeout=None) as client:
+                async with client.stream(
+                    "POST", url, headers=headers, json=body
+                ) as resp:
                     resp.raise_for_status()
                     if scrub_think:
+                        # Pull text segments, scrub tags, and yield bytes
                         async for txt in resp.aiter_text():
                             try:
                                 cleaned = scrub_think_tags(txt)
                 except Exception:
                     pass
+            return fastapi.responses.JSONResponse(
+                status_code=r.status_code, content=payload
+            )

agent_server/std_tee.py CHANGED Viewed

@@ -1,317 +1,102 @@
-# agent_server/std_tee.py
-from __future__ import annotations
 import asyncio
 import io
 import json
 import re
 import threading
-import typing as t
 from agent_server.sanitizing_think_tags import scrub_think_tags
-# ---------------------------------------------------------------------------
-# Cleaning / formatting helpers (used by the streaming layer)
-# ---------------------------------------------------------------------------
-# Strip ANSI escape sequences (common from rich/logging)
-_ANSI_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
-# Lines that should never be surfaced (system prompt safety + obvious boilerplate)
-_NOISY_PREFIXES = (
-    "OpenAIServerModel",
-    "Output message of the LLM",
-    "New run",
-    "─ Executing parsed code",
-    "╭", "╰", "│", "━", "─",
-    "System prompt", "SYSTEM PROMPT", "System Prompt",
-)
-# Very long single lines without enough alphanumerics are dropped
-_MIN_SIG_CHARS = re.compile(r"[A-Za-z0-9]{3,}")
-def _strip_ansi_and_think(s: str) -> str:
-    s = scrub_think_tags(s)
-    s = _ANSI_RE.sub("", s)
-    return s
-def _truncate(s: str, n: int) -> str:
-    s = s.strip()
-    if len(s) <= n:
-        return s
-    return s[:n] + "\n… [truncated]"
-def _format_reasoning_chunk(text: str, tag: str, idx: int) -> str:
-    """
-    Lightweight formatter for reasoning stream from stdout.
-    - scrubs <think>…</think>
-    - strips ANSI
-    - drops banners/box drawing and 'System prompt …'
-    - drops very-long low-signal lines
-    Returns a small readable chunk with a trailing blank line.
-    """
-    stripped = _strip_ansi_and_think(text).rstrip("\n").strip()
-    if not stripped:
-        return ""
-    if any(stripped.startswith(p) for p in _NOISY_PREFIXES):
-        return ""
-    # Lines made mostly of box drawing/separators
-    if all(ch in " ─━╭╮╰╯│═·—-_=+•" for ch in stripped):
-        return ""
-    # Excessively long lines with little signal (no alphanumerics)
-    if len(stripped) > 240 and not _MIN_SIG_CHARS.search(stripped):
-        return ""
-    return f"{stripped}\n\n"
-# Optional helper if you ever want to sniff final answer from stdout
-_FINAL_RE = re.compile(r"(?:^|\b)Final\s+answer:\s*(.+)$", flags=re.IGNORECASE)
-def _maybe_parse_final_from_stdout(line: str) -> t.Optional[str]:
-    if not isinstance(line, str):
-        return None
-    m = _FINAL_RE.search(_strip_ansi_and_think(line))
-    if not m:
-        return None
-    return _strip_ansi_and_think(m.group(1)).strip() or None
-# ---------------------------------------------------------------------------
-# QueueWriter: tee stdout/stderr into an asyncio.Queue line-by-line
-# ---------------------------------------------------------------------------
 class QueueWriter(io.TextIOBase):
     """
-    File-like object that pushes lines to an asyncio.Queue.
-    Each complete line is enqueued as {"__stdout__": "<line>"}.
-    The last partial line (if any) is flushed on flush().
     """
-    def __init__(self, q: "asyncio.Queue[dict]"):
         self.q = q
         self._lock = threading.Lock()
-        self._buf: list[str] = []
-    def write(self, s: str) -> int:
         if not s:
             return 0
-        if not isinstance(s, str):
-            s = str(s)
         with self._lock:
             self._buf.append(s)
-            text = "".join(self._buf)
-            if "\n" in text:
-                lines = text.splitlines(keepends=True)
-                # keep last partial (no newline) in buffer
-                tail = "" if text.endswith("\n") else lines.pop()
-                for ln in lines:
-                    if ln:  # include newlines; consumer will trim/format
-                        try:
-                            self.q.put_nowait({"__stdout__": ln})
-                        except Exception:
-                            pass
-                self._buf = [tail]
         return len(s)
-    def flush(self) -> None:
         with self._lock:
             if self._buf:
-                text = "".join(self._buf)
                 self._buf.clear()
-                if text:
-                    try:
-                        self.q.put_nowait({"__stdout__": text})
-                    except Exception:
-                        pass
-    def isatty(self) -> bool:  # some libs check this
-        return False
-# ---------------------------------------------------------------------------
-# (Optional / future) Compact serializer for step objects from various agents
-# ---------------------------------------------------------------------------
 def _serialize_step(step) -> str:
     """
-    Compact, uniform serializer for 'step' objects from different agent libs.
-    Produces:
-      Step N
-      🧠 Thought: …
-      🛠️ Tool: …
-      📥 Args: …
-      📤 Observation: …
-      💥 Error: …
-      (plus code fences when code is present)
-    With truncation to keep the reveal parsimonious.
     """
-    parts: list[str] = []
-    # Step number (best-effort)
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
-    # Thought
     thought_val = getattr(step, "thought", None)
     if thought_val:
-        parts.append(f"🧠 Thought: {_truncate(_strip_ansi_and_think(str(thought_val)), 600)}")
-    # Tool
     tool_val = getattr(step, "tool", None)
     if tool_val:
-        parts.append(f"🛠️ Tool: {_truncate(_strip_ansi_and_think(str(tool_val)), 240)}")
-    # Code (if any)
     code_val = getattr(step, "code", None)
     if code_val:
-        code_str = _truncate(_strip_ansi_and_think(str(code_val)), 1600)
-        if code_str:
-            parts.append("```python\n" + code_str + "\n```")
-    # Args
     args = getattr(step, "args", None)
     if args:
         try:
-            arg_s = _truncate(_strip_ansi_and_think(json.dumps(args, ensure_ascii=False)), 800)
         except Exception:
-            arg_s = _truncate(_strip_ansi_and_think(str(args)), 800)
-        parts.append("📥 Args: " + arg_s)
-    # Error
     error = getattr(step, "error", None)
     if error:
-        parts.append(f"💥 Error: {_truncate(_strip_ansi_and_think(str(error)), 600)}")
-    # Observations
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
-        parts.append("📤 Observation:\n" + _truncate(_strip_ansi_and_think(obs_str), 1600))
     # If this looks like a FinalAnswer step object, surface a clean final answer
     try:
         tname = type(step).__name__
     except Exception:
         tname = ""
-    if isinstance(tname, str) and tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
-            return f"Final answer: {_strip_ansi_and_think(str(out)).strip()}"
-        # Fallback: parse from string repr "FinalAnswerStep(output=...)"
-        s = _strip_ansi_and_think(str(step))
         m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
         if m:
             return f"Final answer: {m.group(1).strip()}"
     # If the only content would be an object repr like FinalAnswerStep(...), drop it;
     # a cleaner "Final answer: ..." will come from the rule above or stdout.
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
-    return joined or _strip_ansi_and_think(str(step))
-# import asyncio
-# import io
-# import json
-# import re
-# import threading
-#
-# from agent_server.sanitizing_think_tags import scrub_think_tags
-#
-#
-# class QueueWriter(io.TextIOBase):
-#     """
-#     File-like object that pushes each write to an asyncio.Queue immediately.
-#     """
-#
-#     def __init__(self, q: "asyncio.Queue"):
-#         self.q = q
-#         self._lock = threading.Lock()
-#         self._buf = []  # accumulate until newline to reduce spam
-#
-#     def write(self, s: str):
-#         if not s:
-#             return 0
-#         with self._lock:
-#             self._buf.append(s)
-#             # flush on newline to keep granularity reasonable
-#             if "\n" in s:
-#                 chunk = "".join(self._buf)
-#                 self._buf.clear()
-#                 try:
-#                     self.q.put_nowait({"__stdout__": chunk})
-#                 except Exception:
-#                     pass
-#         return len(s)
-#
-#     def flush(self):
-#         with self._lock:
-#             if self._buf:
-#                 chunk = "".join(self._buf)
-#                 self._buf.clear()
-#                 try:
-#                     self.q.put_nowait({"__stdout__": chunk})
-#                 except Exception:
-#                     pass
-#
-#
-# def _serialize_step(step) -> str:
-#     """
-#     Best-effort pretty string for a smolagents MemoryStep / ActionStep.
-#     Works even if attributes are missing on some versions.
-#     """
-#     parts = []
-#     sn = getattr(step, "step_number", None)
-#     if sn is not None:
-#         parts.append(f"Step {sn}")
-#     thought_val = getattr(step, "thought", None)
-#     if thought_val:
-#         parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
-#     tool_val = getattr(step, "tool", None)
-#     if tool_val:
-#         parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
-#     code_val = getattr(step, "code", None)
-#     if code_val:
-#         code_str = scrub_think_tags(str(code_val)).strip()
-#         parts.append("```python\n" + code_str + "\n```")
-#     args = getattr(step, "args", None)
-#     if args:
-#         try:
-#             parts.append(
-#                 "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
-#             )
-#         except Exception:
-#             parts.append("Args: " + scrub_think_tags(str(args)))
-#     error = getattr(step, "error", None)
-#     if error:
-#         parts.append(f"Error: {scrub_think_tags(str(error))}")
-#     obs = getattr(step, "observations", None)
-#     if obs is not None:
-#         if isinstance(obs, (list, tuple)):
-#             obs_str = "\n".join(map(str, obs))
-#         else:
-#             obs_str = str(obs)
-#         parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
-#     # If this looks like a FinalAnswer step object, surface a clean final answer
-#     try:
-#         tname = type(step).__name__
-#     except Exception:
-#         tname = ""
-#     if tname.lower().startswith("finalanswer"):
-#         out = getattr(step, "output", None)
-#         if out is not None:
-#             return f"Final answer: {scrub_think_tags(str(out)).strip()}"
-#         # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
-#         s = scrub_think_tags(str(step))
-#         m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
-#         if m:
-#             return f"Final answer: {m.group(1).strip()}"
-#     # If the only content would be an object repr like FinalAnswerStep(...), drop it;
-#     # a cleaner "Final answer: ..." will come from the rule above or stdout.
-#     joined = "\n".join(parts).strip()
-#     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
-#         return ""
-#     return joined or scrub_think_tags(str(step))

 import asyncio
 import io
 import json
 import re
 import threading
 from agent_server.sanitizing_think_tags import scrub_think_tags
 class QueueWriter(io.TextIOBase):
     """
+    File-like object that pushes each write to an asyncio.Queue immediately.
     """
+    def __init__(self, q: "asyncio.Queue"):
         self.q = q
         self._lock = threading.Lock()
+        self._buf = []  # accumulate until newline to reduce spam
+    def write(self, s: str):
         if not s:
             return 0
         with self._lock:
             self._buf.append(s)
+            # flush on newline to keep granularity reasonable
+            if "\n" in s:
+                chunk = "".join(self._buf)
+                self._buf.clear()
+                try:
+                    self.q.put_nowait({"__stdout__": chunk})
+                except Exception:
+                    pass
         return len(s)
+    def flush(self):
         with self._lock:
             if self._buf:
+                chunk = "".join(self._buf)
                 self._buf.clear()
+                try:
+                    self.q.put_nowait({"__stdout__": chunk})
+                except Exception:
+                    pass
 def _serialize_step(step) -> str:
     """
+    Best-effort pretty string for a smolagents MemoryStep / ActionStep.
+    Works even if attributes are missing on some versions.
     """
+    parts = []
     sn = getattr(step, "step_number", None)
     if sn is not None:
         parts.append(f"Step {sn}")
     thought_val = getattr(step, "thought", None)
     if thought_val:
+        parts.append(f"Thought: {scrub_think_tags(str(thought_val))}")
     tool_val = getattr(step, "tool", None)
     if tool_val:
+        parts.append(f"Tool: {scrub_think_tags(str(tool_val))}")
     code_val = getattr(step, "code", None)
     if code_val:
+        code_str = scrub_think_tags(str(code_val)).strip()
+        parts.append("```python\n" + code_str + "\n```")
     args = getattr(step, "args", None)
     if args:
         try:
+            parts.append(
+                "Args: " + scrub_think_tags(json.dumps(args, ensure_ascii=False))
+            )
         except Exception:
+            parts.append("Args: " + scrub_think_tags(str(args)))
     error = getattr(step, "error", None)
     if error:
+        parts.append(f"Error: {scrub_think_tags(str(error))}")
     obs = getattr(step, "observations", None)
     if obs is not None:
         if isinstance(obs, (list, tuple)):
             obs_str = "\n".join(map(str, obs))
         else:
             obs_str = str(obs)
+        parts.append("Observation:\n" + scrub_think_tags(obs_str).strip())
     # If this looks like a FinalAnswer step object, surface a clean final answer
     try:
         tname = type(step).__name__
     except Exception:
         tname = ""
+    if tname.lower().startswith("finalanswer"):
         out = getattr(step, "output", None)
         if out is not None:
+            return f"Final answer: {scrub_think_tags(str(out)).strip()}"
+        # Fallback: try to parse from string repr "FinalAnswerStep(output=...)"
+        s = scrub_think_tags(str(step))
         m = re.search(r"FinalAnswer[^()]*\(\s*output\s*=\s*([^,)]+)", s)
         if m:
             return f"Final answer: {m.group(1).strip()}"
     # If the only content would be an object repr like FinalAnswerStep(...), drop it;
     # a cleaner "Final answer: ..." will come from the rule above or stdout.
     joined = "\n".join(parts).strip()
     if re.match(r"^FinalAnswer[^\n]+\)$", joined):
         return ""
+    return joined or scrub_think_tags(str(step))