File size: 4,911 Bytes
5f7408d
 
3f686b5
5f7408d
 
b0394f8
d88717c
 
3f686b5
5f7408d
 
 
3f686b5
 
b0394f8
3f686b5
864f881
b0394f8
 
 
 
 
 
 
3f686b5
 
b0394f8
 
 
3f686b5
b0394f8
bfa8029
3f686b5
 
1e30ae2
3f686b5
 
 
5f7408d
 
 
3f686b5
 
 
5f7408d
 
 
3f686b5
 
 
 
 
 
 
 
 
 
b0394f8
3f686b5
 
5f7408d
 
3f686b5
5f7408d
 
 
 
 
 
 
 
 
3f686b5
 
5f7408d
3f686b5
b0394f8
5f7408d
 
3f686b5
b0394f8
3f686b5
b0394f8
3f686b5
b0394f8
3f686b5
b0394f8
 
3f686b5
5f7408d
3f686b5
 
b0394f8
5f7408d
3f686b5
b0394f8
5f7408d
3f686b5
 
b0394f8
5f7408d
b0394f8
5f7408d
 
3f686b5
b0394f8
3f686b5
b0394f8
5f7408d
 
3f686b5
 
 
 
 
 
 
5f7408d
3f686b5
5f7408d
 
 
 
 
 
 
 
 
 
 
3f686b5
 
 
5f7408d
 
 
 
 
99ca1a9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
OpenAI-compatible FastAPI proxy that wraps a smolagents CodeAgent
Refactored for readability and modularity (single-file).
"""

import logging  # For logging
import os  # For dealing with env vars
import typing  # For type annotations

import fastapi
import fastapi.responses

# Upstream pass-through + local helpers
from agent_server.agent_streaming import (
    proxy_upstream_chat_completions,
)
from agent_server.chat_completions import (
    normalize_model_name,
    is_upstream_passthrough,
    is_upstream_passthrough_nothink,
    apply_nothink_to_body,
    agent_for_model,
    make_sse_generator,
    run_non_streaming,
)
from agent_server.helpers import (
    messages_to_task,
    openai_response,
    sse_headers,
)
from agent_server.models import models_payload
from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest

# Local agent factories

# --------------------------------------------------------------------------------------
# Logging / Config
# --------------------------------------------------------------------------------------
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper())
log = logging.getLogger(__name__)

# --------------------------------------------------------------------------------------
# FastAPI app
# --------------------------------------------------------------------------------------
app = fastapi.FastAPI()


# --------------------------------------------------------------------------------------
# HTTP Handlers (thin wrappers around helpers)
# --------------------------------------------------------------------------------------
@app.get("/healthz")
async def healthz():
    return {"ok": True}


@app.get("/v1/models")
async def list_models():
    return models_payload()


@app.post("/v1/chat/completions")
async def chat_completions(req: fastapi.Request):
    # ---------------- Parse & basic validation ----------------
    try:
        body: ChatCompletionRequest = typing.cast(
            ChatCompletionRequest, await req.json()
        )
    except Exception as e:
        return fastapi.responses.JSONResponse(
            {"error": {"message": f"Invalid JSON: {e}"}}, status_code=400
        )

    messages: typing.List[ChatMessage] = typing.cast(
        typing.List[ChatMessage], body.get("messages") or []
    )
    stream: bool = bool(body.get("stream", False))
    model_name: str = normalize_model_name(body.get("model"))

    try:
        # ---------------- Upstream pass-through modes ----------------
        if is_upstream_passthrough(model_name):
            # Raw pass-through to upstream
            return await proxy_upstream_chat_completions(dict(body), stream)

        if is_upstream_passthrough_nothink(model_name):
            # Modify body for /nothink and forward to upstream
            return await proxy_upstream_chat_completions(
                apply_nothink_to_body(body, messages), stream, scrub_think=True
            )

        # ---------------- Local agent execution ----------------
        # Convert OpenAI messages -> internal "task"
        task: str = messages_to_task(messages)

        # Create agent impl for the requested local model
        agent_for_request = agent_for_model(model_name)

        if stream:
            # Streaming: return SSE response
            gen = make_sse_generator(task, agent_for_request, model_name)
            return fastapi.responses.StreamingResponse(
                gen(), media_type="text/event-stream", headers=sse_headers()
            )
        else:
            # Non-streaming: materialize final text and wrap in OpenAI shape
            result_text = await run_non_streaming(task, agent_for_request)
            return fastapi.responses.JSONResponse(
                openai_response(result_text, model_name)
            )

    except ValueError as ve:
        # Unknown model or other parameter validation errors
        log.error("Invalid request: %s", ve)
        return fastapi.responses.JSONResponse(
            status_code=400,
            content={"error": {"message": str(ve), "type": "invalid_request_error"}},
        )
    except Exception as e:
        # Operational / agent runtime errors
        msg = str(e)
        status = 503 if "503" in msg or "Service Unavailable" in msg else 500
        log.error("Agent error (%s): %s", status, msg)
        return fastapi.responses.JSONResponse(
            status_code=status,
            content={
                "error": {"message": f"Agent error: {msg}", "type": "agent_error"}
            },
        )


# --------------------------------------------------------------------------------------
# Local dev entrypoint
# --------------------------------------------------------------------------------------
if __name__ == "__main__":
    import uvicorn

    uvicorn.run(
        "app:app", host="0.0.0.0", port=int(os.getenv("PORT", "8000")), reload=False
    )