Spaces:
Sleeping
Sleeping
File size: 4,911 Bytes
5f7408d 3f686b5 5f7408d b0394f8 d88717c 3f686b5 5f7408d 3f686b5 b0394f8 3f686b5 864f881 b0394f8 3f686b5 b0394f8 3f686b5 b0394f8 bfa8029 3f686b5 1e30ae2 3f686b5 5f7408d 3f686b5 5f7408d 3f686b5 b0394f8 3f686b5 5f7408d 3f686b5 5f7408d 3f686b5 5f7408d 3f686b5 b0394f8 5f7408d 3f686b5 b0394f8 3f686b5 b0394f8 3f686b5 b0394f8 3f686b5 b0394f8 3f686b5 5f7408d 3f686b5 b0394f8 5f7408d 3f686b5 b0394f8 5f7408d 3f686b5 b0394f8 5f7408d b0394f8 5f7408d 3f686b5 b0394f8 3f686b5 b0394f8 5f7408d 3f686b5 5f7408d 3f686b5 5f7408d 3f686b5 5f7408d 99ca1a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
"""
OpenAI-compatible FastAPI proxy that wraps a smolagents CodeAgent
Refactored for readability and modularity (single-file).
"""
import logging # For logging
import os # For dealing with env vars
import typing # For type annotations
import fastapi
import fastapi.responses
# Upstream pass-through + local helpers
from agent_server.agent_streaming import (
proxy_upstream_chat_completions,
)
from agent_server.chat_completions import (
normalize_model_name,
is_upstream_passthrough,
is_upstream_passthrough_nothink,
apply_nothink_to_body,
agent_for_model,
make_sse_generator,
run_non_streaming,
)
from agent_server.helpers import (
messages_to_task,
openai_response,
sse_headers,
)
from agent_server.models import models_payload
from agent_server.openai_schemas import ChatMessage, ChatCompletionRequest
# Local agent factories
# --------------------------------------------------------------------------------------
# Logging / Config
# --------------------------------------------------------------------------------------
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper())
log = logging.getLogger(__name__)
# --------------------------------------------------------------------------------------
# FastAPI app
# --------------------------------------------------------------------------------------
app = fastapi.FastAPI()
# --------------------------------------------------------------------------------------
# HTTP Handlers (thin wrappers around helpers)
# --------------------------------------------------------------------------------------
@app.get("/healthz")
async def healthz():
return {"ok": True}
@app.get("/v1/models")
async def list_models():
return models_payload()
@app.post("/v1/chat/completions")
async def chat_completions(req: fastapi.Request):
# ---------------- Parse & basic validation ----------------
try:
body: ChatCompletionRequest = typing.cast(
ChatCompletionRequest, await req.json()
)
except Exception as e:
return fastapi.responses.JSONResponse(
{"error": {"message": f"Invalid JSON: {e}"}}, status_code=400
)
messages: typing.List[ChatMessage] = typing.cast(
typing.List[ChatMessage], body.get("messages") or []
)
stream: bool = bool(body.get("stream", False))
model_name: str = normalize_model_name(body.get("model"))
try:
# ---------------- Upstream pass-through modes ----------------
if is_upstream_passthrough(model_name):
# Raw pass-through to upstream
return await proxy_upstream_chat_completions(dict(body), stream)
if is_upstream_passthrough_nothink(model_name):
# Modify body for /nothink and forward to upstream
return await proxy_upstream_chat_completions(
apply_nothink_to_body(body, messages), stream, scrub_think=True
)
# ---------------- Local agent execution ----------------
# Convert OpenAI messages -> internal "task"
task: str = messages_to_task(messages)
# Create agent impl for the requested local model
agent_for_request = agent_for_model(model_name)
if stream:
# Streaming: return SSE response
gen = make_sse_generator(task, agent_for_request, model_name)
return fastapi.responses.StreamingResponse(
gen(), media_type="text/event-stream", headers=sse_headers()
)
else:
# Non-streaming: materialize final text and wrap in OpenAI shape
result_text = await run_non_streaming(task, agent_for_request)
return fastapi.responses.JSONResponse(
openai_response(result_text, model_name)
)
except ValueError as ve:
# Unknown model or other parameter validation errors
log.error("Invalid request: %s", ve)
return fastapi.responses.JSONResponse(
status_code=400,
content={"error": {"message": str(ve), "type": "invalid_request_error"}},
)
except Exception as e:
# Operational / agent runtime errors
msg = str(e)
status = 503 if "503" in msg or "Service Unavailable" in msg else 500
log.error("Agent error (%s): %s", status, msg)
return fastapi.responses.JSONResponse(
status_code=status,
content={
"error": {"message": f"Agent error: {msg}", "type": "agent_error"}
},
)
# --------------------------------------------------------------------------------------
# Local dev entrypoint
# --------------------------------------------------------------------------------------
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app:app", host="0.0.0.0", port=int(os.getenv("PORT", "8000")), reload=False
)
|