Spaces:
Sleeping
Sleeping
Build from requirements.txt, and more accurate calls to models, allowing agent-model to use nothink
Browse files- .env.local +4 -1
- Dockerfile +3 -4
- agent_server/chat_completions.py +4 -4
- agent_server/models.py +3 -3
- requirements.txt +6 -0
.env.local
CHANGED
|
@@ -3,7 +3,10 @@ OPENAI_BASE_URL=http://127.0.0.1:8000/v1/
|
|
| 3 |
|
| 4 |
# The proxy forwards here (your HF endpoint):
|
| 5 |
UPSTREAM_OPENAI_BASE=https://ay8ts6hfrqidjvwt.us-east-1.aws.endpoints.huggingface.cloud/v1
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
TASK_MODEL="Qwen/Qwen3-1.7B-nothink"
|
| 8 |
|
| 9 |
PUBLIC_APP_NAME="Agent Examples"
|
|
|
|
| 3 |
|
| 4 |
# The proxy forwards here (your HF endpoint):
|
| 5 |
UPSTREAM_OPENAI_BASE=https://ay8ts6hfrqidjvwt.us-east-1.aws.endpoints.huggingface.cloud/v1
|
| 6 |
+
|
| 7 |
+
# Models used by the proxy:
|
| 8 |
+
MODEL_NAME="Qwen/Qwen3-1.7B"
|
| 9 |
+
AGENT_MODEL="Qwen/Qwen3-1.7B-nothink"
|
| 10 |
TASK_MODEL="Qwen/Qwen3-1.7B-nothink"
|
| 11 |
|
| 12 |
PUBLIC_APP_NAME="Agent Examples"
|
Dockerfile
CHANGED
|
@@ -12,11 +12,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|
| 12 |
rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
# Upgrade pip and install runtime libs used by proxy.py
|
|
|
|
| 15 |
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
| 16 |
-
pip3 install --no-cache-dir \
|
| 17 |
-
|
| 18 |
-
smolagents[toolkit] litellm \
|
| 19 |
-
"pydantic>=2,<3"
|
| 20 |
|
| 21 |
# MongoDB
|
| 22 |
RUN curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | \
|
|
|
|
| 12 |
rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
# Upgrade pip and install runtime libs used by proxy.py
|
| 15 |
+
COPY requirements.txt /tmp/requirements.txt
|
| 16 |
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
|
| 17 |
+
pip3 install --no-cache-dir -r /tmp/requirements.txt && \
|
| 18 |
+
rm /tmp/requirements.txt
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# MongoDB
|
| 21 |
RUN curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | \
|
agent_server/chat_completions.py
CHANGED
|
@@ -21,7 +21,7 @@ from agents.json_tool_calling_agents import (
|
|
| 21 |
generate_tool_calling_agent_with_search_and_code,
|
| 22 |
)
|
| 23 |
|
| 24 |
-
|
| 25 |
|
| 26 |
|
| 27 |
def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
|
|
@@ -37,11 +37,11 @@ def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
|
|
| 37 |
|
| 38 |
|
| 39 |
def is_upstream_passthrough(model_name: str) -> bool:
|
| 40 |
-
return model_name ==
|
| 41 |
|
| 42 |
|
| 43 |
def is_upstream_passthrough_nothink(model_name: str) -> bool:
|
| 44 |
-
return model_name == f"{
|
| 45 |
|
| 46 |
|
| 47 |
def apply_nothink_to_body(
|
|
@@ -53,7 +53,7 @@ def apply_nothink_to_body(
|
|
| 53 |
- Appends '/nothink' to user message content
|
| 54 |
"""
|
| 55 |
new_body: ChatCompletionRequest = dict(body) # shallow copy is fine
|
| 56 |
-
new_body["model"] =
|
| 57 |
|
| 58 |
new_messages: typing.List[ChatMessage] = []
|
| 59 |
for msg in messages:
|
|
|
|
| 21 |
generate_tool_calling_agent_with_search_and_code,
|
| 22 |
)
|
| 23 |
|
| 24 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen3-1.7B")
|
| 25 |
|
| 26 |
|
| 27 |
def normalize_model_name(raw_model: typing.Union[str, dict, None]) -> str:
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
def is_upstream_passthrough(model_name: str) -> bool:
|
| 40 |
+
return model_name == MODEL_NAME
|
| 41 |
|
| 42 |
|
| 43 |
def is_upstream_passthrough_nothink(model_name: str) -> bool:
|
| 44 |
+
return model_name == f"{MODEL_NAME}-nothink"
|
| 45 |
|
| 46 |
|
| 47 |
def apply_nothink_to_body(
|
|
|
|
| 53 |
- Appends '/nothink' to user message content
|
| 54 |
"""
|
| 55 |
new_body: ChatCompletionRequest = dict(body) # shallow copy is fine
|
| 56 |
+
new_body["model"] = MODEL_NAME
|
| 57 |
|
| 58 |
new_messages: typing.List[ChatMessage] = []
|
| 59 |
for msg in messages:
|
agent_server/models.py
CHANGED
|
@@ -7,7 +7,7 @@ def models_payload() -> dict:
|
|
| 7 |
"""
|
| 8 |
Returns the /v1/models response payload.
|
| 9 |
"""
|
| 10 |
-
|
| 11 |
now = agent_server.helpers.now_ts()
|
| 12 |
return {
|
| 13 |
"object": "list",
|
|
@@ -37,13 +37,13 @@ def models_payload() -> dict:
|
|
| 37 |
"owned_by": "you",
|
| 38 |
},
|
| 39 |
{
|
| 40 |
-
"id":
|
| 41 |
"object": "model",
|
| 42 |
"created": now,
|
| 43 |
"owned_by": "upstream",
|
| 44 |
},
|
| 45 |
{
|
| 46 |
-
"id": f"{
|
| 47 |
"object": "model",
|
| 48 |
"created": now,
|
| 49 |
"owned_by": "upstream",
|
|
|
|
| 7 |
"""
|
| 8 |
Returns the /v1/models response payload.
|
| 9 |
"""
|
| 10 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen3-1.7B")
|
| 11 |
now = agent_server.helpers.now_ts()
|
| 12 |
return {
|
| 13 |
"object": "list",
|
|
|
|
| 37 |
"owned_by": "you",
|
| 38 |
},
|
| 39 |
{
|
| 40 |
+
"id": MODEL_NAME,
|
| 41 |
"object": "model",
|
| 42 |
"created": now,
|
| 43 |
"owned_by": "upstream",
|
| 44 |
},
|
| 45 |
{
|
| 46 |
+
"id": f"{MODEL_NAME}-nothink",
|
| 47 |
"object": "model",
|
| 48 |
"created": now,
|
| 49 |
"owned_by": "upstream",
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
httpx[http2]
|
| 4 |
+
smolagents[toolkit]
|
| 5 |
+
litellm
|
| 6 |
+
pydantic>=2,<3
|