Spaces:

Deign86
/

mathpulse-api-v3test

Running

github-actions[bot] commited on 18 days ago

Commit

cb5a5af

1 Parent(s): d778d65

🚀 Auto-deploy backend from GitHub (7408e56)

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -23,6 +23,7 @@ RUN python -m pip install --upgrade pip setuptools wheel && \
 # Copy only runtime sources to reduce invalidation surface.
 COPY main.py /app/main.py
 COPY analytics.py /app/analytics.py
 COPY automation_engine.py /app/automation_engine.py
 COPY services /app/services

 # Copy only runtime sources to reduce invalidation surface.
 COPY main.py /app/main.py
+COPY startup_validation.py /app/startup_validation.py
 COPY analytics.py /app/analytics.py
 COPY automation_engine.py /app/automation_engine.py
 COPY services /app/services

config/env.sample CHANGED Viewed

@@ -38,8 +38,8 @@ INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
 INFERENCE_BACKGROUND_TIMEOUT_SEC=120
 # model defaults
-# Leave empty unless you intentionally want one global model for every task.
-INFERENCE_MODEL_ID=
 INFERENCE_MAX_NEW_TOKENS=640
 INFERENCE_TEMPERATURE=0.2
 INFERENCE_TOP_P=0.9
@@ -54,7 +54,7 @@ CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=25
 CHAT_STREAM_TOTAL_TIMEOUT_SEC=120
 # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
 HF_QUIZ_MODEL_ID=
-HF_QUIZ_JSON_REPAIR_MODEL_ID=meta-llama/Llama-3.1-8B-Instruct
 # retry behavior
 INFERENCE_MAX_RETRIES=3

 INFERENCE_BACKGROUND_TIMEOUT_SEC=120
 # model defaults
+# Global default model for all tasks.
+INFERENCE_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
 INFERENCE_MAX_NEW_TOKENS=640
 INFERENCE_TEMPERATURE=0.2
 INFERENCE_TOP_P=0.9
 CHAT_STREAM_TOTAL_TIMEOUT_SEC=120
 # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
 HF_QUIZ_MODEL_ID=
+HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
 # retry behavior
 INFERENCE_MAX_RETRIES=3

config/models.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 models:
   primary:
-    id: meta-llama/Llama-3.1-8B-Instruct
-    description: Fast default instruction model for interactive Grade 11-12 math tutoring
     max_new_tokens: 640
     temperature: 0.25
     top_p: 0.9
@@ -26,24 +26,23 @@ models:
 routing:
   task_model_map:
-    # Chat default: Llama-3.1-8B for low latency.
-    # Hard prompts can escalate to 70B via runtime policy in inference_client.
-    chat: meta-llama/Llama-3.1-8B-Instruct
-    verify_solution: NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
-    lesson_generation: meta-llama/Llama-3.1-8B-Instruct
-    quiz_generation: meta-llama/Llama-3.1-8B-Instruct
-    learning_path: meta-llama/Llama-3.1-8B-Instruct
-    daily_insight: meta-llama/Llama-3.1-8B-Instruct
-    risk_classification: meta-llama/Llama-3.1-8B-Instruct
-    risk_narrative: meta-llama/Llama-3.1-8B-Instruct
   task_fallback_model_map:
     chat:
       - meta-llama/Meta-Llama-3-70B-Instruct      # Hard/fallback quality tier
       - google/gemma-2-2b-it                      # Fast safety fallback
     verify_solution:
-      - NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO  # Primary (reasoning-focused)
-      - meta-llama/Meta-Llama-3-70B-Instruct      # First fallback
       - meta-llama/Llama-3.1-8B-Instruct          # Second fallback
   task_provider_map:

 models:
   primary:
+    id: Qwen/Qwen2.5-7B-Instruct
+    description: Global default instruction model for interactive Grade 11-12 math tutoring
     max_new_tokens: 640
     temperature: 0.25
     top_p: 0.9
 routing:
   task_model_map:
+    # Global default: Qwen2.5-7B across all tasks.
+    # Hard prompts can still escalate via runtime policy in inference_client.
+    chat: Qwen/Qwen2.5-7B-Instruct
+    verify_solution: Qwen/Qwen2.5-7B-Instruct
+    lesson_generation: Qwen/Qwen2.5-7B-Instruct
+    quiz_generation: Qwen/Qwen2.5-7B-Instruct
+    learning_path: Qwen/Qwen2.5-7B-Instruct
+    daily_insight: Qwen/Qwen2.5-7B-Instruct
+    risk_classification: Qwen/Qwen2.5-7B-Instruct
+    risk_narrative: Qwen/Qwen2.5-7B-Instruct
   task_fallback_model_map:
     chat:
       - meta-llama/Meta-Llama-3-70B-Instruct      # Hard/fallback quality tier
       - google/gemma-2-2b-it                      # Fast safety fallback
     verify_solution:
+      - meta-llama/Meta-Llama-3-70B-Instruct      # Higher-capacity fallback
       - meta-llama/Llama-3.1-8B-Instruct          # Second fallback
   task_provider_map:

services/inference_client.py CHANGED Viewed

@@ -107,7 +107,7 @@ class InferenceClient:
         self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
         self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
-        default_model_fallback = str(primary.get("id") or "meta-llama/Llama-3.1-8B-Instruct")
         env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
         self.default_model = env_model_id or default_model_fallback
@@ -177,16 +177,16 @@ class InferenceClient:
             int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
         )
-        # Default task-to-model routing (chat defaults to fast 8B, with hard-prompt escalation to 70B)
         self.task_model_map: Dict[str, str] = {
-            "chat": "meta-llama/Llama-3.1-8B-Instruct",
             "verify_solution": "Qwen/Qwen2.5-7B-Instruct",
             "lesson_generation": "Qwen/Qwen2.5-7B-Instruct",
             "quiz_generation": "Qwen/Qwen2.5-7B-Instruct",
             "learning_path": "Qwen/Qwen2.5-7B-Instruct",
-            "daily_insight": self.default_model,
-            "risk_classification": self.default_model,
-            "risk_narrative": self.default_model,
         }
         # Fallback chains (only to other HF-supported models, no featherless-ai)
         self.task_fallback_model_map: Dict[str, List[str]] = {

         self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
         self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
+        default_model_fallback = str(primary.get("id") or "Qwen/Qwen2.5-7B-Instruct")
         env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
         self.default_model = env_model_id or default_model_fallback
             int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
         )
+        # Default task-to-model routing (global default set to Qwen2.5-7B)
         self.task_model_map: Dict[str, str] = {
+            "chat": "Qwen/Qwen2.5-7B-Instruct",
             "verify_solution": "Qwen/Qwen2.5-7B-Instruct",
             "lesson_generation": "Qwen/Qwen2.5-7B-Instruct",
             "quiz_generation": "Qwen/Qwen2.5-7B-Instruct",
             "learning_path": "Qwen/Qwen2.5-7B-Instruct",
+            "daily_insight": "Qwen/Qwen2.5-7B-Instruct",
+            "risk_classification": "Qwen/Qwen2.5-7B-Instruct",
+            "risk_narrative": "Qwen/Qwen2.5-7B-Instruct",
         }
         # Fallback chains (only to other HF-supported models, no featherless-ai)
         self.task_fallback_model_map: Dict[str, List[str]] = {