Spaces:

CrazyMonkey0
/

APi_English

Running

App Files Files Community

CrazyMonkey0 commited on Dec 28, 2025

Commit

4d18a16

1 Parent(s): f854f33

feat(nlp): change nlp model to Qwen/Qwen2.5-1.5B-Instruct

Browse files

Files changed (2) hide show

README.md +4 -4
app/routes/nlp.py +21 -40

README.md CHANGED Viewed

@@ -9,7 +9,7 @@ app_file: "app/main.py"
 app_port: 7860
 short_description: "English learning API"
 models:
-  - Qwen/Qwen2.5-0.5B-Instruct
   - openai/whisper-small.en
   - facebook/mms-tts-eng
   - allegro/BiDi-eng-pol
@@ -53,7 +53,7 @@ Each model retains its original license as listed below:
   Developed by [**AI at Meta**](https://ai.facebook.com/).
 ### 💬 Natural Language Processing (Chat & Grammar)
-- [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
   Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
   Developed by [**Qwen Team**](https://qwen.ai/)
@@ -93,7 +93,7 @@ The source code of this application is distributed separately under the license
     year={2023}
 }
-### 3. Qwen/Qwen2.5-0.5B-Instruct — Qwen Team
 @misc{qwen2.5,
     title = {Qwen2.5: A Party of Foundation Models},
     url = {https://qwenlm.github.io/blog/qwen2.5/},
@@ -124,7 +124,7 @@ Special thanks to the teams and organizations that created and maintain the foll
 - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
-- **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
 This application uses these models for educational and research purposes only, in full compliance with their respective licenses.

 app_port: 7860
 short_description: "English learning API"
 models:
+  - Qwen/Qwen2.5-1.5B-Instruct
   - openai/whisper-small.en
   - facebook/mms-tts-eng
   - allegro/BiDi-eng-pol
   Developed by [**AI at Meta**](https://ai.facebook.com/).
 ### 💬 Natural Language Processing (Chat & Grammar)
+- [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)
   Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
   Developed by [**Qwen Team**](https://qwen.ai/)
     year={2023}
 }
+### 3. Qwen/Qwen2.5-1.5B-Instruct  — Qwen Team
 @misc{qwen2.5,
     title = {Qwen2.5: A Party of Foundation Models},
     url = {https://qwenlm.github.io/blog/qwen2.5/},
 - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
+- **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
 This application uses these models for educational and research purposes only, in full compliance with their respective licenses.

app/routes/nlp.py CHANGED Viewed

@@ -3,50 +3,32 @@ from fastapi.responses import JSONResponse
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from pydantic import BaseModel
-from .tts import send_audio
 router = APIRouter()
-# HF Free CPU tuning (IMPORTANT)
-torch.set_num_threads(2)
-torch.set_num_interop_threads(1)
-SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
-Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
-CORRECTION:
-Error: [type]
-Original: "..."
-Correction: "..."
-Explanation: [one simple sentence]
-Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!
-"""
 class ChatRequest(BaseModel):
     message: str
-# Load NLP model (Phi-3.5)
 def load_model_nlp():
-    model_id = "microsoft/Phi-3.5-mini-instruct"
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_id,
-        use_fast=True
-    )
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        torch_dtype=torch.float32,   # CPU-safe
-        device_map="cpu",
-        low_cpu_mem_usage=True
     )
     model.eval()
     return model, tokenizer
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
     text = chat_request.message
@@ -59,34 +41,33 @@ async def chat(request: Request, chat_request: ChatRequest):
         {"role": "user", "content": text},
     ]
-    # Phi-3.5 requires chat template
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_tensors="pt",
     ).to(model.device)
     with torch.no_grad():
         output = model.generate(
-            inputs,
-            max_new_tokens=80,     # more than enough
-            do_sample=False,       # IMPORTANT: faster + stable
-            eos_token_id=tokenizer.eos_token_id,
-            pad_token_id=tokenizer.eos_token_id,
         )
     response_text = tokenizer.decode(
-        output[0][inputs.shape[-1]:],
         skip_special_tokens=True
     ).strip()
     # Generate audio using TTS
     audio_name = send_audio(request, response_text)
     return JSONResponse(
-        {
-            "response": response_text,
-            "audio": audio_name,
-        }
-    )

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from pydantic import BaseModel
+from .tts import send_audio
+import uuid
+import os
 router = APIRouter()
+SYSTEM_PROMPT = """you are emma an advanced AI assistant for English language learning."""
 class ChatRequest(BaseModel):
     message: str
+# Load NLP model
 def load_model_nlp():
+    model_id = "Qwen/Qwen2.5-1.5B-Instruct"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        torch_dtype=torch.float32,   # CPU friendly
+        device_map="cpu"
     )
     model.eval()
     return model, tokenizer
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
     text = chat_request.message
         {"role": "user", "content": text},
     ]
     inputs = tokenizer.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
+        return_dict=True,
         return_tensors="pt",
     ).to(model.device)
     with torch.no_grad():
         output = model.generate(
+            **inputs,
+            max_new_tokens=150,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
         )
     response_text = tokenizer.decode(
+        output[0][inputs["input_ids"].shape[-1]:],
         skip_special_tokens=True
     ).strip()
     # Generate audio using TTS
     audio_name = send_audio(request, response_text)
     return JSONResponse(
+        {"response": response_text,
+         "audio": audio_name,}
+        )