CrazyMonkey0 commited on
Commit
4d18a16
Β·
1 Parent(s): f854f33

feat(nlp): change nlp model to Qwen/Qwen2.5-1.5B-Instruct

Browse files
Files changed (2) hide show
  1. README.md +4 -4
  2. app/routes/nlp.py +21 -40
README.md CHANGED
@@ -9,7 +9,7 @@ app_file: "app/main.py"
9
  app_port: 7860
10
  short_description: "English learning API"
11
  models:
12
- - Qwen/Qwen2.5-0.5B-Instruct
13
  - openai/whisper-small.en
14
  - facebook/mms-tts-eng
15
  - allegro/BiDi-eng-pol
@@ -53,7 +53,7 @@ Each model retains its original license as listed below:
53
  Developed by [**AI at Meta**](https://ai.facebook.com/).
54
 
55
  ### πŸ’¬ Natural Language Processing (Chat & Grammar)
56
- - [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
57
  Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
58
  Developed by [**Qwen Team**](https://qwen.ai/)
59
 
@@ -93,7 +93,7 @@ The source code of this application is distributed separately under the license
93
  year={2023}
94
  }
95
 
96
- ### 3. Qwen/Qwen2.5-0.5B-Instruct β€” Qwen Team
97
  @misc{qwen2.5,
98
  title = {Qwen2.5: A Party of Foundation Models},
99
  url = {https://qwenlm.github.io/blog/qwen2.5/},
@@ -124,7 +124,7 @@ Special thanks to the teams and organizations that created and maintain the foll
124
 
125
  - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) β€” Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
126
  - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) β€” Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
127
- - **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) β€” Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
128
  - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) β€” Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
129
 
130
  This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
 
9
  app_port: 7860
10
  short_description: "English learning API"
11
  models:
12
+ - Qwen/Qwen2.5-1.5B-Instruct
13
  - openai/whisper-small.en
14
  - facebook/mms-tts-eng
15
  - allegro/BiDi-eng-pol
 
53
  Developed by [**AI at Meta**](https://ai.facebook.com/).
54
 
55
  ### πŸ’¬ Natural Language Processing (Chat & Grammar)
56
+ - [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)
57
  Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
58
  Developed by [**Qwen Team**](https://qwen.ai/)
59
 
 
93
  year={2023}
94
  }
95
 
96
+ ### 3. Qwen/Qwen2.5-1.5B-Instruct β€” Qwen Team
97
  @misc{qwen2.5,
98
  title = {Qwen2.5: A Party of Foundation Models},
99
  url = {https://qwenlm.github.io/blog/qwen2.5/},
 
124
 
125
  - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) β€” Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
126
  - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) β€” Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
127
+ - **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-1.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) β€” Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
128
  - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) β€” Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
129
 
130
  This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
app/routes/nlp.py CHANGED
@@ -3,50 +3,32 @@ from fastapi.responses import JSONResponse
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  from pydantic import BaseModel
6
- from .tts import send_audio
 
 
7
 
8
  router = APIRouter()
9
 
10
- # HF Free CPU tuning (IMPORTANT)
11
- torch.set_num_threads(2)
12
- torch.set_num_interop_threads(1)
13
-
14
- SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
15
-
16
- Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
17
-
18
- CORRECTION:
19
- Error: [type]
20
- Original: "..."
21
- Correction: "..."
22
- Explanation: [one simple sentence]
23
-
24
- Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!
25
- """
26
 
27
  class ChatRequest(BaseModel):
28
  message: str
29
 
30
- # Load NLP model (Phi-3.5)
31
  def load_model_nlp():
32
- model_id = "microsoft/Phi-3.5-mini-instruct"
33
 
34
- tokenizer = AutoTokenizer.from_pretrained(
35
- model_id,
36
- use_fast=True
37
- )
38
 
 
39
  model = AutoModelForCausalLM.from_pretrained(
40
  model_id,
41
- torch_dtype=torch.float32, # CPU-safe
42
- device_map="cpu",
43
- low_cpu_mem_usage=True
44
  )
45
 
46
  model.eval()
47
  return model, tokenizer
48
 
49
-
50
  @router.post("/chat")
51
  async def chat(request: Request, chat_request: ChatRequest):
52
  text = chat_request.message
@@ -59,34 +41,33 @@ async def chat(request: Request, chat_request: ChatRequest):
59
  {"role": "user", "content": text},
60
  ]
61
 
62
- # Phi-3.5 requires chat template
63
  inputs = tokenizer.apply_chat_template(
64
  messages,
65
  add_generation_prompt=True,
66
  tokenize=True,
 
67
  return_tensors="pt",
68
  ).to(model.device)
69
 
70
  with torch.no_grad():
71
  output = model.generate(
72
- inputs,
73
- max_new_tokens=80, # more than enough
74
- do_sample=False, # IMPORTANT: faster + stable
75
- eos_token_id=tokenizer.eos_token_id,
76
- pad_token_id=tokenizer.eos_token_id,
77
  )
78
 
79
  response_text = tokenizer.decode(
80
- output[0][inputs.shape[-1]:],
81
  skip_special_tokens=True
82
  ).strip()
83
 
84
  # Generate audio using TTS
85
  audio_name = send_audio(request, response_text)
86
-
 
87
  return JSONResponse(
88
- {
89
- "response": response_text,
90
- "audio": audio_name,
91
- }
92
- )
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  from pydantic import BaseModel
6
+ from .tts import send_audio
7
+ import uuid
8
+ import os
9
 
10
  router = APIRouter()
11
 
12
+ SYSTEM_PROMPT = """you are emma an advanced AI assistant for English language learning."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class ChatRequest(BaseModel):
15
  message: str
16
 
17
+ # Load NLP model
18
  def load_model_nlp():
 
19
 
20
+ model_id = "Qwen/Qwen2.5-1.5B-Instruct"
 
 
 
21
 
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  model_id,
25
+ torch_dtype=torch.float32, # CPU friendly
26
+ device_map="cpu"
 
27
  )
28
 
29
  model.eval()
30
  return model, tokenizer
31
 
 
32
  @router.post("/chat")
33
  async def chat(request: Request, chat_request: ChatRequest):
34
  text = chat_request.message
 
41
  {"role": "user", "content": text},
42
  ]
43
 
 
44
  inputs = tokenizer.apply_chat_template(
45
  messages,
46
  add_generation_prompt=True,
47
  tokenize=True,
48
+ return_dict=True,
49
  return_tensors="pt",
50
  ).to(model.device)
51
 
52
  with torch.no_grad():
53
  output = model.generate(
54
+ **inputs,
55
+ max_new_tokens=150,
56
+ temperature=0.7,
57
+ top_p=0.9,
58
+ do_sample=True,
59
  )
60
 
61
  response_text = tokenizer.decode(
62
+ output[0][inputs["input_ids"].shape[-1]:],
63
  skip_special_tokens=True
64
  ).strip()
65
 
66
  # Generate audio using TTS
67
  audio_name = send_audio(request, response_text)
68
+
69
+
70
  return JSONResponse(
71
+ {"response": response_text,
72
+ "audio": audio_name,}
73
+ )