HeWhoComes commited on
Commit
948d66a
·
verified ·
1 Parent(s): d32e505

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -8,7 +8,8 @@ from fastapi import FastAPI, Request
8
  from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
9
  import uvicorn
10
  import numpy as np
11
- import soundfile as sf
 
12
 
13
  # Import KittenTTS directly without demo fallback
14
  from get_model import KittenTTS
@@ -85,9 +86,22 @@ async def generate_speech(request: Request):
85
  # Generate real audio with KittenTTS
86
  audio = tts.generate(text, voice=voice, speed=speed)
87
 
88
- # Save to temporary file
89
  temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
90
- sf.write(temp_file.name, audio, 24000)
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  return JSONResponse({
93
  "success": True,
 
8
  from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
9
  import uvicorn
10
  import numpy as np
11
+ import wave
12
+
13
 
14
  # Import KittenTTS directly without demo fallback
15
  from get_model import KittenTTS
 
86
  # Generate real audio with KittenTTS
87
  audio = tts.generate(text, voice=voice, speed=speed)
88
 
89
+ # Save to temporary WAV (mono, 16-bit PCM)
90
  temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
91
+
92
+ a = np.asarray(audio, dtype=np.float32)
93
+ # if model returns [N,1], flatten it:
94
+ if a.ndim > 1:
95
+ a = a[:, 0]
96
+ a = np.clip(a, -1.0, 1.0)
97
+ a_i16 = (a * 32767.0).astype(np.int16)
98
+
99
+ with wave.open(temp_file.name, "wb") as w:
100
+ w.setnchannels(1) # mono
101
+ w.setsampwidth(2) # 16-bit
102
+ w.setframerate(24000) # Hz (use your model's sr if different)
103
+ w.writeframes(a_i16.tobytes())
104
+
105
 
106
  return JSONResponse({
107
  "success": True,