Spaces:

HeWhoComes
/

zw-kitten-tts-working

Runtime error

HeWhoComes commited on Aug 21

Commit

948d66a

verified ·

1 Parent(s): d32e505

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from fastapi import FastAPI, Request
 from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
 import uvicorn
 import numpy as np
-import soundfile as sf
 # Import KittenTTS directly without demo fallback
 from get_model import KittenTTS
@@ -85,9 +86,22 @@ async def generate_speech(request: Request):
         # Generate real audio with KittenTTS
         audio = tts.generate(text, voice=voice, speed=speed)
-        # Save to temporary file
         temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        sf.write(temp_file.name, audio, 24000)
         return JSONResponse({
             "success": True,

 from fastapi.responses import HTMLResponse, FileResponse, JSONResponse
 import uvicorn
 import numpy as np
+import wave
 # Import KittenTTS directly without demo fallback
 from get_model import KittenTTS
         # Generate real audio with KittenTTS
         audio = tts.generate(text, voice=voice, speed=speed)
+        # Save to temporary WAV (mono, 16-bit PCM)
         temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        a = np.asarray(audio, dtype=np.float32)
+        # if model returns [N,1], flatten it:
+        if a.ndim > 1:
+            a = a[:, 0]
+        a = np.clip(a, -1.0, 1.0)
+        a_i16 = (a * 32767.0).astype(np.int16)
+        with wave.open(temp_file.name, "wb") as w:
+            w.setnchannels(1)       # mono
+            w.setsampwidth(2)       # 16-bit
+            w.setframerate(24000)   # Hz (use your model's sr if different)
+            w.writeframes(a_i16.tobytes())
         return JSONResponse({
             "success": True,