Spaces:

yash184
/

12labindian

Running

App Files Files Community

yash184 commited on 15 days ago

Commit

02202ef

verified ·

1 Parent(s): 2822aef

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -16

app.py CHANGED Viewed

@@ -10,12 +10,14 @@ import hashlib
 import tempfile
 import traceback
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     print("WARNING: HF_TOKEN missing. Add it in Space → Settings → Variables & Secrets if needed.")
 MODEL_ID = "ai4bharat/indic-parler-tts"
 try:
     from parler_tts import ParlerTTSForConditionalGeneration
     from transformers import AutoTokenizer
@@ -28,6 +30,7 @@ print("Loading model… (this may take a while)")
 model = ParlerTTSForConditionalGeneration.from_pretrained(MODEL_ID).to(device)
 text_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 try:
     desc_encoder_name = model.config.text_encoder._name_or_path
     desc_tokenizer = AutoTokenizer.from_pretrained(desc_encoder_name)
@@ -36,6 +39,7 @@ except Exception:
 sampling_rate = int(getattr(model.config, "sampling_rate", 22050))
 sp_file = Path(__file__).parent / "speakers.json"
 if sp_file.exists():
     try:
@@ -45,7 +49,8 @@ if sp_file.exists():
 else:
     SPEAKERS = ["Default"]
-def _make_outfile(text: str, speaker: str, emotion: str):
     key = (text + "|" + str(speaker) + "|" + str(emotion)).encode("utf-8")
     h = hashlib.sha256(key).hexdigest()[:20]
     filename = f"out_{h}.wav"
@@ -53,10 +58,13 @@ def _make_outfile(text: str, speaker: str, emotion: str):
     return str(Path(tmpdir) / filename)
 def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
     if not text or not str(text).strip():
         return None
     desc = f"{speaker}'s voice. Tone: {emotion}. Natural, clear speech, close mic."
     try:
         desc_ids = desc_tokenizer(desc, return_tensors="pt").to(device)
         text_ids = text_tokenizer(text, return_tensors="pt").to(device)
@@ -64,6 +72,7 @@ def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
         desc_ids = desc_tokenizer(desc, return_tensors="pt")
         text_ids = text_tokenizer(text, return_tensors="pt")
     try:
         with torch.no_grad():
             try:
@@ -81,6 +90,7 @@ def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
         traceback.print_exc()
         return None
     try:
         arr = audio.cpu().numpy().squeeze()
     except Exception as e:
@@ -88,6 +98,7 @@ def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
         traceback.print_exc()
         return None
     if np.issubdtype(arr.dtype, np.integer):
         arr = arr.astype("float32") / np.iinfo(arr.dtype).max
     else:
@@ -108,23 +119,19 @@ def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
     print(f"Generated audio saved to: {out_path}")
     return out_path
-# --- Gradio UI / API ---
 with gr.Blocks() as demo:
     gr.Markdown("# Indic Parler-TTS (69 Speakers)")
-    # Simple health fallback box (helps avoid totally blank page)
-    with gr.Row():
-        with gr.Column(scale=2):
-            txt = gr.Textbox(value="नमस्ते, यह एक परीक्षण वाक्य है।", label="Text")
-            sp = gr.Dropdown(SPEAKERS, value=SPEAKERS[0], label="Speaker")
-            emo = gr.Dropdown(["Neutral","Happy","Sad","Angry","Narration"], value="Neutral", label="Emotion")
-            btn = gr.Button("Generate")
-        with gr.Column(scale=1):
-            gr.Markdown("**Status**\n\n- Server running\n- If UI is blank, open browser console (F12) to view errors.")
-            out = gr.Audio(label="Output (wav)", type="filepath")
-    # Expose API endpoint
     btn.click(fn=synthesize, inputs=[txt, sp, emo], outputs=out, api_name="/synthesize")
-# Launch with SSR disabled (helps avoid blank UI in some HF environments)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

 import tempfile
 import traceback
+# Token MUST be added via HuggingFace Space Secrets (only needed for private resources)
 HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     print("WARNING: HF_TOKEN missing. Add it in Space → Settings → Variables & Secrets if needed.")
 MODEL_ID = "ai4bharat/indic-parler-tts"
+# Required imports (raise clear error if missing)
 try:
     from parler_tts import ParlerTTSForConditionalGeneration
     from transformers import AutoTokenizer
 model = ParlerTTSForConditionalGeneration.from_pretrained(MODEL_ID).to(device)
 text_tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+# Some models use a separate description encoder tokenizer
 try:
     desc_encoder_name = model.config.text_encoder._name_or_path
     desc_tokenizer = AutoTokenizer.from_pretrained(desc_encoder_name)
 sampling_rate = int(getattr(model.config, "sampling_rate", 22050))
+# Load speakers list if present
 sp_file = Path(__file__).parent / "speakers.json"
 if sp_file.exists():
     try:
 else:
     SPEAKERS = ["Default"]
+def _make_outfile(text: str, speaker: str, emotion: str) -> str:
+    """Deterministic temporary filename to reduce collisions and allow caching."""
     key = (text + "|" + str(speaker) + "|" + str(emotion)).encode("utf-8")
     h = hashlib.sha256(key).hexdigest()[:20]
     filename = f"out_{h}.wav"
     return str(Path(tmpdir) / filename)
 def synthesize(text: str, speaker: str, emotion: str = "Neutral"):
+    """Generate TTS audio and return filepath (Gradio Audio expects filepath when type='filepath')."""
     if not text or not str(text).strip():
         return None
     desc = f"{speaker}'s voice. Tone: {emotion}. Natural, clear speech, close mic."
+    # Tokenize (attempt to keep tensors on device)
     try:
         desc_ids = desc_tokenizer(desc, return_tensors="pt").to(device)
         text_ids = text_tokenizer(text, return_tensors="pt").to(device)
         desc_ids = desc_tokenizer(desc, return_tensors="pt")
         text_ids = text_tokenizer(text, return_tensors="pt")
+    # Model generation with fallbacks
     try:
         with torch.no_grad():
             try:
         traceback.print_exc()
         return None
+    # Convert tensor -> numpy
     try:
         arr = audio.cpu().numpy().squeeze()
     except Exception as e:
         traceback.print_exc()
         return None
+    # Normalize integer PCM to float32 in [-1,1] if needed
     if np.issubdtype(arr.dtype, np.integer):
         arr = arr.astype("float32") / np.iinfo(arr.dtype).max
     else:
     print(f"Generated audio saved to: {out_path}")
     return out_path
+# --- Gradio UI and API exposure ---
 with gr.Blocks() as demo:
     gr.Markdown("# Indic Parler-TTS (69 Speakers)")
+    txt = gr.Textbox(value="नमस्ते, यह एक परीक्षण वाक्य है।", label="Text")
+    sp = gr.Dropdown(SPEAKERS, value=SPEAKERS[0], label="Speaker")
+    emo = gr.Dropdown(["Neutral","Happy","Sad","Angry","Narration"], value="Neutral", label="Emotion")
+    btn = gr.Button("Generate")
+    out = gr.Audio(label="Output (wav)", type="filepath")
+    # Expose the endpoint so the Space API is available at /synthesize
     btn.click(fn=synthesize, inputs=[txt, sp, emo], outputs=out, api_name="/synthesize")
+if __name__ == '__main__':
+    # Disable SSR to avoid frontend blank issues in some Spaces environments
+    demo.launch(ssr_mode=False)