Spaces:

mrfakename
/

E2-F5-TTS

Running on Zero

App Files Files Community

Upload app.py

#76

by rtyrfth - opened Sep 22

base: refs/heads/main

←

from: refs/pr/76

Discussion Files changed

+59

-18

Files changed (1) hide show

app.py +59 -18

app.py CHANGED Viewed

@@ -1,38 +1,79 @@
 import spaces
 import gradio as gr
-from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
 from f5_tts.api import F5TTS
 import tempfile
-import os
-f5tts = F5TTS()
 @spaces.GPU
-def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
     output_wav_path = tempfile.mktemp(suffix=".wav")
     wav, sr, _ = f5tts.infer(
         ref_file=ref_audio,
-        ref_text=ref_text,
         gen_text=gen_text,
         file_wave=output_wav_path,
         remove_silence=remove_silence,
     )
     return output_wav_path
-demo = gr.Interface(
-    fn=run_tts,
-    inputs=[
-        gr.Audio(label="Reference Audio", type="filepath"),
-        gr.Textbox(label="Reference Text", placeholder="some call me nature, others call me mother nature."),
-        gr.Textbox(label="Generation Text", placeholder="I don't really care what you call me..."),
-        gr.Checkbox(label="Remove Silence from Output?", value=False)
-    ],
-    outputs=gr.Audio(label="Generated Speech"),
-    title="🗣️ F5-TTS Demo",
-    description="Upload a reference voice, give reference and generation text, and hear it in the same voice!",
-)
 if __name__ == "__main__":
     demo.launch()

 import spaces
 import gradio as gr
 from f5_tts.api import F5TTS
 import tempfile
+f5tts = F5TTS()
 @spaces.GPU
+def run_tts(ref_audio, gen_text, remove_silence=False):
     output_wav_path = tempfile.mktemp(suffix=".wav")
     wav, sr, _ = f5tts.infer(
         ref_file=ref_audio,
+        ref_text="",   # reference text removed
         gen_text=gen_text,
         file_wave=output_wav_path,
         remove_silence=remove_silence,
     )
     return output_wav_path
+custom_css = """
+body {background: #f4f7fb;}
+.gradio-container {max-width: 900px; margin: auto;}
+h1, h2, h3, h4, h5 {color: #1a1a1a; font-weight: 700;}
+#main-box textarea {
+    font-size: 16px !important;
+    padding: 15px !important;
+    border-radius: 12px !important;
+    border: 1px solid #d1d5db !important;
+    box-shadow: 0px 2px 6px rgba(0,0,0,0.08);
+}
+button {
+    font-size: 16px !important;
+    font-weight: bold !important;
+    padding: 12px 20px !important;
+    border-radius: 10px !important;
+}
+button.primary {
+    background: linear-gradient(135deg, #2563eb, #1e40af) !important;
+    color: white !important;
+}
+"""
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        <div style="text-align: center; margin-bottom: 20px;">
+            <h1>🗣️ TalkClone</h1>
+            <p style="font-size:16px; color:#4b5563;">
+             <b>TalkClone is currently in maintenance mode. This is not the final interface — just a temporary version while we work on improvements. The full experience will be back soon.</b>  <b></b>
+            </p>
+        </div>
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            ref_audio = gr.Audio(label="🎧 Reference Audio", type="filepath")
+            remove_silence = gr.Checkbox(label="✨ Remove Silence", value=False)
+        with gr.Column(scale=2):
+            gen_text = gr.Textbox(
+                label="📝 Enter Text to Generate Speech",
+                elem_id="main-box",
+                placeholder="Type or paste your text here...",
+                lines=10,
+            )
+    generate_btn = gr.Button("🚀 Generate Speech", variant="primary")
+    output_audio = gr.Audio(label="🔊 Generated Speech")
+    generate_btn.click(
+        fn=run_tts,
+        inputs=[ref_audio, gen_text, remove_silence],
+        outputs=output_audio
+    )
 if __name__ == "__main__":
     demo.launch()