Files changed (1) hide show
  1. app.py +59 -18
app.py CHANGED
@@ -1,38 +1,79 @@
1
  import spaces
2
  import gradio as gr
3
- from f5_tts.infer.utils_infer import remove_silence_for_generated_wav
4
  from f5_tts.api import F5TTS
5
  import tempfile
6
- import os
7
- f5tts = F5TTS()
8
 
 
9
 
10
  @spaces.GPU
11
- def run_tts(ref_audio, ref_text, gen_text, remove_silence=False):
12
  output_wav_path = tempfile.mktemp(suffix=".wav")
13
 
14
  wav, sr, _ = f5tts.infer(
15
  ref_file=ref_audio,
16
- ref_text=ref_text,
17
  gen_text=gen_text,
18
  file_wave=output_wav_path,
19
  remove_silence=remove_silence,
20
  )
21
-
22
  return output_wav_path
23
 
24
- demo = gr.Interface(
25
- fn=run_tts,
26
- inputs=[
27
- gr.Audio(label="Reference Audio", type="filepath"),
28
- gr.Textbox(label="Reference Text", placeholder="some call me nature, others call me mother nature."),
29
- gr.Textbox(label="Generation Text", placeholder="I don't really care what you call me..."),
30
- gr.Checkbox(label="Remove Silence from Output?", value=False)
31
- ],
32
- outputs=gr.Audio(label="Generated Speech"),
33
- title="πŸ—£οΈ F5-TTS Demo",
34
- description="Upload a reference voice, give reference and generation text, and hear it in the same voice!",
35
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  if __name__ == "__main__":
38
  demo.launch()
 
1
  import spaces
2
  import gradio as gr
 
3
  from f5_tts.api import F5TTS
4
  import tempfile
 
 
5
 
6
+ f5tts = F5TTS()
7
 
8
  @spaces.GPU
9
+ def run_tts(ref_audio, gen_text, remove_silence=False):
10
  output_wav_path = tempfile.mktemp(suffix=".wav")
11
 
12
  wav, sr, _ = f5tts.infer(
13
  ref_file=ref_audio,
14
+ ref_text="", # reference text removed
15
  gen_text=gen_text,
16
  file_wave=output_wav_path,
17
  remove_silence=remove_silence,
18
  )
 
19
  return output_wav_path
20
 
21
+
22
+ custom_css = """
23
+ body {background: #f4f7fb;}
24
+ .gradio-container {max-width: 900px; margin: auto;}
25
+ h1, h2, h3, h4, h5 {color: #1a1a1a; font-weight: 700;}
26
+ #main-box textarea {
27
+ font-size: 16px !important;
28
+ padding: 15px !important;
29
+ border-radius: 12px !important;
30
+ border: 1px solid #d1d5db !important;
31
+ box-shadow: 0px 2px 6px rgba(0,0,0,0.08);
32
+ }
33
+ button {
34
+ font-size: 16px !important;
35
+ font-weight: bold !important;
36
+ padding: 12px 20px !important;
37
+ border-radius: 10px !important;
38
+ }
39
+ button.primary {
40
+ background: linear-gradient(135deg, #2563eb, #1e40af) !important;
41
+ color: white !important;
42
+ }
43
+ """
44
+
45
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
46
+ gr.Markdown(
47
+ """
48
+ <div style="text-align: center; margin-bottom: 20px;">
49
+ <h1>πŸ—£οΈ TalkClone</h1>
50
+ <p style="font-size:16px; color:#4b5563;">
51
+ <b>TalkClone is currently in maintenance mode. This is not the final interface β€” just a temporary version while we work on improvements. The full experience will be back soon.</b> <b></b>
52
+ </p>
53
+ </div>
54
+ """
55
+ )
56
+
57
+ with gr.Row():
58
+ with gr.Column(scale=1):
59
+ ref_audio = gr.Audio(label="🎧 Reference Audio", type="filepath")
60
+ remove_silence = gr.Checkbox(label="✨ Remove Silence", value=False)
61
+ with gr.Column(scale=2):
62
+ gen_text = gr.Textbox(
63
+ label="πŸ“ Enter Text to Generate Speech",
64
+ elem_id="main-box",
65
+ placeholder="Type or paste your text here...",
66
+ lines=10,
67
+ )
68
+
69
+ generate_btn = gr.Button("πŸš€ Generate Speech", variant="primary")
70
+ output_audio = gr.Audio(label="πŸ”Š Generated Speech")
71
+
72
+ generate_btn.click(
73
+ fn=run_tts,
74
+ inputs=[ref_audio, gen_text, remove_silence],
75
+ outputs=output_audio
76
+ )
77
 
78
  if __name__ == "__main__":
79
  demo.launch()