import tempfile, os import gradio as gr import soundfile as sf from TTS.utils.download import download_url from TTS.utils.synthesizer import Synthesizer MODEL_NAMES = [ "vits male1 (best)", "vits female (best)", "vits-male", "vits female1", "glowtts-male", "glowtts-female", "female tacotron2" ] MAX_TXT_LEN = 800 # لیست مدل‌ها و لینک‌ها modelInfo = [ ["vits-male", "best_model_65633.pth", "config-0.json", "https://huggingface.co/Kamtera/persian-tts-male-vits/resolve/main/"], ["vits female (best)", "checkpoint_48000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-vits/resolve/main/"], ["glowtts-male", "best_model_77797.pth", "config-1.json", "https://huggingface.co/Kamtera/persian-tts-male-glow_tts/resolve/main/"], ["glowtts-female", "best_model.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female-glow_tts/resolve/main/"], ["vits male1 (best)", "checkpoint_88000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-male1-vits/resolve/main/"], ["vits female1", "checkpoint_50000.pth", "config.json", "https://huggingface.co/Kamtera/persian-tts-female1-vits/resolve/main/"], ["female tacotron2", "checkpoint_313000.pth", "config-2.json", "https://huggingface.co/Kamtera/persian-tts-female-tacotron2/resolve/main/"] ] # دانلود مدل‌ها for d in modelInfo: directory = d[0] if not os.path.exists(directory): os.makedirs(directory) print("|> Downloading:", directory) download_url(d[3] + d[1], directory, "best_model.pth") download_url(d[3] + d[2], directory, "config.json") # تابع اصلی TTS def tts(text: str, model_name: str): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") synthesizer = Synthesizer( model_name + "/best_model.pth", model_name + "/config.json" ) if synthesizer is None: raise NameError("Model not found") wavs = synthesizer.tts(text) # خروجی numpy array # ذخیره مستقیم به OGG with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as fp: sf.write(fp.name, wavs, synthesizer.output_sample_rate, format="OGG", subtype="VORBIS") return fp.name description = """ این یک دمو از مدل متن به گفتار فارسی است. **Github : https://github.com/karim23657/Persian-tts-coqui** """ examples = [ ["و خداوند شما را با ارسال روح در جسم زندگانی و حیات بخشید", "vits-male"], ["تاجر تو چه تجارت می کنی ، تو را چه که چه تجارت می کنم؟", "vits female (best)"], ["شیش سیخ جیگر سیخی شیش هزار", "vits female (best)"], ["سه شیشه شیر ، سه سیر سرشیر", "vits female (best)"], ["دزدی دزدید ز بز دزدی بزی ، عجب دزدی که دزدید ز بز دزدی بزی", "vits male1 (best)"], ["مثنوی یکی از قالب های شعری است ک هر بیت قافیه ی جداگانه دارد", "vits female1"], ["در گلو ماند خس او سالها، چیست آن خس مهر جاه و مالها", "vits male1 (best)"], ] iface = gr.Interface( fn=tts, inputs=[ gr.Textbox( label="Text", value="زندگی فقط یک بار است؛ از آن به خوبی استفاده کن", ), gr.Radio( label="Pick a TTS Model ", choices=MODEL_NAMES, value="vits-female", ), ], outputs=gr.Audio(label="Output", type='filepath'), examples=examples, title="🗣️ Persian tts 🗣️", description=description, live=False ) iface.launch(share=False)