import gradio as gr import numpy as np from nemo.collections.tts.modules.magpietts_inference.utils import ModelLoadConfig, load_magpie_model ''' If gradio is not already installed, run: pip install --no-cache-dir gradio export PYTHONPATH=$PYTHONPATH:/workspace/NeMo pip install kaldialign pip install git+https://github.com/sarulab-speech/UTMOSv2.git@v1.2.1 place this file in root directory of NeMo ''' CHECKPOINT_PATH = "/checkpoints/results/ML_MagpieTTS/CE-Removed_GRPO_Magpie_TTS_ML_V1.nemo" CODEC_MODEL_PATH = "nvidia/nemo-nano-codec-22khz-1.89kbps-21.5fps" def setup_model(): model_config = ModelLoadConfig( nemo_file=CHECKPOINT_PATH, codecmodel_path=CODEC_MODEL_PATH, legacy_codebooks=False, legacy_text_conditioning=False, hparams_from_wandb=None, ) model, _ = load_magpie_model(model_config) model.eval().cuda() return model def main(): model = setup_model() def demo_tts(input_text, language): audio, audio_len = model.do_tts(input_text, language=language, apply_TN=True) audio_np = audio[0, :audio_len[0]].cpu().numpy() return model.sample_rate, audio_np demo = gr.Interface( fn=demo_tts, inputs=[gr.Textbox(label="Text to synthesize"), gr.Textbox(label="Language", value="en")], outputs="audio", title="Text to Speech MagpieTTS Demo") demo.launch(server_name="0.0.0.0", server_port=6007, share=True) if __name__ == "__main__": main()