!/usr/bin/env bash DOCKER_EXP_DIR="/checkpoints/results/25FPS_inference" export PYTHONPATH=$PYTHONPATH:/workspace/NeMo # python -m pip install --upgrade pip # pip install nv_one_logger_core nv_one_logger_training_telemetry nv_one_logger_pytorch_lightning_integration \ # --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-nemo-pypi/simple # pip install kaldialign # pip install git+https://github.com/sarulab-speech/UTMOSv2.git@v1.2.1 ### Best for nonstreaming Great for streaming # CKPT="/checkpoints/streaming/magpie/jason/magpieTTS--val_loss=5.1255-epoch=89-last.ckpt" # HPARAM="/checkpoints/streaming/magpie/jason/magpietts_en_jason_inference.yaml" # Great for nonstreaming Best for streaming with exponential weight # CKPT="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpieTTS--val_loss=5.1851-epoch=143-last.ckpt" # HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpietts_en_subhankarg_BIN_F2P1E0.0.yaml" # Good for nonstreaming # CKPT="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpieTTS--val_loss=5.2150-epoch=173-last.ckpt" # HPARAM="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml" # CKPT="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpieTTS--val_loss=5.2076-epoch=144-last.ckpt" # HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml" # 25FPS # Non streaming ok # CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2386-epoch=153-last.ckpt" # CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_NOwaitK_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2388-epoch=150-last.ckpt" # # CKPT="/TB/magpie2503_DC_CE_CA_BIN_BPE_F5P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue_bpe/magpieTTS/checkpoints/magpieTTS--val_loss=5.2238-epoch=229.ckpt" # HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_en_bpe_25fps_inference.yaml" ### SMALL MAGPIE # CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo" # CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/MagpieTTS-EN-Lhotse--val_loss=5.1882-step=349010-epoch=349-last.ckpt" # HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml" # SMALL MAGPIE BETA-BINOMIAL # CKPT="/TB/DC_CE_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo" CKPT="/TB/magpie2508_DC_CE_CE_CA_TextCond_BetaBinom_C21FPS_Causal_8cb_En-Mn-Fr-Es-De-Vi-Hi_IPA_lr1e-4_bs12_prec32_corrected/magpieTTS/checkpoints/magpieTTS.nemo" ### ML Not BAKED CKPT="/checkpoints/GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt" CKPT="/checkpoints/results/ML_MagpieTTS/CE-Removed_GRPO_Magpie_TTS_ML_V1.nemo" # CKPT="/checkpoints/Magpie_TTS_ML_val_loss_9_7622_step_476016.ckpt" HPARAM="/checkpoints/hparams.yaml" # ###### BAKED # CKPT="/checkpoints/results/baked_test.ckpt" # HPARAM="/checkpoints/results/baked_test_config.yaml" ### SMALL MAGPIE WITH GRPO # CKPT="/TB/GRPO_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0237-step=1373-epoch=1.ckpt" # CKPT="/TB/GRPO_KLDiv_infcfg0.5_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0461-step=1846-epoch=2.ckpt" # HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml" ### SMALL MAGPIE BETA-BINOMIAL WITH GRPO # CKPT="/TB/GRPO_DC_CE_small_C21FPS_Causal_8cb_lr5e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0250-step=1373-epoch=1.ckpt" # CKPT="/checkpoints/streaming/magpie/shehzeen_ckpt/cer_ssim_priornull_2e7_beta001_llmservice_wcfg_wthreholds_epoch0.ckpt" # HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_betabinomial_inference.yaml" # CODEC="/nemo_codec_checkpoints/AudioCodec_21Hz_no_eliz.nemo" CODEC="/nemo_codec_checkpoints/21fps_causal_codecmodel.nemo" # CODEC="/nemo_codec_checkpoints/Low_Frame-rate_Speech_Codec++.nemo" # CODEC="/nemo_codec_checkpoints/Low_Frame-rate_25FPS_Speech_Codec++.nemo" # CODEC="nvidia/nemo-nano-codec-22khz-1.89kbps-21.5fps" EPS=0.1 # EPS=1e-8 DATASET=carlos #,tom #, lindy,rodney,megan,samy,virginie,houzhen,siwei,emma,lindy,sean,tom # libritts_test_clean,libritts_seen # riva_hard_digits,riva_hard_letters,riva_hard_money,riva_hard_short # local_longer_4 riva_challenging_nozeros # local_test_100 # local_test_20 # local_test # local_long_20 # local_longer_20 # local_longer_10 # DATASET=an4_val_ci MODELTYPE=DC_CE_small_kld_cfg0.5_BIN_GRPO_2e-7_allattninf # MODELTYPE=Shehzeen_ckpt OUT_DIR="/checkpoints/results/GRPO_${DATASET}" # OUT_DIR="/checkpoints/results/baked_${DATASET}" export CUDA_VISIBLE_DEVICES=0 export CUDA_LAUNCH_BLOCKING=1 # --nemo_files $CKPT \ # --checkpoint_files $CKPT \ # --hparams_files ${HPARAM} \ # python scripts/magpietts/infer_and_evaluate.py \ # --checkpoint_files $CKPT \ # --hparams_files ${HPARAM} \ # --codecmodel_path ${CODEC} \ # --datasets $DATASET \ # --out_dir /checkpoints/results/magpie2503_${MODELTYPE}_${DATASET}_BinInfer_multiplicative_Eps${EPS}_1 \ # --batch_size 32 \ # --use_cfg \ # --attention_prior_epsilon ${EPS} \ # --attention_prior_lookahead_window 5 \ # --start_prior_after_n_audio_steps 0 \ # --cfg_scale 2.5 \ # --temperature 0.6 \ # --apply_attention_prior \ # --asr_model_name "nvidia/parakeet-tdt-1.1b" \ # --estimate_alignment_from_layers "3,4,5,7,10" \ # --apply_prior_to_layers "3,4,5,6,7,8,10" \ # --estimate_alignment_from_layers "4,6,7,10" \ # --apply_prior_to_layers "4,5,6,7,8,10" \ # --checkpoint_files ${CKPT} \ # --hparams_files ${HPARAM} \ # python scripts/magpietts/infer_and_evaluate.py \ # --checkpoint_files ${CKPT} \ # --hparams_files ${HPARAM} \ # --codecmodel_path ${CODEC} \ # --out_dir ${OUT_DIR} \ # --datasets $DATASET \ # --use_cfg \ # --apply_attention_prior # --hparams_files ${HPARAM} \ # --checkpoint_files ${CKPT} \ python examples/tts/magpietts_inference.py \ --nemo_files ${CKPT} \ --datasets $DATASET \ --out_dir ${OUT_DIR} \ --codecmodel_path ${CODEC} \ --use_cfg \ --apply_attention_prior \ --run_evaluation # python scripts/magpietts/bake_context_embedding.py \ # --input_checkpoint ${CKPT} \ # --config_path ${HPARAM} \ # --output_checkpoint /checkpoints/results/CE-Removed_GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt \ # --context_audio /checkpoints/results/JohnVanStanPlaceofburial_wordsworth.wav \ # --device cuda