MagpieTTS_Internal_Demo / run_inference.sh
subhankarg's picture
Upload folder using huggingface_hub
0558aa4 verified
!/usr/bin/env bash
DOCKER_EXP_DIR="/checkpoints/results/25FPS_inference"
export PYTHONPATH=$PYTHONPATH:/workspace/NeMo
# python -m pip install --upgrade pip
# pip install nv_one_logger_core nv_one_logger_training_telemetry nv_one_logger_pytorch_lightning_integration \
# --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-nemo-pypi/simple
# pip install kaldialign
# pip install git+https://github.com/sarulab-speech/[email protected]
### Best for nonstreaming Great for streaming
# CKPT="/checkpoints/streaming/magpie/jason/magpieTTS--val_loss=5.1255-epoch=89-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/jason/magpietts_en_jason_inference.yaml"
# Great for nonstreaming Best for streaming with exponential weight
# CKPT="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpieTTS--val_loss=5.1851-epoch=143-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpietts_en_subhankarg_BIN_F2P1E0.0.yaml"
# Good for nonstreaming
# CKPT="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpieTTS--val_loss=5.2150-epoch=173-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml"
# CKPT="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpieTTS--val_loss=5.2076-epoch=144-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml"
# 25FPS
# Non streaming ok
# CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2386-epoch=153-last.ckpt"
# CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_NOwaitK_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2388-epoch=150-last.ckpt"
#
# CKPT="/TB/magpie2503_DC_CE_CA_BIN_BPE_F5P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue_bpe/magpieTTS/checkpoints/magpieTTS--val_loss=5.2238-epoch=229.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_en_bpe_25fps_inference.yaml"
### SMALL MAGPIE
# CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo"
# CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/MagpieTTS-EN-Lhotse--val_loss=5.1882-step=349010-epoch=349-last.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml"
# SMALL MAGPIE BETA-BINOMIAL
# CKPT="/TB/DC_CE_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo"
CKPT="/TB/magpie2508_DC_CE_CE_CA_TextCond_BetaBinom_C21FPS_Causal_8cb_En-Mn-Fr-Es-De-Vi-Hi_IPA_lr1e-4_bs12_prec32_corrected/magpieTTS/checkpoints/magpieTTS.nemo"
### ML Not BAKED
CKPT="/checkpoints/GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt"
CKPT="/checkpoints/results/ML_MagpieTTS/CE-Removed_GRPO_Magpie_TTS_ML_V1.nemo"
# CKPT="/checkpoints/Magpie_TTS_ML_val_loss_9_7622_step_476016.ckpt"
HPARAM="/checkpoints/hparams.yaml"
# ###### BAKED
# CKPT="/checkpoints/results/baked_test.ckpt"
# HPARAM="/checkpoints/results/baked_test_config.yaml"
### SMALL MAGPIE WITH GRPO
# CKPT="/TB/GRPO_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0237-step=1373-epoch=1.ckpt"
# CKPT="/TB/GRPO_KLDiv_infcfg0.5_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0461-step=1846-epoch=2.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml"
### SMALL MAGPIE BETA-BINOMIAL WITH GRPO
# CKPT="/TB/GRPO_DC_CE_small_C21FPS_Causal_8cb_lr5e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0250-step=1373-epoch=1.ckpt"
# CKPT="/checkpoints/streaming/magpie/shehzeen_ckpt/cer_ssim_priornull_2e7_beta001_llmservice_wcfg_wthreholds_epoch0.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_betabinomial_inference.yaml"
# CODEC="/nemo_codec_checkpoints/AudioCodec_21Hz_no_eliz.nemo"
CODEC="/nemo_codec_checkpoints/21fps_causal_codecmodel.nemo"
# CODEC="/nemo_codec_checkpoints/Low_Frame-rate_Speech_Codec++.nemo"
# CODEC="/nemo_codec_checkpoints/Low_Frame-rate_25FPS_Speech_Codec++.nemo"
# CODEC="nvidia/nemo-nano-codec-22khz-1.89kbps-21.5fps"
EPS=0.1
# EPS=1e-8
DATASET=carlos #,tom #, lindy,rodney,megan,samy,virginie,houzhen,siwei,emma,lindy,sean,tom # libritts_test_clean,libritts_seen # riva_hard_digits,riva_hard_letters,riva_hard_money,riva_hard_short # local_longer_4 riva_challenging_nozeros # local_test_100 # local_test_20 # local_test # local_long_20 # local_longer_20 # local_longer_10
# DATASET=an4_val_ci
MODELTYPE=DC_CE_small_kld_cfg0.5_BIN_GRPO_2e-7_allattninf
# MODELTYPE=Shehzeen_ckpt
OUT_DIR="/checkpoints/results/GRPO_${DATASET}"
# OUT_DIR="/checkpoints/results/baked_${DATASET}"
export CUDA_VISIBLE_DEVICES=0
export CUDA_LAUNCH_BLOCKING=1
# --nemo_files $CKPT \
# --checkpoint_files $CKPT \
# --hparams_files ${HPARAM} \
# python scripts/magpietts/infer_and_evaluate.py \
# --checkpoint_files $CKPT \
# --hparams_files ${HPARAM} \
# --codecmodel_path ${CODEC} \
# --datasets $DATASET \
# --out_dir /checkpoints/results/magpie2503_${MODELTYPE}_${DATASET}_BinInfer_multiplicative_Eps${EPS}_1 \
# --batch_size 32 \
# --use_cfg \
# --attention_prior_epsilon ${EPS} \
# --attention_prior_lookahead_window 5 \
# --start_prior_after_n_audio_steps 0 \
# --cfg_scale 2.5 \
# --temperature 0.6 \
# --apply_attention_prior \
# --asr_model_name "nvidia/parakeet-tdt-1.1b" \
# --estimate_alignment_from_layers "3,4,5,7,10" \
# --apply_prior_to_layers "3,4,5,6,7,8,10" \
# --estimate_alignment_from_layers "4,6,7,10" \
# --apply_prior_to_layers "4,5,6,7,8,10" \
# --checkpoint_files ${CKPT} \
# --hparams_files ${HPARAM} \
# python scripts/magpietts/infer_and_evaluate.py \
# --checkpoint_files ${CKPT} \
# --hparams_files ${HPARAM} \
# --codecmodel_path ${CODEC} \
# --out_dir ${OUT_DIR} \
# --datasets $DATASET \
# --use_cfg \
# --apply_attention_prior
# --hparams_files ${HPARAM} \
# --checkpoint_files ${CKPT} \
python examples/tts/magpietts_inference.py \
--nemo_files ${CKPT} \
--datasets $DATASET \
--out_dir ${OUT_DIR} \
--codecmodel_path ${CODEC} \
--use_cfg \
--apply_attention_prior \
--run_evaluation
# python scripts/magpietts/bake_context_embedding.py \
# --input_checkpoint ${CKPT} \
# --config_path ${HPARAM} \
# --output_checkpoint /checkpoints/results/CE-Removed_GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt \
# --context_audio /checkpoints/results/JohnVanStanPlaceofburial_wordsworth.wav \
# --device cuda