Spaces:
Runtime error
Runtime error
File size: 6,849 Bytes
0558aa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
!/usr/bin/env bash
DOCKER_EXP_DIR="/checkpoints/results/25FPS_inference"
export PYTHONPATH=$PYTHONPATH:/workspace/NeMo
# python -m pip install --upgrade pip
# pip install nv_one_logger_core nv_one_logger_training_telemetry nv_one_logger_pytorch_lightning_integration \
# --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-nemo-pypi/simple
# pip install kaldialign
# pip install git+https://github.com/sarulab-speech/[email protected]
### Best for nonstreaming Great for streaming
# CKPT="/checkpoints/streaming/magpie/jason/magpieTTS--val_loss=5.1255-epoch=89-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/jason/magpietts_en_jason_inference.yaml"
# Great for nonstreaming Best for streaming with exponential weight
# CKPT="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpieTTS--val_loss=5.1851-epoch=143-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F2P1E0.0/magpietts_en_subhankarg_BIN_F2P1E0.0.yaml"
# Good for nonstreaming
# CKPT="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpieTTS--val_loss=5.2150-epoch=173-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/BIN_F5P5E0_DECF1.0P1.0_wait1_strictwindowTrue/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml"
# CKPT="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpieTTS--val_loss=5.2076-epoch=144-last.ckpt"
# HPARAM="/checkpoints/streaming/magpie/sugh_BIN_F0P2E0.0_DECF1.0P1.0/magpietts_en_subhankarg_BIN_F0P2E0.0_DECF1.0P1.0.yaml"
# 25FPS
# Non streaming ok
# CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2386-epoch=153-last.ckpt"
# CKPT="/TB/magpie2503_CE_CA_BIN_BPE_F6P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_NOwaitK_strictwindowTrue/magpieTTS/checkpoints/magpieTTS--val_loss=5.2388-epoch=150-last.ckpt"
#
# CKPT="/TB/magpie2503_DC_CE_CA_BIN_BPE_F5P2E0_DECF1.0P1.0_C25FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_wait3_strictwindowTrue_bpe/magpieTTS/checkpoints/magpieTTS--val_loss=5.2238-epoch=229.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_en_bpe_25fps_inference.yaml"
### SMALL MAGPIE
# CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo"
# CKPT="/TB/DC_CE_BIN_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/MagpieTTS-EN-Lhotse--val_loss=5.1882-step=349010-epoch=349-last.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml"
# SMALL MAGPIE BETA-BINOMIAL
# CKPT="/TB/DC_CE_small_C21FPS_Causal_8cb_HRLLM_lr1e-4_bs12_precbf16_lhotse/magpieTTS/checkpoints/magpieTTS.nemo"
CKPT="/TB/magpie2508_DC_CE_CE_CA_TextCond_BetaBinom_C21FPS_Causal_8cb_En-Mn-Fr-Es-De-Vi-Hi_IPA_lr1e-4_bs12_prec32_corrected/magpieTTS/checkpoints/magpieTTS.nemo"
### ML Not BAKED
CKPT="/checkpoints/GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt"
CKPT="/checkpoints/results/ML_MagpieTTS/CE-Removed_GRPO_Magpie_TTS_ML_V1.nemo"
# CKPT="/checkpoints/Magpie_TTS_ML_val_loss_9_7622_step_476016.ckpt"
HPARAM="/checkpoints/hparams.yaml"
# ###### BAKED
# CKPT="/checkpoints/results/baked_test.ckpt"
# HPARAM="/checkpoints/results/baked_test_config.yaml"
### SMALL MAGPIE WITH GRPO
# CKPT="/TB/GRPO_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0237-step=1373-epoch=1.ckpt"
# CKPT="/TB/GRPO_KLDiv_infcfg0.5_DC_CE_BIN_small_C21FPS_Causal_8cb_lr2e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0461-step=1846-epoch=2.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_inference.yaml"
### SMALL MAGPIE BETA-BINOMIAL WITH GRPO
# CKPT="/TB/GRPO_DC_CE_small_C21FPS_Causal_8cb_lr5e-7_bs2_prec32/magpieTTS/checkpoints/Magpie-TTS-EN--val_cer_gt=0.0250-step=1373-epoch=1.ckpt"
# CKPT="/checkpoints/streaming/magpie/shehzeen_ckpt/cer_ssim_priornull_2e7_beta001_llmservice_wcfg_wthreholds_epoch0.ckpt"
# HPARAM="/workspace/NeMo/examples/tts/conf/magpietts/magpietts_lhotse_dc_en_tiny_betabinomial_inference.yaml"
# CODEC="/nemo_codec_checkpoints/AudioCodec_21Hz_no_eliz.nemo"
CODEC="/nemo_codec_checkpoints/21fps_causal_codecmodel.nemo"
# CODEC="/nemo_codec_checkpoints/Low_Frame-rate_Speech_Codec++.nemo"
# CODEC="/nemo_codec_checkpoints/Low_Frame-rate_25FPS_Speech_Codec++.nemo"
# CODEC="nvidia/nemo-nano-codec-22khz-1.89kbps-21.5fps"
EPS=0.1
# EPS=1e-8
DATASET=carlos #,tom #, lindy,rodney,megan,samy,virginie,houzhen,siwei,emma,lindy,sean,tom # libritts_test_clean,libritts_seen # riva_hard_digits,riva_hard_letters,riva_hard_money,riva_hard_short # local_longer_4 riva_challenging_nozeros # local_test_100 # local_test_20 # local_test # local_long_20 # local_longer_20 # local_longer_10
# DATASET=an4_val_ci
MODELTYPE=DC_CE_small_kld_cfg0.5_BIN_GRPO_2e-7_allattninf
# MODELTYPE=Shehzeen_ckpt
OUT_DIR="/checkpoints/results/GRPO_${DATASET}"
# OUT_DIR="/checkpoints/results/baked_${DATASET}"
export CUDA_VISIBLE_DEVICES=0
export CUDA_LAUNCH_BLOCKING=1
# --nemo_files $CKPT \
# --checkpoint_files $CKPT \
# --hparams_files ${HPARAM} \
# python scripts/magpietts/infer_and_evaluate.py \
# --checkpoint_files $CKPT \
# --hparams_files ${HPARAM} \
# --codecmodel_path ${CODEC} \
# --datasets $DATASET \
# --out_dir /checkpoints/results/magpie2503_${MODELTYPE}_${DATASET}_BinInfer_multiplicative_Eps${EPS}_1 \
# --batch_size 32 \
# --use_cfg \
# --attention_prior_epsilon ${EPS} \
# --attention_prior_lookahead_window 5 \
# --start_prior_after_n_audio_steps 0 \
# --cfg_scale 2.5 \
# --temperature 0.6 \
# --apply_attention_prior \
# --asr_model_name "nvidia/parakeet-tdt-1.1b" \
# --estimate_alignment_from_layers "3,4,5,7,10" \
# --apply_prior_to_layers "3,4,5,6,7,8,10" \
# --estimate_alignment_from_layers "4,6,7,10" \
# --apply_prior_to_layers "4,5,6,7,8,10" \
# --checkpoint_files ${CKPT} \
# --hparams_files ${HPARAM} \
# python scripts/magpietts/infer_and_evaluate.py \
# --checkpoint_files ${CKPT} \
# --hparams_files ${HPARAM} \
# --codecmodel_path ${CODEC} \
# --out_dir ${OUT_DIR} \
# --datasets $DATASET \
# --use_cfg \
# --apply_attention_prior
# --hparams_files ${HPARAM} \
# --checkpoint_files ${CKPT} \
python examples/tts/magpietts_inference.py \
--nemo_files ${CKPT} \
--datasets $DATASET \
--out_dir ${OUT_DIR} \
--codecmodel_path ${CODEC} \
--use_cfg \
--apply_attention_prior \
--run_evaluation
# python scripts/magpietts/bake_context_embedding.py \
# --input_checkpoint ${CKPT} \
# --config_path ${HPARAM} \
# --output_checkpoint /checkpoints/results/CE-Removed_GRPO_Magpie_TTS_ML_V1_val_cer_gt_0_1014_step_800.ckpt \
# --context_audio /checkpoints/results/JohnVanStanPlaceofburial_wordsworth.wav \
# --device cuda
|