Spaces:

subhankarg
/

MagpieTTS_Internal_Demo

Runtime error

App Files Files Community

MagpieTTS_Internal_Demo / tests /collections /audio /test_audio_maxine_models.py

subhankarg

Upload folder using huggingface_hub

0558aa4 verified 9 days ago

raw

history blame contribute delete

3.89 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import pytest
	import torch
	from omegaconf import DictConfig

	try:
	import importlib

	importlib.import_module('torchaudio')

	HAVE_TORCHAUDIO = True
	except ModuleNotFoundError:
	HAVE_TORCHAUDIO = False

	from nemo.collections.audio.models.maxine import BNR2


	@pytest.fixture()
	def maxine_model_fixture():
	sample_rate = 16000
	fft_length = 1920
	hop_length = 480
	num_mels = 320

	optim = {
	'name': 'adam',
	'lr': 0.0005,
	'sched': {
	'name': 'StepLR',
	},
	'gamma': 0.999,
	'step_size': 2,
	}

	loss = {
	'_target_': 'nemo.collections.audio.losses.maxine.CombinedLoss',
	'sample_rate': sample_rate,
	'fft_length': fft_length,
	'hop_length': hop_length,
	'num_mels': num_mels,
	'sisnr_loss_weight': 1,
	'spectral_loss_weight': 15,
	'asr_loss_weight': 1,
	'use_asr_loss': True,
	'use_mel_spec': True,
	}

	config = DictConfig(
	{
	'type': "bnr",
	'sample_rate': sample_rate,
	'fft_length': fft_length,
	'hop_length': hop_length,
	'num_mels': num_mels,
	'skip_nan_grad': False,
	'num_outputs': 1,
	'segment': 4,
	'loss': DictConfig(loss),
	'optim': DictConfig(optim),
	}
	)

	bnr = BNR2(cfg=config)
	return bnr


	class TestBNR2Model:
	"""Test BNR 2 model."""

	@pytest.mark.unit
	@pytest.mark.skipif(not HAVE_TORCHAUDIO, reason="Modules in this test require torchaudio")
	def test_constructor(self, maxine_model_fixture):
	"""Test that the model can be constructed from a config dict."""
	model = maxine_model_fixture.train()
	confdict = model.to_config_dict()
	instance2 = BNR2.from_config_dict(confdict)
	assert isinstance(instance2, BNR2)

	@pytest.mark.unit
	@pytest.mark.skipif(not HAVE_TORCHAUDIO, reason="Modules in this test require torchaudio")
	@pytest.mark.parametrize(
	"batch_size, sample_len",
	[
	# Note: Must be a multiple of 10ms @ 16kkHz
	(4, 16), # Example 1
	(2, 8), # Example 2
	(1, 32), # Example 3
	],
	)
	def test_forward_infer(self, maxine_model_fixture, batch_size, sample_len):
	"""Test that the model can run forward inference."""
	model = maxine_model_fixture.eval()
	confdict = model.to_config_dict()
	sampling_rate = confdict['sample_rate']
	input_signal = torch.randn(size=(batch_size, 1, sample_len * sampling_rate))

	abs_tol = 1e-5

	with torch.no_grad():
	# batch size 1
	output_list = []
	for i in range(input_signal.size(0)):
	output = model.forward(input_signal=input_signal[i : i + 1])
	output_list.append(output)
	output_instance = torch.cat(output_list, 0)

	# batch size batch_size
	output_batch = model.forward(input_signal=input_signal)

	# Check that the output is the same for the instance and batch
	assert output_instance.shape == output_batch.shape

	diff = torch.max(torch.abs(output_instance - output_batch))
	assert diff <= abs_tol