Spaces:

subhankarg
/

MagpieTTS_Internal_Demo

Runtime error

App Files Files Community

MagpieTTS_Internal_Demo / nemo /lightning /io /registry.py

subhankarg

Upload folder using huggingface_hub

0558aa4 verified 14 days ago

raw

history blame contribute delete

2.16 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	from nemo.lightning.io.artifact import DirOrStringArtifact, FileArtifact
	from nemo.lightning.io.mixin import track_io

	# Registers all required classes with track_io functionality
	try:
	# Track HF tokenizers
	from transformers import AutoTokenizer as HfAutoTokenizer
	from transformers.models.llama.tokenization_llama import LlamaTokenizer
	from transformers.models.llama.tokenization_llama_fast import LlamaTokenizerFast

	for cls in [HfAutoTokenizer, LlamaTokenizer, LlamaTokenizerFast]:
	track_io(
	cls,
	artifacts=[
	FileArtifact(attr_name, required=False)
	for attr_name in ['vocab_file', 'merges_file', 'tokenizer_file', 'name_or_path']
	],
	)

	from nemo.collections.common.tokenizers import AutoTokenizer

	track_io(
	AutoTokenizer,
	artifacts=[
	FileArtifact("vocab_file", required=False),
	FileArtifact("merges_file", required=False),
	DirOrStringArtifact("pretrained_model_name", required=False),
	],
	)
	except ImportError:
	# HF tokenizers are not available, no need to track them
	pass


	try:
	from nemo.collections.common.tokenizers import ByteLevelTokenizer, SentencePieceTokenizer, TiktokenTokenizer

	track_io(SentencePieceTokenizer, artifacts=[FileArtifact("model_path")])
	track_io(TiktokenTokenizer, artifacts=[FileArtifact("vocab_file")])
	track_io(ByteLevelTokenizer)
	except ImportError:
	# Tokenizers are not available, no need to track it.
	pass