Training in progress, step 3000
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +2 -2
- inspect_dataset.py +26 -0
- model.safetensors +1 -1
- requirements.txt +11 -0
- run.sh +9 -6
- run_cv.sh +39 -0
- run_speech_recognition_seq2seq_streaming.py +144 -55
- run_speech_recognition_seq2seq_streaming_cv.py +657 -0
- tokenizer_config.json +1 -0
- training_args.bin +2 -2
- wandb/debug-internal.log +7 -0
- wandb/debug.log +25 -0
- wandb/run-20250212_121751-d4i88lzt/files/config.yaml +512 -0
- wandb/run-20250212_121751-d4i88lzt/files/output.log +22 -0
- wandb/run-20250212_121751-d4i88lzt/files/requirements.txt +115 -0
- wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json +85 -0
- wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json +1 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log +14 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log +15 -0
- wandb/run-20250212_121751-d4i88lzt/logs/debug.log +26 -0
- wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb +0 -0
- wandb/run-20250212_122637-v3d3ouvn/files/config.yaml +512 -0
- wandb/run-20250212_122637-v3d3ouvn/files/output.log +22 -0
- wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt +115 -0
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json +85 -0
- wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json +1 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log +14 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log +15 -0
- wandb/run-20250212_122637-v3d3ouvn/logs/debug.log +26 -0
- wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb +0 -0
- wandb/run-20250212_122854-4m048f5s/files/config.yaml +512 -0
- wandb/run-20250212_122854-4m048f5s/files/output.log +22 -0
- wandb/run-20250212_122854-4m048f5s/files/requirements.txt +115 -0
- wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json +85 -0
- wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json +1 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug-core.log +14 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log +15 -0
- wandb/run-20250212_122854-4m048f5s/logs/debug.log +26 -0
- wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb +0 -0
- wandb/run-20250212_125202-c6xjc1gs/files/config.yaml +512 -0
- wandb/run-20250212_125202-c6xjc1gs/files/output.log +22 -0
- wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt +115 -0
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json +85 -0
- wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json +1 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log +14 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log +15 -0
- wandb/run-20250212_125202-c6xjc1gs/logs/debug.log +26 -0
- wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb +0 -0
- wandb/run-20250212_125924-xhsgsxqq/files/config.yaml +512 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
wandb/run-20250212_152709-lejyafmi/run-lejyafmi.wandb filter=lfs diff=lfs merge=lfs -text
|
config.json
CHANGED
|
@@ -31,7 +31,7 @@
|
|
| 31 |
"mask_time_length": 10,
|
| 32 |
"mask_time_min_masks": 2,
|
| 33 |
"mask_time_prob": 0.05,
|
| 34 |
-
"max_length":
|
| 35 |
"max_source_positions": 1500,
|
| 36 |
"max_target_positions": 448,
|
| 37 |
"median_filter_width": 7,
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
"pad_token_id": 50257,
|
| 42 |
"scale_embedding": false,
|
| 43 |
"torch_dtype": "float32",
|
| 44 |
-
"transformers_version": "4.
|
| 45 |
"use_cache": false,
|
| 46 |
"use_weighted_layer_sum": false,
|
| 47 |
"vocab_size": 51865
|
|
|
|
| 31 |
"mask_time_length": 10,
|
| 32 |
"mask_time_min_masks": 2,
|
| 33 |
"mask_time_prob": 0.05,
|
| 34 |
+
"max_length": null,
|
| 35 |
"max_source_positions": 1500,
|
| 36 |
"max_target_positions": 448,
|
| 37 |
"median_filter_width": 7,
|
|
|
|
| 41 |
"pad_token_id": 50257,
|
| 42 |
"scale_embedding": false,
|
| 43 |
"torch_dtype": "float32",
|
| 44 |
+
"transformers_version": "4.49.0.dev0",
|
| 45 |
"use_cache": false,
|
| 46 |
"use_weighted_layer_sum": false,
|
| 47 |
"vocab_size": 51865
|
inspect_dataset.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datasets import load_dataset
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
# Load the first few examples of the dataset
|
| 5 |
+
dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True)
|
| 6 |
+
|
| 7 |
+
# Get the first example
|
| 8 |
+
examples = []
|
| 9 |
+
for i, example in enumerate(dataset):
|
| 10 |
+
if i >= 3: # Get first 3 examples
|
| 11 |
+
break
|
| 12 |
+
examples.append(example)
|
| 13 |
+
|
| 14 |
+
# Print the structure and content
|
| 15 |
+
for i, example in enumerate(examples):
|
| 16 |
+
print(f"\nExample {i+1}:")
|
| 17 |
+
for key, value in example.items():
|
| 18 |
+
if key == "audio":
|
| 19 |
+
print(f"audio keys: {value.keys()}")
|
| 20 |
+
for audio_key, audio_value in value.items():
|
| 21 |
+
if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview):
|
| 22 |
+
print(f" {audio_key}: <binary data>")
|
| 23 |
+
else:
|
| 24 |
+
print(f" {audio_key}: {audio_value}")
|
| 25 |
+
else:
|
| 26 |
+
print(f"{key}: {value}")
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 966995080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d2fd3b1746a32b70ee58ee1a3c90a88042e6300b79bcf3fd6d5bfc260af06f0
|
| 3 |
size 966995080
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=1.7
|
| 2 |
+
torchaudio
|
| 3 |
+
git+https://github.com/huggingface/transformers
|
| 4 |
+
git+https://github.com/huggingface/datasets
|
| 5 |
+
librosa
|
| 6 |
+
jiwer
|
| 7 |
+
evaluate>=0.3.0
|
| 8 |
+
more-itertools
|
| 9 |
+
tensorboard
|
| 10 |
+
accelerate>=0.26.0
|
| 11 |
+
wandb>=0.19.6
|
run.sh
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
|
|
| 1 |
python run_speech_recognition_seq2seq_streaming.py \
|
| 2 |
--model_name_or_path="openai/whisper-small" \
|
| 3 |
-
--dataset_name="
|
| 4 |
-
--dataset_config_name="eu" \
|
| 5 |
--language="basque" \
|
| 6 |
-
--train_split_name="train
|
| 7 |
-
--eval_split_name="
|
| 8 |
--model_index_name="Whisper Small Basque" \
|
| 9 |
-
--max_steps="
|
| 10 |
--output_dir="./" \
|
| 11 |
--per_device_train_batch_size="32" \
|
| 12 |
--per_device_eval_batch_size="16" \
|
|
@@ -21,6 +21,7 @@ python run_speech_recognition_seq2seq_streaming.py \
|
|
| 21 |
--generation_max_length="225" \
|
| 22 |
--length_column_name="input_length" \
|
| 23 |
--max_duration_in_seconds="30" \
|
|
|
|
| 24 |
--text_column_name="sentence" \
|
| 25 |
--freeze_feature_encoder="False" \
|
| 26 |
--report_to="tensorboard" \
|
|
@@ -36,4 +37,6 @@ python run_speech_recognition_seq2seq_streaming.py \
|
|
| 36 |
--do_normalize_eval \
|
| 37 |
--streaming \
|
| 38 |
--use_auth_token \
|
| 39 |
-
--push_to_hub
|
|
|
|
|
|
|
|
|
| 1 |
+
WANDB_PROJECT=whisper-small-eu \
|
| 2 |
python run_speech_recognition_seq2seq_streaming.py \
|
| 3 |
--model_name_or_path="openai/whisper-small" \
|
| 4 |
+
--dataset_name="asierhv/composite_corpus_eu_v2.1" \
|
|
|
|
| 5 |
--language="basque" \
|
| 6 |
+
--train_split_name="train" \
|
| 7 |
+
--eval_split_name="dev_parl+test_parl+test_cv+test_oslr" \
|
| 8 |
--model_index_name="Whisper Small Basque" \
|
| 9 |
+
--max_steps="8000" \
|
| 10 |
--output_dir="./" \
|
| 11 |
--per_device_train_batch_size="32" \
|
| 12 |
--per_device_eval_batch_size="16" \
|
|
|
|
| 21 |
--generation_max_length="225" \
|
| 22 |
--length_column_name="input_length" \
|
| 23 |
--max_duration_in_seconds="30" \
|
| 24 |
+
--audio_column_name="audio" \
|
| 25 |
--text_column_name="sentence" \
|
| 26 |
--freeze_feature_encoder="False" \
|
| 27 |
--report_to="tensorboard" \
|
|
|
|
| 37 |
--do_normalize_eval \
|
| 38 |
--streaming \
|
| 39 |
--use_auth_token \
|
| 40 |
+
--push_to_hub \
|
| 41 |
+
--report_to "wandb" \
|
| 42 |
+
--run_name "whisper-small-eu"
|
run_cv.sh
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python run_speech_recognition_seq2seq_streaming.py \
|
| 2 |
+
--model_name_or_path="openai/whisper-small" \
|
| 3 |
+
--dataset_name="mozilla-foundation/common_voice_17_0" \
|
| 4 |
+
--dataset_config_name="eu" \
|
| 5 |
+
--language="basque" \
|
| 6 |
+
--train_split_name="train+validation" \
|
| 7 |
+
--eval_split_name="test" \
|
| 8 |
+
--model_index_name="Whisper Small Basque" \
|
| 9 |
+
--max_steps="5000" \
|
| 10 |
+
--output_dir="./" \
|
| 11 |
+
--per_device_train_batch_size="32" \
|
| 12 |
+
--per_device_eval_batch_size="16" \
|
| 13 |
+
--gradient_accumulation_steps="1" \
|
| 14 |
+
--logging_steps="25" \
|
| 15 |
+
--learning_rate="1e-5" \
|
| 16 |
+
--warmup_steps="500" \
|
| 17 |
+
--evaluation_strategy="steps" \
|
| 18 |
+
--eval_steps="1000" \
|
| 19 |
+
--save_strategy="steps" \
|
| 20 |
+
--save_steps="1000" \
|
| 21 |
+
--generation_max_length="225" \
|
| 22 |
+
--length_column_name="input_length" \
|
| 23 |
+
--max_duration_in_seconds="30" \
|
| 24 |
+
--text_column_name="sentence" \
|
| 25 |
+
--freeze_feature_encoder="False" \
|
| 26 |
+
--report_to="tensorboard" \
|
| 27 |
+
--metric_for_best_model="wer" \
|
| 28 |
+
--greater_is_better="False" \
|
| 29 |
+
--load_best_model_at_end \
|
| 30 |
+
--gradient_checkpointing \
|
| 31 |
+
--fp16 \
|
| 32 |
+
--overwrite_output_dir \
|
| 33 |
+
--do_train \
|
| 34 |
+
--do_eval \
|
| 35 |
+
--predict_with_generate \
|
| 36 |
+
--do_normalize_eval \
|
| 37 |
+
--streaming \
|
| 38 |
+
--use_auth_token \
|
| 39 |
+
--push_to_hub
|
run_speech_recognition_seq2seq_streaming.py
CHANGED
|
@@ -25,6 +25,7 @@ import os
|
|
| 25 |
import sys
|
| 26 |
from dataclasses import dataclass, field
|
| 27 |
from typing import Any, Dict, List, Optional, Union
|
|
|
|
| 28 |
|
| 29 |
import datasets
|
| 30 |
import torch
|
|
@@ -265,25 +266,58 @@ class DataCollatorSpeechSeq2SeqWithPadding:
|
|
| 265 |
return batch
|
| 266 |
|
| 267 |
|
| 268 |
-
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
|
| 269 |
"""
|
| 270 |
-
Utility function to load a dataset in streaming mode.
|
| 271 |
-
each split is loaded individually and then splits combined by taking alternating examples from
|
| 272 |
-
each (interleaving).
|
| 273 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
if "+" in split:
|
| 275 |
-
#
|
| 276 |
-
dataset_splits = [
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
]
|
| 280 |
-
# interleave multiple splits to form one dataset
|
| 281 |
-
interleaved_dataset = interleave_datasets(dataset_splits)
|
| 282 |
-
return interleaved_dataset
|
| 283 |
else:
|
| 284 |
-
|
| 285 |
-
dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
|
| 286 |
-
return dataset
|
| 287 |
|
| 288 |
|
| 289 |
def main():
|
|
@@ -356,37 +390,39 @@ def main():
|
|
| 356 |
raw_datasets["train"] = load_maybe_streaming_dataset(
|
| 357 |
data_args.dataset_name,
|
| 358 |
data_args.dataset_config_name,
|
|
|
|
| 359 |
split=data_args.train_split_name,
|
| 360 |
-
# xezpeleta
|
| 361 |
-
#use_auth_token=True if model_args.use_auth_token else None,
|
| 362 |
streaming=data_args.streaming,
|
| 363 |
)
|
| 364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
if training_args.do_eval:
|
| 366 |
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
| 367 |
data_args.dataset_name,
|
| 368 |
data_args.dataset_config_name,
|
|
|
|
| 369 |
split=data_args.eval_split_name,
|
| 370 |
-
#use_auth_token=True if model_args.use_auth_token else None,
|
| 371 |
streaming=data_args.streaming,
|
| 372 |
)
|
| 373 |
|
| 374 |
-
raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
|
| 375 |
-
|
| 376 |
-
if data_args.audio_column_name not in raw_datasets_features:
|
| 377 |
-
raise ValueError(
|
| 378 |
-
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
| 379 |
-
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
| 380 |
-
f"{', '.join(raw_datasets_features)}."
|
| 381 |
-
)
|
| 382 |
-
|
| 383 |
-
if data_args.text_column_name not in raw_datasets_features:
|
| 384 |
-
raise ValueError(
|
| 385 |
-
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
| 386 |
-
"Make sure to set `--text_column_name` to the correct text column - one of "
|
| 387 |
-
f"{', '.join(raw_datasets_features)}."
|
| 388 |
-
)
|
| 389 |
-
|
| 390 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
| 391 |
#
|
| 392 |
# Distributed training:
|
|
@@ -438,14 +474,12 @@ def main():
|
|
| 438 |
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
| 439 |
|
| 440 |
# 6. Resample speech dataset if necessary
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
|
| 445 |
-
)
|
| 446 |
|
| 447 |
# 7. Preprocessing the datasets.
|
| 448 |
-
|
| 449 |
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
| 450 |
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
| 451 |
audio_column_name = data_args.audio_column_name
|
|
@@ -469,20 +503,59 @@ def main():
|
|
| 469 |
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
| 470 |
)
|
| 471 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
def prepare_dataset(batch):
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
|
| 487 |
with training_args.main_process_first(desc="dataset map pre-processing"):
|
| 488 |
vectorized_datasets = raw_datasets.map(
|
|
@@ -490,6 +563,16 @@ def main():
|
|
| 490 |
remove_columns=raw_datasets_features,
|
| 491 |
).with_format("torch")
|
| 492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
if training_args.do_train and data_args.streaming:
|
| 494 |
# manually shuffle if streaming (done by the trainer for non-streaming)
|
| 495 |
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
|
@@ -551,7 +634,13 @@ def main():
|
|
| 551 |
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
| 552 |
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
| 553 |
class ShuffleCallback(TrainerCallback):
|
| 554 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 556 |
pass # set_epoch() is handled by the Trainer
|
| 557 |
elif isinstance(train_dataloader.dataset, IterableDataset):
|
|
@@ -563,7 +652,7 @@ def main():
|
|
| 563 |
args=training_args,
|
| 564 |
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
| 565 |
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
| 566 |
-
|
| 567 |
data_collator=data_collator,
|
| 568 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
| 569 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
|
|
|
| 25 |
import sys
|
| 26 |
from dataclasses import dataclass, field
|
| 27 |
from typing import Any, Dict, List, Optional, Union
|
| 28 |
+
import numpy
|
| 29 |
|
| 30 |
import datasets
|
| 31 |
import torch
|
|
|
|
| 266 |
return batch
|
| 267 |
|
| 268 |
|
| 269 |
+
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, data_args, split="train", streaming=True, **kwargs):
|
| 270 |
"""
|
| 271 |
+
Utility function to load a dataset in streaming mode.
|
|
|
|
|
|
|
| 272 |
"""
|
| 273 |
+
logger.info(f"Loading dataset {dataset_name} split {split} (streaming={streaming})")
|
| 274 |
+
|
| 275 |
+
def load_single_split(split_name):
|
| 276 |
+
logger.info(f"Loading split: {split_name}")
|
| 277 |
+
ds = load_dataset(
|
| 278 |
+
dataset_name,
|
| 279 |
+
dataset_config_name,
|
| 280 |
+
split=split_name,
|
| 281 |
+
streaming=streaming,
|
| 282 |
+
trust_remote_code=True,
|
| 283 |
+
**kwargs
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
# Add validation transform to ensure consistent audio format
|
| 287 |
+
def validate_example(example):
|
| 288 |
+
if not isinstance(example[data_args.audio_column_name], dict):
|
| 289 |
+
example[data_args.audio_column_name] = {
|
| 290 |
+
'array': example[data_args.audio_column_name].array,
|
| 291 |
+
'sampling_rate': example[data_args.audio_column_name].sampling_rate,
|
| 292 |
+
'path': getattr(example[data_args.audio_column_name], 'path', None)
|
| 293 |
+
}
|
| 294 |
+
return example
|
| 295 |
+
|
| 296 |
+
ds = ds.map(validate_example)
|
| 297 |
+
|
| 298 |
+
# Log first example structure for debugging
|
| 299 |
+
try:
|
| 300 |
+
first_example = next(iter(ds))
|
| 301 |
+
logger.info(f"First example from {split_name}:")
|
| 302 |
+
logger.info(f" Keys: {first_example.keys()}")
|
| 303 |
+
if data_args.audio_column_name in first_example:
|
| 304 |
+
audio = first_example[data_args.audio_column_name]
|
| 305 |
+
logger.info(f" Audio type: {type(audio)}")
|
| 306 |
+
if isinstance(audio, dict):
|
| 307 |
+
logger.info(f" Audio keys: {audio.keys()}")
|
| 308 |
+
logger.info(f" Array type: {type(audio['array']) if 'array' in audio else 'missing'}")
|
| 309 |
+
except Exception as e:
|
| 310 |
+
logger.warning(f"Could not inspect first example from {split_name}: {e}")
|
| 311 |
+
|
| 312 |
+
return ds
|
| 313 |
+
|
| 314 |
if "+" in split:
|
| 315 |
+
# Load and validate each split individually
|
| 316 |
+
dataset_splits = [load_single_split(split_name) for split_name in split.split("+")]
|
| 317 |
+
# Interleave datasets
|
| 318 |
+
return interleave_datasets(dataset_splits)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
else:
|
| 320 |
+
return load_single_split(split)
|
|
|
|
|
|
|
| 321 |
|
| 322 |
|
| 323 |
def main():
|
|
|
|
| 390 |
raw_datasets["train"] = load_maybe_streaming_dataset(
|
| 391 |
data_args.dataset_name,
|
| 392 |
data_args.dataset_config_name,
|
| 393 |
+
data_args,
|
| 394 |
split=data_args.train_split_name,
|
|
|
|
|
|
|
| 395 |
streaming=data_args.streaming,
|
| 396 |
)
|
| 397 |
|
| 398 |
+
# Get features from train dataset since it's guaranteed to exist if do_train is True
|
| 399 |
+
train_dataset = raw_datasets["train"]
|
| 400 |
+
first_example = next(iter(train_dataset))
|
| 401 |
+
raw_datasets_features = list(first_example.keys())
|
| 402 |
+
|
| 403 |
+
if data_args.audio_column_name not in raw_datasets_features:
|
| 404 |
+
raise ValueError(
|
| 405 |
+
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
| 406 |
+
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
| 407 |
+
f"{', '.join(raw_datasets_features)}."
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
if data_args.text_column_name not in raw_datasets_features:
|
| 411 |
+
raise ValueError(
|
| 412 |
+
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
| 413 |
+
"Make sure to set `--text_column_name` to the correct text column - one of "
|
| 414 |
+
f"{', '.join(raw_datasets_features)}."
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
if training_args.do_eval:
|
| 418 |
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
| 419 |
data_args.dataset_name,
|
| 420 |
data_args.dataset_config_name,
|
| 421 |
+
data_args,
|
| 422 |
split=data_args.eval_split_name,
|
|
|
|
| 423 |
streaming=data_args.streaming,
|
| 424 |
)
|
| 425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
# 5. Load pretrained model, tokenizer, and feature extractor
|
| 427 |
#
|
| 428 |
# Distributed training:
|
|
|
|
| 474 |
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
| 475 |
|
| 476 |
# 6. Resample speech dataset if necessary
|
| 477 |
+
# For streaming datasets with audio bytes, sampling rate is handled in prepare_dataset
|
| 478 |
+
logger.info("Using feature extractor sampling rate: %d", feature_extractor.sampling_rate)
|
| 479 |
+
dataset_sampling_rate = feature_extractor.sampling_rate
|
|
|
|
|
|
|
| 480 |
|
| 481 |
# 7. Preprocessing the datasets.
|
| 482 |
+
logger.info("Starting dataset preprocessing")
|
| 483 |
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
| 484 |
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
| 485 |
audio_column_name = data_args.audio_column_name
|
|
|
|
| 503 |
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
| 504 |
)
|
| 505 |
|
| 506 |
+
# Inspect dataset before processing
|
| 507 |
+
for split, dataset in raw_datasets.items():
|
| 508 |
+
try:
|
| 509 |
+
first_example = next(iter(dataset))
|
| 510 |
+
logger.info(f"First example from {split} before processing:")
|
| 511 |
+
logger.info(f"Keys: {first_example.keys()}")
|
| 512 |
+
if audio_column_name in first_example:
|
| 513 |
+
audio_data = first_example[audio_column_name]
|
| 514 |
+
logger.info(f"Audio column type: {type(audio_data)}")
|
| 515 |
+
if isinstance(audio_data, dict):
|
| 516 |
+
logger.info(f"Audio keys: {audio_data.keys()}")
|
| 517 |
+
except Exception as e:
|
| 518 |
+
logger.warning(f"Could not inspect first example from {split}: {e}")
|
| 519 |
+
|
| 520 |
def prepare_dataset(batch):
|
| 521 |
+
try:
|
| 522 |
+
# Validate audio format
|
| 523 |
+
audio = batch[audio_column_name]
|
| 524 |
+
|
| 525 |
+
# Load audio from bytes if needed
|
| 526 |
+
if isinstance(audio, dict) and 'bytes' in audio:
|
| 527 |
+
import io
|
| 528 |
+
import soundfile as sf
|
| 529 |
+
audio_bytes = io.BytesIO(audio['bytes'])
|
| 530 |
+
audio_array, sampling_rate = sf.read(audio_bytes)
|
| 531 |
+
audio = {'array': audio_array, 'sampling_rate': sampling_rate}
|
| 532 |
+
|
| 533 |
+
# Process audio through feature extractor
|
| 534 |
+
inputs = feature_extractor(audio['array'], sampling_rate=audio['sampling_rate'])
|
| 535 |
+
batch["input_length"] = len(audio['array'])
|
| 536 |
+
batch[model_input_name] = inputs[model_input_name][0]
|
| 537 |
+
|
| 538 |
+
# Process text
|
| 539 |
+
input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
|
| 540 |
+
if do_remove_punctuation:
|
| 541 |
+
input_str = normalizer(input_str).strip()
|
| 542 |
+
batch["labels"] = tokenizer(input_str).input_ids
|
| 543 |
+
|
| 544 |
+
return batch
|
| 545 |
+
|
| 546 |
+
except Exception as e:
|
| 547 |
+
logger.error(f"Error processing batch in prepare_dataset:")
|
| 548 |
+
logger.error(f" Error type: {type(e).__name__}")
|
| 549 |
+
logger.error(f" Error message: {str(e)}")
|
| 550 |
+
logger.error(f" Batch keys: {list(batch.keys())}")
|
| 551 |
+
if audio_column_name in batch:
|
| 552 |
+
audio_data = batch[audio_column_name]
|
| 553 |
+
logger.error(f" Audio type: {type(audio_data)}")
|
| 554 |
+
if isinstance(audio_data, dict):
|
| 555 |
+
logger.error(f" Audio keys: {list(audio_data.keys())}")
|
| 556 |
+
elif hasattr(audio_data, '__dict__'):
|
| 557 |
+
logger.error(f" Audio attributes: {dir(audio_data)}")
|
| 558 |
+
raise
|
| 559 |
|
| 560 |
with training_args.main_process_first(desc="dataset map pre-processing"):
|
| 561 |
vectorized_datasets = raw_datasets.map(
|
|
|
|
| 563 |
remove_columns=raw_datasets_features,
|
| 564 |
).with_format("torch")
|
| 565 |
|
| 566 |
+
# Inspect vectorized dataset
|
| 567 |
+
for split, dataset in vectorized_datasets.items():
|
| 568 |
+
try:
|
| 569 |
+
first_example = next(iter(dataset))
|
| 570 |
+
logger.info(f"First example from {split} after processing:")
|
| 571 |
+
logger.info(f"Keys: {first_example.keys()}")
|
| 572 |
+
logger.info(f"Types: {', '.join(f'{k}: {type(v)}' for k, v in first_example.items())}")
|
| 573 |
+
except Exception as e:
|
| 574 |
+
logger.warning(f"Could not inspect first example from vectorized {split}: {e}")
|
| 575 |
+
|
| 576 |
if training_args.do_train and data_args.streaming:
|
| 577 |
# manually shuffle if streaming (done by the trainer for non-streaming)
|
| 578 |
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
|
|
|
| 634 |
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
| 635 |
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
| 636 |
class ShuffleCallback(TrainerCallback):
|
| 637 |
+
def on_train_begin(self, args, state, control, **kwargs):
|
| 638 |
+
self.trainer = kwargs.get('trainer')
|
| 639 |
+
|
| 640 |
+
def on_epoch_begin(self, args, state, control, **kwargs):
|
| 641 |
+
if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
|
| 642 |
+
return
|
| 643 |
+
train_dataloader = self.trainer.train_dataloader
|
| 644 |
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 645 |
pass # set_epoch() is handled by the Trainer
|
| 646 |
elif isinstance(train_dataloader.dataset, IterableDataset):
|
|
|
|
| 652 |
args=training_args,
|
| 653 |
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
| 654 |
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
| 655 |
+
processing_class=feature_extractor,
|
| 656 |
data_collator=data_collator,
|
| 657 |
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
| 658 |
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
run_speech_recognition_seq2seq_streaming_cv.py
ADDED
|
@@ -0,0 +1,657 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding=utf-8
|
| 3 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
"""
|
| 17 |
+
Fine-tuning the library models for sequence to sequence speech recognition
|
| 18 |
+
with 🤗 Datasets' streaming mode.
|
| 19 |
+
"""
|
| 20 |
+
# You can also adapt this script for your own sequence to sequence speech
|
| 21 |
+
# recognition task. Pointers for this are left as comments.
|
| 22 |
+
|
| 23 |
+
import logging
|
| 24 |
+
import os
|
| 25 |
+
import sys
|
| 26 |
+
from dataclasses import dataclass, field
|
| 27 |
+
from typing import Any, Dict, List, Optional, Union
|
| 28 |
+
import numpy
|
| 29 |
+
|
| 30 |
+
import datasets
|
| 31 |
+
import torch
|
| 32 |
+
from datasets import DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset
|
| 33 |
+
from torch.utils.data import IterableDataset
|
| 34 |
+
|
| 35 |
+
import evaluate
|
| 36 |
+
import transformers
|
| 37 |
+
from transformers import (
|
| 38 |
+
AutoConfig,
|
| 39 |
+
AutoFeatureExtractor,
|
| 40 |
+
AutoModelForSpeechSeq2Seq,
|
| 41 |
+
AutoProcessor,
|
| 42 |
+
AutoTokenizer,
|
| 43 |
+
HfArgumentParser,
|
| 44 |
+
Seq2SeqTrainer,
|
| 45 |
+
Seq2SeqTrainingArguments,
|
| 46 |
+
TrainerCallback,
|
| 47 |
+
set_seed,
|
| 48 |
+
)
|
| 49 |
+
from transformers.models.whisper.english_normalizer import BasicTextNormalizer
|
| 50 |
+
from transformers.trainer_pt_utils import IterableDatasetShard
|
| 51 |
+
from transformers.trainer_utils import get_last_checkpoint, is_main_process
|
| 52 |
+
from transformers.utils import check_min_version, send_example_telemetry
|
| 53 |
+
from transformers.utils.versions import require_version
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
| 57 |
+
check_min_version("4.25.0.dev0")
|
| 58 |
+
|
| 59 |
+
require_version("datasets>=1.18.2", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt")
|
| 60 |
+
|
| 61 |
+
logger = logging.getLogger(__name__)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@dataclass
|
| 65 |
+
class ModelArguments:
|
| 66 |
+
"""
|
| 67 |
+
Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
model_name_or_path: str = field(
|
| 71 |
+
metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
|
| 72 |
+
)
|
| 73 |
+
config_name: Optional[str] = field(
|
| 74 |
+
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
| 75 |
+
)
|
| 76 |
+
tokenizer_name: Optional[str] = field(
|
| 77 |
+
default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
|
| 78 |
+
)
|
| 79 |
+
feature_extractor_name: Optional[str] = field(
|
| 80 |
+
default=None, metadata={"help": "feature extractor name or path if not the same as model_name"}
|
| 81 |
+
)
|
| 82 |
+
cache_dir: Optional[str] = field(
|
| 83 |
+
default=None,
|
| 84 |
+
metadata={"help": "Where to store the pretrained models downloaded from huggingface.co"},
|
| 85 |
+
)
|
| 86 |
+
use_fast_tokenizer: bool = field(
|
| 87 |
+
default=True,
|
| 88 |
+
metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
|
| 89 |
+
)
|
| 90 |
+
model_revision: str = field(
|
| 91 |
+
default="main",
|
| 92 |
+
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
|
| 93 |
+
)
|
| 94 |
+
use_auth_token: bool = field(
|
| 95 |
+
default=False,
|
| 96 |
+
metadata={
|
| 97 |
+
"help": (
|
| 98 |
+
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
|
| 99 |
+
"with private models)."
|
| 100 |
+
)
|
| 101 |
+
},
|
| 102 |
+
)
|
| 103 |
+
freeze_feature_encoder: bool = field(
|
| 104 |
+
default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
|
| 105 |
+
)
|
| 106 |
+
freeze_encoder: bool = field(
|
| 107 |
+
default=False, metadata={"help": "Whether to freeze the entire encoder of the seq2seq model."}
|
| 108 |
+
)
|
| 109 |
+
forced_decoder_ids: List[List[int]] = field(
|
| 110 |
+
default=None,
|
| 111 |
+
metadata={
|
| 112 |
+
"help": (
|
| 113 |
+
"A list of pairs of integers which indicates a mapping from generation indices to token indices "
|
| 114 |
+
"that will be forced before sampling. For example, [[0, 123]] means the first generated token "
|
| 115 |
+
"will always be a token of index 123."
|
| 116 |
+
)
|
| 117 |
+
},
|
| 118 |
+
)
|
| 119 |
+
suppress_tokens: List[int] = field(
|
| 120 |
+
default=None, metadata={"help": "A list of tokens that will be suppressed at generation."}
|
| 121 |
+
)
|
| 122 |
+
model_index_name: str = field(default=None, metadata={"help": "Pretty name for the model card."})
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
@dataclass
|
| 126 |
+
class DataTrainingArguments:
|
| 127 |
+
"""
|
| 128 |
+
Arguments pertaining to what data we are going to input our model for training and eval.
|
| 129 |
+
"""
|
| 130 |
+
|
| 131 |
+
dataset_name: str = field(
|
| 132 |
+
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
|
| 133 |
+
)
|
| 134 |
+
dataset_config_name: Optional[str] = field(
|
| 135 |
+
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
|
| 136 |
+
)
|
| 137 |
+
text_column: Optional[str] = field(
|
| 138 |
+
default=None,
|
| 139 |
+
metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."},
|
| 140 |
+
)
|
| 141 |
+
max_train_samples: Optional[int] = field(
|
| 142 |
+
default=None,
|
| 143 |
+
metadata={
|
| 144 |
+
"help": (
|
| 145 |
+
"For debugging purposes or quicker training, truncate the number of training examples to this "
|
| 146 |
+
"value if set."
|
| 147 |
+
)
|
| 148 |
+
},
|
| 149 |
+
)
|
| 150 |
+
max_eval_samples: Optional[int] = field(
|
| 151 |
+
default=None,
|
| 152 |
+
metadata={
|
| 153 |
+
"help": (
|
| 154 |
+
"For debugging purposes or quicker training, truncate the number of evaluation examples to this "
|
| 155 |
+
"value if set."
|
| 156 |
+
)
|
| 157 |
+
},
|
| 158 |
+
)
|
| 159 |
+
audio_column_name: str = field(
|
| 160 |
+
default="audio",
|
| 161 |
+
metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
|
| 162 |
+
)
|
| 163 |
+
text_column_name: str = field(
|
| 164 |
+
default="text",
|
| 165 |
+
metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
|
| 166 |
+
)
|
| 167 |
+
max_duration_in_seconds: float = field(
|
| 168 |
+
default=20.0,
|
| 169 |
+
metadata={
|
| 170 |
+
"help": (
|
| 171 |
+
"Truncate audio files that are longer than `max_duration_in_seconds` seconds to"
|
| 172 |
+
" 'max_duration_in_seconds`"
|
| 173 |
+
)
|
| 174 |
+
},
|
| 175 |
+
)
|
| 176 |
+
min_duration_in_seconds: float = field(
|
| 177 |
+
default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
|
| 178 |
+
)
|
| 179 |
+
train_split_name: str = field(
|
| 180 |
+
default="train",
|
| 181 |
+
metadata={
|
| 182 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
|
| 183 |
+
},
|
| 184 |
+
)
|
| 185 |
+
eval_split_name: str = field(
|
| 186 |
+
default="test",
|
| 187 |
+
metadata={
|
| 188 |
+
"help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
|
| 189 |
+
},
|
| 190 |
+
)
|
| 191 |
+
do_lower_case: bool = field(
|
| 192 |
+
default=False,
|
| 193 |
+
metadata={"help": "Whether the target text should be lower cased."},
|
| 194 |
+
)
|
| 195 |
+
do_remove_punctuation: bool = field(
|
| 196 |
+
default=False,
|
| 197 |
+
metadata={"help": "Whether the target text should be striped of punctuation."},
|
| 198 |
+
)
|
| 199 |
+
do_normalize_eval: bool = field(
|
| 200 |
+
default=True,
|
| 201 |
+
metadata={"help": "Whether to normalise the references and predictions in the eval WER calculation."},
|
| 202 |
+
)
|
| 203 |
+
language: str = field(
|
| 204 |
+
default=None,
|
| 205 |
+
metadata={
|
| 206 |
+
"help": (
|
| 207 |
+
"Language for multilingual fine-tuning. This argument should be set for multilingual fine-tuning "
|
| 208 |
+
"only. For English speech recognition, it should be set to `None`."
|
| 209 |
+
)
|
| 210 |
+
},
|
| 211 |
+
)
|
| 212 |
+
task: str = field(
|
| 213 |
+
default="transcribe",
|
| 214 |
+
metadata={"help": "Task, either `transcribe` for speech recognition or `translate` for speech translation."},
|
| 215 |
+
)
|
| 216 |
+
shuffle_buffer_size: Optional[int] = field(
|
| 217 |
+
default=500,
|
| 218 |
+
metadata={
|
| 219 |
+
"help": (
|
| 220 |
+
"The number of streamed examples to download before shuffling them. The large the buffer, "
|
| 221 |
+
"the closer it is to real offline shuffling."
|
| 222 |
+
)
|
| 223 |
+
},
|
| 224 |
+
)
|
| 225 |
+
streaming: bool = field(
|
| 226 |
+
default=True,
|
| 227 |
+
metadata={"help": "Whether to use streaming mode to load and pre-process the data."},
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
@dataclass
|
| 232 |
+
class DataCollatorSpeechSeq2SeqWithPadding:
|
| 233 |
+
"""
|
| 234 |
+
Data collator that will dynamically pad the inputs received.
|
| 235 |
+
Args:
|
| 236 |
+
processor ([`WhisperProcessor`])
|
| 237 |
+
The processor used for processing the data.
|
| 238 |
+
decoder_start_token_id (`int`)
|
| 239 |
+
The begin-of-sentence of the decoder.
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
processor: Any
|
| 243 |
+
decoder_start_token_id: int
|
| 244 |
+
|
| 245 |
+
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
|
| 246 |
+
# split inputs and labels since they have to be of different lengths and need
|
| 247 |
+
# different padding methods
|
| 248 |
+
model_input_name = self.processor.model_input_names[0]
|
| 249 |
+
input_features = [{model_input_name: feature[model_input_name]} for feature in features]
|
| 250 |
+
label_features = [{"input_ids": feature["labels"]} for feature in features]
|
| 251 |
+
|
| 252 |
+
batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")
|
| 253 |
+
|
| 254 |
+
labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
|
| 255 |
+
|
| 256 |
+
# replace padding with -100 to ignore loss correctly
|
| 257 |
+
labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
|
| 258 |
+
|
| 259 |
+
# if bos token is appended in previous tokenization step,
|
| 260 |
+
# cut bos token here as it's append later anyways
|
| 261 |
+
if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():
|
| 262 |
+
labels = labels[:, 1:]
|
| 263 |
+
|
| 264 |
+
batch["labels"] = labels
|
| 265 |
+
|
| 266 |
+
return batch
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def load_maybe_streaming_dataset(dataset_name, dataset_config_name, split="train", streaming=True, **kwargs):
|
| 270 |
+
"""
|
| 271 |
+
Utility function to load a dataset in streaming mode. For datasets with multiple splits,
|
| 272 |
+
each split is loaded individually and then splits combined by taking alternating examples from
|
| 273 |
+
each (interleaving).
|
| 274 |
+
"""
|
| 275 |
+
if ("+" in split):
|
| 276 |
+
# load multiple splits separated by the `+` symbol with streaming mode
|
| 277 |
+
dataset_splits = [
|
| 278 |
+
load_dataset(dataset_name, dataset_config_name, split=split_name, streaming=streaming, trust_remote_code=True, **kwargs)
|
| 279 |
+
for split_name in split.split("+")
|
| 280 |
+
]
|
| 281 |
+
# interleave multiple splits to form one dataset
|
| 282 |
+
interleaved_dataset = interleave_datasets(dataset_splits)
|
| 283 |
+
return interleaved_dataset
|
| 284 |
+
else:
|
| 285 |
+
# load a single split *with* streaming mode
|
| 286 |
+
dataset = load_dataset(dataset_name, dataset_config_name, split=split, streaming=streaming, trust_remote_code=True, **kwargs)
|
| 287 |
+
return dataset
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def main():
|
| 291 |
+
# 1. Parse input arguments
|
| 292 |
+
# See all possible arguments in src/transformers/training_args.py
|
| 293 |
+
# or by passing the --help flag to this script.
|
| 294 |
+
# We now keep distinct sets of args, for a cleaner separation of concerns.
|
| 295 |
+
parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
|
| 296 |
+
|
| 297 |
+
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
| 298 |
+
# If we pass only one argument to the script and it's the path to a json file,
|
| 299 |
+
# let's parse it to get our arguments.
|
| 300 |
+
model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
|
| 301 |
+
else:
|
| 302 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
| 303 |
+
|
| 304 |
+
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
|
| 305 |
+
# information sent is the one passed as arguments along with your Python/PyTorch versions.
|
| 306 |
+
send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
|
| 307 |
+
|
| 308 |
+
# 2. Setup logging
|
| 309 |
+
logging.basicConfig(
|
| 310 |
+
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
| 311 |
+
datefmt="%m/%d/%Y %H:%M:%S",
|
| 312 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
| 313 |
+
)
|
| 314 |
+
log_level = training_args.get_process_log_level()
|
| 315 |
+
logger.setLevel(log_level)
|
| 316 |
+
datasets.utils.logging.set_verbosity(log_level)
|
| 317 |
+
transformers.utils.logging.set_verbosity(log_level)
|
| 318 |
+
transformers.utils.logging.enable_default_handler()
|
| 319 |
+
transformers.utils.logging.enable_explicit_format()
|
| 320 |
+
|
| 321 |
+
logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
|
| 322 |
+
|
| 323 |
+
# Log on each process the small summary:
|
| 324 |
+
logger.warning(
|
| 325 |
+
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
|
| 326 |
+
f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
|
| 327 |
+
)
|
| 328 |
+
logger.info(f"Training/evaluation parameters {training_args}")
|
| 329 |
+
|
| 330 |
+
# Set the verbosity to info of the Transformers logger (on main process only):
|
| 331 |
+
if is_main_process(training_args.local_rank):
|
| 332 |
+
transformers.utils.logging.set_verbosity_info()
|
| 333 |
+
logger.info("Training/evaluation parameters %s", training_args)
|
| 334 |
+
|
| 335 |
+
# 3. Detecting last checkpoint and eventually continue from last checkpoint
|
| 336 |
+
last_checkpoint = None
|
| 337 |
+
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
|
| 338 |
+
last_checkpoint = get_last_checkpoint(training_args.output_dir)
|
| 339 |
+
if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
|
| 340 |
+
raise ValueError(
|
| 341 |
+
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
|
| 342 |
+
"Use --overwrite_output_dir to overcome."
|
| 343 |
+
)
|
| 344 |
+
elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
|
| 345 |
+
logger.info(
|
| 346 |
+
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
|
| 347 |
+
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
# Set seed before initializing model.
|
| 351 |
+
set_seed(training_args.seed)
|
| 352 |
+
|
| 353 |
+
# 4. Load dataset
|
| 354 |
+
raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
|
| 355 |
+
|
| 356 |
+
if training_args.do_train:
|
| 357 |
+
raw_datasets["train"] = load_maybe_streaming_dataset(
|
| 358 |
+
data_args.dataset_name,
|
| 359 |
+
data_args.dataset_config_name,
|
| 360 |
+
split=data_args.train_split_name,
|
| 361 |
+
#use_auth_token=True if model_args.use_auth_token else None,
|
| 362 |
+
streaming=data_args.streaming,
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
if training_args.do_eval:
|
| 366 |
+
raw_datasets["eval"] = load_maybe_streaming_dataset(
|
| 367 |
+
data_args.dataset_name,
|
| 368 |
+
data_args.dataset_config_name,
|
| 369 |
+
split=data_args.eval_split_name,
|
| 370 |
+
#use_auth_token=True if model_args.use_auth_token else None,
|
| 371 |
+
streaming=data_args.streaming,
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
raw_datasets_features = list(next(iter(raw_datasets.values())).features.keys())
|
| 375 |
+
|
| 376 |
+
if data_args.audio_column_name not in raw_datasets_features:
|
| 377 |
+
raise ValueError(
|
| 378 |
+
f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
|
| 379 |
+
"Make sure to set `--audio_column_name` to the correct audio column - one of "
|
| 380 |
+
f"{', '.join(raw_datasets_features)}."
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
if data_args.text_column_name not in raw_datasets_features:
|
| 384 |
+
raise ValueError(
|
| 385 |
+
f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
|
| 386 |
+
"Make sure to set `--text_column_name` to the correct text column - one of "
|
| 387 |
+
f"{', '.join(raw_datasets_features)}."
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
# 5. Load pretrained model, tokenizer, and feature extractor
|
| 391 |
+
#
|
| 392 |
+
# Distributed training:
|
| 393 |
+
# The .from_pretrained methods guarantee that only one local process can concurrently
|
| 394 |
+
config = AutoConfig.from_pretrained(
|
| 395 |
+
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
|
| 396 |
+
cache_dir=model_args.cache_dir,
|
| 397 |
+
revision=model_args.model_revision,
|
| 398 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens})
|
| 402 |
+
|
| 403 |
+
if training_args.gradient_checkpointing:
|
| 404 |
+
config.update({"use_cache": False})
|
| 405 |
+
|
| 406 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(
|
| 407 |
+
model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path,
|
| 408 |
+
cache_dir=model_args.cache_dir,
|
| 409 |
+
revision=model_args.model_revision,
|
| 410 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
| 411 |
+
)
|
| 412 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 413 |
+
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
|
| 414 |
+
cache_dir=model_args.cache_dir,
|
| 415 |
+
use_fast=model_args.use_fast_tokenizer,
|
| 416 |
+
revision=model_args.model_revision,
|
| 417 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
| 418 |
+
)
|
| 419 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 420 |
+
model_args.model_name_or_path,
|
| 421 |
+
config=config,
|
| 422 |
+
cache_dir=model_args.cache_dir,
|
| 423 |
+
revision=model_args.model_revision,
|
| 424 |
+
use_auth_token=True if model_args.use_auth_token else None,
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
if model.config.decoder_start_token_id is None:
|
| 428 |
+
raise ValueError("Make sure that `config.decoder_start_token_id` is correctly defined")
|
| 429 |
+
|
| 430 |
+
if model_args.freeze_feature_encoder:
|
| 431 |
+
model.freeze_feature_encoder()
|
| 432 |
+
|
| 433 |
+
if model_args.freeze_encoder:
|
| 434 |
+
model.freeze_encoder()
|
| 435 |
+
|
| 436 |
+
if data_args.language is not None:
|
| 437 |
+
# We only need to set the task id when the language is specified (i.e. in a multilingual setting)
|
| 438 |
+
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
|
| 439 |
+
|
| 440 |
+
# 6. Resample speech dataset if necessary
|
| 441 |
+
dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
|
| 442 |
+
if dataset_sampling_rate != feature_extractor.sampling_rate:
|
| 443 |
+
raw_datasets = raw_datasets.cast_column(
|
| 444 |
+
data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
# 7. Preprocessing the datasets.
|
| 448 |
+
# We need to read the audio files as arrays and tokenize the targets.
|
| 449 |
+
max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
|
| 450 |
+
min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
|
| 451 |
+
audio_column_name = data_args.audio_column_name
|
| 452 |
+
text_column_name = data_args.text_column_name
|
| 453 |
+
model_input_name = feature_extractor.model_input_names[0]
|
| 454 |
+
do_lower_case = data_args.do_lower_case
|
| 455 |
+
do_remove_punctuation = data_args.do_remove_punctuation
|
| 456 |
+
normalizer = BasicTextNormalizer() # 'official' text normalizer from OpenAI
|
| 457 |
+
|
| 458 |
+
if data_args.max_train_samples is not None:
|
| 459 |
+
raw_datasets["train"] = (
|
| 460 |
+
raw_datasets["train"].take(data_args.max_train_samples)
|
| 461 |
+
if data_args.streaming
|
| 462 |
+
else raw_datasets["train"].select(range(data_args.max_train_samples))
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
if data_args.max_eval_samples is not None:
|
| 466 |
+
raw_datasets["eval"] = (
|
| 467 |
+
raw_datasets["eval"].take(data_args.max_eval_samples)
|
| 468 |
+
if data_args.streaming
|
| 469 |
+
else raw_datasets["eval"].select(range(data_args.max_eval_samples))
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
def prepare_dataset(batch):
|
| 473 |
+
# process audio
|
| 474 |
+
sample = batch[audio_column_name]
|
| 475 |
+
|
| 476 |
+
# Handle different audio formats - some datasets provide raw arrays, others provide paths
|
| 477 |
+
if isinstance(sample, dict):
|
| 478 |
+
if "array" in sample:
|
| 479 |
+
audio_array = sample["array"]
|
| 480 |
+
sampling_rate = sample["sampling_rate"]
|
| 481 |
+
elif "path" in sample:
|
| 482 |
+
# Load from path if array is not available
|
| 483 |
+
audio_array = sample["path"] # datasets will load the file for us
|
| 484 |
+
sampling_rate = sample.get("sampling_rate", feature_extractor.sampling_rate)
|
| 485 |
+
else:
|
| 486 |
+
raise ValueError(f"Unsupported audio format. Sample must contain either 'array' or 'path'. Got {sample.keys()}")
|
| 487 |
+
else:
|
| 488 |
+
# Assume it's a direct path or array
|
| 489 |
+
audio_array = sample
|
| 490 |
+
sampling_rate = feature_extractor.sampling_rate
|
| 491 |
+
|
| 492 |
+
inputs = feature_extractor(audio_array, sampling_rate=sampling_rate)
|
| 493 |
+
|
| 494 |
+
# process audio length
|
| 495 |
+
if isinstance(audio_array, numpy.ndarray):
|
| 496 |
+
batch["input_length"] = len(audio_array)
|
| 497 |
+
else:
|
| 498 |
+
# If we couldn't get the direct array length, estimate it from the processed features
|
| 499 |
+
batch["input_length"] = inputs.get(model_input_name)[0].shape[0] * feature_extractor.hop_length
|
| 500 |
+
|
| 501 |
+
# process targets
|
| 502 |
+
input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name]
|
| 503 |
+
if do_remove_punctuation:
|
| 504 |
+
input_str = normalizer(input_str).strip()
|
| 505 |
+
batch["labels"] = tokenizer(input_str).input_ids
|
| 506 |
+
return batch
|
| 507 |
+
|
| 508 |
+
with training_args.main_process_first(desc="dataset map pre-processing"):
|
| 509 |
+
vectorized_datasets = raw_datasets.map(
|
| 510 |
+
prepare_dataset,
|
| 511 |
+
remove_columns=raw_datasets_features,
|
| 512 |
+
).with_format("torch")
|
| 513 |
+
|
| 514 |
+
if training_args.do_train and data_args.streaming:
|
| 515 |
+
# manually shuffle if streaming (done by the trainer for non-streaming)
|
| 516 |
+
vectorized_datasets["train"] = vectorized_datasets["train"].shuffle(
|
| 517 |
+
buffer_size=data_args.shuffle_buffer_size,
|
| 518 |
+
seed=training_args.seed,
|
| 519 |
+
)
|
| 520 |
+
|
| 521 |
+
# filter training data that is shorter than min_input_length or longer than
|
| 522 |
+
# max_input_length
|
| 523 |
+
def is_audio_in_length_range(length):
|
| 524 |
+
return min_input_length < length < max_input_length
|
| 525 |
+
|
| 526 |
+
if training_args.do_train:
|
| 527 |
+
vectorized_datasets["train"] = vectorized_datasets["train"].filter(
|
| 528 |
+
is_audio_in_length_range,
|
| 529 |
+
input_columns=["input_length"],
|
| 530 |
+
)
|
| 531 |
+
|
| 532 |
+
# 8. Load Metric
|
| 533 |
+
metric = evaluate.load("wer")
|
| 534 |
+
do_normalize_eval = data_args.do_normalize_eval
|
| 535 |
+
|
| 536 |
+
def compute_metrics(pred):
|
| 537 |
+
pred_ids = pred.predictions
|
| 538 |
+
|
| 539 |
+
pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
|
| 540 |
+
|
| 541 |
+
pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
|
| 542 |
+
# we do not want to group tokens when computing the metrics
|
| 543 |
+
label_str = tokenizer.batch_decode(pred.label_ids, skip_special_tokens=True)
|
| 544 |
+
|
| 545 |
+
if do_normalize_eval:
|
| 546 |
+
pred_str = [normalizer(pred) for pred in pred_str]
|
| 547 |
+
label_str = [normalizer(label) for label in label_str]
|
| 548 |
+
# filtering step to only evaluate the samples that correspond to non-zero references:
|
| 549 |
+
pred_str = [pred_str[i] for i in range(len(pred_str)) if len(label_str[i]) > 0]
|
| 550 |
+
label_str = [label_str[i] for i in range(len(label_str)) if len(label_str[i]) > 0]
|
| 551 |
+
|
| 552 |
+
wer = 100 * metric.compute(predictions=pred_str, references=label_str)
|
| 553 |
+
|
| 554 |
+
return {"wer": wer}
|
| 555 |
+
|
| 556 |
+
# 9. Create a single speech processor
|
| 557 |
+
if is_main_process(training_args.local_rank):
|
| 558 |
+
# save feature extractor, tokenizer and config
|
| 559 |
+
feature_extractor.save_pretrained(training_args.output_dir)
|
| 560 |
+
tokenizer.save_pretrained(training_args.output_dir)
|
| 561 |
+
config.save_pretrained(training_args.output_dir)
|
| 562 |
+
|
| 563 |
+
processor = AutoProcessor.from_pretrained(training_args.output_dir)
|
| 564 |
+
|
| 565 |
+
# 10. Define data collator
|
| 566 |
+
data_collator = DataCollatorSpeechSeq2SeqWithPadding(
|
| 567 |
+
processor=processor,
|
| 568 |
+
decoder_start_token_id=model.config.decoder_start_token_id,
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
# 11. Configure Trainer
|
| 572 |
+
# Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
|
| 573 |
+
# Only required for streaming: Trainer automatically shuffles non-streaming datasets
|
| 574 |
+
class ShuffleCallback(TrainerCallback):
|
| 575 |
+
def on_train_begin(self, args, state, control, **kwargs):
|
| 576 |
+
self.trainer = kwargs.get('trainer')
|
| 577 |
+
|
| 578 |
+
def on_epoch_begin(self, args, state, control, **kwargs):
|
| 579 |
+
if not hasattr(self, "trainer") or not hasattr(self.trainer, "train_dataloader") or self.trainer.train_dataloader is None:
|
| 580 |
+
return
|
| 581 |
+
train_dataloader = self.trainer.train_dataloader
|
| 582 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 583 |
+
pass # set_epoch() is handled by the Trainer
|
| 584 |
+
elif isinstance(train_dataloader.dataset, IterableDataset):
|
| 585 |
+
train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
|
| 586 |
+
|
| 587 |
+
# Initialize Trainer
|
| 588 |
+
trainer = Seq2SeqTrainer(
|
| 589 |
+
model=model,
|
| 590 |
+
args=training_args,
|
| 591 |
+
train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
|
| 592 |
+
eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
|
| 593 |
+
processing_class=feature_extractor,
|
| 594 |
+
data_collator=data_collator,
|
| 595 |
+
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
|
| 596 |
+
callbacks=[ShuffleCallback()] if data_args.streaming else None,
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
# 12. Training
|
| 600 |
+
if training_args.do_train:
|
| 601 |
+
checkpoint = None
|
| 602 |
+
if training_args.resume_from_checkpoint is not None:
|
| 603 |
+
checkpoint = training_args.resume_from_checkpoint
|
| 604 |
+
elif last_checkpoint is not None:
|
| 605 |
+
checkpoint = last_checkpoint
|
| 606 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 607 |
+
trainer.save_model() # Saves the feature extractor too for easy upload
|
| 608 |
+
|
| 609 |
+
metrics = train_result.metrics
|
| 610 |
+
if data_args.max_train_samples:
|
| 611 |
+
metrics["train_samples"] = data_args.max_train_samples
|
| 612 |
+
trainer.log_metrics("train", metrics)
|
| 613 |
+
trainer.save_metrics("train", metrics)
|
| 614 |
+
trainer.save_state()
|
| 615 |
+
|
| 616 |
+
# 13. Evaluation
|
| 617 |
+
results = {}
|
| 618 |
+
if training_args.do_eval:
|
| 619 |
+
logger.info("*** Evaluate ***")
|
| 620 |
+
metrics = trainer.evaluate(
|
| 621 |
+
metric_key_prefix="eval",
|
| 622 |
+
max_length=training_args.generation_max_length,
|
| 623 |
+
num_beams=training_args.generation_num_beams,
|
| 624 |
+
)
|
| 625 |
+
if data_args.max_eval_samples:
|
| 626 |
+
metrics["eval_samples"] = data_args.max_eval_samples
|
| 627 |
+
|
| 628 |
+
trainer.log_metrics("eval", metrics)
|
| 629 |
+
trainer.save_metrics("eval", metrics)
|
| 630 |
+
|
| 631 |
+
# 14. Write Training Stats
|
| 632 |
+
kwargs = {
|
| 633 |
+
"finetuned_from": model_args.model_name_or_path,
|
| 634 |
+
"tasks": "automatic-speech-recognition",
|
| 635 |
+
"tags": "whisper-event",
|
| 636 |
+
}
|
| 637 |
+
if data_args.dataset_name is not None:
|
| 638 |
+
kwargs["dataset_tags"] = data_args.dataset_name
|
| 639 |
+
if data_args.dataset_config_name is not None:
|
| 640 |
+
kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
|
| 641 |
+
else:
|
| 642 |
+
kwargs["dataset"] = data_args.dataset_name
|
| 643 |
+
if "common_voice" in data_args.dataset_name:
|
| 644 |
+
kwargs["language"] = data_args.dataset_config_name.split('-')[0]
|
| 645 |
+
if model_args.model_index_name is not None:
|
| 646 |
+
kwargs["model_name"] = model_args.model_index_name
|
| 647 |
+
|
| 648 |
+
if training_args.push_to_hub:
|
| 649 |
+
trainer.push_to_hub(**kwargs)
|
| 650 |
+
else:
|
| 651 |
+
trainer.create_model_card(**kwargs)
|
| 652 |
+
|
| 653 |
+
return results
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
if __name__ == "__main__":
|
| 657 |
+
main()
|
tokenizer_config.json
CHANGED
|
@@ -12980,6 +12980,7 @@
|
|
| 12980 |
"clean_up_tokenization_spaces": true,
|
| 12981 |
"eos_token": "<|endoftext|>",
|
| 12982 |
"errors": "replace",
|
|
|
|
| 12983 |
"model_max_length": 1024,
|
| 12984 |
"pad_token": "<|endoftext|>",
|
| 12985 |
"processor_class": "WhisperProcessor",
|
|
|
|
| 12980 |
"clean_up_tokenization_spaces": true,
|
| 12981 |
"eos_token": "<|endoftext|>",
|
| 12982 |
"errors": "replace",
|
| 12983 |
+
"extra_special_tokens": {},
|
| 12984 |
"model_max_length": 1024,
|
| 12985 |
"pad_token": "<|endoftext|>",
|
| 12986 |
"processor_class": "WhisperProcessor",
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37d41e6c93c9164dab27b25a4957996293e07bbed9895811c22360ffbda7ebbf
|
| 3 |
+
size 5432
|
wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T15:27:10.115999744Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-02-12T15:27:10.219975741Z","level":"INFO","msg":"created new stream","id":"lejyafmi"}
|
| 3 |
+
{"time":"2025-02-12T15:27:10.220016671Z","level":"INFO","msg":"stream: started","id":"lejyafmi"}
|
| 4 |
+
{"time":"2025-02-12T15:27:10.220140679Z","level":"INFO","msg":"writer: Do: started","stream_id":"lejyafmi"}
|
| 5 |
+
{"time":"2025-02-12T15:27:10.220197299Z","level":"INFO","msg":"handler: started","stream_id":"lejyafmi"}
|
| 6 |
+
{"time":"2025-02-12T15:27:10.220285178Z","level":"INFO","msg":"sender: started","stream_id":"lejyafmi"}
|
| 7 |
+
{"time":"2025-02-12T15:27:10.587185852Z","level":"INFO","msg":"Starting system monitor"}
|
wandb/debug.log
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
| 2 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Configure stats pid to 243546
|
| 3 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
| 4 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
| 5 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
| 6 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug.log
|
| 7 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_152709-lejyafmi/logs/debug-internal.log
|
| 8 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():756] calling init triggers
|
| 9 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-02-12 15:27:09,901 INFO MainThread:243546 [wandb_init.py:init():789] starting backend
|
| 12 |
+
2025-02-12 15:27:10,107 INFO MainThread:243546 [wandb_init.py:init():793] sending inform_init request
|
| 13 |
+
2025-02-12 15:27:10,112 INFO MainThread:243546 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-02-12 15:27:10,113 INFO MainThread:243546 [wandb_init.py:init():808] backend started and connected
|
| 15 |
+
2025-02-12 15:27:10,115 INFO MainThread:243546 [wandb_init.py:init():901] updated telemetry
|
| 16 |
+
2025-02-12 15:27:10,122 INFO MainThread:243546 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-02-12 15:27:10,584 INFO MainThread:243546 [wandb_init.py:init():994] starting run threads in backend
|
| 18 |
+
2025-02-12 15:27:10,691 INFO MainThread:243546 [wandb_run.py:_console_start():2385] atexit reg
|
| 19 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
| 20 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
| 21 |
+
2025-02-12 15:27:10,692 INFO MainThread:243546 [wandb_run.py:_redirect():2325] Redirects installed.
|
| 22 |
+
2025-02-12 15:27:10,694 INFO MainThread:243546 [wandb_init.py:init():1036] run started, returning control to user process
|
| 23 |
+
2025-02-12 15:27:10,698 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_15-26-19_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
| 24 |
+
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7eb0a4c1e180>>
|
| 25 |
+
2025-02-12 15:27:10,704 INFO MainThread:243546 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
wandb/run-20250212_121751-d4i88lzt/files/config.yaml
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_attn_implementation_autoset:
|
| 2 |
+
value: true
|
| 3 |
+
_name_or_path:
|
| 4 |
+
value: openai/whisper-small
|
| 5 |
+
_wandb:
|
| 6 |
+
value:
|
| 7 |
+
cli_version: 0.19.6
|
| 8 |
+
m:
|
| 9 |
+
- "1": train/global_step
|
| 10 |
+
"6":
|
| 11 |
+
- 3
|
| 12 |
+
"7": []
|
| 13 |
+
python_version: 3.12.3
|
| 14 |
+
t:
|
| 15 |
+
"1":
|
| 16 |
+
- 1
|
| 17 |
+
- 5
|
| 18 |
+
- 11
|
| 19 |
+
- 49
|
| 20 |
+
- 51
|
| 21 |
+
- 53
|
| 22 |
+
- 55
|
| 23 |
+
- 71
|
| 24 |
+
- 100
|
| 25 |
+
"2":
|
| 26 |
+
- 1
|
| 27 |
+
- 5
|
| 28 |
+
- 11
|
| 29 |
+
- 49
|
| 30 |
+
- 51
|
| 31 |
+
- 53
|
| 32 |
+
- 55
|
| 33 |
+
- 71
|
| 34 |
+
- 100
|
| 35 |
+
"3":
|
| 36 |
+
- 7
|
| 37 |
+
- 13
|
| 38 |
+
- 19
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 66
|
| 42 |
+
"4": 3.12.3
|
| 43 |
+
"5": 0.19.6
|
| 44 |
+
"6": 4.49.0.dev0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"9":
|
| 48 |
+
"1": transformers_trainer
|
| 49 |
+
"12": 0.19.6
|
| 50 |
+
"13": linux-x86_64
|
| 51 |
+
accelerator_config:
|
| 52 |
+
value:
|
| 53 |
+
dispatch_batches: null
|
| 54 |
+
even_batches: true
|
| 55 |
+
gradient_accumulation_kwargs: null
|
| 56 |
+
non_blocking: false
|
| 57 |
+
split_batches: false
|
| 58 |
+
use_seedable_sampler: true
|
| 59 |
+
activation_dropout:
|
| 60 |
+
value: 0
|
| 61 |
+
activation_function:
|
| 62 |
+
value: gelu
|
| 63 |
+
adafactor:
|
| 64 |
+
value: false
|
| 65 |
+
adam_beta1:
|
| 66 |
+
value: 0.9
|
| 67 |
+
adam_beta2:
|
| 68 |
+
value: 0.999
|
| 69 |
+
adam_epsilon:
|
| 70 |
+
value: 1e-08
|
| 71 |
+
add_cross_attention:
|
| 72 |
+
value: false
|
| 73 |
+
apply_spec_augment:
|
| 74 |
+
value: false
|
| 75 |
+
architectures:
|
| 76 |
+
value:
|
| 77 |
+
- WhisperForConditionalGeneration
|
| 78 |
+
attention_dropout:
|
| 79 |
+
value: 0
|
| 80 |
+
auto_find_batch_size:
|
| 81 |
+
value: false
|
| 82 |
+
average_tokens_across_devices:
|
| 83 |
+
value: false
|
| 84 |
+
bad_words_ids:
|
| 85 |
+
value: null
|
| 86 |
+
batch_eval_metrics:
|
| 87 |
+
value: false
|
| 88 |
+
begin_suppress_tokens:
|
| 89 |
+
value:
|
| 90 |
+
- 220
|
| 91 |
+
- 50257
|
| 92 |
+
bf16:
|
| 93 |
+
value: false
|
| 94 |
+
bf16_full_eval:
|
| 95 |
+
value: false
|
| 96 |
+
bos_token_id:
|
| 97 |
+
value: 50257
|
| 98 |
+
chunk_size_feed_forward:
|
| 99 |
+
value: 0
|
| 100 |
+
classifier_proj_size:
|
| 101 |
+
value: 256
|
| 102 |
+
cross_attention_hidden_size:
|
| 103 |
+
value: null
|
| 104 |
+
d_model:
|
| 105 |
+
value: 768
|
| 106 |
+
data_seed:
|
| 107 |
+
value: null
|
| 108 |
+
dataloader_drop_last:
|
| 109 |
+
value: false
|
| 110 |
+
dataloader_num_workers:
|
| 111 |
+
value: 0
|
| 112 |
+
dataloader_persistent_workers:
|
| 113 |
+
value: false
|
| 114 |
+
dataloader_pin_memory:
|
| 115 |
+
value: true
|
| 116 |
+
dataloader_prefetch_factor:
|
| 117 |
+
value: null
|
| 118 |
+
ddp_backend:
|
| 119 |
+
value: null
|
| 120 |
+
ddp_broadcast_buffers:
|
| 121 |
+
value: null
|
| 122 |
+
ddp_bucket_cap_mb:
|
| 123 |
+
value: null
|
| 124 |
+
ddp_find_unused_parameters:
|
| 125 |
+
value: null
|
| 126 |
+
ddp_timeout:
|
| 127 |
+
value: 1800
|
| 128 |
+
debug:
|
| 129 |
+
value: []
|
| 130 |
+
decoder_attention_heads:
|
| 131 |
+
value: 12
|
| 132 |
+
decoder_ffn_dim:
|
| 133 |
+
value: 3072
|
| 134 |
+
decoder_layerdrop:
|
| 135 |
+
value: 0
|
| 136 |
+
decoder_layers:
|
| 137 |
+
value: 12
|
| 138 |
+
decoder_start_token_id:
|
| 139 |
+
value: 50258
|
| 140 |
+
deepspeed:
|
| 141 |
+
value: null
|
| 142 |
+
disable_tqdm:
|
| 143 |
+
value: false
|
| 144 |
+
dispatch_batches:
|
| 145 |
+
value: null
|
| 146 |
+
diversity_penalty:
|
| 147 |
+
value: 0
|
| 148 |
+
do_eval:
|
| 149 |
+
value: true
|
| 150 |
+
do_predict:
|
| 151 |
+
value: false
|
| 152 |
+
do_sample:
|
| 153 |
+
value: false
|
| 154 |
+
do_train:
|
| 155 |
+
value: true
|
| 156 |
+
dropout:
|
| 157 |
+
value: 0
|
| 158 |
+
early_stopping:
|
| 159 |
+
value: false
|
| 160 |
+
encoder_attention_heads:
|
| 161 |
+
value: 12
|
| 162 |
+
encoder_ffn_dim:
|
| 163 |
+
value: 3072
|
| 164 |
+
encoder_layerdrop:
|
| 165 |
+
value: 0
|
| 166 |
+
encoder_layers:
|
| 167 |
+
value: 12
|
| 168 |
+
encoder_no_repeat_ngram_size:
|
| 169 |
+
value: 0
|
| 170 |
+
eos_token_id:
|
| 171 |
+
value: 50257
|
| 172 |
+
eval_accumulation_steps:
|
| 173 |
+
value: null
|
| 174 |
+
eval_delay:
|
| 175 |
+
value: 0
|
| 176 |
+
eval_do_concat_batches:
|
| 177 |
+
value: true
|
| 178 |
+
eval_on_start:
|
| 179 |
+
value: false
|
| 180 |
+
eval_steps:
|
| 181 |
+
value: 1000
|
| 182 |
+
eval_strategy:
|
| 183 |
+
value: steps
|
| 184 |
+
eval_use_gather_object:
|
| 185 |
+
value: false
|
| 186 |
+
evaluation_strategy:
|
| 187 |
+
value: steps
|
| 188 |
+
exponential_decay_length_penalty:
|
| 189 |
+
value: null
|
| 190 |
+
finetuning_task:
|
| 191 |
+
value: null
|
| 192 |
+
forced_bos_token_id:
|
| 193 |
+
value: null
|
| 194 |
+
forced_decoder_ids:
|
| 195 |
+
value: null
|
| 196 |
+
forced_eos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
fp16:
|
| 199 |
+
value: true
|
| 200 |
+
fp16_backend:
|
| 201 |
+
value: auto
|
| 202 |
+
fp16_full_eval:
|
| 203 |
+
value: false
|
| 204 |
+
fp16_opt_level:
|
| 205 |
+
value: O1
|
| 206 |
+
fsdp:
|
| 207 |
+
value: []
|
| 208 |
+
fsdp_config:
|
| 209 |
+
value:
|
| 210 |
+
min_num_params: 0
|
| 211 |
+
xla: false
|
| 212 |
+
xla_fsdp_grad_ckpt: false
|
| 213 |
+
xla_fsdp_v2: false
|
| 214 |
+
fsdp_min_num_params:
|
| 215 |
+
value: 0
|
| 216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 217 |
+
value: null
|
| 218 |
+
full_determinism:
|
| 219 |
+
value: false
|
| 220 |
+
generation_config:
|
| 221 |
+
value: null
|
| 222 |
+
generation_max_length:
|
| 223 |
+
value: 225
|
| 224 |
+
generation_num_beams:
|
| 225 |
+
value: null
|
| 226 |
+
gradient_accumulation_steps:
|
| 227 |
+
value: 1
|
| 228 |
+
gradient_checkpointing:
|
| 229 |
+
value: true
|
| 230 |
+
gradient_checkpointing_kwargs:
|
| 231 |
+
value: null
|
| 232 |
+
greater_is_better:
|
| 233 |
+
value: false
|
| 234 |
+
group_by_length:
|
| 235 |
+
value: false
|
| 236 |
+
half_precision_backend:
|
| 237 |
+
value: auto
|
| 238 |
+
hub_always_push:
|
| 239 |
+
value: false
|
| 240 |
+
hub_model_id:
|
| 241 |
+
value: null
|
| 242 |
+
hub_private_repo:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
init_std:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: true
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 1e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: input_length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
load_best_model_at_end:
|
| 285 |
+
value: true
|
| 286 |
+
local_rank:
|
| 287 |
+
value: 0
|
| 288 |
+
log_level:
|
| 289 |
+
value: passive
|
| 290 |
+
log_level_replica:
|
| 291 |
+
value: warning
|
| 292 |
+
log_on_each_node:
|
| 293 |
+
value: true
|
| 294 |
+
logging_dir:
|
| 295 |
+
value: ./runs/Feb12_12-17-27_tknika
|
| 296 |
+
logging_first_step:
|
| 297 |
+
value: false
|
| 298 |
+
logging_nan_inf_filter:
|
| 299 |
+
value: true
|
| 300 |
+
logging_steps:
|
| 301 |
+
value: 25
|
| 302 |
+
logging_strategy:
|
| 303 |
+
value: steps
|
| 304 |
+
lr_scheduler_type:
|
| 305 |
+
value: linear
|
| 306 |
+
mask_feature_length:
|
| 307 |
+
value: 10
|
| 308 |
+
mask_feature_min_masks:
|
| 309 |
+
value: 0
|
| 310 |
+
mask_feature_prob:
|
| 311 |
+
value: 0
|
| 312 |
+
mask_time_length:
|
| 313 |
+
value: 10
|
| 314 |
+
mask_time_min_masks:
|
| 315 |
+
value: 2
|
| 316 |
+
mask_time_prob:
|
| 317 |
+
value: 0.05
|
| 318 |
+
max_grad_norm:
|
| 319 |
+
value: 1
|
| 320 |
+
max_length:
|
| 321 |
+
value: 448
|
| 322 |
+
max_source_positions:
|
| 323 |
+
value: 1500
|
| 324 |
+
max_steps:
|
| 325 |
+
value: 8000
|
| 326 |
+
max_target_positions:
|
| 327 |
+
value: 448
|
| 328 |
+
median_filter_width:
|
| 329 |
+
value: 7
|
| 330 |
+
metric_for_best_model:
|
| 331 |
+
value: wer
|
| 332 |
+
min_length:
|
| 333 |
+
value: 0
|
| 334 |
+
model/num_parameters:
|
| 335 |
+
value: 241734912
|
| 336 |
+
model_type:
|
| 337 |
+
value: whisper
|
| 338 |
+
mp_parameters:
|
| 339 |
+
value: ""
|
| 340 |
+
neftune_noise_alpha:
|
| 341 |
+
value: null
|
| 342 |
+
no_cuda:
|
| 343 |
+
value: false
|
| 344 |
+
no_repeat_ngram_size:
|
| 345 |
+
value: 0
|
| 346 |
+
num_beam_groups:
|
| 347 |
+
value: 1
|
| 348 |
+
num_beams:
|
| 349 |
+
value: 1
|
| 350 |
+
num_hidden_layers:
|
| 351 |
+
value: 12
|
| 352 |
+
num_mel_bins:
|
| 353 |
+
value: 80
|
| 354 |
+
num_return_sequences:
|
| 355 |
+
value: 1
|
| 356 |
+
num_train_epochs:
|
| 357 |
+
value: 3
|
| 358 |
+
optim:
|
| 359 |
+
value: adamw_torch
|
| 360 |
+
optim_args:
|
| 361 |
+
value: null
|
| 362 |
+
optim_target_modules:
|
| 363 |
+
value: null
|
| 364 |
+
output_attentions:
|
| 365 |
+
value: false
|
| 366 |
+
output_dir:
|
| 367 |
+
value: ./
|
| 368 |
+
output_hidden_states:
|
| 369 |
+
value: false
|
| 370 |
+
output_scores:
|
| 371 |
+
value: false
|
| 372 |
+
overwrite_output_dir:
|
| 373 |
+
value: true
|
| 374 |
+
pad_token_id:
|
| 375 |
+
value: 50257
|
| 376 |
+
past_index:
|
| 377 |
+
value: -1
|
| 378 |
+
per_device_eval_batch_size:
|
| 379 |
+
value: 16
|
| 380 |
+
per_device_train_batch_size:
|
| 381 |
+
value: 32
|
| 382 |
+
per_gpu_eval_batch_size:
|
| 383 |
+
value: null
|
| 384 |
+
per_gpu_train_batch_size:
|
| 385 |
+
value: null
|
| 386 |
+
predict_with_generate:
|
| 387 |
+
value: true
|
| 388 |
+
prediction_loss_only:
|
| 389 |
+
value: false
|
| 390 |
+
prefix:
|
| 391 |
+
value: null
|
| 392 |
+
problem_type:
|
| 393 |
+
value: null
|
| 394 |
+
push_to_hub:
|
| 395 |
+
value: true
|
| 396 |
+
push_to_hub_model_id:
|
| 397 |
+
value: null
|
| 398 |
+
push_to_hub_organization:
|
| 399 |
+
value: null
|
| 400 |
+
push_to_hub_token:
|
| 401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 402 |
+
ray_scope:
|
| 403 |
+
value: last
|
| 404 |
+
remove_invalid_values:
|
| 405 |
+
value: false
|
| 406 |
+
remove_unused_columns:
|
| 407 |
+
value: true
|
| 408 |
+
repetition_penalty:
|
| 409 |
+
value: 1
|
| 410 |
+
report_to:
|
| 411 |
+
value:
|
| 412 |
+
- wandb
|
| 413 |
+
restore_callback_states_from_checkpoint:
|
| 414 |
+
value: false
|
| 415 |
+
resume_from_checkpoint:
|
| 416 |
+
value: null
|
| 417 |
+
return_dict:
|
| 418 |
+
value: true
|
| 419 |
+
return_dict_in_generate:
|
| 420 |
+
value: false
|
| 421 |
+
run_name:
|
| 422 |
+
value: whisper-small-eu
|
| 423 |
+
save_on_each_node:
|
| 424 |
+
value: false
|
| 425 |
+
save_only_model:
|
| 426 |
+
value: false
|
| 427 |
+
save_safetensors:
|
| 428 |
+
value: true
|
| 429 |
+
save_steps:
|
| 430 |
+
value: 1000
|
| 431 |
+
save_strategy:
|
| 432 |
+
value: steps
|
| 433 |
+
save_total_limit:
|
| 434 |
+
value: null
|
| 435 |
+
scale_embedding:
|
| 436 |
+
value: false
|
| 437 |
+
seed:
|
| 438 |
+
value: 42
|
| 439 |
+
sep_token_id:
|
| 440 |
+
value: null
|
| 441 |
+
skip_memory_metrics:
|
| 442 |
+
value: true
|
| 443 |
+
sortish_sampler:
|
| 444 |
+
value: false
|
| 445 |
+
split_batches:
|
| 446 |
+
value: null
|
| 447 |
+
suppress_tokens:
|
| 448 |
+
value: null
|
| 449 |
+
task_specific_params:
|
| 450 |
+
value: null
|
| 451 |
+
temperature:
|
| 452 |
+
value: 1
|
| 453 |
+
tf_legacy_loss:
|
| 454 |
+
value: false
|
| 455 |
+
tf32:
|
| 456 |
+
value: null
|
| 457 |
+
tie_encoder_decoder:
|
| 458 |
+
value: false
|
| 459 |
+
tie_word_embeddings:
|
| 460 |
+
value: true
|
| 461 |
+
tokenizer_class:
|
| 462 |
+
value: null
|
| 463 |
+
top_k:
|
| 464 |
+
value: 50
|
| 465 |
+
top_p:
|
| 466 |
+
value: 1
|
| 467 |
+
torch_compile:
|
| 468 |
+
value: false
|
| 469 |
+
torch_compile_backend:
|
| 470 |
+
value: null
|
| 471 |
+
torch_compile_mode:
|
| 472 |
+
value: null
|
| 473 |
+
torch_dtype:
|
| 474 |
+
value: float32
|
| 475 |
+
torch_empty_cache_steps:
|
| 476 |
+
value: null
|
| 477 |
+
torchdynamo:
|
| 478 |
+
value: null
|
| 479 |
+
torchscript:
|
| 480 |
+
value: false
|
| 481 |
+
tpu_metrics_debug:
|
| 482 |
+
value: false
|
| 483 |
+
tpu_num_cores:
|
| 484 |
+
value: null
|
| 485 |
+
transformers_version:
|
| 486 |
+
value: 4.49.0.dev0
|
| 487 |
+
typical_p:
|
| 488 |
+
value: 1
|
| 489 |
+
use_bfloat16:
|
| 490 |
+
value: false
|
| 491 |
+
use_cache:
|
| 492 |
+
value: false
|
| 493 |
+
use_cpu:
|
| 494 |
+
value: false
|
| 495 |
+
use_ipex:
|
| 496 |
+
value: false
|
| 497 |
+
use_legacy_prediction_loop:
|
| 498 |
+
value: false
|
| 499 |
+
use_liger_kernel:
|
| 500 |
+
value: false
|
| 501 |
+
use_mps_device:
|
| 502 |
+
value: false
|
| 503 |
+
use_weighted_layer_sum:
|
| 504 |
+
value: false
|
| 505 |
+
vocab_size:
|
| 506 |
+
value: 51865
|
| 507 |
+
warmup_ratio:
|
| 508 |
+
value: 0
|
| 509 |
+
warmup_steps:
|
| 510 |
+
value: 500
|
| 511 |
+
weight_decay:
|
| 512 |
+
value: 0
|
wandb/run-20250212_121751-d4i88lzt/files/output.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
| 2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
| 3 |
+
main()
|
| 4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
| 5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
| 8 |
+
return inner_training_loop(
|
| 9 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
| 11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
| 12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
| 14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
| 15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
| 17 |
+
result = getattr(callback, event)(
|
| 18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
| 20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_121751-d4i88lzt/files/requirements.txt
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiosignal==1.3.2
|
| 2 |
+
Markdown==3.7
|
| 3 |
+
more-itertools==10.6.0
|
| 4 |
+
requests==2.32.3
|
| 5 |
+
sentry-sdk==2.21.0
|
| 6 |
+
torchaudio==2.6.0
|
| 7 |
+
charset-normalizer==3.4.1
|
| 8 |
+
docker-pycreds==0.4.0
|
| 9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 10 |
+
PyYAML==6.0.2
|
| 11 |
+
librosa==0.10.2.post1
|
| 12 |
+
soxr==0.5.0.post1
|
| 13 |
+
multiprocess==0.70.16
|
| 14 |
+
setuptools==75.8.0
|
| 15 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 16 |
+
joblib==1.4.2
|
| 17 |
+
pytz==2025.1
|
| 18 |
+
pip==24.0
|
| 19 |
+
scikit-learn==1.6.1
|
| 20 |
+
certifi==2025.1.31
|
| 21 |
+
jiwer==3.1.0
|
| 22 |
+
regex==2024.11.6
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
grpcio==1.70.0
|
| 25 |
+
msgpack==1.1.0
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 28 |
+
soundfile==0.13.1
|
| 29 |
+
dill==0.3.8
|
| 30 |
+
nvidia-nvtx-cu12==12.4.127
|
| 31 |
+
six==1.17.0
|
| 32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 33 |
+
pyarrow==19.0.0
|
| 34 |
+
nvidia-nccl-cu12==2.21.5
|
| 35 |
+
psutil==6.1.1
|
| 36 |
+
decorator==5.1.1
|
| 37 |
+
llvmlite==0.44.0
|
| 38 |
+
frozenlist==1.5.0
|
| 39 |
+
pydantic==2.10.6
|
| 40 |
+
networkx==3.4.2
|
| 41 |
+
idna==3.10
|
| 42 |
+
wandb==0.19.6
|
| 43 |
+
aiohttp==3.11.12
|
| 44 |
+
RapidFuzz==3.12.1
|
| 45 |
+
pandas==2.2.3
|
| 46 |
+
python-dateutil==2.9.0.post0
|
| 47 |
+
numpy==2.1.3
|
| 48 |
+
tokenizers==0.21.0
|
| 49 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 50 |
+
typing_extensions==4.12.2
|
| 51 |
+
urllib3==2.3.0
|
| 52 |
+
setproctitle==1.3.4
|
| 53 |
+
tzdata==2025.1
|
| 54 |
+
sympy==1.13.1
|
| 55 |
+
pooch==1.8.2
|
| 56 |
+
click==8.1.8
|
| 57 |
+
pydantic_core==2.27.2
|
| 58 |
+
MarkupSafe==3.0.2
|
| 59 |
+
scipy==1.15.1
|
| 60 |
+
accelerate==1.3.0
|
| 61 |
+
tensorboard==2.19.0
|
| 62 |
+
protobuf==5.29.3
|
| 63 |
+
gitdb==4.0.12
|
| 64 |
+
smmap==5.0.2
|
| 65 |
+
absl-py==2.1.0
|
| 66 |
+
tqdm==4.67.1
|
| 67 |
+
yarl==1.18.3
|
| 68 |
+
pycparser==2.22
|
| 69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 70 |
+
attrs==25.1.0
|
| 71 |
+
lazy_loader==0.4
|
| 72 |
+
tensorboard-data-server==0.7.2
|
| 73 |
+
threadpoolctl==3.5.0
|
| 74 |
+
GitPython==3.1.44
|
| 75 |
+
safetensors==0.5.2
|
| 76 |
+
fsspec==2024.12.0
|
| 77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 78 |
+
filelock==3.17.0
|
| 79 |
+
aiohappyeyeballs==2.4.6
|
| 80 |
+
packaging==24.2
|
| 81 |
+
datasets==3.2.1.dev0
|
| 82 |
+
audioread==3.0.1
|
| 83 |
+
propcache==0.2.1
|
| 84 |
+
transformers==4.49.0.dev0
|
| 85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 86 |
+
cffi==1.17.1
|
| 87 |
+
evaluate==0.4.3
|
| 88 |
+
Werkzeug==3.1.3
|
| 89 |
+
huggingface-hub==0.28.1
|
| 90 |
+
Jinja2==3.1.5
|
| 91 |
+
torch==2.6.0
|
| 92 |
+
nvidia-curand-cu12==10.3.5.147
|
| 93 |
+
xxhash==3.5.0
|
| 94 |
+
platformdirs==4.3.6
|
| 95 |
+
multidict==6.1.0
|
| 96 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 98 |
+
triton==3.2.0
|
| 99 |
+
numba==0.61.0
|
| 100 |
+
importlib_metadata==8.0.0
|
| 101 |
+
platformdirs==4.2.2
|
| 102 |
+
typeguard==4.3.0
|
| 103 |
+
more-itertools==10.3.0
|
| 104 |
+
tomli==2.0.1
|
| 105 |
+
autocommand==2.2.2
|
| 106 |
+
zipp==3.19.2
|
| 107 |
+
typing_extensions==4.12.2
|
| 108 |
+
backports.tarfile==1.2.0
|
| 109 |
+
inflect==7.3.1
|
| 110 |
+
jaraco.text==3.12.1
|
| 111 |
+
wheel==0.43.0
|
| 112 |
+
packaging==24.2
|
| 113 |
+
jaraco.collections==5.1.0
|
| 114 |
+
jaraco.functools==4.0.1
|
| 115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-02-12T12:17:51.527114Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model_name_or_path=openai/whisper-small",
|
| 7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
| 8 |
+
"--language=basque",
|
| 9 |
+
"--train_split_name=train",
|
| 10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
| 11 |
+
"--model_index_name=Whisper Small Basque",
|
| 12 |
+
"--max_steps=8000",
|
| 13 |
+
"--output_dir=./",
|
| 14 |
+
"--per_device_train_batch_size=32",
|
| 15 |
+
"--per_device_eval_batch_size=16",
|
| 16 |
+
"--gradient_accumulation_steps=1",
|
| 17 |
+
"--logging_steps=25",
|
| 18 |
+
"--learning_rate=1e-5",
|
| 19 |
+
"--warmup_steps=500",
|
| 20 |
+
"--evaluation_strategy=steps",
|
| 21 |
+
"--eval_steps=1000",
|
| 22 |
+
"--save_strategy=steps",
|
| 23 |
+
"--save_steps=1000",
|
| 24 |
+
"--generation_max_length=225",
|
| 25 |
+
"--length_column_name=input_length",
|
| 26 |
+
"--max_duration_in_seconds=30",
|
| 27 |
+
"--text_column_name=sentence",
|
| 28 |
+
"--freeze_feature_encoder=False",
|
| 29 |
+
"--report_to=tensorboard",
|
| 30 |
+
"--metric_for_best_model=wer",
|
| 31 |
+
"--greater_is_better=False",
|
| 32 |
+
"--load_best_model_at_end",
|
| 33 |
+
"--gradient_checkpointing",
|
| 34 |
+
"--fp16",
|
| 35 |
+
"--overwrite_output_dir",
|
| 36 |
+
"--do_train",
|
| 37 |
+
"--do_eval",
|
| 38 |
+
"--predict_with_generate",
|
| 39 |
+
"--do_normalize_eval",
|
| 40 |
+
"--streaming",
|
| 41 |
+
"--use_auth_token",
|
| 42 |
+
"--push_to_hub",
|
| 43 |
+
"--report_to",
|
| 44 |
+
"wandb",
|
| 45 |
+
"--run_name",
|
| 46 |
+
"whisper-small-eu"
|
| 47 |
+
],
|
| 48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
| 49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
| 50 |
+
"git": {
|
| 51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
| 52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
| 53 |
+
},
|
| 54 |
+
"email": "xezpeleta@gmail.com",
|
| 55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
| 56 |
+
"host": "tknika",
|
| 57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
| 58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
| 59 |
+
"cpu_count": 8,
|
| 60 |
+
"cpu_count_logical": 8,
|
| 61 |
+
"gpu": "NVIDIA L40-48Q",
|
| 62 |
+
"gpu_count": 1,
|
| 63 |
+
"disk": {
|
| 64 |
+
"/": {
|
| 65 |
+
"total": "525987168256",
|
| 66 |
+
"used": "297346564096"
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
"memory": {
|
| 70 |
+
"total": "33654022144"
|
| 71 |
+
},
|
| 72 |
+
"cpu": {
|
| 73 |
+
"count": 8,
|
| 74 |
+
"countLogical": 8
|
| 75 |
+
},
|
| 76 |
+
"gpu_nvidia": [
|
| 77 |
+
{
|
| 78 |
+
"name": "NVIDIA L40-48Q",
|
| 79 |
+
"memoryTotal": "51539607552",
|
| 80 |
+
"cudaCores": 18176,
|
| 81 |
+
"architecture": "Ada"
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"cudaVersion": "12.4"
|
| 85 |
+
}
|
wandb/run-20250212_121751-d4i88lzt/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:17:51.340771692Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpu_kqxp5v/port-223392.txt","pid":223392,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-02-12T12:17:51.391525122Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":223392}
|
| 3 |
+
{"time":"2025-02-12T12:17:51.391505422Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":35377,"Zone":""}}
|
| 4 |
+
{"time":"2025-02-12T12:17:51.521026758Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:42852"}
|
| 5 |
+
{"time":"2025-02-12T12:17:51.529437253Z","level":"INFO","msg":"handleInformInit: received","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
| 6 |
+
{"time":"2025-02-12T12:17:51.635683608Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"d4i88lzt","id":"127.0.0.1:42852"}
|
| 7 |
+
{"time":"2025-02-12T12:17:52.089736796Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:42852"}
|
| 8 |
+
{"time":"2025-02-12T12:17:52.089842845Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:42852"}
|
| 9 |
+
{"time":"2025-02-12T12:17:52.089890025Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:42852"}
|
| 10 |
+
{"time":"2025-02-12T12:17:52.089878375Z","level":"INFO","msg":"server is shutting down"}
|
| 11 |
+
{"time":"2025-02-12T12:17:52.241493374Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:35377->127.0.0.1:42852: use of closed network connection","id":"127.0.0.1:42852"}
|
| 12 |
+
{"time":"2025-02-12T12:17:53.244042129Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:42852"}
|
| 13 |
+
{"time":"2025-02-12T12:17:53.244065929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:42852"}
|
| 14 |
+
{"time":"2025-02-12T12:17:53.244128968Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:17:51.5298133Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-02-12T12:17:51.635607299Z","level":"INFO","msg":"created new stream","id":"d4i88lzt"}
|
| 3 |
+
{"time":"2025-02-12T12:17:51.635674098Z","level":"INFO","msg":"stream: started","id":"d4i88lzt"}
|
| 4 |
+
{"time":"2025-02-12T12:17:51.635773898Z","level":"INFO","msg":"writer: Do: started","stream_id":"d4i88lzt"}
|
| 5 |
+
{"time":"2025-02-12T12:17:51.635842217Z","level":"INFO","msg":"sender: started","stream_id":"d4i88lzt"}
|
| 6 |
+
{"time":"2025-02-12T12:17:51.635963186Z","level":"INFO","msg":"handler: started","stream_id":"d4i88lzt"}
|
| 7 |
+
{"time":"2025-02-12T12:17:51.947487454Z","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-02-12T12:17:52.089832235Z","level":"INFO","msg":"stream: closing","id":"d4i88lzt"}
|
| 9 |
+
{"time":"2025-02-12T12:17:52.089860885Z","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-02-12T12:17:52.090422051Z","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-02-12T12:17:53.018559862Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-02-12T12:17:53.24378817Z","level":"INFO","msg":"handler: closed","stream_id":"d4i88lzt"}
|
| 13 |
+
{"time":"2025-02-12T12:17:53.24383994Z","level":"INFO","msg":"writer: Close: closed","stream_id":"d4i88lzt"}
|
| 14 |
+
{"time":"2025-02-12T12:17:53.24386653Z","level":"INFO","msg":"sender: closed","stream_id":"d4i88lzt"}
|
| 15 |
+
{"time":"2025-02-12T12:17:53.243926789Z","level":"INFO","msg":"stream: closed","id":"d4i88lzt"}
|
wandb/run-20250212_121751-d4i88lzt/logs/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
| 2 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Configure stats pid to 223392
|
| 3 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
| 4 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
| 5 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
| 6 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug.log
|
| 7 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_121751-d4i88lzt/logs/debug-internal.log
|
| 8 |
+
2025-02-12 12:17:51,311 INFO MainThread:223392 [wandb_init.py:init():756] calling init triggers
|
| 9 |
+
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-02-12 12:17:51,312 INFO MainThread:223392 [wandb_init.py:init():789] starting backend
|
| 12 |
+
2025-02-12 12:17:51,521 INFO MainThread:223392 [wandb_init.py:init():793] sending inform_init request
|
| 13 |
+
2025-02-12 12:17:51,526 INFO MainThread:223392 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-02-12 12:17:51,526 INFO MainThread:223392 [wandb_init.py:init():808] backend started and connected
|
| 15 |
+
2025-02-12 12:17:51,528 INFO MainThread:223392 [wandb_init.py:init():901] updated telemetry
|
| 16 |
+
2025-02-12 12:17:51,535 INFO MainThread:223392 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-02-12 12:17:51,944 INFO MainThread:223392 [wandb_init.py:init():994] starting run threads in backend
|
| 18 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_console_start():2385] atexit reg
|
| 19 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
| 20 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
| 21 |
+
2025-02-12 12:17:52,049 INFO MainThread:223392 [wandb_run.py:_redirect():2325] Redirects installed.
|
| 22 |
+
2025-02-12 12:17:52,051 INFO MainThread:223392 [wandb_init.py:init():1036] run started, returning control to user process
|
| 23 |
+
2025-02-12 12:17:52,052 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-17-27_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
| 24 |
+
2025-02-12 12:17:52,054 INFO MainThread:223392 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x75ef87e92c00>>
|
| 25 |
+
2025-02-12 12:17:52,055 INFO MainThread:223392 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
| 26 |
+
2025-02-12 12:17:52,089 WARNING MsgRouterThr:223392 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_121751-d4i88lzt/run-d4i88lzt.wandb
ADDED
|
Binary file (11.3 kB). View file
|
|
|
wandb/run-20250212_122637-v3d3ouvn/files/config.yaml
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_attn_implementation_autoset:
|
| 2 |
+
value: true
|
| 3 |
+
_name_or_path:
|
| 4 |
+
value: openai/whisper-small
|
| 5 |
+
_wandb:
|
| 6 |
+
value:
|
| 7 |
+
cli_version: 0.19.6
|
| 8 |
+
m:
|
| 9 |
+
- "1": train/global_step
|
| 10 |
+
"6":
|
| 11 |
+
- 3
|
| 12 |
+
"7": []
|
| 13 |
+
python_version: 3.12.3
|
| 14 |
+
t:
|
| 15 |
+
"1":
|
| 16 |
+
- 1
|
| 17 |
+
- 5
|
| 18 |
+
- 11
|
| 19 |
+
- 49
|
| 20 |
+
- 51
|
| 21 |
+
- 53
|
| 22 |
+
- 55
|
| 23 |
+
- 71
|
| 24 |
+
- 100
|
| 25 |
+
"2":
|
| 26 |
+
- 1
|
| 27 |
+
- 5
|
| 28 |
+
- 11
|
| 29 |
+
- 49
|
| 30 |
+
- 51
|
| 31 |
+
- 53
|
| 32 |
+
- 55
|
| 33 |
+
- 71
|
| 34 |
+
- 100
|
| 35 |
+
"3":
|
| 36 |
+
- 7
|
| 37 |
+
- 13
|
| 38 |
+
- 19
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 66
|
| 42 |
+
"4": 3.12.3
|
| 43 |
+
"5": 0.19.6
|
| 44 |
+
"6": 4.49.0.dev0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"9":
|
| 48 |
+
"1": transformers_trainer
|
| 49 |
+
"12": 0.19.6
|
| 50 |
+
"13": linux-x86_64
|
| 51 |
+
accelerator_config:
|
| 52 |
+
value:
|
| 53 |
+
dispatch_batches: null
|
| 54 |
+
even_batches: true
|
| 55 |
+
gradient_accumulation_kwargs: null
|
| 56 |
+
non_blocking: false
|
| 57 |
+
split_batches: false
|
| 58 |
+
use_seedable_sampler: true
|
| 59 |
+
activation_dropout:
|
| 60 |
+
value: 0
|
| 61 |
+
activation_function:
|
| 62 |
+
value: gelu
|
| 63 |
+
adafactor:
|
| 64 |
+
value: false
|
| 65 |
+
adam_beta1:
|
| 66 |
+
value: 0.9
|
| 67 |
+
adam_beta2:
|
| 68 |
+
value: 0.999
|
| 69 |
+
adam_epsilon:
|
| 70 |
+
value: 1e-08
|
| 71 |
+
add_cross_attention:
|
| 72 |
+
value: false
|
| 73 |
+
apply_spec_augment:
|
| 74 |
+
value: false
|
| 75 |
+
architectures:
|
| 76 |
+
value:
|
| 77 |
+
- WhisperForConditionalGeneration
|
| 78 |
+
attention_dropout:
|
| 79 |
+
value: 0
|
| 80 |
+
auto_find_batch_size:
|
| 81 |
+
value: false
|
| 82 |
+
average_tokens_across_devices:
|
| 83 |
+
value: false
|
| 84 |
+
bad_words_ids:
|
| 85 |
+
value: null
|
| 86 |
+
batch_eval_metrics:
|
| 87 |
+
value: false
|
| 88 |
+
begin_suppress_tokens:
|
| 89 |
+
value:
|
| 90 |
+
- 220
|
| 91 |
+
- 50257
|
| 92 |
+
bf16:
|
| 93 |
+
value: false
|
| 94 |
+
bf16_full_eval:
|
| 95 |
+
value: false
|
| 96 |
+
bos_token_id:
|
| 97 |
+
value: 50257
|
| 98 |
+
chunk_size_feed_forward:
|
| 99 |
+
value: 0
|
| 100 |
+
classifier_proj_size:
|
| 101 |
+
value: 256
|
| 102 |
+
cross_attention_hidden_size:
|
| 103 |
+
value: null
|
| 104 |
+
d_model:
|
| 105 |
+
value: 768
|
| 106 |
+
data_seed:
|
| 107 |
+
value: null
|
| 108 |
+
dataloader_drop_last:
|
| 109 |
+
value: false
|
| 110 |
+
dataloader_num_workers:
|
| 111 |
+
value: 0
|
| 112 |
+
dataloader_persistent_workers:
|
| 113 |
+
value: false
|
| 114 |
+
dataloader_pin_memory:
|
| 115 |
+
value: true
|
| 116 |
+
dataloader_prefetch_factor:
|
| 117 |
+
value: null
|
| 118 |
+
ddp_backend:
|
| 119 |
+
value: null
|
| 120 |
+
ddp_broadcast_buffers:
|
| 121 |
+
value: null
|
| 122 |
+
ddp_bucket_cap_mb:
|
| 123 |
+
value: null
|
| 124 |
+
ddp_find_unused_parameters:
|
| 125 |
+
value: null
|
| 126 |
+
ddp_timeout:
|
| 127 |
+
value: 1800
|
| 128 |
+
debug:
|
| 129 |
+
value: []
|
| 130 |
+
decoder_attention_heads:
|
| 131 |
+
value: 12
|
| 132 |
+
decoder_ffn_dim:
|
| 133 |
+
value: 3072
|
| 134 |
+
decoder_layerdrop:
|
| 135 |
+
value: 0
|
| 136 |
+
decoder_layers:
|
| 137 |
+
value: 12
|
| 138 |
+
decoder_start_token_id:
|
| 139 |
+
value: 50258
|
| 140 |
+
deepspeed:
|
| 141 |
+
value: null
|
| 142 |
+
disable_tqdm:
|
| 143 |
+
value: false
|
| 144 |
+
dispatch_batches:
|
| 145 |
+
value: null
|
| 146 |
+
diversity_penalty:
|
| 147 |
+
value: 0
|
| 148 |
+
do_eval:
|
| 149 |
+
value: true
|
| 150 |
+
do_predict:
|
| 151 |
+
value: false
|
| 152 |
+
do_sample:
|
| 153 |
+
value: false
|
| 154 |
+
do_train:
|
| 155 |
+
value: true
|
| 156 |
+
dropout:
|
| 157 |
+
value: 0
|
| 158 |
+
early_stopping:
|
| 159 |
+
value: false
|
| 160 |
+
encoder_attention_heads:
|
| 161 |
+
value: 12
|
| 162 |
+
encoder_ffn_dim:
|
| 163 |
+
value: 3072
|
| 164 |
+
encoder_layerdrop:
|
| 165 |
+
value: 0
|
| 166 |
+
encoder_layers:
|
| 167 |
+
value: 12
|
| 168 |
+
encoder_no_repeat_ngram_size:
|
| 169 |
+
value: 0
|
| 170 |
+
eos_token_id:
|
| 171 |
+
value: 50257
|
| 172 |
+
eval_accumulation_steps:
|
| 173 |
+
value: null
|
| 174 |
+
eval_delay:
|
| 175 |
+
value: 0
|
| 176 |
+
eval_do_concat_batches:
|
| 177 |
+
value: true
|
| 178 |
+
eval_on_start:
|
| 179 |
+
value: false
|
| 180 |
+
eval_steps:
|
| 181 |
+
value: 1000
|
| 182 |
+
eval_strategy:
|
| 183 |
+
value: steps
|
| 184 |
+
eval_use_gather_object:
|
| 185 |
+
value: false
|
| 186 |
+
evaluation_strategy:
|
| 187 |
+
value: steps
|
| 188 |
+
exponential_decay_length_penalty:
|
| 189 |
+
value: null
|
| 190 |
+
finetuning_task:
|
| 191 |
+
value: null
|
| 192 |
+
forced_bos_token_id:
|
| 193 |
+
value: null
|
| 194 |
+
forced_decoder_ids:
|
| 195 |
+
value: null
|
| 196 |
+
forced_eos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
fp16:
|
| 199 |
+
value: true
|
| 200 |
+
fp16_backend:
|
| 201 |
+
value: auto
|
| 202 |
+
fp16_full_eval:
|
| 203 |
+
value: false
|
| 204 |
+
fp16_opt_level:
|
| 205 |
+
value: O1
|
| 206 |
+
fsdp:
|
| 207 |
+
value: []
|
| 208 |
+
fsdp_config:
|
| 209 |
+
value:
|
| 210 |
+
min_num_params: 0
|
| 211 |
+
xla: false
|
| 212 |
+
xla_fsdp_grad_ckpt: false
|
| 213 |
+
xla_fsdp_v2: false
|
| 214 |
+
fsdp_min_num_params:
|
| 215 |
+
value: 0
|
| 216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 217 |
+
value: null
|
| 218 |
+
full_determinism:
|
| 219 |
+
value: false
|
| 220 |
+
generation_config:
|
| 221 |
+
value: null
|
| 222 |
+
generation_max_length:
|
| 223 |
+
value: 225
|
| 224 |
+
generation_num_beams:
|
| 225 |
+
value: null
|
| 226 |
+
gradient_accumulation_steps:
|
| 227 |
+
value: 1
|
| 228 |
+
gradient_checkpointing:
|
| 229 |
+
value: true
|
| 230 |
+
gradient_checkpointing_kwargs:
|
| 231 |
+
value: null
|
| 232 |
+
greater_is_better:
|
| 233 |
+
value: false
|
| 234 |
+
group_by_length:
|
| 235 |
+
value: false
|
| 236 |
+
half_precision_backend:
|
| 237 |
+
value: auto
|
| 238 |
+
hub_always_push:
|
| 239 |
+
value: false
|
| 240 |
+
hub_model_id:
|
| 241 |
+
value: null
|
| 242 |
+
hub_private_repo:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
init_std:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: true
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 1e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: input_length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
load_best_model_at_end:
|
| 285 |
+
value: true
|
| 286 |
+
local_rank:
|
| 287 |
+
value: 0
|
| 288 |
+
log_level:
|
| 289 |
+
value: passive
|
| 290 |
+
log_level_replica:
|
| 291 |
+
value: warning
|
| 292 |
+
log_on_each_node:
|
| 293 |
+
value: true
|
| 294 |
+
logging_dir:
|
| 295 |
+
value: ./runs/Feb12_12-26-11_tknika
|
| 296 |
+
logging_first_step:
|
| 297 |
+
value: false
|
| 298 |
+
logging_nan_inf_filter:
|
| 299 |
+
value: true
|
| 300 |
+
logging_steps:
|
| 301 |
+
value: 25
|
| 302 |
+
logging_strategy:
|
| 303 |
+
value: steps
|
| 304 |
+
lr_scheduler_type:
|
| 305 |
+
value: linear
|
| 306 |
+
mask_feature_length:
|
| 307 |
+
value: 10
|
| 308 |
+
mask_feature_min_masks:
|
| 309 |
+
value: 0
|
| 310 |
+
mask_feature_prob:
|
| 311 |
+
value: 0
|
| 312 |
+
mask_time_length:
|
| 313 |
+
value: 10
|
| 314 |
+
mask_time_min_masks:
|
| 315 |
+
value: 2
|
| 316 |
+
mask_time_prob:
|
| 317 |
+
value: 0.05
|
| 318 |
+
max_grad_norm:
|
| 319 |
+
value: 1
|
| 320 |
+
max_length:
|
| 321 |
+
value: 448
|
| 322 |
+
max_source_positions:
|
| 323 |
+
value: 1500
|
| 324 |
+
max_steps:
|
| 325 |
+
value: 8000
|
| 326 |
+
max_target_positions:
|
| 327 |
+
value: 448
|
| 328 |
+
median_filter_width:
|
| 329 |
+
value: 7
|
| 330 |
+
metric_for_best_model:
|
| 331 |
+
value: wer
|
| 332 |
+
min_length:
|
| 333 |
+
value: 0
|
| 334 |
+
model/num_parameters:
|
| 335 |
+
value: 241734912
|
| 336 |
+
model_type:
|
| 337 |
+
value: whisper
|
| 338 |
+
mp_parameters:
|
| 339 |
+
value: ""
|
| 340 |
+
neftune_noise_alpha:
|
| 341 |
+
value: null
|
| 342 |
+
no_cuda:
|
| 343 |
+
value: false
|
| 344 |
+
no_repeat_ngram_size:
|
| 345 |
+
value: 0
|
| 346 |
+
num_beam_groups:
|
| 347 |
+
value: 1
|
| 348 |
+
num_beams:
|
| 349 |
+
value: 1
|
| 350 |
+
num_hidden_layers:
|
| 351 |
+
value: 12
|
| 352 |
+
num_mel_bins:
|
| 353 |
+
value: 80
|
| 354 |
+
num_return_sequences:
|
| 355 |
+
value: 1
|
| 356 |
+
num_train_epochs:
|
| 357 |
+
value: 3
|
| 358 |
+
optim:
|
| 359 |
+
value: adamw_torch
|
| 360 |
+
optim_args:
|
| 361 |
+
value: null
|
| 362 |
+
optim_target_modules:
|
| 363 |
+
value: null
|
| 364 |
+
output_attentions:
|
| 365 |
+
value: false
|
| 366 |
+
output_dir:
|
| 367 |
+
value: ./
|
| 368 |
+
output_hidden_states:
|
| 369 |
+
value: false
|
| 370 |
+
output_scores:
|
| 371 |
+
value: false
|
| 372 |
+
overwrite_output_dir:
|
| 373 |
+
value: true
|
| 374 |
+
pad_token_id:
|
| 375 |
+
value: 50257
|
| 376 |
+
past_index:
|
| 377 |
+
value: -1
|
| 378 |
+
per_device_eval_batch_size:
|
| 379 |
+
value: 16
|
| 380 |
+
per_device_train_batch_size:
|
| 381 |
+
value: 32
|
| 382 |
+
per_gpu_eval_batch_size:
|
| 383 |
+
value: null
|
| 384 |
+
per_gpu_train_batch_size:
|
| 385 |
+
value: null
|
| 386 |
+
predict_with_generate:
|
| 387 |
+
value: true
|
| 388 |
+
prediction_loss_only:
|
| 389 |
+
value: false
|
| 390 |
+
prefix:
|
| 391 |
+
value: null
|
| 392 |
+
problem_type:
|
| 393 |
+
value: null
|
| 394 |
+
push_to_hub:
|
| 395 |
+
value: true
|
| 396 |
+
push_to_hub_model_id:
|
| 397 |
+
value: null
|
| 398 |
+
push_to_hub_organization:
|
| 399 |
+
value: null
|
| 400 |
+
push_to_hub_token:
|
| 401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 402 |
+
ray_scope:
|
| 403 |
+
value: last
|
| 404 |
+
remove_invalid_values:
|
| 405 |
+
value: false
|
| 406 |
+
remove_unused_columns:
|
| 407 |
+
value: true
|
| 408 |
+
repetition_penalty:
|
| 409 |
+
value: 1
|
| 410 |
+
report_to:
|
| 411 |
+
value:
|
| 412 |
+
- wandb
|
| 413 |
+
restore_callback_states_from_checkpoint:
|
| 414 |
+
value: false
|
| 415 |
+
resume_from_checkpoint:
|
| 416 |
+
value: null
|
| 417 |
+
return_dict:
|
| 418 |
+
value: true
|
| 419 |
+
return_dict_in_generate:
|
| 420 |
+
value: false
|
| 421 |
+
run_name:
|
| 422 |
+
value: whisper-small-eu
|
| 423 |
+
save_on_each_node:
|
| 424 |
+
value: false
|
| 425 |
+
save_only_model:
|
| 426 |
+
value: false
|
| 427 |
+
save_safetensors:
|
| 428 |
+
value: true
|
| 429 |
+
save_steps:
|
| 430 |
+
value: 1000
|
| 431 |
+
save_strategy:
|
| 432 |
+
value: steps
|
| 433 |
+
save_total_limit:
|
| 434 |
+
value: null
|
| 435 |
+
scale_embedding:
|
| 436 |
+
value: false
|
| 437 |
+
seed:
|
| 438 |
+
value: 42
|
| 439 |
+
sep_token_id:
|
| 440 |
+
value: null
|
| 441 |
+
skip_memory_metrics:
|
| 442 |
+
value: true
|
| 443 |
+
sortish_sampler:
|
| 444 |
+
value: false
|
| 445 |
+
split_batches:
|
| 446 |
+
value: null
|
| 447 |
+
suppress_tokens:
|
| 448 |
+
value: null
|
| 449 |
+
task_specific_params:
|
| 450 |
+
value: null
|
| 451 |
+
temperature:
|
| 452 |
+
value: 1
|
| 453 |
+
tf_legacy_loss:
|
| 454 |
+
value: false
|
| 455 |
+
tf32:
|
| 456 |
+
value: null
|
| 457 |
+
tie_encoder_decoder:
|
| 458 |
+
value: false
|
| 459 |
+
tie_word_embeddings:
|
| 460 |
+
value: true
|
| 461 |
+
tokenizer_class:
|
| 462 |
+
value: null
|
| 463 |
+
top_k:
|
| 464 |
+
value: 50
|
| 465 |
+
top_p:
|
| 466 |
+
value: 1
|
| 467 |
+
torch_compile:
|
| 468 |
+
value: false
|
| 469 |
+
torch_compile_backend:
|
| 470 |
+
value: null
|
| 471 |
+
torch_compile_mode:
|
| 472 |
+
value: null
|
| 473 |
+
torch_dtype:
|
| 474 |
+
value: float32
|
| 475 |
+
torch_empty_cache_steps:
|
| 476 |
+
value: null
|
| 477 |
+
torchdynamo:
|
| 478 |
+
value: null
|
| 479 |
+
torchscript:
|
| 480 |
+
value: false
|
| 481 |
+
tpu_metrics_debug:
|
| 482 |
+
value: false
|
| 483 |
+
tpu_num_cores:
|
| 484 |
+
value: null
|
| 485 |
+
transformers_version:
|
| 486 |
+
value: 4.49.0.dev0
|
| 487 |
+
typical_p:
|
| 488 |
+
value: 1
|
| 489 |
+
use_bfloat16:
|
| 490 |
+
value: false
|
| 491 |
+
use_cache:
|
| 492 |
+
value: false
|
| 493 |
+
use_cpu:
|
| 494 |
+
value: false
|
| 495 |
+
use_ipex:
|
| 496 |
+
value: false
|
| 497 |
+
use_legacy_prediction_loop:
|
| 498 |
+
value: false
|
| 499 |
+
use_liger_kernel:
|
| 500 |
+
value: false
|
| 501 |
+
use_mps_device:
|
| 502 |
+
value: false
|
| 503 |
+
use_weighted_layer_sum:
|
| 504 |
+
value: false
|
| 505 |
+
vocab_size:
|
| 506 |
+
value: 51865
|
| 507 |
+
warmup_ratio:
|
| 508 |
+
value: 0
|
| 509 |
+
warmup_steps:
|
| 510 |
+
value: 500
|
| 511 |
+
weight_decay:
|
| 512 |
+
value: 0
|
wandb/run-20250212_122637-v3d3ouvn/files/output.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
| 2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
| 3 |
+
main()
|
| 4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
| 5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
| 8 |
+
return inner_training_loop(
|
| 9 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
| 11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
| 12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
| 14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
| 15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
| 17 |
+
result = getattr(callback, event)(
|
| 18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
| 20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_122637-v3d3ouvn/files/requirements.txt
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiosignal==1.3.2
|
| 2 |
+
Markdown==3.7
|
| 3 |
+
more-itertools==10.6.0
|
| 4 |
+
requests==2.32.3
|
| 5 |
+
sentry-sdk==2.21.0
|
| 6 |
+
torchaudio==2.6.0
|
| 7 |
+
charset-normalizer==3.4.1
|
| 8 |
+
docker-pycreds==0.4.0
|
| 9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 10 |
+
PyYAML==6.0.2
|
| 11 |
+
librosa==0.10.2.post1
|
| 12 |
+
soxr==0.5.0.post1
|
| 13 |
+
multiprocess==0.70.16
|
| 14 |
+
setuptools==75.8.0
|
| 15 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 16 |
+
joblib==1.4.2
|
| 17 |
+
pytz==2025.1
|
| 18 |
+
pip==24.0
|
| 19 |
+
scikit-learn==1.6.1
|
| 20 |
+
certifi==2025.1.31
|
| 21 |
+
jiwer==3.1.0
|
| 22 |
+
regex==2024.11.6
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
grpcio==1.70.0
|
| 25 |
+
msgpack==1.1.0
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 28 |
+
soundfile==0.13.1
|
| 29 |
+
dill==0.3.8
|
| 30 |
+
nvidia-nvtx-cu12==12.4.127
|
| 31 |
+
six==1.17.0
|
| 32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 33 |
+
pyarrow==19.0.0
|
| 34 |
+
nvidia-nccl-cu12==2.21.5
|
| 35 |
+
psutil==6.1.1
|
| 36 |
+
decorator==5.1.1
|
| 37 |
+
llvmlite==0.44.0
|
| 38 |
+
frozenlist==1.5.0
|
| 39 |
+
pydantic==2.10.6
|
| 40 |
+
networkx==3.4.2
|
| 41 |
+
idna==3.10
|
| 42 |
+
wandb==0.19.6
|
| 43 |
+
aiohttp==3.11.12
|
| 44 |
+
RapidFuzz==3.12.1
|
| 45 |
+
pandas==2.2.3
|
| 46 |
+
python-dateutil==2.9.0.post0
|
| 47 |
+
numpy==2.1.3
|
| 48 |
+
tokenizers==0.21.0
|
| 49 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 50 |
+
typing_extensions==4.12.2
|
| 51 |
+
urllib3==2.3.0
|
| 52 |
+
setproctitle==1.3.4
|
| 53 |
+
tzdata==2025.1
|
| 54 |
+
sympy==1.13.1
|
| 55 |
+
pooch==1.8.2
|
| 56 |
+
click==8.1.8
|
| 57 |
+
pydantic_core==2.27.2
|
| 58 |
+
MarkupSafe==3.0.2
|
| 59 |
+
scipy==1.15.1
|
| 60 |
+
accelerate==1.3.0
|
| 61 |
+
tensorboard==2.19.0
|
| 62 |
+
protobuf==5.29.3
|
| 63 |
+
gitdb==4.0.12
|
| 64 |
+
smmap==5.0.2
|
| 65 |
+
absl-py==2.1.0
|
| 66 |
+
tqdm==4.67.1
|
| 67 |
+
yarl==1.18.3
|
| 68 |
+
pycparser==2.22
|
| 69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 70 |
+
attrs==25.1.0
|
| 71 |
+
lazy_loader==0.4
|
| 72 |
+
tensorboard-data-server==0.7.2
|
| 73 |
+
threadpoolctl==3.5.0
|
| 74 |
+
GitPython==3.1.44
|
| 75 |
+
safetensors==0.5.2
|
| 76 |
+
fsspec==2024.12.0
|
| 77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 78 |
+
filelock==3.17.0
|
| 79 |
+
aiohappyeyeballs==2.4.6
|
| 80 |
+
packaging==24.2
|
| 81 |
+
datasets==3.2.1.dev0
|
| 82 |
+
audioread==3.0.1
|
| 83 |
+
propcache==0.2.1
|
| 84 |
+
transformers==4.49.0.dev0
|
| 85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 86 |
+
cffi==1.17.1
|
| 87 |
+
evaluate==0.4.3
|
| 88 |
+
Werkzeug==3.1.3
|
| 89 |
+
huggingface-hub==0.28.1
|
| 90 |
+
Jinja2==3.1.5
|
| 91 |
+
torch==2.6.0
|
| 92 |
+
nvidia-curand-cu12==10.3.5.147
|
| 93 |
+
xxhash==3.5.0
|
| 94 |
+
platformdirs==4.3.6
|
| 95 |
+
multidict==6.1.0
|
| 96 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 98 |
+
triton==3.2.0
|
| 99 |
+
numba==0.61.0
|
| 100 |
+
importlib_metadata==8.0.0
|
| 101 |
+
platformdirs==4.2.2
|
| 102 |
+
typeguard==4.3.0
|
| 103 |
+
more-itertools==10.3.0
|
| 104 |
+
tomli==2.0.1
|
| 105 |
+
autocommand==2.2.2
|
| 106 |
+
zipp==3.19.2
|
| 107 |
+
typing_extensions==4.12.2
|
| 108 |
+
backports.tarfile==1.2.0
|
| 109 |
+
inflect==7.3.1
|
| 110 |
+
jaraco.text==3.12.1
|
| 111 |
+
wheel==0.43.0
|
| 112 |
+
packaging==24.2
|
| 113 |
+
jaraco.collections==5.1.0
|
| 114 |
+
jaraco.functools==4.0.1
|
| 115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-02-12T12:26:37.277902Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model_name_or_path=openai/whisper-small",
|
| 7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
| 8 |
+
"--language=basque",
|
| 9 |
+
"--train_split_name=train",
|
| 10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
| 11 |
+
"--model_index_name=Whisper Small Basque",
|
| 12 |
+
"--max_steps=8000",
|
| 13 |
+
"--output_dir=./",
|
| 14 |
+
"--per_device_train_batch_size=32",
|
| 15 |
+
"--per_device_eval_batch_size=16",
|
| 16 |
+
"--gradient_accumulation_steps=1",
|
| 17 |
+
"--logging_steps=25",
|
| 18 |
+
"--learning_rate=1e-5",
|
| 19 |
+
"--warmup_steps=500",
|
| 20 |
+
"--evaluation_strategy=steps",
|
| 21 |
+
"--eval_steps=1000",
|
| 22 |
+
"--save_strategy=steps",
|
| 23 |
+
"--save_steps=1000",
|
| 24 |
+
"--generation_max_length=225",
|
| 25 |
+
"--length_column_name=input_length",
|
| 26 |
+
"--max_duration_in_seconds=30",
|
| 27 |
+
"--text_column_name=sentence",
|
| 28 |
+
"--freeze_feature_encoder=False",
|
| 29 |
+
"--report_to=tensorboard",
|
| 30 |
+
"--metric_for_best_model=wer",
|
| 31 |
+
"--greater_is_better=False",
|
| 32 |
+
"--load_best_model_at_end",
|
| 33 |
+
"--gradient_checkpointing",
|
| 34 |
+
"--fp16",
|
| 35 |
+
"--overwrite_output_dir",
|
| 36 |
+
"--do_train",
|
| 37 |
+
"--do_eval",
|
| 38 |
+
"--predict_with_generate",
|
| 39 |
+
"--do_normalize_eval",
|
| 40 |
+
"--streaming",
|
| 41 |
+
"--use_auth_token",
|
| 42 |
+
"--push_to_hub",
|
| 43 |
+
"--report_to",
|
| 44 |
+
"wandb",
|
| 45 |
+
"--run_name",
|
| 46 |
+
"whisper-small-eu"
|
| 47 |
+
],
|
| 48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
| 49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
| 50 |
+
"git": {
|
| 51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
| 52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
| 53 |
+
},
|
| 54 |
+
"email": "xezpeleta@gmail.com",
|
| 55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
| 56 |
+
"host": "tknika",
|
| 57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
| 58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
| 59 |
+
"cpu_count": 8,
|
| 60 |
+
"cpu_count_logical": 8,
|
| 61 |
+
"gpu": "NVIDIA L40-48Q",
|
| 62 |
+
"gpu_count": 1,
|
| 63 |
+
"disk": {
|
| 64 |
+
"/": {
|
| 65 |
+
"total": "525987168256",
|
| 66 |
+
"used": "297346666496"
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
"memory": {
|
| 70 |
+
"total": "33654022144"
|
| 71 |
+
},
|
| 72 |
+
"cpu": {
|
| 73 |
+
"count": 8,
|
| 74 |
+
"countLogical": 8
|
| 75 |
+
},
|
| 76 |
+
"gpu_nvidia": [
|
| 77 |
+
{
|
| 78 |
+
"name": "NVIDIA L40-48Q",
|
| 79 |
+
"memoryTotal": "51539607552",
|
| 80 |
+
"cudaCores": 18176,
|
| 81 |
+
"architecture": "Ada"
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"cudaVersion": "12.4"
|
| 85 |
+
}
|
wandb/run-20250212_122637-v3d3ouvn/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:26:37.096402413Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpcjtnmyy4/port-224110.txt","pid":224110,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-02-12T12:26:37.136235603Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224110}
|
| 3 |
+
{"time":"2025-02-12T12:26:37.136202753Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":34237,"Zone":""}}
|
| 4 |
+
{"time":"2025-02-12T12:26:37.272154204Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:48156"}
|
| 5 |
+
{"time":"2025-02-12T12:26:37.280104802Z","level":"INFO","msg":"handleInformInit: received","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
| 6 |
+
{"time":"2025-02-12T12:26:37.385176776Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v3d3ouvn","id":"127.0.0.1:48156"}
|
| 7 |
+
{"time":"2025-02-12T12:26:37.805006529Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:48156"}
|
| 8 |
+
{"time":"2025-02-12T12:26:37.805113068Z","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-02-12T12:26:37.805096358Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:48156"}
|
| 10 |
+
{"time":"2025-02-12T12:26:37.805232397Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:48156"}
|
| 11 |
+
{"time":"2025-02-12T12:26:37.995286135Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:34237->127.0.0.1:48156: use of closed network connection","id":"127.0.0.1:48156"}
|
| 12 |
+
{"time":"2025-02-12T12:26:39.120464204Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:48156"}
|
| 13 |
+
{"time":"2025-02-12T12:26:39.120492104Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:48156"}
|
| 14 |
+
{"time":"2025-02-12T12:26:39.120507034Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:26:37.280430379Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-02-12T12:26:37.385120447Z","level":"INFO","msg":"created new stream","id":"v3d3ouvn"}
|
| 3 |
+
{"time":"2025-02-12T12:26:37.385167976Z","level":"INFO","msg":"stream: started","id":"v3d3ouvn"}
|
| 4 |
+
{"time":"2025-02-12T12:26:37.385225046Z","level":"INFO","msg":"writer: Do: started","stream_id":"v3d3ouvn"}
|
| 5 |
+
{"time":"2025-02-12T12:26:37.385310785Z","level":"INFO","msg":"sender: started","stream_id":"v3d3ouvn"}
|
| 6 |
+
{"time":"2025-02-12T12:26:37.385358905Z","level":"INFO","msg":"handler: started","stream_id":"v3d3ouvn"}
|
| 7 |
+
{"time":"2025-02-12T12:26:37.656629021Z","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-02-12T12:26:37.805164318Z","level":"INFO","msg":"stream: closing","id":"v3d3ouvn"}
|
| 9 |
+
{"time":"2025-02-12T12:26:37.805220128Z","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-02-12T12:26:37.805952593Z","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-02-12T12:26:38.904190518Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-02-12T12:26:39.120209166Z","level":"INFO","msg":"handler: closed","stream_id":"v3d3ouvn"}
|
| 13 |
+
{"time":"2025-02-12T12:26:39.120281046Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v3d3ouvn"}
|
| 14 |
+
{"time":"2025-02-12T12:26:39.120312915Z","level":"INFO","msg":"sender: closed","stream_id":"v3d3ouvn"}
|
| 15 |
+
{"time":"2025-02-12T12:26:39.120355495Z","level":"INFO","msg":"stream: closed","id":"v3d3ouvn"}
|
wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
| 2 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Configure stats pid to 224110
|
| 3 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
| 4 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
| 5 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
| 6 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug.log
|
| 7 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122637-v3d3ouvn/logs/debug-internal.log
|
| 8 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():756] calling init triggers
|
| 9 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-02-12 12:26:37,062 INFO MainThread:224110 [wandb_init.py:init():789] starting backend
|
| 12 |
+
2025-02-12 12:26:37,272 INFO MainThread:224110 [wandb_init.py:init():793] sending inform_init request
|
| 13 |
+
2025-02-12 12:26:37,277 INFO MainThread:224110 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-02-12 12:26:37,277 INFO MainThread:224110 [wandb_init.py:init():808] backend started and connected
|
| 15 |
+
2025-02-12 12:26:37,279 INFO MainThread:224110 [wandb_init.py:init():901] updated telemetry
|
| 16 |
+
2025-02-12 12:26:37,285 INFO MainThread:224110 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-02-12 12:26:37,653 INFO MainThread:224110 [wandb_init.py:init():994] starting run threads in backend
|
| 18 |
+
2025-02-12 12:26:37,764 INFO MainThread:224110 [wandb_run.py:_console_start():2385] atexit reg
|
| 19 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
| 20 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
| 21 |
+
2025-02-12 12:26:37,765 INFO MainThread:224110 [wandb_run.py:_redirect():2325] Redirects installed.
|
| 22 |
+
2025-02-12 12:26:37,766 INFO MainThread:224110 [wandb_init.py:init():1036] run started, returning control to user process
|
| 23 |
+
2025-02-12 12:26:37,767 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-26-11_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
| 24 |
+
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7a5cbc15a330>>
|
| 25 |
+
2025-02-12 12:26:37,770 INFO MainThread:224110 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
| 26 |
+
2025-02-12 12:26:37,805 WARNING MsgRouterThr:224110 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_122637-v3d3ouvn/run-v3d3ouvn.wandb
ADDED
|
Binary file (11.3 kB). View file
|
|
|
wandb/run-20250212_122854-4m048f5s/files/config.yaml
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_attn_implementation_autoset:
|
| 2 |
+
value: true
|
| 3 |
+
_name_or_path:
|
| 4 |
+
value: openai/whisper-small
|
| 5 |
+
_wandb:
|
| 6 |
+
value:
|
| 7 |
+
cli_version: 0.19.6
|
| 8 |
+
m:
|
| 9 |
+
- "1": train/global_step
|
| 10 |
+
"6":
|
| 11 |
+
- 3
|
| 12 |
+
"7": []
|
| 13 |
+
python_version: 3.12.3
|
| 14 |
+
t:
|
| 15 |
+
"1":
|
| 16 |
+
- 1
|
| 17 |
+
- 5
|
| 18 |
+
- 11
|
| 19 |
+
- 49
|
| 20 |
+
- 51
|
| 21 |
+
- 53
|
| 22 |
+
- 55
|
| 23 |
+
- 71
|
| 24 |
+
- 100
|
| 25 |
+
"2":
|
| 26 |
+
- 1
|
| 27 |
+
- 5
|
| 28 |
+
- 11
|
| 29 |
+
- 49
|
| 30 |
+
- 51
|
| 31 |
+
- 53
|
| 32 |
+
- 55
|
| 33 |
+
- 71
|
| 34 |
+
- 100
|
| 35 |
+
"3":
|
| 36 |
+
- 7
|
| 37 |
+
- 13
|
| 38 |
+
- 19
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 66
|
| 42 |
+
"4": 3.12.3
|
| 43 |
+
"5": 0.19.6
|
| 44 |
+
"6": 4.49.0.dev0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"9":
|
| 48 |
+
"1": transformers_trainer
|
| 49 |
+
"12": 0.19.6
|
| 50 |
+
"13": linux-x86_64
|
| 51 |
+
accelerator_config:
|
| 52 |
+
value:
|
| 53 |
+
dispatch_batches: null
|
| 54 |
+
even_batches: true
|
| 55 |
+
gradient_accumulation_kwargs: null
|
| 56 |
+
non_blocking: false
|
| 57 |
+
split_batches: false
|
| 58 |
+
use_seedable_sampler: true
|
| 59 |
+
activation_dropout:
|
| 60 |
+
value: 0
|
| 61 |
+
activation_function:
|
| 62 |
+
value: gelu
|
| 63 |
+
adafactor:
|
| 64 |
+
value: false
|
| 65 |
+
adam_beta1:
|
| 66 |
+
value: 0.9
|
| 67 |
+
adam_beta2:
|
| 68 |
+
value: 0.999
|
| 69 |
+
adam_epsilon:
|
| 70 |
+
value: 1e-08
|
| 71 |
+
add_cross_attention:
|
| 72 |
+
value: false
|
| 73 |
+
apply_spec_augment:
|
| 74 |
+
value: false
|
| 75 |
+
architectures:
|
| 76 |
+
value:
|
| 77 |
+
- WhisperForConditionalGeneration
|
| 78 |
+
attention_dropout:
|
| 79 |
+
value: 0
|
| 80 |
+
auto_find_batch_size:
|
| 81 |
+
value: false
|
| 82 |
+
average_tokens_across_devices:
|
| 83 |
+
value: false
|
| 84 |
+
bad_words_ids:
|
| 85 |
+
value: null
|
| 86 |
+
batch_eval_metrics:
|
| 87 |
+
value: false
|
| 88 |
+
begin_suppress_tokens:
|
| 89 |
+
value:
|
| 90 |
+
- 220
|
| 91 |
+
- 50257
|
| 92 |
+
bf16:
|
| 93 |
+
value: false
|
| 94 |
+
bf16_full_eval:
|
| 95 |
+
value: false
|
| 96 |
+
bos_token_id:
|
| 97 |
+
value: 50257
|
| 98 |
+
chunk_size_feed_forward:
|
| 99 |
+
value: 0
|
| 100 |
+
classifier_proj_size:
|
| 101 |
+
value: 256
|
| 102 |
+
cross_attention_hidden_size:
|
| 103 |
+
value: null
|
| 104 |
+
d_model:
|
| 105 |
+
value: 768
|
| 106 |
+
data_seed:
|
| 107 |
+
value: null
|
| 108 |
+
dataloader_drop_last:
|
| 109 |
+
value: false
|
| 110 |
+
dataloader_num_workers:
|
| 111 |
+
value: 0
|
| 112 |
+
dataloader_persistent_workers:
|
| 113 |
+
value: false
|
| 114 |
+
dataloader_pin_memory:
|
| 115 |
+
value: true
|
| 116 |
+
dataloader_prefetch_factor:
|
| 117 |
+
value: null
|
| 118 |
+
ddp_backend:
|
| 119 |
+
value: null
|
| 120 |
+
ddp_broadcast_buffers:
|
| 121 |
+
value: null
|
| 122 |
+
ddp_bucket_cap_mb:
|
| 123 |
+
value: null
|
| 124 |
+
ddp_find_unused_parameters:
|
| 125 |
+
value: null
|
| 126 |
+
ddp_timeout:
|
| 127 |
+
value: 1800
|
| 128 |
+
debug:
|
| 129 |
+
value: []
|
| 130 |
+
decoder_attention_heads:
|
| 131 |
+
value: 12
|
| 132 |
+
decoder_ffn_dim:
|
| 133 |
+
value: 3072
|
| 134 |
+
decoder_layerdrop:
|
| 135 |
+
value: 0
|
| 136 |
+
decoder_layers:
|
| 137 |
+
value: 12
|
| 138 |
+
decoder_start_token_id:
|
| 139 |
+
value: 50258
|
| 140 |
+
deepspeed:
|
| 141 |
+
value: null
|
| 142 |
+
disable_tqdm:
|
| 143 |
+
value: false
|
| 144 |
+
dispatch_batches:
|
| 145 |
+
value: null
|
| 146 |
+
diversity_penalty:
|
| 147 |
+
value: 0
|
| 148 |
+
do_eval:
|
| 149 |
+
value: true
|
| 150 |
+
do_predict:
|
| 151 |
+
value: false
|
| 152 |
+
do_sample:
|
| 153 |
+
value: false
|
| 154 |
+
do_train:
|
| 155 |
+
value: true
|
| 156 |
+
dropout:
|
| 157 |
+
value: 0
|
| 158 |
+
early_stopping:
|
| 159 |
+
value: false
|
| 160 |
+
encoder_attention_heads:
|
| 161 |
+
value: 12
|
| 162 |
+
encoder_ffn_dim:
|
| 163 |
+
value: 3072
|
| 164 |
+
encoder_layerdrop:
|
| 165 |
+
value: 0
|
| 166 |
+
encoder_layers:
|
| 167 |
+
value: 12
|
| 168 |
+
encoder_no_repeat_ngram_size:
|
| 169 |
+
value: 0
|
| 170 |
+
eos_token_id:
|
| 171 |
+
value: 50257
|
| 172 |
+
eval_accumulation_steps:
|
| 173 |
+
value: null
|
| 174 |
+
eval_delay:
|
| 175 |
+
value: 0
|
| 176 |
+
eval_do_concat_batches:
|
| 177 |
+
value: true
|
| 178 |
+
eval_on_start:
|
| 179 |
+
value: false
|
| 180 |
+
eval_steps:
|
| 181 |
+
value: 1000
|
| 182 |
+
eval_strategy:
|
| 183 |
+
value: steps
|
| 184 |
+
eval_use_gather_object:
|
| 185 |
+
value: false
|
| 186 |
+
evaluation_strategy:
|
| 187 |
+
value: steps
|
| 188 |
+
exponential_decay_length_penalty:
|
| 189 |
+
value: null
|
| 190 |
+
finetuning_task:
|
| 191 |
+
value: null
|
| 192 |
+
forced_bos_token_id:
|
| 193 |
+
value: null
|
| 194 |
+
forced_decoder_ids:
|
| 195 |
+
value: null
|
| 196 |
+
forced_eos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
fp16:
|
| 199 |
+
value: true
|
| 200 |
+
fp16_backend:
|
| 201 |
+
value: auto
|
| 202 |
+
fp16_full_eval:
|
| 203 |
+
value: false
|
| 204 |
+
fp16_opt_level:
|
| 205 |
+
value: O1
|
| 206 |
+
fsdp:
|
| 207 |
+
value: []
|
| 208 |
+
fsdp_config:
|
| 209 |
+
value:
|
| 210 |
+
min_num_params: 0
|
| 211 |
+
xla: false
|
| 212 |
+
xla_fsdp_grad_ckpt: false
|
| 213 |
+
xla_fsdp_v2: false
|
| 214 |
+
fsdp_min_num_params:
|
| 215 |
+
value: 0
|
| 216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 217 |
+
value: null
|
| 218 |
+
full_determinism:
|
| 219 |
+
value: false
|
| 220 |
+
generation_config:
|
| 221 |
+
value: null
|
| 222 |
+
generation_max_length:
|
| 223 |
+
value: 225
|
| 224 |
+
generation_num_beams:
|
| 225 |
+
value: null
|
| 226 |
+
gradient_accumulation_steps:
|
| 227 |
+
value: 1
|
| 228 |
+
gradient_checkpointing:
|
| 229 |
+
value: true
|
| 230 |
+
gradient_checkpointing_kwargs:
|
| 231 |
+
value: null
|
| 232 |
+
greater_is_better:
|
| 233 |
+
value: false
|
| 234 |
+
group_by_length:
|
| 235 |
+
value: false
|
| 236 |
+
half_precision_backend:
|
| 237 |
+
value: auto
|
| 238 |
+
hub_always_push:
|
| 239 |
+
value: false
|
| 240 |
+
hub_model_id:
|
| 241 |
+
value: null
|
| 242 |
+
hub_private_repo:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
init_std:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: true
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 1e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: input_length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
load_best_model_at_end:
|
| 285 |
+
value: true
|
| 286 |
+
local_rank:
|
| 287 |
+
value: 0
|
| 288 |
+
log_level:
|
| 289 |
+
value: passive
|
| 290 |
+
log_level_replica:
|
| 291 |
+
value: warning
|
| 292 |
+
log_on_each_node:
|
| 293 |
+
value: true
|
| 294 |
+
logging_dir:
|
| 295 |
+
value: ./runs/Feb12_12-28-29_tknika
|
| 296 |
+
logging_first_step:
|
| 297 |
+
value: false
|
| 298 |
+
logging_nan_inf_filter:
|
| 299 |
+
value: true
|
| 300 |
+
logging_steps:
|
| 301 |
+
value: 25
|
| 302 |
+
logging_strategy:
|
| 303 |
+
value: steps
|
| 304 |
+
lr_scheduler_type:
|
| 305 |
+
value: linear
|
| 306 |
+
mask_feature_length:
|
| 307 |
+
value: 10
|
| 308 |
+
mask_feature_min_masks:
|
| 309 |
+
value: 0
|
| 310 |
+
mask_feature_prob:
|
| 311 |
+
value: 0
|
| 312 |
+
mask_time_length:
|
| 313 |
+
value: 10
|
| 314 |
+
mask_time_min_masks:
|
| 315 |
+
value: 2
|
| 316 |
+
mask_time_prob:
|
| 317 |
+
value: 0.05
|
| 318 |
+
max_grad_norm:
|
| 319 |
+
value: 1
|
| 320 |
+
max_length:
|
| 321 |
+
value: 448
|
| 322 |
+
max_source_positions:
|
| 323 |
+
value: 1500
|
| 324 |
+
max_steps:
|
| 325 |
+
value: 8000
|
| 326 |
+
max_target_positions:
|
| 327 |
+
value: 448
|
| 328 |
+
median_filter_width:
|
| 329 |
+
value: 7
|
| 330 |
+
metric_for_best_model:
|
| 331 |
+
value: wer
|
| 332 |
+
min_length:
|
| 333 |
+
value: 0
|
| 334 |
+
model/num_parameters:
|
| 335 |
+
value: 241734912
|
| 336 |
+
model_type:
|
| 337 |
+
value: whisper
|
| 338 |
+
mp_parameters:
|
| 339 |
+
value: ""
|
| 340 |
+
neftune_noise_alpha:
|
| 341 |
+
value: null
|
| 342 |
+
no_cuda:
|
| 343 |
+
value: false
|
| 344 |
+
no_repeat_ngram_size:
|
| 345 |
+
value: 0
|
| 346 |
+
num_beam_groups:
|
| 347 |
+
value: 1
|
| 348 |
+
num_beams:
|
| 349 |
+
value: 1
|
| 350 |
+
num_hidden_layers:
|
| 351 |
+
value: 12
|
| 352 |
+
num_mel_bins:
|
| 353 |
+
value: 80
|
| 354 |
+
num_return_sequences:
|
| 355 |
+
value: 1
|
| 356 |
+
num_train_epochs:
|
| 357 |
+
value: 3
|
| 358 |
+
optim:
|
| 359 |
+
value: adamw_torch
|
| 360 |
+
optim_args:
|
| 361 |
+
value: null
|
| 362 |
+
optim_target_modules:
|
| 363 |
+
value: null
|
| 364 |
+
output_attentions:
|
| 365 |
+
value: false
|
| 366 |
+
output_dir:
|
| 367 |
+
value: ./
|
| 368 |
+
output_hidden_states:
|
| 369 |
+
value: false
|
| 370 |
+
output_scores:
|
| 371 |
+
value: false
|
| 372 |
+
overwrite_output_dir:
|
| 373 |
+
value: true
|
| 374 |
+
pad_token_id:
|
| 375 |
+
value: 50257
|
| 376 |
+
past_index:
|
| 377 |
+
value: -1
|
| 378 |
+
per_device_eval_batch_size:
|
| 379 |
+
value: 16
|
| 380 |
+
per_device_train_batch_size:
|
| 381 |
+
value: 32
|
| 382 |
+
per_gpu_eval_batch_size:
|
| 383 |
+
value: null
|
| 384 |
+
per_gpu_train_batch_size:
|
| 385 |
+
value: null
|
| 386 |
+
predict_with_generate:
|
| 387 |
+
value: true
|
| 388 |
+
prediction_loss_only:
|
| 389 |
+
value: false
|
| 390 |
+
prefix:
|
| 391 |
+
value: null
|
| 392 |
+
problem_type:
|
| 393 |
+
value: null
|
| 394 |
+
push_to_hub:
|
| 395 |
+
value: true
|
| 396 |
+
push_to_hub_model_id:
|
| 397 |
+
value: null
|
| 398 |
+
push_to_hub_organization:
|
| 399 |
+
value: null
|
| 400 |
+
push_to_hub_token:
|
| 401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 402 |
+
ray_scope:
|
| 403 |
+
value: last
|
| 404 |
+
remove_invalid_values:
|
| 405 |
+
value: false
|
| 406 |
+
remove_unused_columns:
|
| 407 |
+
value: true
|
| 408 |
+
repetition_penalty:
|
| 409 |
+
value: 1
|
| 410 |
+
report_to:
|
| 411 |
+
value:
|
| 412 |
+
- wandb
|
| 413 |
+
restore_callback_states_from_checkpoint:
|
| 414 |
+
value: false
|
| 415 |
+
resume_from_checkpoint:
|
| 416 |
+
value: null
|
| 417 |
+
return_dict:
|
| 418 |
+
value: true
|
| 419 |
+
return_dict_in_generate:
|
| 420 |
+
value: false
|
| 421 |
+
run_name:
|
| 422 |
+
value: whisper-small-eu
|
| 423 |
+
save_on_each_node:
|
| 424 |
+
value: false
|
| 425 |
+
save_only_model:
|
| 426 |
+
value: false
|
| 427 |
+
save_safetensors:
|
| 428 |
+
value: true
|
| 429 |
+
save_steps:
|
| 430 |
+
value: 1000
|
| 431 |
+
save_strategy:
|
| 432 |
+
value: steps
|
| 433 |
+
save_total_limit:
|
| 434 |
+
value: null
|
| 435 |
+
scale_embedding:
|
| 436 |
+
value: false
|
| 437 |
+
seed:
|
| 438 |
+
value: 42
|
| 439 |
+
sep_token_id:
|
| 440 |
+
value: null
|
| 441 |
+
skip_memory_metrics:
|
| 442 |
+
value: true
|
| 443 |
+
sortish_sampler:
|
| 444 |
+
value: false
|
| 445 |
+
split_batches:
|
| 446 |
+
value: null
|
| 447 |
+
suppress_tokens:
|
| 448 |
+
value: null
|
| 449 |
+
task_specific_params:
|
| 450 |
+
value: null
|
| 451 |
+
temperature:
|
| 452 |
+
value: 1
|
| 453 |
+
tf_legacy_loss:
|
| 454 |
+
value: false
|
| 455 |
+
tf32:
|
| 456 |
+
value: null
|
| 457 |
+
tie_encoder_decoder:
|
| 458 |
+
value: false
|
| 459 |
+
tie_word_embeddings:
|
| 460 |
+
value: true
|
| 461 |
+
tokenizer_class:
|
| 462 |
+
value: null
|
| 463 |
+
top_k:
|
| 464 |
+
value: 50
|
| 465 |
+
top_p:
|
| 466 |
+
value: 1
|
| 467 |
+
torch_compile:
|
| 468 |
+
value: false
|
| 469 |
+
torch_compile_backend:
|
| 470 |
+
value: null
|
| 471 |
+
torch_compile_mode:
|
| 472 |
+
value: null
|
| 473 |
+
torch_dtype:
|
| 474 |
+
value: float32
|
| 475 |
+
torch_empty_cache_steps:
|
| 476 |
+
value: null
|
| 477 |
+
torchdynamo:
|
| 478 |
+
value: null
|
| 479 |
+
torchscript:
|
| 480 |
+
value: false
|
| 481 |
+
tpu_metrics_debug:
|
| 482 |
+
value: false
|
| 483 |
+
tpu_num_cores:
|
| 484 |
+
value: null
|
| 485 |
+
transformers_version:
|
| 486 |
+
value: 4.49.0.dev0
|
| 487 |
+
typical_p:
|
| 488 |
+
value: 1
|
| 489 |
+
use_bfloat16:
|
| 490 |
+
value: false
|
| 491 |
+
use_cache:
|
| 492 |
+
value: false
|
| 493 |
+
use_cpu:
|
| 494 |
+
value: false
|
| 495 |
+
use_ipex:
|
| 496 |
+
value: false
|
| 497 |
+
use_legacy_prediction_loop:
|
| 498 |
+
value: false
|
| 499 |
+
use_liger_kernel:
|
| 500 |
+
value: false
|
| 501 |
+
use_mps_device:
|
| 502 |
+
value: false
|
| 503 |
+
use_weighted_layer_sum:
|
| 504 |
+
value: false
|
| 505 |
+
vocab_size:
|
| 506 |
+
value: 51865
|
| 507 |
+
warmup_ratio:
|
| 508 |
+
value: 0
|
| 509 |
+
warmup_steps:
|
| 510 |
+
value: 500
|
| 511 |
+
weight_decay:
|
| 512 |
+
value: 0
|
wandb/run-20250212_122854-4m048f5s/files/output.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
| 2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
| 3 |
+
main()
|
| 4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
| 5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
| 8 |
+
return inner_training_loop(
|
| 9 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
| 11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
| 12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
| 14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
| 15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
| 17 |
+
result = getattr(callback, event)(
|
| 18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
| 20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_122854-4m048f5s/files/requirements.txt
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiosignal==1.3.2
|
| 2 |
+
Markdown==3.7
|
| 3 |
+
more-itertools==10.6.0
|
| 4 |
+
requests==2.32.3
|
| 5 |
+
sentry-sdk==2.21.0
|
| 6 |
+
torchaudio==2.6.0
|
| 7 |
+
charset-normalizer==3.4.1
|
| 8 |
+
docker-pycreds==0.4.0
|
| 9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 10 |
+
PyYAML==6.0.2
|
| 11 |
+
librosa==0.10.2.post1
|
| 12 |
+
soxr==0.5.0.post1
|
| 13 |
+
multiprocess==0.70.16
|
| 14 |
+
setuptools==75.8.0
|
| 15 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 16 |
+
joblib==1.4.2
|
| 17 |
+
pytz==2025.1
|
| 18 |
+
pip==24.0
|
| 19 |
+
scikit-learn==1.6.1
|
| 20 |
+
certifi==2025.1.31
|
| 21 |
+
jiwer==3.1.0
|
| 22 |
+
regex==2024.11.6
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
grpcio==1.70.0
|
| 25 |
+
msgpack==1.1.0
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 28 |
+
soundfile==0.13.1
|
| 29 |
+
dill==0.3.8
|
| 30 |
+
nvidia-nvtx-cu12==12.4.127
|
| 31 |
+
six==1.17.0
|
| 32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 33 |
+
pyarrow==19.0.0
|
| 34 |
+
nvidia-nccl-cu12==2.21.5
|
| 35 |
+
psutil==6.1.1
|
| 36 |
+
decorator==5.1.1
|
| 37 |
+
llvmlite==0.44.0
|
| 38 |
+
frozenlist==1.5.0
|
| 39 |
+
pydantic==2.10.6
|
| 40 |
+
networkx==3.4.2
|
| 41 |
+
idna==3.10
|
| 42 |
+
wandb==0.19.6
|
| 43 |
+
aiohttp==3.11.12
|
| 44 |
+
RapidFuzz==3.12.1
|
| 45 |
+
pandas==2.2.3
|
| 46 |
+
python-dateutil==2.9.0.post0
|
| 47 |
+
numpy==2.1.3
|
| 48 |
+
tokenizers==0.21.0
|
| 49 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 50 |
+
typing_extensions==4.12.2
|
| 51 |
+
urllib3==2.3.0
|
| 52 |
+
setproctitle==1.3.4
|
| 53 |
+
tzdata==2025.1
|
| 54 |
+
sympy==1.13.1
|
| 55 |
+
pooch==1.8.2
|
| 56 |
+
click==8.1.8
|
| 57 |
+
pydantic_core==2.27.2
|
| 58 |
+
MarkupSafe==3.0.2
|
| 59 |
+
scipy==1.15.1
|
| 60 |
+
accelerate==1.3.0
|
| 61 |
+
tensorboard==2.19.0
|
| 62 |
+
protobuf==5.29.3
|
| 63 |
+
gitdb==4.0.12
|
| 64 |
+
smmap==5.0.2
|
| 65 |
+
absl-py==2.1.0
|
| 66 |
+
tqdm==4.67.1
|
| 67 |
+
yarl==1.18.3
|
| 68 |
+
pycparser==2.22
|
| 69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 70 |
+
attrs==25.1.0
|
| 71 |
+
lazy_loader==0.4
|
| 72 |
+
tensorboard-data-server==0.7.2
|
| 73 |
+
threadpoolctl==3.5.0
|
| 74 |
+
GitPython==3.1.44
|
| 75 |
+
safetensors==0.5.2
|
| 76 |
+
fsspec==2024.12.0
|
| 77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 78 |
+
filelock==3.17.0
|
| 79 |
+
aiohappyeyeballs==2.4.6
|
| 80 |
+
packaging==24.2
|
| 81 |
+
datasets==3.2.1.dev0
|
| 82 |
+
audioread==3.0.1
|
| 83 |
+
propcache==0.2.1
|
| 84 |
+
transformers==4.49.0.dev0
|
| 85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 86 |
+
cffi==1.17.1
|
| 87 |
+
evaluate==0.4.3
|
| 88 |
+
Werkzeug==3.1.3
|
| 89 |
+
huggingface-hub==0.28.1
|
| 90 |
+
Jinja2==3.1.5
|
| 91 |
+
torch==2.6.0
|
| 92 |
+
nvidia-curand-cu12==10.3.5.147
|
| 93 |
+
xxhash==3.5.0
|
| 94 |
+
platformdirs==4.3.6
|
| 95 |
+
multidict==6.1.0
|
| 96 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 98 |
+
triton==3.2.0
|
| 99 |
+
numba==0.61.0
|
| 100 |
+
importlib_metadata==8.0.0
|
| 101 |
+
platformdirs==4.2.2
|
| 102 |
+
typeguard==4.3.0
|
| 103 |
+
more-itertools==10.3.0
|
| 104 |
+
tomli==2.0.1
|
| 105 |
+
autocommand==2.2.2
|
| 106 |
+
zipp==3.19.2
|
| 107 |
+
typing_extensions==4.12.2
|
| 108 |
+
backports.tarfile==1.2.0
|
| 109 |
+
inflect==7.3.1
|
| 110 |
+
jaraco.text==3.12.1
|
| 111 |
+
wheel==0.43.0
|
| 112 |
+
packaging==24.2
|
| 113 |
+
jaraco.collections==5.1.0
|
| 114 |
+
jaraco.functools==4.0.1
|
| 115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_122854-4m048f5s/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-02-12T12:28:54.528397Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model_name_or_path=openai/whisper-small",
|
| 7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
| 8 |
+
"--language=basque",
|
| 9 |
+
"--train_split_name=train",
|
| 10 |
+
"--eval_split_name=dev_parl+test_parl+test_cv+test_oslr",
|
| 11 |
+
"--model_index_name=Whisper Small Basque",
|
| 12 |
+
"--max_steps=8000",
|
| 13 |
+
"--output_dir=./",
|
| 14 |
+
"--per_device_train_batch_size=32",
|
| 15 |
+
"--per_device_eval_batch_size=16",
|
| 16 |
+
"--gradient_accumulation_steps=1",
|
| 17 |
+
"--logging_steps=25",
|
| 18 |
+
"--learning_rate=1e-5",
|
| 19 |
+
"--warmup_steps=500",
|
| 20 |
+
"--evaluation_strategy=steps",
|
| 21 |
+
"--eval_steps=1000",
|
| 22 |
+
"--save_strategy=steps",
|
| 23 |
+
"--save_steps=1000",
|
| 24 |
+
"--generation_max_length=225",
|
| 25 |
+
"--length_column_name=input_length",
|
| 26 |
+
"--max_duration_in_seconds=30",
|
| 27 |
+
"--text_column_name=sentence",
|
| 28 |
+
"--freeze_feature_encoder=False",
|
| 29 |
+
"--report_to=tensorboard",
|
| 30 |
+
"--metric_for_best_model=wer",
|
| 31 |
+
"--greater_is_better=False",
|
| 32 |
+
"--load_best_model_at_end",
|
| 33 |
+
"--gradient_checkpointing",
|
| 34 |
+
"--fp16",
|
| 35 |
+
"--overwrite_output_dir",
|
| 36 |
+
"--do_train",
|
| 37 |
+
"--do_eval",
|
| 38 |
+
"--predict_with_generate",
|
| 39 |
+
"--do_normalize_eval",
|
| 40 |
+
"--streaming",
|
| 41 |
+
"--use_auth_token",
|
| 42 |
+
"--push_to_hub",
|
| 43 |
+
"--report_to",
|
| 44 |
+
"wandb",
|
| 45 |
+
"--run_name",
|
| 46 |
+
"whisper-small-eu"
|
| 47 |
+
],
|
| 48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
| 49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
| 50 |
+
"git": {
|
| 51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
| 52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
| 53 |
+
},
|
| 54 |
+
"email": "xezpeleta@gmail.com",
|
| 55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
| 56 |
+
"host": "tknika",
|
| 57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
| 58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
| 59 |
+
"cpu_count": 8,
|
| 60 |
+
"cpu_count_logical": 8,
|
| 61 |
+
"gpu": "NVIDIA L40-48Q",
|
| 62 |
+
"gpu_count": 1,
|
| 63 |
+
"disk": {
|
| 64 |
+
"/": {
|
| 65 |
+
"total": "525987168256",
|
| 66 |
+
"used": "297346756608"
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
"memory": {
|
| 70 |
+
"total": "33654022144"
|
| 71 |
+
},
|
| 72 |
+
"cpu": {
|
| 73 |
+
"count": 8,
|
| 74 |
+
"countLogical": 8
|
| 75 |
+
},
|
| 76 |
+
"gpu_nvidia": [
|
| 77 |
+
{
|
| 78 |
+
"name": "NVIDIA L40-48Q",
|
| 79 |
+
"memoryTotal": "51539607552",
|
| 80 |
+
"cudaCores": 18176,
|
| 81 |
+
"architecture": "Ada"
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"cudaVersion": "12.4"
|
| 85 |
+
}
|
wandb/run-20250212_122854-4m048f5s/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_122854-4m048f5s/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:28:54.343223143Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmg8o5mqm/port-224528.txt","pid":224528,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-02-12T12:28:54.34827505Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":224528}
|
| 3 |
+
{"time":"2025-02-12T12:28:54.34821581Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":44981,"Zone":""}}
|
| 4 |
+
{"time":"2025-02-12T12:28:54.521681286Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:51116"}
|
| 5 |
+
{"time":"2025-02-12T12:28:54.53173104Z","level":"INFO","msg":"handleInformInit: received","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
| 6 |
+
{"time":"2025-02-12T12:28:54.636478984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"4m048f5s","id":"127.0.0.1:51116"}
|
| 7 |
+
{"time":"2025-02-12T12:28:55.028718067Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:51116"}
|
| 8 |
+
{"time":"2025-02-12T12:28:55.028819337Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:51116"}
|
| 9 |
+
{"time":"2025-02-12T12:28:55.028818347Z","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-02-12T12:28:55.028912476Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:51116"}
|
| 11 |
+
{"time":"2025-02-12T12:28:55.368512133Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:44981->127.0.0.1:51116: use of closed network connection","id":"127.0.0.1:51116"}
|
| 12 |
+
{"time":"2025-02-12T12:28:56.249016671Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:51116"}
|
| 13 |
+
{"time":"2025-02-12T12:28:56.249048031Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:51116"}
|
| 14 |
+
{"time":"2025-02-12T12:28:56.249109521Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:28:54.532033248Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-02-12T12:28:54.636425775Z","level":"INFO","msg":"created new stream","id":"4m048f5s"}
|
| 3 |
+
{"time":"2025-02-12T12:28:54.636473304Z","level":"INFO","msg":"stream: started","id":"4m048f5s"}
|
| 4 |
+
{"time":"2025-02-12T12:28:54.636556744Z","level":"INFO","msg":"writer: Do: started","stream_id":"4m048f5s"}
|
| 5 |
+
{"time":"2025-02-12T12:28:54.636597734Z","level":"INFO","msg":"handler: started","stream_id":"4m048f5s"}
|
| 6 |
+
{"time":"2025-02-12T12:28:54.636670993Z","level":"INFO","msg":"sender: started","stream_id":"4m048f5s"}
|
| 7 |
+
{"time":"2025-02-12T12:28:54.886030488Z","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-02-12T12:28:55.028853626Z","level":"INFO","msg":"stream: closing","id":"4m048f5s"}
|
| 9 |
+
{"time":"2025-02-12T12:28:55.028891716Z","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-02-12T12:28:55.029589382Z","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-02-12T12:28:56.017176821Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-02-12T12:28:56.248703933Z","level":"INFO","msg":"handler: closed","stream_id":"4m048f5s"}
|
| 13 |
+
{"time":"2025-02-12T12:28:56.248768363Z","level":"INFO","msg":"writer: Close: closed","stream_id":"4m048f5s"}
|
| 14 |
+
{"time":"2025-02-12T12:28:56.248802103Z","level":"INFO","msg":"sender: closed","stream_id":"4m048f5s"}
|
| 15 |
+
{"time":"2025-02-12T12:28:56.248896982Z","level":"INFO","msg":"stream: closed","id":"4m048f5s"}
|
wandb/run-20250212_122854-4m048f5s/logs/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
| 2 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Configure stats pid to 224528
|
| 3 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
| 4 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
| 5 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
| 6 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug.log
|
| 7 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_122854-4m048f5s/logs/debug-internal.log
|
| 8 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():756] calling init triggers
|
| 9 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-02-12 12:28:54,316 INFO MainThread:224528 [wandb_init.py:init():789] starting backend
|
| 12 |
+
2025-02-12 12:28:54,521 INFO MainThread:224528 [wandb_init.py:init():793] sending inform_init request
|
| 13 |
+
2025-02-12 12:28:54,527 INFO MainThread:224528 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-02-12 12:28:54,528 INFO MainThread:224528 [wandb_init.py:init():808] backend started and connected
|
| 15 |
+
2025-02-12 12:28:54,530 INFO MainThread:224528 [wandb_init.py:init():901] updated telemetry
|
| 16 |
+
2025-02-12 12:28:54,537 INFO MainThread:224528 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-02-12 12:28:54,883 INFO MainThread:224528 [wandb_init.py:init():994] starting run threads in backend
|
| 18 |
+
2025-02-12 12:28:54,988 INFO MainThread:224528 [wandb_run.py:_console_start():2385] atexit reg
|
| 19 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
| 20 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
| 21 |
+
2025-02-12 12:28:54,989 INFO MainThread:224528 [wandb_run.py:_redirect():2325] Redirects installed.
|
| 22 |
+
2025-02-12 12:28:54,990 INFO MainThread:224528 [wandb_init.py:init():1036] run started, returning control to user process
|
| 23 |
+
2025-02-12 12:28:54,991 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-28-29_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
| 24 |
+
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x71c5f6c57cb0>>
|
| 25 |
+
2025-02-12 12:28:54,995 INFO MainThread:224528 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
| 26 |
+
2025-02-12 12:28:55,029 WARNING MsgRouterThr:224528 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_122854-4m048f5s/run-4m048f5s.wandb
ADDED
|
Binary file (11.3 kB). View file
|
|
|
wandb/run-20250212_125202-c6xjc1gs/files/config.yaml
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_attn_implementation_autoset:
|
| 2 |
+
value: true
|
| 3 |
+
_name_or_path:
|
| 4 |
+
value: openai/whisper-small
|
| 5 |
+
_wandb:
|
| 6 |
+
value:
|
| 7 |
+
cli_version: 0.19.6
|
| 8 |
+
m:
|
| 9 |
+
- "1": train/global_step
|
| 10 |
+
"6":
|
| 11 |
+
- 3
|
| 12 |
+
"7": []
|
| 13 |
+
python_version: 3.12.3
|
| 14 |
+
t:
|
| 15 |
+
"1":
|
| 16 |
+
- 1
|
| 17 |
+
- 5
|
| 18 |
+
- 11
|
| 19 |
+
- 49
|
| 20 |
+
- 51
|
| 21 |
+
- 53
|
| 22 |
+
- 55
|
| 23 |
+
- 71
|
| 24 |
+
- 100
|
| 25 |
+
"2":
|
| 26 |
+
- 1
|
| 27 |
+
- 5
|
| 28 |
+
- 11
|
| 29 |
+
- 49
|
| 30 |
+
- 51
|
| 31 |
+
- 53
|
| 32 |
+
- 55
|
| 33 |
+
- 71
|
| 34 |
+
- 100
|
| 35 |
+
"3":
|
| 36 |
+
- 7
|
| 37 |
+
- 13
|
| 38 |
+
- 19
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 66
|
| 42 |
+
"4": 3.12.3
|
| 43 |
+
"5": 0.19.6
|
| 44 |
+
"6": 4.49.0.dev0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"9":
|
| 48 |
+
"1": transformers_trainer
|
| 49 |
+
"12": 0.19.6
|
| 50 |
+
"13": linux-x86_64
|
| 51 |
+
accelerator_config:
|
| 52 |
+
value:
|
| 53 |
+
dispatch_batches: null
|
| 54 |
+
even_batches: true
|
| 55 |
+
gradient_accumulation_kwargs: null
|
| 56 |
+
non_blocking: false
|
| 57 |
+
split_batches: false
|
| 58 |
+
use_seedable_sampler: true
|
| 59 |
+
activation_dropout:
|
| 60 |
+
value: 0
|
| 61 |
+
activation_function:
|
| 62 |
+
value: gelu
|
| 63 |
+
adafactor:
|
| 64 |
+
value: false
|
| 65 |
+
adam_beta1:
|
| 66 |
+
value: 0.9
|
| 67 |
+
adam_beta2:
|
| 68 |
+
value: 0.999
|
| 69 |
+
adam_epsilon:
|
| 70 |
+
value: 1e-08
|
| 71 |
+
add_cross_attention:
|
| 72 |
+
value: false
|
| 73 |
+
apply_spec_augment:
|
| 74 |
+
value: false
|
| 75 |
+
architectures:
|
| 76 |
+
value:
|
| 77 |
+
- WhisperForConditionalGeneration
|
| 78 |
+
attention_dropout:
|
| 79 |
+
value: 0
|
| 80 |
+
auto_find_batch_size:
|
| 81 |
+
value: false
|
| 82 |
+
average_tokens_across_devices:
|
| 83 |
+
value: false
|
| 84 |
+
bad_words_ids:
|
| 85 |
+
value: null
|
| 86 |
+
batch_eval_metrics:
|
| 87 |
+
value: false
|
| 88 |
+
begin_suppress_tokens:
|
| 89 |
+
value:
|
| 90 |
+
- 220
|
| 91 |
+
- 50257
|
| 92 |
+
bf16:
|
| 93 |
+
value: false
|
| 94 |
+
bf16_full_eval:
|
| 95 |
+
value: false
|
| 96 |
+
bos_token_id:
|
| 97 |
+
value: 50257
|
| 98 |
+
chunk_size_feed_forward:
|
| 99 |
+
value: 0
|
| 100 |
+
classifier_proj_size:
|
| 101 |
+
value: 256
|
| 102 |
+
cross_attention_hidden_size:
|
| 103 |
+
value: null
|
| 104 |
+
d_model:
|
| 105 |
+
value: 768
|
| 106 |
+
data_seed:
|
| 107 |
+
value: null
|
| 108 |
+
dataloader_drop_last:
|
| 109 |
+
value: false
|
| 110 |
+
dataloader_num_workers:
|
| 111 |
+
value: 0
|
| 112 |
+
dataloader_persistent_workers:
|
| 113 |
+
value: false
|
| 114 |
+
dataloader_pin_memory:
|
| 115 |
+
value: true
|
| 116 |
+
dataloader_prefetch_factor:
|
| 117 |
+
value: null
|
| 118 |
+
ddp_backend:
|
| 119 |
+
value: null
|
| 120 |
+
ddp_broadcast_buffers:
|
| 121 |
+
value: null
|
| 122 |
+
ddp_bucket_cap_mb:
|
| 123 |
+
value: null
|
| 124 |
+
ddp_find_unused_parameters:
|
| 125 |
+
value: null
|
| 126 |
+
ddp_timeout:
|
| 127 |
+
value: 1800
|
| 128 |
+
debug:
|
| 129 |
+
value: []
|
| 130 |
+
decoder_attention_heads:
|
| 131 |
+
value: 12
|
| 132 |
+
decoder_ffn_dim:
|
| 133 |
+
value: 3072
|
| 134 |
+
decoder_layerdrop:
|
| 135 |
+
value: 0
|
| 136 |
+
decoder_layers:
|
| 137 |
+
value: 12
|
| 138 |
+
decoder_start_token_id:
|
| 139 |
+
value: 50258
|
| 140 |
+
deepspeed:
|
| 141 |
+
value: null
|
| 142 |
+
disable_tqdm:
|
| 143 |
+
value: false
|
| 144 |
+
dispatch_batches:
|
| 145 |
+
value: null
|
| 146 |
+
diversity_penalty:
|
| 147 |
+
value: 0
|
| 148 |
+
do_eval:
|
| 149 |
+
value: true
|
| 150 |
+
do_predict:
|
| 151 |
+
value: false
|
| 152 |
+
do_sample:
|
| 153 |
+
value: false
|
| 154 |
+
do_train:
|
| 155 |
+
value: true
|
| 156 |
+
dropout:
|
| 157 |
+
value: 0
|
| 158 |
+
early_stopping:
|
| 159 |
+
value: false
|
| 160 |
+
encoder_attention_heads:
|
| 161 |
+
value: 12
|
| 162 |
+
encoder_ffn_dim:
|
| 163 |
+
value: 3072
|
| 164 |
+
encoder_layerdrop:
|
| 165 |
+
value: 0
|
| 166 |
+
encoder_layers:
|
| 167 |
+
value: 12
|
| 168 |
+
encoder_no_repeat_ngram_size:
|
| 169 |
+
value: 0
|
| 170 |
+
eos_token_id:
|
| 171 |
+
value: 50257
|
| 172 |
+
eval_accumulation_steps:
|
| 173 |
+
value: null
|
| 174 |
+
eval_delay:
|
| 175 |
+
value: 0
|
| 176 |
+
eval_do_concat_batches:
|
| 177 |
+
value: true
|
| 178 |
+
eval_on_start:
|
| 179 |
+
value: false
|
| 180 |
+
eval_steps:
|
| 181 |
+
value: 1000
|
| 182 |
+
eval_strategy:
|
| 183 |
+
value: steps
|
| 184 |
+
eval_use_gather_object:
|
| 185 |
+
value: false
|
| 186 |
+
evaluation_strategy:
|
| 187 |
+
value: steps
|
| 188 |
+
exponential_decay_length_penalty:
|
| 189 |
+
value: null
|
| 190 |
+
finetuning_task:
|
| 191 |
+
value: null
|
| 192 |
+
forced_bos_token_id:
|
| 193 |
+
value: null
|
| 194 |
+
forced_decoder_ids:
|
| 195 |
+
value: null
|
| 196 |
+
forced_eos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
fp16:
|
| 199 |
+
value: true
|
| 200 |
+
fp16_backend:
|
| 201 |
+
value: auto
|
| 202 |
+
fp16_full_eval:
|
| 203 |
+
value: false
|
| 204 |
+
fp16_opt_level:
|
| 205 |
+
value: O1
|
| 206 |
+
fsdp:
|
| 207 |
+
value: []
|
| 208 |
+
fsdp_config:
|
| 209 |
+
value:
|
| 210 |
+
min_num_params: 0
|
| 211 |
+
xla: false
|
| 212 |
+
xla_fsdp_grad_ckpt: false
|
| 213 |
+
xla_fsdp_v2: false
|
| 214 |
+
fsdp_min_num_params:
|
| 215 |
+
value: 0
|
| 216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 217 |
+
value: null
|
| 218 |
+
full_determinism:
|
| 219 |
+
value: false
|
| 220 |
+
generation_config:
|
| 221 |
+
value: null
|
| 222 |
+
generation_max_length:
|
| 223 |
+
value: 225
|
| 224 |
+
generation_num_beams:
|
| 225 |
+
value: null
|
| 226 |
+
gradient_accumulation_steps:
|
| 227 |
+
value: 1
|
| 228 |
+
gradient_checkpointing:
|
| 229 |
+
value: true
|
| 230 |
+
gradient_checkpointing_kwargs:
|
| 231 |
+
value: null
|
| 232 |
+
greater_is_better:
|
| 233 |
+
value: false
|
| 234 |
+
group_by_length:
|
| 235 |
+
value: false
|
| 236 |
+
half_precision_backend:
|
| 237 |
+
value: auto
|
| 238 |
+
hub_always_push:
|
| 239 |
+
value: false
|
| 240 |
+
hub_model_id:
|
| 241 |
+
value: null
|
| 242 |
+
hub_private_repo:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
init_std:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: true
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 1e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: input_length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
load_best_model_at_end:
|
| 285 |
+
value: true
|
| 286 |
+
local_rank:
|
| 287 |
+
value: 0
|
| 288 |
+
log_level:
|
| 289 |
+
value: passive
|
| 290 |
+
log_level_replica:
|
| 291 |
+
value: warning
|
| 292 |
+
log_on_each_node:
|
| 293 |
+
value: true
|
| 294 |
+
logging_dir:
|
| 295 |
+
value: ./runs/Feb12_12-51-48_tknika
|
| 296 |
+
logging_first_step:
|
| 297 |
+
value: false
|
| 298 |
+
logging_nan_inf_filter:
|
| 299 |
+
value: true
|
| 300 |
+
logging_steps:
|
| 301 |
+
value: 25
|
| 302 |
+
logging_strategy:
|
| 303 |
+
value: steps
|
| 304 |
+
lr_scheduler_type:
|
| 305 |
+
value: linear
|
| 306 |
+
mask_feature_length:
|
| 307 |
+
value: 10
|
| 308 |
+
mask_feature_min_masks:
|
| 309 |
+
value: 0
|
| 310 |
+
mask_feature_prob:
|
| 311 |
+
value: 0
|
| 312 |
+
mask_time_length:
|
| 313 |
+
value: 10
|
| 314 |
+
mask_time_min_masks:
|
| 315 |
+
value: 2
|
| 316 |
+
mask_time_prob:
|
| 317 |
+
value: 0.05
|
| 318 |
+
max_grad_norm:
|
| 319 |
+
value: 1
|
| 320 |
+
max_length:
|
| 321 |
+
value: 448
|
| 322 |
+
max_source_positions:
|
| 323 |
+
value: 1500
|
| 324 |
+
max_steps:
|
| 325 |
+
value: 8000
|
| 326 |
+
max_target_positions:
|
| 327 |
+
value: 448
|
| 328 |
+
median_filter_width:
|
| 329 |
+
value: 7
|
| 330 |
+
metric_for_best_model:
|
| 331 |
+
value: wer
|
| 332 |
+
min_length:
|
| 333 |
+
value: 0
|
| 334 |
+
model/num_parameters:
|
| 335 |
+
value: 241734912
|
| 336 |
+
model_type:
|
| 337 |
+
value: whisper
|
| 338 |
+
mp_parameters:
|
| 339 |
+
value: ""
|
| 340 |
+
neftune_noise_alpha:
|
| 341 |
+
value: null
|
| 342 |
+
no_cuda:
|
| 343 |
+
value: false
|
| 344 |
+
no_repeat_ngram_size:
|
| 345 |
+
value: 0
|
| 346 |
+
num_beam_groups:
|
| 347 |
+
value: 1
|
| 348 |
+
num_beams:
|
| 349 |
+
value: 1
|
| 350 |
+
num_hidden_layers:
|
| 351 |
+
value: 12
|
| 352 |
+
num_mel_bins:
|
| 353 |
+
value: 80
|
| 354 |
+
num_return_sequences:
|
| 355 |
+
value: 1
|
| 356 |
+
num_train_epochs:
|
| 357 |
+
value: 3
|
| 358 |
+
optim:
|
| 359 |
+
value: adamw_torch
|
| 360 |
+
optim_args:
|
| 361 |
+
value: null
|
| 362 |
+
optim_target_modules:
|
| 363 |
+
value: null
|
| 364 |
+
output_attentions:
|
| 365 |
+
value: false
|
| 366 |
+
output_dir:
|
| 367 |
+
value: ./
|
| 368 |
+
output_hidden_states:
|
| 369 |
+
value: false
|
| 370 |
+
output_scores:
|
| 371 |
+
value: false
|
| 372 |
+
overwrite_output_dir:
|
| 373 |
+
value: true
|
| 374 |
+
pad_token_id:
|
| 375 |
+
value: 50257
|
| 376 |
+
past_index:
|
| 377 |
+
value: -1
|
| 378 |
+
per_device_eval_batch_size:
|
| 379 |
+
value: 16
|
| 380 |
+
per_device_train_batch_size:
|
| 381 |
+
value: 32
|
| 382 |
+
per_gpu_eval_batch_size:
|
| 383 |
+
value: null
|
| 384 |
+
per_gpu_train_batch_size:
|
| 385 |
+
value: null
|
| 386 |
+
predict_with_generate:
|
| 387 |
+
value: true
|
| 388 |
+
prediction_loss_only:
|
| 389 |
+
value: false
|
| 390 |
+
prefix:
|
| 391 |
+
value: null
|
| 392 |
+
problem_type:
|
| 393 |
+
value: null
|
| 394 |
+
push_to_hub:
|
| 395 |
+
value: true
|
| 396 |
+
push_to_hub_model_id:
|
| 397 |
+
value: null
|
| 398 |
+
push_to_hub_organization:
|
| 399 |
+
value: null
|
| 400 |
+
push_to_hub_token:
|
| 401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 402 |
+
ray_scope:
|
| 403 |
+
value: last
|
| 404 |
+
remove_invalid_values:
|
| 405 |
+
value: false
|
| 406 |
+
remove_unused_columns:
|
| 407 |
+
value: true
|
| 408 |
+
repetition_penalty:
|
| 409 |
+
value: 1
|
| 410 |
+
report_to:
|
| 411 |
+
value:
|
| 412 |
+
- wandb
|
| 413 |
+
restore_callback_states_from_checkpoint:
|
| 414 |
+
value: false
|
| 415 |
+
resume_from_checkpoint:
|
| 416 |
+
value: null
|
| 417 |
+
return_dict:
|
| 418 |
+
value: true
|
| 419 |
+
return_dict_in_generate:
|
| 420 |
+
value: false
|
| 421 |
+
run_name:
|
| 422 |
+
value: whisper-small-eu
|
| 423 |
+
save_on_each_node:
|
| 424 |
+
value: false
|
| 425 |
+
save_only_model:
|
| 426 |
+
value: false
|
| 427 |
+
save_safetensors:
|
| 428 |
+
value: true
|
| 429 |
+
save_steps:
|
| 430 |
+
value: 1000
|
| 431 |
+
save_strategy:
|
| 432 |
+
value: steps
|
| 433 |
+
save_total_limit:
|
| 434 |
+
value: null
|
| 435 |
+
scale_embedding:
|
| 436 |
+
value: false
|
| 437 |
+
seed:
|
| 438 |
+
value: 42
|
| 439 |
+
sep_token_id:
|
| 440 |
+
value: null
|
| 441 |
+
skip_memory_metrics:
|
| 442 |
+
value: true
|
| 443 |
+
sortish_sampler:
|
| 444 |
+
value: false
|
| 445 |
+
split_batches:
|
| 446 |
+
value: null
|
| 447 |
+
suppress_tokens:
|
| 448 |
+
value: null
|
| 449 |
+
task_specific_params:
|
| 450 |
+
value: null
|
| 451 |
+
temperature:
|
| 452 |
+
value: 1
|
| 453 |
+
tf_legacy_loss:
|
| 454 |
+
value: false
|
| 455 |
+
tf32:
|
| 456 |
+
value: null
|
| 457 |
+
tie_encoder_decoder:
|
| 458 |
+
value: false
|
| 459 |
+
tie_word_embeddings:
|
| 460 |
+
value: true
|
| 461 |
+
tokenizer_class:
|
| 462 |
+
value: null
|
| 463 |
+
top_k:
|
| 464 |
+
value: 50
|
| 465 |
+
top_p:
|
| 466 |
+
value: 1
|
| 467 |
+
torch_compile:
|
| 468 |
+
value: false
|
| 469 |
+
torch_compile_backend:
|
| 470 |
+
value: null
|
| 471 |
+
torch_compile_mode:
|
| 472 |
+
value: null
|
| 473 |
+
torch_dtype:
|
| 474 |
+
value: float32
|
| 475 |
+
torch_empty_cache_steps:
|
| 476 |
+
value: null
|
| 477 |
+
torchdynamo:
|
| 478 |
+
value: null
|
| 479 |
+
torchscript:
|
| 480 |
+
value: false
|
| 481 |
+
tpu_metrics_debug:
|
| 482 |
+
value: false
|
| 483 |
+
tpu_num_cores:
|
| 484 |
+
value: null
|
| 485 |
+
transformers_version:
|
| 486 |
+
value: 4.49.0.dev0
|
| 487 |
+
typical_p:
|
| 488 |
+
value: 1
|
| 489 |
+
use_bfloat16:
|
| 490 |
+
value: false
|
| 491 |
+
use_cache:
|
| 492 |
+
value: false
|
| 493 |
+
use_cpu:
|
| 494 |
+
value: false
|
| 495 |
+
use_ipex:
|
| 496 |
+
value: false
|
| 497 |
+
use_legacy_prediction_loop:
|
| 498 |
+
value: false
|
| 499 |
+
use_liger_kernel:
|
| 500 |
+
value: false
|
| 501 |
+
use_mps_device:
|
| 502 |
+
value: false
|
| 503 |
+
use_weighted_layer_sum:
|
| 504 |
+
value: false
|
| 505 |
+
vocab_size:
|
| 506 |
+
value: 51865
|
| 507 |
+
warmup_ratio:
|
| 508 |
+
value: 0
|
| 509 |
+
warmup_steps:
|
| 510 |
+
value: 500
|
| 511 |
+
weight_decay:
|
| 512 |
+
value: 0
|
wandb/run-20250212_125202-c6xjc1gs/files/output.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
0%| | 0/8000 [00:00<?, ?it/s]Traceback (most recent call last):
|
| 2 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
|
| 3 |
+
main()
|
| 4 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
|
| 5 |
+
train_result = trainer.train(resume_from_checkpoint=checkpoint)
|
| 6 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 7 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2197, in train
|
| 8 |
+
return inner_training_loop(
|
| 9 |
+
^^^^^^^^^^^^^^^^^^^^
|
| 10 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2438, in _inner_training_loop
|
| 11 |
+
self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
|
| 12 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 13 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 514, in on_epoch_begin
|
| 14 |
+
return self.call_event("on_epoch_begin", args, state, control)
|
| 15 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 16 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/lib/python3.12/site-packages/transformers/trainer_callback.py", line 557, in call_event
|
| 17 |
+
result = getattr(callback, event)(
|
| 18 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 19 |
+
File "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py", line 555, in on_epoch_begin
|
| 20 |
+
if isinstance(train_dataloader.dataset, IterableDatasetShard):
|
| 21 |
+
^^^^^^^^^^^^^^^^^^^^^^^^
|
| 22 |
+
AttributeError: 'NoneType' object has no attribute 'dataset'
|
wandb/run-20250212_125202-c6xjc1gs/files/requirements.txt
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiosignal==1.3.2
|
| 2 |
+
Markdown==3.7
|
| 3 |
+
more-itertools==10.6.0
|
| 4 |
+
requests==2.32.3
|
| 5 |
+
sentry-sdk==2.21.0
|
| 6 |
+
torchaudio==2.6.0
|
| 7 |
+
charset-normalizer==3.4.1
|
| 8 |
+
docker-pycreds==0.4.0
|
| 9 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 10 |
+
PyYAML==6.0.2
|
| 11 |
+
librosa==0.10.2.post1
|
| 12 |
+
soxr==0.5.0.post1
|
| 13 |
+
multiprocess==0.70.16
|
| 14 |
+
setuptools==75.8.0
|
| 15 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 16 |
+
joblib==1.4.2
|
| 17 |
+
pytz==2025.1
|
| 18 |
+
pip==24.0
|
| 19 |
+
scikit-learn==1.6.1
|
| 20 |
+
certifi==2025.1.31
|
| 21 |
+
jiwer==3.1.0
|
| 22 |
+
regex==2024.11.6
|
| 23 |
+
annotated-types==0.7.0
|
| 24 |
+
grpcio==1.70.0
|
| 25 |
+
msgpack==1.1.0
|
| 26 |
+
mpmath==1.3.0
|
| 27 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 28 |
+
soundfile==0.13.1
|
| 29 |
+
dill==0.3.8
|
| 30 |
+
nvidia-nvtx-cu12==12.4.127
|
| 31 |
+
six==1.17.0
|
| 32 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 33 |
+
pyarrow==19.0.0
|
| 34 |
+
nvidia-nccl-cu12==2.21.5
|
| 35 |
+
psutil==6.1.1
|
| 36 |
+
decorator==5.1.1
|
| 37 |
+
llvmlite==0.44.0
|
| 38 |
+
frozenlist==1.5.0
|
| 39 |
+
pydantic==2.10.6
|
| 40 |
+
networkx==3.4.2
|
| 41 |
+
idna==3.10
|
| 42 |
+
wandb==0.19.6
|
| 43 |
+
aiohttp==3.11.12
|
| 44 |
+
RapidFuzz==3.12.1
|
| 45 |
+
pandas==2.2.3
|
| 46 |
+
python-dateutil==2.9.0.post0
|
| 47 |
+
numpy==2.1.3
|
| 48 |
+
tokenizers==0.21.0
|
| 49 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 50 |
+
typing_extensions==4.12.2
|
| 51 |
+
urllib3==2.3.0
|
| 52 |
+
setproctitle==1.3.4
|
| 53 |
+
tzdata==2025.1
|
| 54 |
+
sympy==1.13.1
|
| 55 |
+
pooch==1.8.2
|
| 56 |
+
click==8.1.8
|
| 57 |
+
pydantic_core==2.27.2
|
| 58 |
+
MarkupSafe==3.0.2
|
| 59 |
+
scipy==1.15.1
|
| 60 |
+
accelerate==1.3.0
|
| 61 |
+
tensorboard==2.19.0
|
| 62 |
+
protobuf==5.29.3
|
| 63 |
+
gitdb==4.0.12
|
| 64 |
+
smmap==5.0.2
|
| 65 |
+
absl-py==2.1.0
|
| 66 |
+
tqdm==4.67.1
|
| 67 |
+
yarl==1.18.3
|
| 68 |
+
pycparser==2.22
|
| 69 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 70 |
+
attrs==25.1.0
|
| 71 |
+
lazy_loader==0.4
|
| 72 |
+
tensorboard-data-server==0.7.2
|
| 73 |
+
threadpoolctl==3.5.0
|
| 74 |
+
GitPython==3.1.44
|
| 75 |
+
safetensors==0.5.2
|
| 76 |
+
fsspec==2024.12.0
|
| 77 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 78 |
+
filelock==3.17.0
|
| 79 |
+
aiohappyeyeballs==2.4.6
|
| 80 |
+
packaging==24.2
|
| 81 |
+
datasets==3.2.1.dev0
|
| 82 |
+
audioread==3.0.1
|
| 83 |
+
propcache==0.2.1
|
| 84 |
+
transformers==4.49.0.dev0
|
| 85 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 86 |
+
cffi==1.17.1
|
| 87 |
+
evaluate==0.4.3
|
| 88 |
+
Werkzeug==3.1.3
|
| 89 |
+
huggingface-hub==0.28.1
|
| 90 |
+
Jinja2==3.1.5
|
| 91 |
+
torch==2.6.0
|
| 92 |
+
nvidia-curand-cu12==10.3.5.147
|
| 93 |
+
xxhash==3.5.0
|
| 94 |
+
platformdirs==4.3.6
|
| 95 |
+
multidict==6.1.0
|
| 96 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 97 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 98 |
+
triton==3.2.0
|
| 99 |
+
numba==0.61.0
|
| 100 |
+
importlib_metadata==8.0.0
|
| 101 |
+
platformdirs==4.2.2
|
| 102 |
+
typeguard==4.3.0
|
| 103 |
+
more-itertools==10.3.0
|
| 104 |
+
tomli==2.0.1
|
| 105 |
+
autocommand==2.2.2
|
| 106 |
+
zipp==3.19.2
|
| 107 |
+
typing_extensions==4.12.2
|
| 108 |
+
backports.tarfile==1.2.0
|
| 109 |
+
inflect==7.3.1
|
| 110 |
+
jaraco.text==3.12.1
|
| 111 |
+
wheel==0.43.0
|
| 112 |
+
packaging==24.2
|
| 113 |
+
jaraco.collections==5.1.0
|
| 114 |
+
jaraco.functools==4.0.1
|
| 115 |
+
jaraco.context==5.3.0
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-48-generic-x86_64-with-glibc2.39",
|
| 3 |
+
"python": "CPython 3.12.3",
|
| 4 |
+
"startedAt": "2025-02-12T12:52:03.105234Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model_name_or_path=openai/whisper-small",
|
| 7 |
+
"--dataset_name=asierhv/composite_corpus_eu_v2.1",
|
| 8 |
+
"--language=basque",
|
| 9 |
+
"--train_split_name=train",
|
| 10 |
+
"--eval_split_name=test_parl",
|
| 11 |
+
"--model_index_name=Whisper Small Basque",
|
| 12 |
+
"--max_steps=8000",
|
| 13 |
+
"--output_dir=./",
|
| 14 |
+
"--per_device_train_batch_size=32",
|
| 15 |
+
"--per_device_eval_batch_size=16",
|
| 16 |
+
"--gradient_accumulation_steps=1",
|
| 17 |
+
"--logging_steps=25",
|
| 18 |
+
"--learning_rate=1e-5",
|
| 19 |
+
"--warmup_steps=500",
|
| 20 |
+
"--evaluation_strategy=steps",
|
| 21 |
+
"--eval_steps=1000",
|
| 22 |
+
"--save_strategy=steps",
|
| 23 |
+
"--save_steps=1000",
|
| 24 |
+
"--generation_max_length=225",
|
| 25 |
+
"--length_column_name=input_length",
|
| 26 |
+
"--max_duration_in_seconds=30",
|
| 27 |
+
"--text_column_name=sentence",
|
| 28 |
+
"--freeze_feature_encoder=False",
|
| 29 |
+
"--report_to=tensorboard",
|
| 30 |
+
"--metric_for_best_model=wer",
|
| 31 |
+
"--greater_is_better=False",
|
| 32 |
+
"--load_best_model_at_end",
|
| 33 |
+
"--gradient_checkpointing",
|
| 34 |
+
"--fp16",
|
| 35 |
+
"--overwrite_output_dir",
|
| 36 |
+
"--do_train",
|
| 37 |
+
"--do_eval",
|
| 38 |
+
"--predict_with_generate",
|
| 39 |
+
"--do_normalize_eval",
|
| 40 |
+
"--streaming",
|
| 41 |
+
"--use_auth_token",
|
| 42 |
+
"--push_to_hub",
|
| 43 |
+
"--report_to",
|
| 44 |
+
"wandb",
|
| 45 |
+
"--run_name",
|
| 46 |
+
"whisper-small-eu"
|
| 47 |
+
],
|
| 48 |
+
"program": "/home/tknika/xezpeleta/whisper/whisper-small-eu/run_speech_recognition_seq2seq_streaming.py",
|
| 49 |
+
"codePath": "run_speech_recognition_seq2seq_streaming.py",
|
| 50 |
+
"git": {
|
| 51 |
+
"remote": "https://huggingface.co/xezpeleta/whisper-small-eu",
|
| 52 |
+
"commit": "9c975864b20b4df94398a870e97cad2934253ec3"
|
| 53 |
+
},
|
| 54 |
+
"email": "xezpeleta@gmail.com",
|
| 55 |
+
"root": "/home/tknika/xezpeleta/whisper/whisper-small-eu",
|
| 56 |
+
"host": "tknika",
|
| 57 |
+
"executable": "/home/tknika/xezpeleta/whisper/whisper-small-eu/.venv/bin/python",
|
| 58 |
+
"codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
|
| 59 |
+
"cpu_count": 8,
|
| 60 |
+
"cpu_count_logical": 8,
|
| 61 |
+
"gpu": "NVIDIA L40-48Q",
|
| 62 |
+
"gpu_count": 1,
|
| 63 |
+
"disk": {
|
| 64 |
+
"/": {
|
| 65 |
+
"total": "525987168256",
|
| 66 |
+
"used": "313777016832"
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
"memory": {
|
| 70 |
+
"total": "33654022144"
|
| 71 |
+
},
|
| 72 |
+
"cpu": {
|
| 73 |
+
"count": 8,
|
| 74 |
+
"countLogical": 8
|
| 75 |
+
},
|
| 76 |
+
"gpu_nvidia": [
|
| 77 |
+
{
|
| 78 |
+
"name": "NVIDIA L40-48Q",
|
| 79 |
+
"memoryTotal": "51539607552",
|
| 80 |
+
"cudaCores": 18176,
|
| 81 |
+
"architecture": "Ada"
|
| 82 |
+
}
|
| 83 |
+
],
|
| 84 |
+
"cudaVersion": "12.4"
|
| 85 |
+
}
|
wandb/run-20250212_125202-c6xjc1gs/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_wandb":{"runtime":0}}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:52:02.919881508Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpeae6bnaj/port-226112.txt","pid":226112,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
|
| 2 |
+
{"time":"2025-02-12T12:52:02.924775623Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":226112}
|
| 3 |
+
{"time":"2025-02-12T12:52:02.924741833Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37305,"Zone":""}}
|
| 4 |
+
{"time":"2025-02-12T12:52:03.098177175Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34596"}
|
| 5 |
+
{"time":"2025-02-12T12:52:03.107916075Z","level":"INFO","msg":"handleInformInit: received","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
| 6 |
+
{"time":"2025-02-12T12:52:03.213738528Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"c6xjc1gs","id":"127.0.0.1:34596"}
|
| 7 |
+
{"time":"2025-02-12T12:52:03.606976673Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34596"}
|
| 8 |
+
{"time":"2025-02-12T12:52:03.607096473Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:34596"}
|
| 9 |
+
{"time":"2025-02-12T12:52:03.607114372Z","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2025-02-12T12:52:03.607218922Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:34596"}
|
| 11 |
+
{"time":"2025-02-12T12:52:03.804235797Z","level":"ERROR","msg":"processOutgoingData: flush error","error":"write tcp 127.0.0.1:37305->127.0.0.1:34596: use of closed network connection","id":"127.0.0.1:34596"}
|
| 12 |
+
{"time":"2025-02-12T12:52:05.13436235Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34596"}
|
| 13 |
+
{"time":"2025-02-12T12:52:05.13438775Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34596"}
|
| 14 |
+
{"time":"2025-02-12T12:52:05.13441372Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-02-12T12:52:03.108316863Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-core.log"}
|
| 2 |
+
{"time":"2025-02-12T12:52:03.213666458Z","level":"INFO","msg":"created new stream","id":"c6xjc1gs"}
|
| 3 |
+
{"time":"2025-02-12T12:52:03.213728098Z","level":"INFO","msg":"stream: started","id":"c6xjc1gs"}
|
| 4 |
+
{"time":"2025-02-12T12:52:03.213779117Z","level":"INFO","msg":"writer: Do: started","stream_id":"c6xjc1gs"}
|
| 5 |
+
{"time":"2025-02-12T12:52:03.213809587Z","level":"INFO","msg":"handler: started","stream_id":"c6xjc1gs"}
|
| 6 |
+
{"time":"2025-02-12T12:52:03.214018716Z","level":"INFO","msg":"sender: started","stream_id":"c6xjc1gs"}
|
| 7 |
+
{"time":"2025-02-12T12:52:03.484749537Z","level":"INFO","msg":"Starting system monitor"}
|
| 8 |
+
{"time":"2025-02-12T12:52:03.607062513Z","level":"INFO","msg":"stream: closing","id":"c6xjc1gs"}
|
| 9 |
+
{"time":"2025-02-12T12:52:03.607125442Z","level":"INFO","msg":"Stopping system monitor"}
|
| 10 |
+
{"time":"2025-02-12T12:52:03.607814399Z","level":"INFO","msg":"Stopped system monitor"}
|
| 11 |
+
{"time":"2025-02-12T12:52:04.912814278Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 12 |
+
{"time":"2025-02-12T12:52:05.133965652Z","level":"INFO","msg":"handler: closed","stream_id":"c6xjc1gs"}
|
| 13 |
+
{"time":"2025-02-12T12:52:05.134024822Z","level":"INFO","msg":"sender: closed","stream_id":"c6xjc1gs"}
|
| 14 |
+
{"time":"2025-02-12T12:52:05.134018042Z","level":"INFO","msg":"writer: Close: closed","stream_id":"c6xjc1gs"}
|
| 15 |
+
{"time":"2025-02-12T12:52:05.134218211Z","level":"INFO","msg":"stream: closed","id":"c6xjc1gs"}
|
wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-02-12 12:52:02,886 INFO MainThread:226112 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
|
| 2 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Configure stats pid to 226112
|
| 3 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/.config/wandb/settings
|
| 4 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/settings
|
| 5 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_setup.py:_flush():68] Loading settings from environment variables
|
| 6 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug.log
|
| 7 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /home/tknika/xezpeleta/whisper/whisper-small-eu/wandb/run-20250212_125202-c6xjc1gs/logs/debug-internal.log
|
| 8 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():756] calling init triggers
|
| 9 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-02-12 12:52:02,887 INFO MainThread:226112 [wandb_init.py:init():789] starting backend
|
| 12 |
+
2025-02-12 12:52:03,097 INFO MainThread:226112 [wandb_init.py:init():793] sending inform_init request
|
| 13 |
+
2025-02-12 12:52:03,104 INFO MainThread:226112 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 14 |
+
2025-02-12 12:52:03,104 INFO MainThread:226112 [wandb_init.py:init():808] backend started and connected
|
| 15 |
+
2025-02-12 12:52:03,107 INFO MainThread:226112 [wandb_init.py:init():901] updated telemetry
|
| 16 |
+
2025-02-12 12:52:03,114 INFO MainThread:226112 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-02-12 12:52:03,483 INFO MainThread:226112 [wandb_init.py:init():994] starting run threads in backend
|
| 18 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_console_start():2385] atexit reg
|
| 19 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2235] redirect: wrap_raw
|
| 20 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2300] Wrapping output streams.
|
| 21 |
+
2025-02-12 12:52:03,566 INFO MainThread:226112 [wandb_run.py:_redirect():2325] Redirects installed.
|
| 22 |
+
2025-02-12 12:52:03,568 INFO MainThread:226112 [wandb_init.py:init():1036] run started, returning control to user process
|
| 23 |
+
2025-02-12 12:52:03,569 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb None None {'vocab_size': 51865, 'num_mel_bins': 80, 'd_model': 768, 'encoder_layers': 12, 'encoder_attention_heads': 12, 'decoder_layers': 12, 'decoder_attention_heads': 12, 'decoder_ffn_dim': 3072, 'encoder_ffn_dim': 3072, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 12, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50257, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-small', '_attn_implementation_autoset': True, 'transformers_version': '4.49.0.dev0', 'forced_decoder_ids': None, 'model_type': 'whisper', 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 1e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 8000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Feb12_12-51-48_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-small-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
|
| 24 |
+
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_config.py:__setitem__():154] config set model/num_parameters = 241734912 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7d4830f2ddf0>>
|
| 25 |
+
2025-02-12 12:52:03,571 INFO MainThread:226112 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 241734912 None
|
| 26 |
+
2025-02-12 12:52:03,607 WARNING MsgRouterThr:226112 [router.py:message_loop():75] message_loop has been closed
|
wandb/run-20250212_125202-c6xjc1gs/run-c6xjc1gs.wandb
ADDED
|
Binary file (11.3 kB). View file
|
|
|
wandb/run-20250212_125924-xhsgsxqq/files/config.yaml
ADDED
|
@@ -0,0 +1,512 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_attn_implementation_autoset:
|
| 2 |
+
value: true
|
| 3 |
+
_name_or_path:
|
| 4 |
+
value: openai/whisper-small
|
| 5 |
+
_wandb:
|
| 6 |
+
value:
|
| 7 |
+
cli_version: 0.19.6
|
| 8 |
+
m:
|
| 9 |
+
- "1": train/global_step
|
| 10 |
+
"6":
|
| 11 |
+
- 3
|
| 12 |
+
"7": []
|
| 13 |
+
python_version: 3.12.3
|
| 14 |
+
t:
|
| 15 |
+
"1":
|
| 16 |
+
- 1
|
| 17 |
+
- 5
|
| 18 |
+
- 11
|
| 19 |
+
- 49
|
| 20 |
+
- 51
|
| 21 |
+
- 53
|
| 22 |
+
- 55
|
| 23 |
+
- 71
|
| 24 |
+
- 100
|
| 25 |
+
"2":
|
| 26 |
+
- 1
|
| 27 |
+
- 5
|
| 28 |
+
- 11
|
| 29 |
+
- 49
|
| 30 |
+
- 51
|
| 31 |
+
- 53
|
| 32 |
+
- 55
|
| 33 |
+
- 71
|
| 34 |
+
- 100
|
| 35 |
+
"3":
|
| 36 |
+
- 7
|
| 37 |
+
- 13
|
| 38 |
+
- 19
|
| 39 |
+
- 23
|
| 40 |
+
- 55
|
| 41 |
+
- 66
|
| 42 |
+
"4": 3.12.3
|
| 43 |
+
"5": 0.19.6
|
| 44 |
+
"6": 4.49.0.dev0
|
| 45 |
+
"8":
|
| 46 |
+
- 5
|
| 47 |
+
"9":
|
| 48 |
+
"1": transformers_trainer
|
| 49 |
+
"12": 0.19.6
|
| 50 |
+
"13": linux-x86_64
|
| 51 |
+
accelerator_config:
|
| 52 |
+
value:
|
| 53 |
+
dispatch_batches: null
|
| 54 |
+
even_batches: true
|
| 55 |
+
gradient_accumulation_kwargs: null
|
| 56 |
+
non_blocking: false
|
| 57 |
+
split_batches: false
|
| 58 |
+
use_seedable_sampler: true
|
| 59 |
+
activation_dropout:
|
| 60 |
+
value: 0
|
| 61 |
+
activation_function:
|
| 62 |
+
value: gelu
|
| 63 |
+
adafactor:
|
| 64 |
+
value: false
|
| 65 |
+
adam_beta1:
|
| 66 |
+
value: 0.9
|
| 67 |
+
adam_beta2:
|
| 68 |
+
value: 0.999
|
| 69 |
+
adam_epsilon:
|
| 70 |
+
value: 1e-08
|
| 71 |
+
add_cross_attention:
|
| 72 |
+
value: false
|
| 73 |
+
apply_spec_augment:
|
| 74 |
+
value: false
|
| 75 |
+
architectures:
|
| 76 |
+
value:
|
| 77 |
+
- WhisperForConditionalGeneration
|
| 78 |
+
attention_dropout:
|
| 79 |
+
value: 0
|
| 80 |
+
auto_find_batch_size:
|
| 81 |
+
value: false
|
| 82 |
+
average_tokens_across_devices:
|
| 83 |
+
value: false
|
| 84 |
+
bad_words_ids:
|
| 85 |
+
value: null
|
| 86 |
+
batch_eval_metrics:
|
| 87 |
+
value: false
|
| 88 |
+
begin_suppress_tokens:
|
| 89 |
+
value:
|
| 90 |
+
- 220
|
| 91 |
+
- 50257
|
| 92 |
+
bf16:
|
| 93 |
+
value: false
|
| 94 |
+
bf16_full_eval:
|
| 95 |
+
value: false
|
| 96 |
+
bos_token_id:
|
| 97 |
+
value: 50257
|
| 98 |
+
chunk_size_feed_forward:
|
| 99 |
+
value: 0
|
| 100 |
+
classifier_proj_size:
|
| 101 |
+
value: 256
|
| 102 |
+
cross_attention_hidden_size:
|
| 103 |
+
value: null
|
| 104 |
+
d_model:
|
| 105 |
+
value: 768
|
| 106 |
+
data_seed:
|
| 107 |
+
value: null
|
| 108 |
+
dataloader_drop_last:
|
| 109 |
+
value: false
|
| 110 |
+
dataloader_num_workers:
|
| 111 |
+
value: 0
|
| 112 |
+
dataloader_persistent_workers:
|
| 113 |
+
value: false
|
| 114 |
+
dataloader_pin_memory:
|
| 115 |
+
value: true
|
| 116 |
+
dataloader_prefetch_factor:
|
| 117 |
+
value: null
|
| 118 |
+
ddp_backend:
|
| 119 |
+
value: null
|
| 120 |
+
ddp_broadcast_buffers:
|
| 121 |
+
value: null
|
| 122 |
+
ddp_bucket_cap_mb:
|
| 123 |
+
value: null
|
| 124 |
+
ddp_find_unused_parameters:
|
| 125 |
+
value: null
|
| 126 |
+
ddp_timeout:
|
| 127 |
+
value: 1800
|
| 128 |
+
debug:
|
| 129 |
+
value: []
|
| 130 |
+
decoder_attention_heads:
|
| 131 |
+
value: 12
|
| 132 |
+
decoder_ffn_dim:
|
| 133 |
+
value: 3072
|
| 134 |
+
decoder_layerdrop:
|
| 135 |
+
value: 0
|
| 136 |
+
decoder_layers:
|
| 137 |
+
value: 12
|
| 138 |
+
decoder_start_token_id:
|
| 139 |
+
value: 50258
|
| 140 |
+
deepspeed:
|
| 141 |
+
value: null
|
| 142 |
+
disable_tqdm:
|
| 143 |
+
value: false
|
| 144 |
+
dispatch_batches:
|
| 145 |
+
value: null
|
| 146 |
+
diversity_penalty:
|
| 147 |
+
value: 0
|
| 148 |
+
do_eval:
|
| 149 |
+
value: true
|
| 150 |
+
do_predict:
|
| 151 |
+
value: false
|
| 152 |
+
do_sample:
|
| 153 |
+
value: false
|
| 154 |
+
do_train:
|
| 155 |
+
value: true
|
| 156 |
+
dropout:
|
| 157 |
+
value: 0
|
| 158 |
+
early_stopping:
|
| 159 |
+
value: false
|
| 160 |
+
encoder_attention_heads:
|
| 161 |
+
value: 12
|
| 162 |
+
encoder_ffn_dim:
|
| 163 |
+
value: 3072
|
| 164 |
+
encoder_layerdrop:
|
| 165 |
+
value: 0
|
| 166 |
+
encoder_layers:
|
| 167 |
+
value: 12
|
| 168 |
+
encoder_no_repeat_ngram_size:
|
| 169 |
+
value: 0
|
| 170 |
+
eos_token_id:
|
| 171 |
+
value: 50257
|
| 172 |
+
eval_accumulation_steps:
|
| 173 |
+
value: null
|
| 174 |
+
eval_delay:
|
| 175 |
+
value: 0
|
| 176 |
+
eval_do_concat_batches:
|
| 177 |
+
value: true
|
| 178 |
+
eval_on_start:
|
| 179 |
+
value: false
|
| 180 |
+
eval_steps:
|
| 181 |
+
value: 1000
|
| 182 |
+
eval_strategy:
|
| 183 |
+
value: steps
|
| 184 |
+
eval_use_gather_object:
|
| 185 |
+
value: false
|
| 186 |
+
evaluation_strategy:
|
| 187 |
+
value: steps
|
| 188 |
+
exponential_decay_length_penalty:
|
| 189 |
+
value: null
|
| 190 |
+
finetuning_task:
|
| 191 |
+
value: null
|
| 192 |
+
forced_bos_token_id:
|
| 193 |
+
value: null
|
| 194 |
+
forced_decoder_ids:
|
| 195 |
+
value: null
|
| 196 |
+
forced_eos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
fp16:
|
| 199 |
+
value: true
|
| 200 |
+
fp16_backend:
|
| 201 |
+
value: auto
|
| 202 |
+
fp16_full_eval:
|
| 203 |
+
value: false
|
| 204 |
+
fp16_opt_level:
|
| 205 |
+
value: O1
|
| 206 |
+
fsdp:
|
| 207 |
+
value: []
|
| 208 |
+
fsdp_config:
|
| 209 |
+
value:
|
| 210 |
+
min_num_params: 0
|
| 211 |
+
xla: false
|
| 212 |
+
xla_fsdp_grad_ckpt: false
|
| 213 |
+
xla_fsdp_v2: false
|
| 214 |
+
fsdp_min_num_params:
|
| 215 |
+
value: 0
|
| 216 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 217 |
+
value: null
|
| 218 |
+
full_determinism:
|
| 219 |
+
value: false
|
| 220 |
+
generation_config:
|
| 221 |
+
value: null
|
| 222 |
+
generation_max_length:
|
| 223 |
+
value: 225
|
| 224 |
+
generation_num_beams:
|
| 225 |
+
value: null
|
| 226 |
+
gradient_accumulation_steps:
|
| 227 |
+
value: 1
|
| 228 |
+
gradient_checkpointing:
|
| 229 |
+
value: true
|
| 230 |
+
gradient_checkpointing_kwargs:
|
| 231 |
+
value: null
|
| 232 |
+
greater_is_better:
|
| 233 |
+
value: false
|
| 234 |
+
group_by_length:
|
| 235 |
+
value: false
|
| 236 |
+
half_precision_backend:
|
| 237 |
+
value: auto
|
| 238 |
+
hub_always_push:
|
| 239 |
+
value: false
|
| 240 |
+
hub_model_id:
|
| 241 |
+
value: null
|
| 242 |
+
hub_private_repo:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
init_std:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: true
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 1e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: input_length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
load_best_model_at_end:
|
| 285 |
+
value: true
|
| 286 |
+
local_rank:
|
| 287 |
+
value: 0
|
| 288 |
+
log_level:
|
| 289 |
+
value: passive
|
| 290 |
+
log_level_replica:
|
| 291 |
+
value: warning
|
| 292 |
+
log_on_each_node:
|
| 293 |
+
value: true
|
| 294 |
+
logging_dir:
|
| 295 |
+
value: ./runs/Feb12_12-58-59_tknika
|
| 296 |
+
logging_first_step:
|
| 297 |
+
value: false
|
| 298 |
+
logging_nan_inf_filter:
|
| 299 |
+
value: true
|
| 300 |
+
logging_steps:
|
| 301 |
+
value: 25
|
| 302 |
+
logging_strategy:
|
| 303 |
+
value: steps
|
| 304 |
+
lr_scheduler_type:
|
| 305 |
+
value: linear
|
| 306 |
+
mask_feature_length:
|
| 307 |
+
value: 10
|
| 308 |
+
mask_feature_min_masks:
|
| 309 |
+
value: 0
|
| 310 |
+
mask_feature_prob:
|
| 311 |
+
value: 0
|
| 312 |
+
mask_time_length:
|
| 313 |
+
value: 10
|
| 314 |
+
mask_time_min_masks:
|
| 315 |
+
value: 2
|
| 316 |
+
mask_time_prob:
|
| 317 |
+
value: 0.05
|
| 318 |
+
max_grad_norm:
|
| 319 |
+
value: 1
|
| 320 |
+
max_length:
|
| 321 |
+
value: 448
|
| 322 |
+
max_source_positions:
|
| 323 |
+
value: 1500
|
| 324 |
+
max_steps:
|
| 325 |
+
value: 8000
|
| 326 |
+
max_target_positions:
|
| 327 |
+
value: 448
|
| 328 |
+
median_filter_width:
|
| 329 |
+
value: 7
|
| 330 |
+
metric_for_best_model:
|
| 331 |
+
value: wer
|
| 332 |
+
min_length:
|
| 333 |
+
value: 0
|
| 334 |
+
model/num_parameters:
|
| 335 |
+
value: 241734912
|
| 336 |
+
model_type:
|
| 337 |
+
value: whisper
|
| 338 |
+
mp_parameters:
|
| 339 |
+
value: ""
|
| 340 |
+
neftune_noise_alpha:
|
| 341 |
+
value: null
|
| 342 |
+
no_cuda:
|
| 343 |
+
value: false
|
| 344 |
+
no_repeat_ngram_size:
|
| 345 |
+
value: 0
|
| 346 |
+
num_beam_groups:
|
| 347 |
+
value: 1
|
| 348 |
+
num_beams:
|
| 349 |
+
value: 1
|
| 350 |
+
num_hidden_layers:
|
| 351 |
+
value: 12
|
| 352 |
+
num_mel_bins:
|
| 353 |
+
value: 80
|
| 354 |
+
num_return_sequences:
|
| 355 |
+
value: 1
|
| 356 |
+
num_train_epochs:
|
| 357 |
+
value: 3
|
| 358 |
+
optim:
|
| 359 |
+
value: adamw_torch
|
| 360 |
+
optim_args:
|
| 361 |
+
value: null
|
| 362 |
+
optim_target_modules:
|
| 363 |
+
value: null
|
| 364 |
+
output_attentions:
|
| 365 |
+
value: false
|
| 366 |
+
output_dir:
|
| 367 |
+
value: ./
|
| 368 |
+
output_hidden_states:
|
| 369 |
+
value: false
|
| 370 |
+
output_scores:
|
| 371 |
+
value: false
|
| 372 |
+
overwrite_output_dir:
|
| 373 |
+
value: true
|
| 374 |
+
pad_token_id:
|
| 375 |
+
value: 50257
|
| 376 |
+
past_index:
|
| 377 |
+
value: -1
|
| 378 |
+
per_device_eval_batch_size:
|
| 379 |
+
value: 16
|
| 380 |
+
per_device_train_batch_size:
|
| 381 |
+
value: 32
|
| 382 |
+
per_gpu_eval_batch_size:
|
| 383 |
+
value: null
|
| 384 |
+
per_gpu_train_batch_size:
|
| 385 |
+
value: null
|
| 386 |
+
predict_with_generate:
|
| 387 |
+
value: true
|
| 388 |
+
prediction_loss_only:
|
| 389 |
+
value: false
|
| 390 |
+
prefix:
|
| 391 |
+
value: null
|
| 392 |
+
problem_type:
|
| 393 |
+
value: null
|
| 394 |
+
push_to_hub:
|
| 395 |
+
value: true
|
| 396 |
+
push_to_hub_model_id:
|
| 397 |
+
value: null
|
| 398 |
+
push_to_hub_organization:
|
| 399 |
+
value: null
|
| 400 |
+
push_to_hub_token:
|
| 401 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 402 |
+
ray_scope:
|
| 403 |
+
value: last
|
| 404 |
+
remove_invalid_values:
|
| 405 |
+
value: false
|
| 406 |
+
remove_unused_columns:
|
| 407 |
+
value: true
|
| 408 |
+
repetition_penalty:
|
| 409 |
+
value: 1
|
| 410 |
+
report_to:
|
| 411 |
+
value:
|
| 412 |
+
- wandb
|
| 413 |
+
restore_callback_states_from_checkpoint:
|
| 414 |
+
value: false
|
| 415 |
+
resume_from_checkpoint:
|
| 416 |
+
value: null
|
| 417 |
+
return_dict:
|
| 418 |
+
value: true
|
| 419 |
+
return_dict_in_generate:
|
| 420 |
+
value: false
|
| 421 |
+
run_name:
|
| 422 |
+
value: whisper-small-eu
|
| 423 |
+
save_on_each_node:
|
| 424 |
+
value: false
|
| 425 |
+
save_only_model:
|
| 426 |
+
value: false
|
| 427 |
+
save_safetensors:
|
| 428 |
+
value: true
|
| 429 |
+
save_steps:
|
| 430 |
+
value: 1000
|
| 431 |
+
save_strategy:
|
| 432 |
+
value: steps
|
| 433 |
+
save_total_limit:
|
| 434 |
+
value: null
|
| 435 |
+
scale_embedding:
|
| 436 |
+
value: false
|
| 437 |
+
seed:
|
| 438 |
+
value: 42
|
| 439 |
+
sep_token_id:
|
| 440 |
+
value: null
|
| 441 |
+
skip_memory_metrics:
|
| 442 |
+
value: true
|
| 443 |
+
sortish_sampler:
|
| 444 |
+
value: false
|
| 445 |
+
split_batches:
|
| 446 |
+
value: null
|
| 447 |
+
suppress_tokens:
|
| 448 |
+
value: null
|
| 449 |
+
task_specific_params:
|
| 450 |
+
value: null
|
| 451 |
+
temperature:
|
| 452 |
+
value: 1
|
| 453 |
+
tf_legacy_loss:
|
| 454 |
+
value: false
|
| 455 |
+
tf32:
|
| 456 |
+
value: null
|
| 457 |
+
tie_encoder_decoder:
|
| 458 |
+
value: false
|
| 459 |
+
tie_word_embeddings:
|
| 460 |
+
value: true
|
| 461 |
+
tokenizer_class:
|
| 462 |
+
value: null
|
| 463 |
+
top_k:
|
| 464 |
+
value: 50
|
| 465 |
+
top_p:
|
| 466 |
+
value: 1
|
| 467 |
+
torch_compile:
|
| 468 |
+
value: false
|
| 469 |
+
torch_compile_backend:
|
| 470 |
+
value: null
|
| 471 |
+
torch_compile_mode:
|
| 472 |
+
value: null
|
| 473 |
+
torch_dtype:
|
| 474 |
+
value: float32
|
| 475 |
+
torch_empty_cache_steps:
|
| 476 |
+
value: null
|
| 477 |
+
torchdynamo:
|
| 478 |
+
value: null
|
| 479 |
+
torchscript:
|
| 480 |
+
value: false
|
| 481 |
+
tpu_metrics_debug:
|
| 482 |
+
value: false
|
| 483 |
+
tpu_num_cores:
|
| 484 |
+
value: null
|
| 485 |
+
transformers_version:
|
| 486 |
+
value: 4.49.0.dev0
|
| 487 |
+
typical_p:
|
| 488 |
+
value: 1
|
| 489 |
+
use_bfloat16:
|
| 490 |
+
value: false
|
| 491 |
+
use_cache:
|
| 492 |
+
value: false
|
| 493 |
+
use_cpu:
|
| 494 |
+
value: false
|
| 495 |
+
use_ipex:
|
| 496 |
+
value: false
|
| 497 |
+
use_legacy_prediction_loop:
|
| 498 |
+
value: false
|
| 499 |
+
use_liger_kernel:
|
| 500 |
+
value: false
|
| 501 |
+
use_mps_device:
|
| 502 |
+
value: false
|
| 503 |
+
use_weighted_layer_sum:
|
| 504 |
+
value: false
|
| 505 |
+
vocab_size:
|
| 506 |
+
value: 51865
|
| 507 |
+
warmup_ratio:
|
| 508 |
+
value: 0
|
| 509 |
+
warmup_steps:
|
| 510 |
+
value: 500
|
| 511 |
+
weight_decay:
|
| 512 |
+
value: 0
|