Spaces:
Runtime error
Runtime error
File size: 2,677 Bytes
2c0f55c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import logging
from pathlib import Path
import pandas as pd
from src.datasets.base_dataset import SimpleAudioFakeDataset
DF_ASVSPOOF_SPLIT = {
"partition_ratio": [0.7, 0.15],
"seed": 45
}
LOGGER = logging.getLogger()
class DeepFakeASVSpoofDataset(SimpleAudioFakeDataset):
protocol_file_name = "keys/CM/trial_metadata.txt"
subset_dir_prefix = "ASVspoof2021_DF_eval"
subset_parts = ("part00", "part01", "part02", "part03")
def __init__(self, path, subset="train", transform=None):
super().__init__(subset, transform)
self.path = path
self.partition_ratio = DF_ASVSPOOF_SPLIT["partition_ratio"]
self.seed = DF_ASVSPOOF_SPLIT["seed"]
self.flac_paths = self.get_file_references()
self.samples = self.read_protocol()
self.transform = transform
LOGGER.info(f"Spoof: {len(self.samples[self.samples['label'] == 'spoof'])}")
LOGGER.info(f"Original: {len(self.samples[self.samples['label'] == 'bonafide'])}")
def get_file_references(self):
flac_paths = {}
for part in self.subset_parts:
path = Path(self.path) / f"{self.subset_dir_prefix}_{part}" / self.subset_dir_prefix / "flac"
flac_list = list(path.glob("*.flac"))
for path in flac_list:
flac_paths[path.stem] = path
return flac_paths
def read_protocol(self):
samples = {
"sample_name": [],
"label": [],
"path": [],
"attack_type": [],
}
real_samples = []
fake_samples = []
with open(Path(self.path) / self.protocol_file_name, "r") as file:
for line in file:
label = line.strip().split(" ")[5]
if label == "bonafide":
real_samples.append(line)
elif label == "spoof":
fake_samples.append(line)
fake_samples = self.split_samples(fake_samples)
for line in fake_samples:
samples = self.add_line_to_samples(samples, line)
real_samples = self.split_samples(real_samples)
for line in real_samples:
samples = self.add_line_to_samples(samples, line)
return pd.DataFrame(samples)
def add_line_to_samples(self, samples, line):
_, sample_name, _, _, _, label, _, _ = line.strip().split(" ")
samples["sample_name"].append(sample_name)
samples["label"].append(label)
samples["attack_type"].append(label)
sample_path = self.flac_paths[sample_name]
assert sample_path.exists()
samples["path"].append(sample_path)
return samples
|