Spaces:
Runtime error
Runtime error
| import logging | |
| from pathlib import Path | |
| import pandas as pd | |
| from src.datasets.base_dataset import SimpleAudioFakeDataset | |
| DF_ASVSPOOF_SPLIT = { | |
| "partition_ratio": [0.7, 0.15], | |
| "seed": 45 | |
| } | |
| LOGGER = logging.getLogger() | |
| class DeepFakeASVSpoofDataset(SimpleAudioFakeDataset): | |
| protocol_file_name = "keys/CM/trial_metadata.txt" | |
| subset_dir_prefix = "ASVspoof2021_DF_eval" | |
| subset_parts = ("part00", "part01", "part02", "part03") | |
| def __init__(self, path, subset="train", transform=None): | |
| super().__init__(subset, transform) | |
| self.path = path | |
| self.partition_ratio = DF_ASVSPOOF_SPLIT["partition_ratio"] | |
| self.seed = DF_ASVSPOOF_SPLIT["seed"] | |
| self.flac_paths = self.get_file_references() | |
| self.samples = self.read_protocol() | |
| self.transform = transform | |
| LOGGER.info(f"Spoof: {len(self.samples[self.samples['label'] == 'spoof'])}") | |
| LOGGER.info(f"Original: {len(self.samples[self.samples['label'] == 'bonafide'])}") | |
| def get_file_references(self): | |
| flac_paths = {} | |
| for part in self.subset_parts: | |
| path = Path(self.path) / f"{self.subset_dir_prefix}_{part}" / self.subset_dir_prefix / "flac" | |
| flac_list = list(path.glob("*.flac")) | |
| for path in flac_list: | |
| flac_paths[path.stem] = path | |
| return flac_paths | |
| def read_protocol(self): | |
| samples = { | |
| "sample_name": [], | |
| "label": [], | |
| "path": [], | |
| "attack_type": [], | |
| } | |
| real_samples = [] | |
| fake_samples = [] | |
| with open(Path(self.path) / self.protocol_file_name, "r") as file: | |
| for line in file: | |
| label = line.strip().split(" ")[5] | |
| if label == "bonafide": | |
| real_samples.append(line) | |
| elif label == "spoof": | |
| fake_samples.append(line) | |
| fake_samples = self.split_samples(fake_samples) | |
| for line in fake_samples: | |
| samples = self.add_line_to_samples(samples, line) | |
| real_samples = self.split_samples(real_samples) | |
| for line in real_samples: | |
| samples = self.add_line_to_samples(samples, line) | |
| return pd.DataFrame(samples) | |
| def add_line_to_samples(self, samples, line): | |
| _, sample_name, _, _, _, label, _, _ = line.strip().split(" ") | |
| samples["sample_name"].append(sample_name) | |
| samples["label"].append(label) | |
| samples["attack_type"].append(label) | |
| sample_path = self.flac_paths[sample_name] | |
| assert sample_path.exists() | |
| samples["path"].append(sample_path) | |
| return samples | |