Spaces:
Runtime error
Runtime error
| from pathlib import Path | |
| import pandas as pd | |
| from src.datasets.base_dataset import SimpleAudioFakeDataset | |
| FAKEAVCELEB_SPLIT = { | |
| "train": ['faceswap-wav2lip', 'fsgan-wav2lip', 'wav2lip', 'rtvc'], | |
| "test": ['faceswap-wav2lip', 'fsgan-wav2lip', 'wav2lip', 'rtvc'], | |
| "val": ['faceswap-wav2lip', 'fsgan-wav2lip', 'wav2lip', 'rtvc'], | |
| "partition_ratio": [0.7, 0.15], | |
| "seed": 45 | |
| } | |
| class FakeAVCelebDataset(SimpleAudioFakeDataset): | |
| audio_folder = "FakeAVCeleb-audio" | |
| audio_extension = ".mp3" | |
| metadata_file = Path(audio_folder) / "meta_data.csv" | |
| subsets = ("train", "dev", "eval") | |
| def __init__(self, path, subset="train", transform=None): | |
| super().__init__(subset, transform) | |
| self.path = path | |
| self.subset = subset | |
| self.allowed_attacks = FAKEAVCELEB_SPLIT[subset] | |
| self.partition_ratio = FAKEAVCELEB_SPLIT["partition_ratio"] | |
| self.seed = FAKEAVCELEB_SPLIT["seed"] | |
| self.metadata = self.get_metadata() | |
| self.samples = pd.concat([self.get_fake_samples(), self.get_real_samples()], ignore_index=True) | |
| def get_metadata(self): | |
| md = pd.read_csv(Path(self.path) / self.metadata_file) | |
| md["audio_type"] = md["type"].apply(lambda x: x.split("-")[-1]) | |
| return md | |
| def get_fake_samples(self): | |
| samples = { | |
| "user_id": [], | |
| "sample_name": [], | |
| "attack_type": [], | |
| "label": [], | |
| "path": [] | |
| } | |
| for attack_name in self.allowed_attacks: | |
| fake_samples = self.metadata[ | |
| (self.metadata["method"] == attack_name) & (self.metadata["audio_type"] == "FakeAudio") | |
| ] | |
| samples_list = fake_samples.iterrows() | |
| samples_list = self.split_samples(samples_list) | |
| for _, sample in samples_list: | |
| samples["user_id"].append(sample["source"]) | |
| samples["sample_name"].append(Path(sample["filename"]).stem) | |
| samples["attack_type"].append(sample["method"]) | |
| samples["label"].append("spoof") | |
| samples["path"].append(self.get_file_path(sample)) | |
| return pd.DataFrame(samples) | |
| def get_real_samples(self): | |
| samples = { | |
| "user_id": [], | |
| "sample_name": [], | |
| "attack_type": [], | |
| "label": [], | |
| "path": [] | |
| } | |
| samples_list = self.metadata[ | |
| (self.metadata["method"] == "real") & (self.metadata["audio_type"] == "RealAudio") | |
| ] | |
| samples_list = self.split_samples(samples_list) | |
| for index, sample in samples_list.iterrows(): | |
| samples["user_id"].append(sample["source"]) | |
| samples["sample_name"].append(Path(sample["filename"]).stem) | |
| samples["attack_type"].append("-") | |
| samples["label"].append("bonafide") | |
| samples["path"].append(self.get_file_path(sample)) | |
| return pd.DataFrame(samples) | |
| def get_file_path(self, sample): | |
| path = "/".join([self.audio_folder, *sample["path"].split("/")[1:]]) | |
| return Path(self.path) / path / Path(sample["filename"]).with_suffix(self.audio_extension) | |