Spaces:
Sleeping
Sleeping
| import cv2 | |
| import librosa | |
| import numpy as np | |
| import albumentations | |
| from albumentations import (Compose, ImageCompression, GaussNoise, HorizontalFlip, | |
| PadIfNeeded, OneOf,ToGray, ShiftScaleRotate, GaussianBlur, | |
| RandomBrightnessContrast, FancyPCA, HueSaturationValue, BasicTransform) | |
| class AudioTransform(BasicTransform): | |
| """ Transform for audio task. This is the main class where we override the targets and update params function for our need""" | |
| def targets(self): | |
| return {"data": self.apply} | |
| def update_params(self, params, **kwargs): | |
| if hasattr(self, "interpolation"): | |
| params["interpolation"] = self.interpolation | |
| if hasattr(self, "fill_value"): | |
| params["fill_value"] = self.fill_value | |
| return params | |
| class TimeShifting(AudioTransform): | |
| """ Do time shifting of audio """ | |
| def __init__(self, always_apply=False, p=0.5): | |
| super(TimeShifting, self).__init__(always_apply, p) | |
| def apply(self,data,**params): | |
| ''' | |
| data : ndarray of audio timeseries | |
| ''' | |
| start_ = int(np.random.uniform(-80000,80000)) | |
| if start_ >= 0: | |
| audio_time_shift = np.r_[data[start_:], np.random.uniform(-0.001,0.001, start_)] | |
| else: | |
| audio_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), data[:start_]] | |
| return audio_time_shift | |
| class PitchShift(AudioTransform): | |
| """ Do time shifting of audio """ | |
| def __init__(self, always_apply=False, p=0.5 , n_steps=None): | |
| super(PitchShift, self).__init__(always_apply, p) | |
| ''' | |
| nsteps here is equal to number of semitones | |
| ''' | |
| self.n_steps = n_steps | |
| def apply(self,data,**params): | |
| ''' | |
| data : ndarray of audio timeseries | |
| ''' | |
| return librosa.effects.pitch_shift(data,sr=16000,n_steps=self.n_steps) | |
| class AddGaussianNoise(AudioTransform): | |
| """ Do time shifting of audio """ | |
| def __init__(self, always_apply=False, p=0.5): | |
| super(AddGaussianNoise, self).__init__(always_apply, p) | |
| def apply(self,data,**params): | |
| ''' | |
| data : ndarray of audio timeseries | |
| ''' | |
| noise = np.random.randn(len(data)) | |
| data_wn = data + 0.005*noise | |
| return data_wn | |
| create_frame_transforms = Compose([ | |
| ImageCompression(quality_lower=60, quality_upper=100, p=0.5), | |
| GaussNoise(p=0.1), | |
| GaussianBlur(blur_limit=3, p=0.05), | |
| HorizontalFlip(), | |
| PadIfNeeded(min_height=256, min_width=256, border_mode=cv2.BORDER_CONSTANT), | |
| OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7), | |
| ToGray(p=0.2), | |
| ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),]) | |
| create_spec_transforms = albumentations.Compose([ | |
| TimeShifting(p=0.9), # here not p=1.0 because your nets should get some difficulties | |
| AddGaussianNoise(p=0.8), | |
| PitchShift(p=0.5,n_steps=4) | |
| ]) | |