import os import numpy as np import dask.array as da import xarray as xr def load_all_file(data_dir=""): data_list = [] filtered_files = [] for filename in os.listdir(data_dir): if filename.startswith("202306"): filtered_files.append(filename) # if filename.endswith("00.npy"): # filtered_files.append(filename) sorted_files = sorted(filtered_files) for item in sorted_files: sub_dir = os.path.join(data_dir) pathfile = sub_dir + "/" + item file = np.load(pathfile) data_list.extend([file]) lon = np.arange(103.5, 109.2, 0.00892) lat = np.arange(8, 13.75, 0.00899) return data_list def preprocess_data(data_list, out_dir=""): patches = [] # Define patch size patch_size = 32 # new_array = xr.DataArray(np.array(data_list[0]), dims=("dim_0", "dim_1")) # Iterate over the array to extract patches for k in range(len(data_list)): for i in range(0, 640, patch_size): for j in range(0, 640, patch_size): patch = data_list[k][i:i+patch_size, j:j+patch_size] patches.append(patch) print(len(patches)) data_shape = len(patches) patches_array = np.array(patches, dtype=np.uint8) temp_array = np.array(np.random.rand(data_shape, 2), dtype=np.uint16) temp_array2 = np.arange(256, dtype=np.float32) temp_array3 = np.arange(data_shape, dtype=np.int64) data_da = da.from_array(patches_array, chunks=(data_shape,32,32)) # Adjust chunk size as needed for your data data_da2 = da.from_array(temp_array, chunks=(data_shape, 2)) data_da3 = da.from_array(temp_array3, chunks=(data_shape, )) data_da4 = da.from_array(temp_array2, chunks=(256, )) # Create xarray DataArray with DaskArray as its backend patches = xr.DataArray(data_da, dims=("dim_patch", "dim_heigh", "dim_width")) patch_coords = xr.DataArray(data_da2, dims=("dim_patch1", "dim_coord")) patch_times = xr.DataArray(data_da3, dims=("dim_patch2")) zero_patch_coords = xr.DataArray(data_da2, dims=("dim_zero_patch", "dim_coord")) zero_patch_times = xr.DataArray(data_da3, dims=("dim_zero_patch1")) scale = xr.DataArray(data_da4, dims=("dim_scale")) ds = patches.to_dataset(name = 'patches') ds['patch_coords'] = patch_coords ds['patch_times'] = patch_times ds['zero_patch_coords'] = zero_patch_coords ds['zero_patch_times'] = zero_patch_times ds['scale'] = scale ds.attrs["zero_value"] = 1 out_dir = out_dir + "/" + "RZC" os.makedirs(out_dir, exist_ok=True) file_name = os.path.join(out_dir, "patches_RV_202306.nc") ds.to_netcdf(file_name) return len(data_list) list = load_all_file(data_dir="/data/data_WF/ldcast_precipitation/test") print(preprocess_data(list, out_dir="/data/data_WF/ldcast_precipitation/preprocess_data_test"))