|
|
import os |
|
|
import numpy as np |
|
|
import dask.array as da |
|
|
import xarray as xr |
|
|
|
|
|
def load_all_file(data_dir=""): |
|
|
data_list = [] |
|
|
filtered_files = [] |
|
|
for filename in os.listdir(data_dir): |
|
|
if filename.startswith("202306"): |
|
|
filtered_files.append(filename) |
|
|
|
|
|
|
|
|
sorted_files = sorted(filtered_files) |
|
|
for item in sorted_files: |
|
|
sub_dir = os.path.join(data_dir) |
|
|
pathfile = sub_dir + "/" + item |
|
|
file = np.load(pathfile) |
|
|
data_list.extend([file]) |
|
|
|
|
|
lon = np.arange(103.5, 109.2, 0.00892) |
|
|
lat = np.arange(8, 13.75, 0.00899) |
|
|
|
|
|
return data_list |
|
|
|
|
|
def preprocess_data(data_list, out_dir=""): |
|
|
patches = [] |
|
|
|
|
|
|
|
|
patch_size = 32 |
|
|
|
|
|
|
|
|
for k in range(len(data_list)): |
|
|
for i in range(0, 640, patch_size): |
|
|
for j in range(0, 640, patch_size): |
|
|
patch = data_list[k][i:i+patch_size, j:j+patch_size] |
|
|
patches.append(patch) |
|
|
|
|
|
print(len(patches)) |
|
|
data_shape = len(patches) |
|
|
patches_array = np.array(patches, dtype=np.uint8) |
|
|
temp_array = np.array(np.random.rand(data_shape, 2), dtype=np.uint16) |
|
|
temp_array2 = np.arange(256, dtype=np.float32) |
|
|
temp_array3 = np.arange(data_shape, dtype=np.int64) |
|
|
|
|
|
data_da = da.from_array(patches_array, chunks=(data_shape,32,32)) |
|
|
data_da2 = da.from_array(temp_array, chunks=(data_shape, 2)) |
|
|
data_da3 = da.from_array(temp_array3, chunks=(data_shape, )) |
|
|
data_da4 = da.from_array(temp_array2, chunks=(256, )) |
|
|
|
|
|
|
|
|
patches = xr.DataArray(data_da, dims=("dim_patch", "dim_heigh", "dim_width")) |
|
|
patch_coords = xr.DataArray(data_da2, dims=("dim_patch1", "dim_coord")) |
|
|
patch_times = xr.DataArray(data_da3, dims=("dim_patch2")) |
|
|
zero_patch_coords = xr.DataArray(data_da2, dims=("dim_zero_patch", "dim_coord")) |
|
|
zero_patch_times = xr.DataArray(data_da3, dims=("dim_zero_patch1")) |
|
|
scale = xr.DataArray(data_da4, dims=("dim_scale")) |
|
|
|
|
|
ds = patches.to_dataset(name = 'patches') |
|
|
ds['patch_coords'] = patch_coords |
|
|
ds['patch_times'] = patch_times |
|
|
ds['zero_patch_coords'] = zero_patch_coords |
|
|
ds['zero_patch_times'] = zero_patch_times |
|
|
ds['scale'] = scale |
|
|
|
|
|
ds.attrs["zero_value"] = 1 |
|
|
out_dir = out_dir + "/" + "RZC" |
|
|
os.makedirs(out_dir, exist_ok=True) |
|
|
file_name = os.path.join(out_dir, "patches_RV_202306.nc") |
|
|
ds.to_netcdf(file_name) |
|
|
|
|
|
return len(data_list) |
|
|
|
|
|
|
|
|
list = load_all_file(data_dir="/data/data_WF/ldcast_precipitation/test") |
|
|
print(preprocess_data(list, out_dir="/data/data_WF/ldcast_precipitation/preprocess_data_test")) |
|
|
|