ldcast_code / scripts /convert_data_NB_2nc.py
weatherforecast1024's picture
Upload folder using huggingface_hub
d2f661a verified
import os
import numpy as np
import dask.array as da
import xarray as xr
def load_all_file(data_dir=""):
data_list = []
filtered_files = []
for filename in os.listdir(data_dir):
if filename.startswith("202306"):
filtered_files.append(filename)
# if filename.endswith("00.npy"):
# filtered_files.append(filename)
sorted_files = sorted(filtered_files)
for item in sorted_files:
sub_dir = os.path.join(data_dir)
pathfile = sub_dir + "/" + item
file = np.load(pathfile)
data_list.extend([file])
lon = np.arange(103.5, 109.2, 0.00892)
lat = np.arange(8, 13.75, 0.00899)
return data_list
def preprocess_data(data_list, out_dir=""):
patches = []
# Define patch size
patch_size = 32
# new_array = xr.DataArray(np.array(data_list[0]), dims=("dim_0", "dim_1"))
# Iterate over the array to extract patches
for k in range(len(data_list)):
for i in range(0, 640, patch_size):
for j in range(0, 640, patch_size):
patch = data_list[k][i:i+patch_size, j:j+patch_size]
patches.append(patch)
print(len(patches))
data_shape = len(patches)
patches_array = np.array(patches, dtype=np.uint8)
temp_array = np.array(np.random.rand(data_shape, 2), dtype=np.uint16)
temp_array2 = np.arange(256, dtype=np.float32)
temp_array3 = np.arange(data_shape, dtype=np.int64)
data_da = da.from_array(patches_array, chunks=(data_shape,32,32)) # Adjust chunk size as needed for your data
data_da2 = da.from_array(temp_array, chunks=(data_shape, 2))
data_da3 = da.from_array(temp_array3, chunks=(data_shape, ))
data_da4 = da.from_array(temp_array2, chunks=(256, ))
# Create xarray DataArray with DaskArray as its backend
patches = xr.DataArray(data_da, dims=("dim_patch", "dim_heigh", "dim_width"))
patch_coords = xr.DataArray(data_da2, dims=("dim_patch1", "dim_coord"))
patch_times = xr.DataArray(data_da3, dims=("dim_patch2"))
zero_patch_coords = xr.DataArray(data_da2, dims=("dim_zero_patch", "dim_coord"))
zero_patch_times = xr.DataArray(data_da3, dims=("dim_zero_patch1"))
scale = xr.DataArray(data_da4, dims=("dim_scale"))
ds = patches.to_dataset(name = 'patches')
ds['patch_coords'] = patch_coords
ds['patch_times'] = patch_times
ds['zero_patch_coords'] = zero_patch_coords
ds['zero_patch_times'] = zero_patch_times
ds['scale'] = scale
ds.attrs["zero_value"] = 1
out_dir = out_dir + "/" + "RZC"
os.makedirs(out_dir, exist_ok=True)
file_name = os.path.join(out_dir, "patches_RV_202306.nc")
ds.to_netcdf(file_name)
return len(data_list)
list = load_all_file(data_dir="/data/data_WF/ldcast_precipitation/test")
print(preprocess_data(list, out_dir="/data/data_WF/ldcast_precipitation/preprocess_data_test"))