diff --git a/src/finam/data/esri_tools.py b/src/finam/data/esri_tools.py new file mode 100644 index 0000000000000000000000000000000000000000..245eb8797abcdafb43294422518fa7db42642bd1 --- /dev/null +++ b/src/finam/data/esri_tools.py @@ -0,0 +1,137 @@ +"""Common ESRI ASCII grid routines.""" +import warnings + +import numpy as np + +ESRI_TYPES = { + "ncols": int, + "nrows": int, + "xllcorner": float, + "yllcorner": float, + "xllcenter": float, + "yllcenter": float, + "cellsize": float, + "nodata_value": float, +} +"""types for ESRI ASCII grid header information.""" + +ESRI_REQ = {"ncols", "nrows", "xllcorner", "yllcorner", "cellsize"} +"""Required ESRI ASCII grid header information.""" + + +def _is_number(string): + try: + float(string) + return True + except ValueError: + return False + + +def _extract_header(file): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return np.genfromtxt( + file, dtype=str, max_rows=6, usecols=(0, 1), invalid_raise=False + ) + + +def standardize_header(header): + """ + Standardize an ASCII grid header dictionary. + + Parameters + ---------- + header : :class:`dict` + Raw header as dictionary. + + Returns + ------- + :class:`dict` + Standardized header as dictionary. + + Raises + ------ + ValueError + If the header is missing required information. + See :any:`ESRI_REQ` + """ + header = {n: ESRI_TYPES[n](v) for (n, v) in header.items() if n in ESRI_TYPES} + # convert cell center to corner information + if "xllcenter" in header: + header["xllcorner"] = header["xllcenter"] - 0.5 * header.get("cellsize", 1) + del header["xllcenter"] + if "yllcenter" in header: + header["yllcorner"] = header["yllcenter"] - 0.5 * header.get("cellsize", 1) + del header["yllcenter"] + # check required header items + missing = ESRI_REQ - (set(header) & ESRI_REQ) + if missing: + msg = f"standardize_header: missing header information {missing}" + raise ValueError(msg) + return header + + +def read_header(file): + """ + Read an ASCII grid header from file. + + Parameters + ---------- + file : :class:`~os.PathLike` + File containing the ASCII grid header. + + Returns + ------- + :class:`dict` + Standardized header as dictionary. + + Notes + ----- + "xllcenter" and "yllcenter" will be converted to + "xllcorner" and "yllcorner" resepectively. + """ + header_lines = _extract_header(file) + return standardize_header({n: v for (n, v) in header_lines}) + + +def read_grid(file, dtype=None): + """ + Read an ASCII grid from file. + + Parameters + ---------- + file : :class:`~os.PathLike` + File containing the ASCII grid. + dtype : str/type, optional + Data type. + Needs to be integer or float and compatible with np.dtype + (i.e. "i4", "f4", "f8"), by default None + + Returns + ------- + header : dict + Header describing the grid. + data : numpy.ndarray + Data of the grid. + + Raises + ------ + ValueError + If data shape is not matching the given header. + """ + header_lines = _extract_header(file) + header = standardize_header({n: v for (n, v) in header_lines}) + # last line could already be data if "nodata_value" is missing + numeric_last = _is_number(header_lines[-1][0]) + header_size = len(header_lines) - int(numeric_last) + data = np.loadtxt(file, dtype=dtype, skiprows=header_size, ndmin=2) + nrows, ncols = header["nrows"], header["ncols"] + if data.shape[0] != nrows or data.shape[1] != ncols: + msg = ( + f"read_grid: data shape {data.shape} " + f"not matching given header ({nrows=}, {ncols=})." + ) + raise ValueError(msg) + if "nodata_value" in header and np.issubdtype(data.dtype, np.integer): + header["nodata_value"] = int(header["nodata_value"]) + return header, data diff --git a/src/finam/data/grid_spec.py b/src/finam/data/grid_spec.py index fb497c6f8fdbc58c143fdb427a8008a4fffa9f6b..7ccde86cae73f4a3419049ad3ba3b6c0aa8edcff 100644 --- a/src/finam/data/grid_spec.py +++ b/src/finam/data/grid_spec.py @@ -5,6 +5,7 @@ import numpy as np from pyevtk.hl import imageToVTK from ..tools.enum_helper import get_enum_value +from .esri_tools import read_grid, read_header from .grid_base import Grid, GridBase, StructuredGrid from .grid_tools import ( CellType, @@ -387,7 +388,7 @@ class EsriGrid(UniformGrid): ) @classmethod - def from_file(cls, file, axes_attributes=None, crs=None): + def from_file(cls, file, axes_attributes=None, crs=None, read_mask=False): """ Generate EsriGrid from given file. @@ -399,23 +400,24 @@ class EsriGrid(UniformGrid): Axes attributes following the CF convention (xyz order), by default None crs : str or None, optional The coordinate reference system, by default None + read_mask : bool, optional + Whether to read the mask from the grid data, by default False Returns ------- EsriGrid The grid specified in the file. """ - header = np.loadtxt(file, dtype=str, max_rows=5) - kwargs = {name: (float(v) if "." in v else int(v)) for (name, v) in header} - if "xllcenter" in kwargs: - kwargs["xllcorner"] = kwargs["xllcenter"] - 0.5 * kwargs["cellsize"] - del kwargs["xllcenter"] - if "yllcenter" in kwargs: - kwargs["yllcorner"] = kwargs["yllcenter"] - 0.5 * kwargs["cellsize"] - del kwargs["yllcenter"] - kwargs["crs"] = crs - kwargs["axes_attributes"] = axes_attributes - return cls(**kwargs) + header = read_header(file) + nodata = header.pop("nodata_value", None) + mask = None + if read_mask and nodata is not None: + _, data = read_grid(file) + mask = np.isclose(data, nodata) + header["crs"] = crs + header["axes_attributes"] = axes_attributes + header["mask"] = mask + return cls(**header) class UnstructuredGrid(Grid):