Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ target/
# Jupyter Notebook
.ipynb_checkpoints

# Local scratch / experiments (not part of the tracked docs)
/scratch/

# IPython
profile_default/
ipython_config.py
Expand Down
14 changes: 10 additions & 4 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def boundary_forcing(use_dask: bool, small_grid: Grid) -> BoundaryForcing:
start_time=datetime(2012, 1, 1),
end_time=datetime(2012, 12, 31),
source={"name": "GLORYS", "path": [fname1, fname2]},
apply_2d_horizontal_fill=False,
prefill=None,
use_dask=use_dask,
)

Expand All @@ -551,7 +551,7 @@ def boundary_forcing_with_2d_fill(use_dask: bool, small_grid: Grid) -> BoundaryF
start_time=datetime(2012, 1, 1),
end_time=datetime(2012, 12, 31),
source={"name": "GLORYS", "path": [fname1, fname2]},
apply_2d_horizontal_fill=True,
prefill="2d_lateral_fill",
use_dask=use_dask,
)

Expand Down Expand Up @@ -583,7 +583,10 @@ def bgc_boundary_forcing_from_climatology(use_dask: bool) -> BoundaryForcing:
end_time=datetime(2021, 6, 30),
source={"path": fname_bgc, "name": "CESM_REGRIDDED", "climatology": True}, # type: ignore[dict-item]
type="bgc",
apply_2d_horizontal_fill=True,
prefill="2d_lateral_fill",
# scipy regrid keeps this fixture byte-identical to the legacy AMG output
# (the new default 'auto' would use xESMF); see test_validation regression.
regrid_method="scipy",
use_dask=use_dask,
)

Expand Down Expand Up @@ -613,7 +616,10 @@ def bgc_boundary_forcing_from_unified_climatology(use_dask: bool) -> BoundaryFor
end_time=datetime(2021, 6, 30),
source={"path": fname_bgc, "name": "UNIFIED", "climatology": True}, # type: ignore[dict-item]
type="bgc",
apply_2d_horizontal_fill=True,
prefill="2d_lateral_fill",
# scipy regrid keeps this fixture byte-identical to the legacy AMG output
# (the new default 'auto' would use xESMF); see test_validation regression.
regrid_method="scipy",
use_dask=use_dask,
)

Expand Down
10,939 changes: 7,775 additions & 3,164 deletions docs/boundary_forcing.ipynb

Large diffs are not rendered by default.

3,419 changes: 1,634 additions & 1,785 deletions docs/end_to_end.ipynb

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions docs/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* The underlying data allowing self attration and loading (SAL) correction to tidal forcing has been updated for accuracy, and was derived from the TPXO10v2a datasets. This is not a functional breaking change, but will change (improve) output from `TidalForcing` based on TPXO data compared to previos versions of ROMS-Tools. See ([#617](https://github.com/CWorthy-ocean/roms-tools/pull/617))
* ERA5 data correction in `SurfaceForcing` now includes longwave radiation, and the shortwave radiation correction multipliers are updated ([#614](https://github.com/CWorthy-ocean/roms-tools/pull/614)). This is a breaking change only in the sense that input radiation fields generated by ROMS-Tools using ERA5 forcing data will be slightly different (but more correct) compared to previous ROMS-Tools versions. Correction factors for previous versions of ROMS-Tools remain functional and available.
* pco2 is no longer read in during `SurfaceForcing` `type='bgc'` ([#608](https://github.com/CWorthy-ocean/roms-tools/pull/608)).
* `BoundaryForcing`'s `apply_2d_horizontal_fill` argument is **deprecated** in favor of the new `prefill` argument (`apply_2d_horizontal_fill=True` → `prefill="2d_lateral_fill"`, `False` → `prefill=None`); it still works but emits a `DeprecationWarning`. Note that `prefill="2d_lateral_fill"` now regrids with the default `"auto"` engine (xESMF when installed), so its output differs slightly from the pre-v4 AMG+scipy path; pass `regrid_method="scipy"` to reproduce the legacy output byte-for-byte.
* `from_file` is now a private function to `Grid`. Files are now loaded as `Grid(filename='grid.nc')` ([#573](https://github.com/CWorthy-ocean/roms-tools/pull/573))
* The `ChildGrid` has been removed. Both a child and parent grid are created using `Grid`, and the functions `align_grids` and `make_edata` are called to adjust bathymetry and do the mapping ([#573](https://github.com/CWorthy-ocean/roms-tools/pull/573)).
* CDR metrics on `ds_cdr` rename efficiency fields: `cdr_efficiency` to `cdr_efficiency_from_flux`, and `cdr_efficiency_from_delta_diff` to `cdr_efficiency_from_DIC_difference`. Saved datasets, ensemble inputs, or downstream code using the old names need updating.
Expand All @@ -26,6 +27,10 @@
* Option to automatically close narrow 1-cell water channels during mask generation via `Grid.update_mask(close_narrow_channels=True)` or `Grid(close_narrow_channels=True)`; integrated into the standard mask workflow
* CDR metrics report CO2 uptake as mass in tonnes of CO2 for both the air-sea flux-difference and DIC-difference constructions (using CO2 molar mass and native flux or inventory units when available).
* The CDR metrics figure from `ROMSOutput.cdr_metrics()` uses twin y-axes: CDR efficiency (dimensionless) and CO2 uptake (tonnes CO2), with title "CO2 uptake and CDR efficiency".
* New `type` is added to SurfaceForcing. `restoring` creates restoring forces files for ROMS ('sss' is the only option) ([#589](https://github.com/CWorthy-ocean/roms-tools/pull/589))
* `CDRForcing` has an option `time_interpolation` to choose step-like or interpolated releases ([#601](https://github.com/CWorthy-ocean/roms-tools/pull/601))
* `BoundaryForcing` now regrids horizontally with a NaN-aware approach by default, removing the need for the slow and memory-hungry 2D Poisson horizontal fill on large multi-year domains. When xESMF is available, masked bilinear regridding (weights renormalized over valid ocean cells) plus inverse-distance-weighted extrapolation (configurable via the `extrap_method` / `extrap_kwargs` arguments) produces NaN-free boundaries directly. When xESMF is unavailable (e.g. Windows/pip installs), a nearest-neighbor pre-fill of the source followed by scipy linear interpolation provides an equivalent NaN-free fallback. **Note:** boundary values shift slightly compared to previous releases for users on the default settings.
* `BoundaryForcing` gains a unified `prefill` argument (with `prefill_kwargs`) to optionally fill the *source* before regridding: `None` (default, no prefill), `"2d_lateral_fill"` (legacy AMG Poisson fill), `"inverse_dist"` / `"nearest_s2d"` (xESMF source-on-source fills), or `"nearest_neighbor"` (cheap scipy distance-transform fill; also the automatic fallback when xESMF is unavailable). The horizontal regrid engine is now selected independently via `regrid_method` (`"auto"` (default; xESMF if installed else scipy), `"xesmf"`, or `"scipy"`).


### Internal Changes
Expand All @@ -39,13 +44,19 @@
* Ensure `regionmask>=0.11.0` in `pyproject.toml` ([#565](https://github.com/CWorthy-ocean/roms-tools/pull/565))
* Integrate narrow-channel closing directly into `Grid.update_mask()` (internal `_close_narrow_channels`), iterating north–south and east–west up to 10 passes
* CDR analysis tests cover tonnes-CO2 uptake and renamed efficiency fields; `compute_cdr_metrics` requires `DIC_source` in the input dataset alongside other CDR variables.
* short and long wave radiation time is shifted 1/2 a timestep sooner and have a dim of `rad_time` ([#586](https://github.com/CWorthy-ocean/roms-tools/pull/586))
* The coarse UNIFIED BGC dataset used for testing was updated to have depths of 0 and 5 m available ([#589](https://github.com/CWorthy-ocean/roms-tools/pull/589))
* 2 checks added for a point source when plotting `CDRForcing.plot_distribution()`. Low hsc is treated as a point source ([#600](https://github.com/CWorthy-ocean/roms-tools/pull/600))

### Documentation

* Nesting notebook is updated to match refactoring of `ChildGrid` to `Grid` objects. ([#573](https://github.com/CWorthy-ocean/roms-tools/pull/573))
* Move "overview of ROMS-Tools functionality" section from paper to docs ([#554](https://github.com/CWorthy-ocean/roms-tools/pull/554))
* Document `close_narrow_channels` option in `Grid` and `update_mask()`; update notebook examples
* CDR analysis notebook: describe CO2 uptake (tonnes CO2) and CDR efficiency together, consistent with the metrics and figure.
* Both the surface forcing and datasets notebooks are updated to reflect `restoring` function and WOA data ([#589](https://github.com/CWorthy-ocean/roms-tools/pull/589))
* The cdr notebook is updated to reflect interpolation option. Default is same as ROMS, no interpolation ([#601](https://github.com/CWorthy-ocean/roms-tools/pull/601))
* The boundary forcing notebook is updated to describe the new NaN-aware horizontal regridding (masked bilinear with inverse-distance extrapolation), replacing the previous "1D versus 2D horizontal fill" discussion, and adds a section comparing the `prefill` source-fill methods (`2d_lateral_fill`, `inverse_dist`, `nearest_s2d`, `nearest_neighbor`) with a strategy table and side-by-side plots of the pre-filled source.

### Bugfixes

Expand Down
201 changes: 200 additions & 1 deletion roms_tools/datasets/lat_lon_datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import importlib.util
import logging
import typing
from collections.abc import Callable, Mapping
from dataclasses import dataclass, field
Expand Down Expand Up @@ -29,7 +30,8 @@
select_relevant_times,
validate_start_end_time,
)
from roms_tools.fill import LateralFill
from roms_tools.fill import LateralFill, nearest_neighbor_fill
from roms_tools.regrid import _xesmf_extrap_kwargs
from roms_tools.setup.utils import (
Timed,
assign_dates_to_climatology,
Expand Down Expand Up @@ -658,6 +660,67 @@ def choose_subdomain(
self.ds = subdomain
return None

def apply_prefill(
self,
method: str,
prefill_kwargs: dict | None = None,
prefill_was_user_set: bool = False,
):
"""Fill masked (land/void) cells in the source by the chosen method.

Thin dispatcher over the concrete fills so callers select a strategy by
name. After a prefill the source is NaN-free, so a subsequent regrid can
use plain bilinear (no destination extrapolation needed).

Parameters
----------
method : str
One of:

- ``"2d_lateral_fill"`` -- iterative AMG Poisson fill
(:meth:`apply_lateral_fill`); no xESMF required.
- ``"nearest_neighbor"`` -- cheap distance-transform fill
(:meth:`apply_nearest_neighbor_fill`); no xESMF required.
- ``"inverse_dist"`` / ``"nearest_s2d"`` / ``"creep_fill"`` -- xESMF
source-on-source fill (:meth:`apply_xesmf_source_fill`).
prefill_kwargs : dict, optional
Method-specific kwargs. For the xESMF methods these are forwarded to
:meth:`apply_xesmf_source_fill` (e.g. ``num_src_pnts`` /
``dist_exponent`` for ``"inverse_dist"``, ``num_levels`` for
``"creep_fill"``). Ignored by the AMG and nearest-neighbor methods.
prefill_was_user_set : bool, optional
Whether the caller's ``prefill`` was set explicitly by the user (vs a
class default). Only used to decide whether to emit the
"source already NaN-free; prefill is a no-op" info-log when this
dataset does not need a lateral fill.

Notes
-----
All underlying fills early-return when ``self.needs_lateral_fill`` is
``False`` (the source is already NaN-free, e.g. the unified BGC dataset).
In that case an explicit user ``prefill`` is a no-op; this is surfaced as
an info-log rather than silently ignored.
"""
prefill_kwargs = prefill_kwargs or {}

if not self.needs_lateral_fill:
if prefill_was_user_set:
logging.info(
"Source data is already NaN-free (needs_lateral_fill=False); "
"prefill=%r is a no-op.",
method,
)
return

if method == "2d_lateral_fill":
self.apply_lateral_fill()
elif method == "nearest_neighbor":
self.apply_nearest_neighbor_fill()
elif method in ("inverse_dist", "nearest_s2d", "creep_fill"):
self.apply_xesmf_source_fill(method=method, **prefill_kwargs)
else:
raise ValueError(f"Unknown prefill method: {method!r}")

def apply_lateral_fill(self):
"""Apply lateral fill to variables using the dataset's mask and grid dimensions.

Expand Down Expand Up @@ -707,6 +770,142 @@ def apply_lateral_fill(self):
# Apply standard lateral fill for other variables
self.ds[var_name] = lateral_fill.apply(self.ds[var_name])

def apply_nearest_neighbor_fill(self):
"""Fill masked values in `self.ds` using nearest-neighbor lookup.

This is a cheap alternative to :meth:`apply_lateral_fill` (the iterative
Poisson solver). It mirrors that method's mask handling -- a separate
mask (`mask_vel`) is used for velocity variables (e.g. `u`, `v`) if
available -- but fills each masked cell with the value of the nearest
valid (ocean) cell via :func:`roms_tools.fill.nearest_neighbor_fill`.

Like :meth:`apply_lateral_fill`, this assumes a ('latitude', 'longitude')
dimension ordering and loops over `self.ds.data_vars` so each variable is
filled only once.
"""
if self.needs_lateral_fill:
dims = (self.dim_names["latitude"], self.dim_names["longitude"])

separate_fill_for_velocities = "mask_vel" in self.ds.data_vars

for var_name in self.ds.data_vars:
if var_name.startswith("mask"):
# Skip variables that are mask types
continue
elif (
separate_fill_for_velocities
and "u" in self.var_names
and "v" in self.var_names
and var_name in [self.var_names["u"], self.var_names["v"]]
):
self.ds[var_name] = nearest_neighbor_fill(
self.ds[var_name], self.ds["mask_vel"], dims
)
else:
self.ds[var_name] = nearest_neighbor_fill(
self.ds[var_name], self.ds["mask"], dims
)

def apply_xesmf_source_fill(
self,
method: str = "inverse_dist",
num_levels: int = 100,
num_src_pnts: int | None = None,
dist_exponent: float | None = None,
):
"""Fill masked values by regridding the source onto its own grid (xESMF).

Unlike destination-side extrapolation during the boundary regrid, this
fills the 2D *source* field itself, where the valid ocean cells act as
seeds spread across the whole grid. Velocity variables use ``mask_vel``
when present, mirroring the other fills. The ``apply_xesmf_`` prefix
distinguishes this from the non-xESMF :meth:`apply_lateral_fill` (AMG) and
:meth:`apply_nearest_neighbor_fill` (scipy) fills.

Parameters
----------
method : str
xESMF extrapolation method used for the fill (default
``"inverse_dist"``):

- ``"inverse_dist"`` -- inverse-distance-weighted average of the
nearest source points; tuned by ``num_src_pnts`` / ``dist_exponent``.
- ``"nearest_s2d"`` -- single nearest source point.
- ``"creep_fill"`` -- truncated Laplace-style diffusion; smooth but
only reaches ``num_levels`` cells from ocean. **Not available in
current released xESMF** (requires a newer/unreleased xESMF + ESMF);
included for forward compatibility and not exposed through
``BoundaryForcing``'s ``prefill`` argument.
num_levels : int
Number of creep iterations (``creep_fill`` only).
num_src_pnts : int, optional
Number of nearest source points to average (``inverse_dist`` only;
xESMF default is 8).
dist_exponent : float, optional
Inverse-distance weighting exponent (``inverse_dist`` only; xESMF
default is 2.0).
"""
if not self.needs_lateral_fill:
return
import warnings

import xesmf as xe

lat = self.dim_names["latitude"]
lon = self.dim_names["longitude"]
lat_vals = self.ds[lat].values
lon_vals = self.ds[lon].values

def _build(mask_da):
ds_in = xr.Dataset(
{
"mask": (
("lat", "lon"),
mask_da.transpose(lat, lon).values.astype("int32"),
)
},
coords={"lat": lat_vals, "lon": lon_vals},
)
ds_out = xr.Dataset(coords={"lat": lat_vals, "lon": lon_vals})
kwargs = dict(method="bilinear", unmapped_to_nan=True, extrap_method=method)
kwargs.update(
_xesmf_extrap_kwargs(
method,
{
"num_levels": num_levels,
"num_src_pnts": num_src_pnts,
"dist_exponent": dist_exponent,
},
)
)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning, module="xesmf")
return xe.Regridder(ds_in, ds_out, **kwargs)

def _fill(da, rg):
renamed = da.rename({lat: "lat", lon: "lon"})
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning, module="xesmf")
out = rg(renamed, keep_attrs=True)
return out.rename({"lat": lat, "lon": lon})

regridder = _build(self.ds["mask"])
separate = "mask_vel" in self.ds.data_vars
regridder_vel = _build(self.ds["mask_vel"]) if separate else None

for var_name in self.ds.data_vars:
if var_name.startswith("mask"):
continue
elif (
separate
and "u" in self.var_names
and "v" in self.var_names
and var_name in [self.var_names["u"], self.var_names["v"]]
):
self.ds[var_name] = _fill(self.ds[var_name], regridder_vel)
else:
self.ds[var_name] = _fill(self.ds[var_name], regridder)

def extrapolate_deepest_to_bottom(self):
"""Extrapolate deepest non-NaN values to fill bottom NaNs along the depth
dimension.
Expand Down
Loading