Source code for salishsea_tools.timeseries_tools

# Copyright 2013 – present by the SalishSeaCast contributors
# and The University of British Columbia

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#    https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A library of Python functions for loading SalishSeaCast timeseries while
conserving memory.
"""

from salishsea_tools import viz_tools
from dateutil.parser import parse
from datetime import timedelta
from tqdm import tqdm
import xarray as xr
import numpy as np
import os


def load_NEMO_timeseries(
    filenames, mask, field, dim, index=0, spacing=1, shape="grid", unstagger_dim=None
):
    """ """

    # Reshape mask, grid, and depth
    tmask, coords, ngrid, ngrid_water = reshape_coords(
        mask, dim, index=index, spacing=spacing
    )

    # Initialize output array
    date = np.empty(0, dtype="datetime64[ns]")
    data = np.empty((0, ngrid_water))

    # Loop through filenames
    for filename in tqdm(filenames):
        # Open NEMO results and flatten (depth averages added here)
        data_grid = xr.open_dataset(filename)[field].isel(**{dim: index})

        # Unstagger if velocity field
        if unstagger_dim is not None:
            data_grid = viz_tools.unstagger_xarray(data_grid, unstagger_dim)

        # Reshape field
        data_trim = reshape_to_ts(
            data_grid.values, tmask, ngrid, ngrid_water, spacing=spacing
        )

        # Store trimmed arrays
        date = np.concatenate([date, data_grid.time_counter.values])
        data = np.concatenate([data, data_trim], axis=0)

    # Reshape to grid
    if shape == "grid":

        # Correct for depth dimension name
        if dim.find("depth") != -1:
            dim1, dim2, dimslice = "gridY", "gridX", "z"
        elif dim.find("y") != -1:
            dim1, dim2, dimslice = "gridZ", "gridX", "y"
        elif dim.find("x") != -1:
            dim1, dim2, dimslice = "gridZ", "gridY", "x"

        # Reshape data to grid
        data = reshape_to_grid(
            data,
            [coords[dim1], coords[dim2]],
            mask.gdept_0.isel(**{"t": 0, dimslice: 0}).shape,
        )

        # Redefine coords for grid
        coords = {
            "depth": mask.gdept_1d.isel(t=0).values,
            "gridZ": mask.z.values,
            "gridY": mask.y.values,
            "gridX": mask.x.values,
        }

    # Coords dict
    coords["date"] = date

    return data, coords


[docs] def make_filename_list( timerange, qty, model="nowcast", resolution="h", path="/results/SalishSea" ): """Return a sequential list of Nowcast results filenames to be passed into `xarray.open_mfdataset` or `timeseries_tools.load_NEMO_timeseries`. :arg timerange: list or tuple of date strings (e.g., ['2017 Jan 1 00:00', '2017 Jan 31 23:00']) :type timerange: list or tuple of str :arg qty: quantity type ('U' for zonal velocity, 'V' for meridional velocity, 'W' for vertical velocity, 'T' for tracers) :type qty: str :arg model: forecast type (e.g., 'nowcast', 'nowcast-green', 'forecast') :type model: str :arg resolution: time resolution ('h' for hourly, 'd', for daily) :type resolution: str :arg path: path to results archive :type path: str :returns: Sequential list of Nowcast results filenames :rtype: list of str """ date, enddate = map(parse, timerange) filenames = [] while date < enddate: datestr1 = date.strftime("%d%b%y").lower() datestr2 = date.strftime("%Y%m%d") filename = "SalishSea_1{}_{}_{}_grid_{}.nc".format( resolution, datestr2, datestr2, qty ) filenames.append(os.path.join(path, model, datestr1, filename)) date = date + timedelta(days=1) return filenames
[docs] def reshape_coords(mask_in, dim_in, index=0, spacing=1): """Prepare the mask and grid for the selected timeseries slice, and reshape into 1 spatial dimension """ # Correct for depth dimension name if dim_in.find("depth") != -1: dim = "deptht" else: dim = dim_in # Create full gridded mask, grid and depth Numpy ndarrays gridZ, gridY, gridX = np.meshgrid(mask_in.z, mask_in.y, mask_in.x, indexing="ij") gridmask = xr.Dataset( { "tmask": ( ["deptht", "y", "x"], mask_in.tmask.isel(t=0).values.astype(bool), ), "depth": (["deptht", "y", "x"], mask_in.gdept_0.isel(t=0).values), "gridZ": (["deptht", "y", "x"], gridZ), "gridY": (["deptht", "y", "x"], gridY), "gridX": (["deptht", "y", "x"], gridX), }, coords={ "deptht": mask_in.gdept_1d.isel(t=0).values, "y": mask_in.y, "x": mask_in.x, }, ) # Slice and subsample mask mask = gridmask.tmask.isel(**{dim: index}).values[::spacing, ::spacing] # Slice and subsample grid and depth into dict coords = { "depth": gridmask.depth.isel(**{dim: index}).values[::spacing, ::spacing], "gridZ": gridmask.gridZ.isel(**{dim: index}).values[::spacing, ::spacing], "gridY": gridmask.gridY.isel(**{dim: index}).values[::spacing, ::spacing], "gridX": gridmask.gridX.isel(**{dim: index}).values[::spacing, ::spacing], } # Number of grid points ngrid = mask.shape[0] * mask.shape[1] ngrid_water = mask.sum() # Reshape mask, grid, and depth mask = mask.reshape(ngrid) coords["depth"] = coords["depth"].reshape(ngrid)[mask] coords["gridZ"] = coords["gridZ"].reshape(ngrid)[mask] coords["gridY"] = coords["gridY"].reshape(ngrid)[mask] coords["gridX"] = coords["gridX"].reshape(ngrid)[mask] return mask, coords, ngrid, ngrid_water
def reshape_coords_GEM(grid, mask_in): """ """ coords = {} # Create full gridded mask, grid and depth Numpy ndarrays coords["gridY"], coords["gridX"] = np.meshgrid(grid.y, grid.x, indexing="ij") # Number of grid points ngrid = mask_in.shape[0] * mask_in.shape[1] ngrid_water = mask_in.sum() # Reshape mask, grid, and depth mask = mask_in.reshape(ngrid) coords["gridY"] = coords["gridY"].reshape(ngrid)[mask.astype(bool)] coords["gridX"] = coords["gridX"].reshape(ngrid)[mask.astype(bool)] return mask, coords, ngrid, ngrid_water def reshape_to_ts(data_grid, mask, ngrid, ngrid_water, spacing=1): """ """ # Convert to Numpy ndarray, subsample, and reshape data_flat = data_grid[:, ::spacing, ::spacing].reshape((-1, ngrid)) # Preallocate trimmed array data_trim = np.zeros((data_flat.shape[0], ngrid_water)) # Trim land points for tindex, data_t in enumerate(data_flat): data_trim[tindex, :] = data_t[mask] return data_trim
[docs] def reshape_to_grid(data_flat, coords, shape): """Given a flattened array of data with the corresponding Y and X coordinates and the desired grid shape, return the grid of desired shape with the data given. Assumes flattened array has a time dimension as first dimension. :arg data_flat: 2d array of data. First dimension is assumed to be time. :arg coords: List of form [Ycoords, Xcoords] for each data point given. :arg shape: 2d tuple corresponding to desired grid shape. For Salish Sea model, shape would be (898,398). :returns: Array of with dimensions corresponding to shape given with data in coordinates given. """ # Preallocate gridded array data_grid = np.zeros((data_flat.shape[0],) + shape) # Reshape flattened data to grid for coord1, coord2, data_xyz in zip(*(coords + [data_flat.T])): data_grid[:, coord1, coord2] = data_xyz return data_grid