Source code for salishsea_tools.loadDataFRP

# Copyright 2013 – present by the SalishSeaCast contributors
# and The University of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Load CTD cast data from Fraser River plume region that Elise Olson collected to ground-truth
her turbidity model."""

import os
import re
import string

import gsw
import netCDF4 as nc
import numpy as np
import pandas as pd
from scipy import signal as ssig

from salishsea_tools import geo_tools

# list CTD cnv files associated with cast numbers
cnvlist19 = {
    1: "fraser2017101.cnv",
    2: "fraser2017102.cnv",
    3: "fraser2017103.cnv",
    4: "fraser2017104.cnv",
    5: "fraser2017105.cnv",
    6: "fraser2017106.cnv",
    7: "fraser2017107.cnv",
    8: "fraser2017108.cnv",
    9: "fraser2017109.cnv",
    10: "fraser2017110.cnv",
    11: "fraser2017111.cnv",
    12: "fraser2017112.cnv",
    13: "fraser2017113.cnv",
    14.1: "fraser2017114.cnv",
    14.2: "fraser2017114.cnv",
    15: "fraser2017115.cnv",
    16: "fraser2017116.cnv",
    17: "fraser2017117.cnv",
    18: "fraser2017118.cnv",
    19: "fraser2017119.cnv",
    20: "fraser2017120.cnv",
    21: "fraser2017121.cnv",
    22: "fraser2017122.cnv",
    23: "fraser2017123.cnv",
    24: "fraser2017124.cnv",
}

cnvlist25 = {
    1: "fraser2017001.cnv",
    2: "fraser2017002.cnv",
    3: "fraser2017003.cnv",
    4: "fraser2017004.cnv",
    5: "fraser2017005.cnv",
    6: "fraser2017006.cnv",
    7: "fraser2017007.cnv",
    8: "fraser2017008.cnv",
    9: "fraser2017009.cnv",
    10: "fraser2017010.cnv",
    11: "fraser2017011.cnv",
    12: "fraser2017012.cnv",
    13: "fraser2017013.cnv",
    14.1: "fraser2017014.cnv",
    14.2: "fraser2017014.cnv",
    15: "fraser2017015.cnv",
    16: "fraser2017016.cnv",
    17: "fraser2017017.cnv",
    18: "fraser2017018.cnv",
    19: "fraser2017019.cnv",
    20: "fraser2017020.cnv",
    21: "fraser2017021.cnv",
    22: "fraser2017022.cnv",
    23: "fraser2017023.cnv",
    24: "fraser2017024.cnv",
}


class Cast:
    def __init__(self, fpath):
        mSta, mLat, mLon, df = readcnv(fpath)
        self.sta = mSta
        self.lat = mLat
        self.lon = mLon
        self.df = df
        self.source = fpath


class zCast:
    def __init__(self, updf, downdf):
        self.uCast = updf
        self.dCast = downdf


class rawCast:
    def __init__(self):
        self.uCast = dict()
        self.dCast = dict()


class dataPair:
    def __init__(self, zval, varval):
        self.z = zval
        self.val = varval


[docs] def fmtVarName(strx): """transform string into one that meets python naming conventions""" vName = re.sub(r"[^a-zA-Z0-9_\-\s/]", "", strx.strip()) vName = re.sub(r"[\s/]", "_", vName) vName = re.sub("-", "_", vName) if re.match("[0-9]", vName): vName = "_" + vName return vName
# def rolling_window(a, window): # # source: https://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html # # use example: np.mean(rolling_window(x, 3), -1) # shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) # strides = a.strides + (a.strides[-1],) # return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) # # def rolling_window_padded(a,window): # # extend rolling window to be same lenth as input array by duplicating first and last values # # even values not symmetric # test=rolling_window(a,window) # while window>1: # if window%2==0: # test=np.concatenate(([test[0,:]],test),axis=0) # else: # test=np.concatenate((test,[test[-1,:]]),axis=0) # window+=-1 # return test def slidingWindowEval(x, func, window, axis=0): # x is input array # func is function to carry out over window # window is window size # axis is axis to act along, in case of multiple # if window is even, results will be shifted left by 1/2 unit x1 = np.lib.stride_tricks.sliding_window_view(x, window, axis) b = func(x1, -1) # the rest of the code pads the front and back to return an array of the same shape as the original nfront = np.floor((window - 1) / 2) nback = np.floor((window - 1) / 2) + (window - 1) % 2 inxf = [slice(None)] * np.ndim(b) inxf[axis] = slice(0, 1, 1) inxb = [slice(None)] * np.ndim(b) inxb[axis] = slice(np.shape(b)[axis] - 1, np.shape(b)[axis], 1) repsf = np.ones(np.ndim(b), dtype=int) repsf[axis] = int(nfront) repsb = np.ones(np.ndim(b), dtype=int) repsb[axis] = int(nback) x2 = np.concatenate( (np.tile(b[tuple(inxf)], repsf), b, np.tile(b[tuple(inxb)], repsb)), axis=axis ) return x2 def amp(var, dim=0): return np.nanmax(var, dim) - np.nanmin(var, dim) def turbQC(x): # turbidity sensor produced erroneous zero readings interspersed with real data when too close to surface # remove suspect values from analysis # - median filter alone was not enough # remove a point if the max-min of the surrounding 5 point window # is greater than 1/3 the maximum turbidity value of the cast # (remove data within 5 points of a large jump) # ii1=amp(rolling_window_padded(x,5),-1)>.33*np.nanmax(x) ii1 = slidingWindowEval(x, amp, 5) > 0.33 * np.nanmax(x) # was .5 # remove data within 5 points of a near-zero turbidity value # ii2=np.nanmin(rolling_window_padded(x,5),-1)<.3 ii2 = slidingWindowEval(x, np.nanmin, 5) < 0.3 y = np.copy(x) y[ np.logical_or( ii1, ii2, ) ] = np.nan y = ssig.medfilt(y, 3) return y def readcnv(fpath): alphnumlist = list(string.ascii_letters) + list(string.digits) # define regexes for reading headers: reSta = re.compile( r"(?<=\*\*\sStation:)\s?([0-9])+\s?" ) # assumes numeric station identifiers reLat = re.compile(r"(?<=\*\*\sLatitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([NS])") reLon = re.compile(r"(?<=\*\*\sLongitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([EW])") # start_time = May 08 2002 09:39:10 reST = re.compile(r"(?<=\#\sstart_time\s=).*") # reTZ=re.compile('(?<=\*\*\s...\s\(Time\)\s=).*') # reCr=re.compile('(?<=\*\*\sCruise:).*') reNam = re.compile(r"(?<=\#\sname\s)([0-9]+)\s=\s(.*)\:\s?(.*)\s?") # define regex for finding searching: spStart = re.compile(r"^\s*[0-9]") # starts with space characters followed by digit headers = list() # lineno=0 mSta = None mLat = None mLon = None with open(fpath, "rt", encoding="ISO-8859-1") as f: for fline in f: if fline.startswith("**"): if reSta.search(fline): mSta = reSta.search(fline).groups() if reLat.search(fline): mLat = reLat.search(fline).groups() if reLon.search(fline): mLon = reLon.search(fline).groups() elif reNam.search(fline): headers.append(fmtVarName(reNam.search(fline).groups(1)[1])) elif fline.startswith("*END*"): break # lineno+=1 # still in with file open df = pd.read_csv(f, delim_whitespace=True, names=headers) # file closed return mSta, mLat, mLon, df def bindepth(inP, inV, edges, targets=[], prebin=False): # calculate depth-associated variables # 1st calculate bin averages of depth and variable # then use np interp to estimate at-grid-point values # edges must be monotonically increasing if prebin == True: newP, newV = bindepth( inP, inV, np.arange(edges[0], edges[-1], 0.05), prebin=False ) inP = newP inV = newV inP = inP[~np.isnan(inV)] inV = inV[~np.isnan(inV)] binned = np.digitize(inP, edges) Pa = np.empty(len(edges) - 1) Va = np.empty(len(edges) - 1) if len(targets) == 0: Pi = 0.5 * (edges[:-1] + edges[1:]) else: Pi = targets[: (len(edges) - 1)] Vi = np.empty(len(edges) - 1) for jj in range(1, len(edges)): ll = binned == jj # &(~np.isnan(inV)) if np.sum(ll) > 0: Pa[jj - 1] = np.mean(inP[ll]) Va[jj - 1] = np.mean(inV[ll]) else: Pa[jj - 1] = np.nan Va[jj - 1] = np.nan # linearly extrapolate some values, but not beyond range of original data pnew = Pa[0] - (Pa[1] - Pa[0]) vnew = Va[0] - (Va[1] - Va[0]) Pa = np.concatenate(([pnew], Pa)) Va = np.concatenate(([vnew], Va)) Vi = np.interp(Pi, Pa[~np.isnan(Va)], Va[~np.isnan(Va)], right=np.nan, left=np.nan) Vi[Pi > np.max(inP)] = np.nan Vi[Pi < np.min(inP)] = np.nan return Pi, Vi def cXfromX(X): X = np.array(X) X[np.isnan(X)] = -5 Y = np.nan * X iii = (X > 0) & (X < 100) Y[iii] = -np.log(X[iii] / 100.0) / 0.25 return Y def turbReg(m, Cx, fl): return np.maximum(0.0, m[0] * Cx - m[1] * fl - m[2]) def turbFit(df0): # calculate conversion factor for sb19 ctd turbidity to ALS bottle turbidity # force through (0,0) x = df0.loc[(df0.ALS_Turb_NTU > 0) & (df0.sb19Turb_uncorrected > 0)][ "sb19Turb_uncorrected" ].values x = x[:, np.newaxis] y = df0.loc[(df0.ALS_Turb_NTU > 0) & (df0.sb19Turb_uncorrected > 0)]["ALS_Turb_NTU"] tinv = np.linalg.lstsq(x, y, rcond=None)[0] tcor = 1.0 / tinv return tcor def loadDataFRP_init(exp="all"): if exp not in {"exp1", "exp2", "exp3", "all"}: print("option exp=" + exp + " is not defined.") raise with open( "/ocean/shared/SalishSeaCastData/FRPlume/stationsDigitizedFinal.csv", "r" ) as fa: df0_a = pd.read_csv(fa, header=0, na_values="None") with open( "/ocean/shared/SalishSeaCastData/FRPlume/util/stationsDigitized_ancillary.csv", "r", ) as fb: df0_b = pd.read_csv(fb, header=0, na_values="None") df0 = pd.merge(df0_a, df0_b, how="left", on=["Station", "Date"]) # if values present, calculate correction factor for sb19 turbidity (divide sb19 turbidity by tcor) # fit true turb to observed turb # calculate here while all values present if np.sum(df0.sb19Turb_uncorrected > 0) > 0: tcor = turbFit(df0) else: tcor = np.nan if exp == "exp1": df0 = df0.drop(df0.index[df0.Date != 20170410]) elif exp == "exp2": df0 = df0.drop(df0.index[df0.Date != 20170531]) elif exp == "exp3": df0 = df0.drop(df0.index[df0.Date != 20171101]) basedir1 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170410/" basedir2 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170531/" basedir3 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20171101/" dir19 = "19-4561/4_derive" dir25 = "25-0363/4_derive" dir19T10 = "19-4561/4b_deriveTEOS10" dir25T10 = "25-0363/4a_deriveTEOS10" clist = [] if exp == "exp1" or exp == "all": clist = clist + list(range(1, 10)) if exp == "exp2" or exp == "all": clist = clist + [10, 11, 12, 13, 14.1, 14.2, 15, 16, 17, 18] if exp == "exp3" or exp == "all": clist = clist + list(range(19, 25)) fpath19 = dict() fpath25 = dict() for ii in clist: if ii < 10: fpath19[ii] = os.path.join(basedir1, dir19T10, cnvlist19[ii]) fpath25[ii] = os.path.join(basedir1, dir25T10, cnvlist25[ii]) elif ii < 19: fpath19[ii] = os.path.join(basedir2, dir19T10, cnvlist19[ii]) fpath25[ii] = os.path.join(basedir2, dir25T10, cnvlist25[ii]) else: fpath19[ii] = os.path.join(basedir3, dir19T10, cnvlist19[ii]) fpath25[ii] = os.path.join(basedir3, dir25T10, cnvlist25[ii]) cast19 = dict() cast25 = dict() for ii in clist: cast19[ii] = Cast(fpath19[ii]) cast25[ii] = Cast(fpath25[ii]) return df0, clist, tcor, cast19, cast25 def loadDataFRP( exp="all", sel="narrow", dp=1.0, form="binned", vert="P", meshPath="/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc", ): # exp determines which sampling date to load (or all) # sel determines whether to use narrow data selection, which is a more conservative estimate, # or 'wide' data selection, which includes more near-surface data but can include # some time the intstruments were parked near-surface # dp determines bin interval in terms of depth variable vert (only if form='binned') # form can be 'binned','raw' or 'SSCgrid' and determines if binned and how # vert determines vertical variable: 'P' or 'Z' # meshPath is SSC mesh file location for binning to model grid (form='SSCgrid') if exp not in {"exp1", "exp2", "exp3", "all"}: print("option exp=" + exp + " is not defined.") raise if sel == "narrow": prebin = False elif sel == "wide": prebin = True else: print("option sel=" + sel + " is not defined.") raise df0, clist, tcor, cast19, cast25 = loadDataFRP_init(exp=exp) parDZ = 0.78 xmisDZ = 0.36 turbDZ = 0.67 pshiftdict = { "gsw_ctA0": 0.0, "gsw_srA0": 0.0, "xmiss": xmisDZ, "seaTurbMtr": turbDZ, "par": parDZ, "wetStar": 0.0, "sbeox0ML_L": 0.0, "seaTurbMtrnoQC": turbDZ, "turb_uncor": turbDZ, "turb": turbDZ, } # for SSC grid version, load model grid variables if form == "SSCgrid": with nc.Dataset(meshPath, "r") as mesh: tmask = mesh.variables["tmask"][0, :, :, :] gdept = mesh.variables["gdept_0"][0, :, :, :] gdepw = mesh.variables["gdepw_0"][0, :, :, :] nav_lat = mesh.variables["nav_lat"][:, :] nav_lon = mesh.variables["nav_lon"][:, :] zCasts = dict() for nn in clist: ip = np.argmax(cast25[nn].df["prSM"].values) ilag = df0.loc[df0.Station == nn, "ishift_sub19"].values[0] pS_pr = df0.loc[df0.Station == nn, "pS_pr"].values[0] pE_pr = df0.loc[df0.Station == nn, "pE_pr"].values[0] pS_tur = df0.loc[df0.Station == nn, "pStart25"].values[0] pE_tur = df0.loc[df0.Station == nn, "pEnd25"].values[0] lat = df0.loc[df0.Station == nn]["LatDecDeg"].values[0] lon = df0.loc[df0.Station == nn]["LonDecDeg"].values[0] if sel == "narrow": pS = pS_tur pE = pE_tur elif sel == "wide": pS = pS_pr pE = pE_pr cast19[nn].df["seaTurbMtrnoQC"] = cast19[nn].df["seaTurbMtr"] cast19[nn].df["turb_uncor"] = turbQC(cast19[nn].df["seaTurbMtr"]) cast19[nn].df["turb"] = cast19[nn].df["turb_uncor"] * 1.0 / tcor if vert == "Z": # calc Z from p cast25[nn].df["Z"] = -1 * gsw.z_from_p(cast25[nn].df["prSM"].values, lat) cast19[nn].df["Z"] = -1 * gsw.z_from_p(cast19[nn].df["prdM"].values, lat) zvar25 = "Z" else: zvar25 = "prSM" pmax = cast25[nn].df.loc[ip, zvar25] if form == "binned" or form == "raw": edges = np.arange(dp / 2, pmax + dp, dp) targets = [] # use default value of 1/2-way between edges elif form == "SSCgrid": jj, ii = geo_tools.find_closest_model_point(lon, lat, nav_lon, nav_lat) edges = gdepw[:, jj, ii] # model w grid targets = gdept[:, jj, ii] # model T grid edges = edges[edges < pmax] targets = targets[: (len(edges) - 1)] if form == "binned" or form == "SSCgrid": dCast = pd.DataFrame() uCast = pd.DataFrame() elif form == "raw": zCasts[nn] = rawCast() dCast = zCasts[nn].dCast uCast = zCasts[nn].uCast def makeVCol(incast_p, outcast, var, i0, i1, incast_v=None): # add in zvar25 # incast_p: [cast25[nn]] sb25 cast to get pressure from; also used for var unless incast_v specified # outcast: [dCast or uCast] cast to save resulting aligned data to # i0, i1: [pS:ip/ip:pE] starting and ending indices for data to include relative to incast_p # var: variable name to extract # incast_v [cast19[nn]] if variables are from sbe19, specify that cast; otherwise get variable from incast_p (25) # vplag: ilag to align sbe19 to sbe25 in case where var is from sbe19; defaults to 0 for sbe25 # NOTE: dataframes are mutable; changes to outcast propagate to its input df if incast_v is None: incast_v = incast_p vplag = 0 else: vplag = ilag inP = incast_p.df.loc[i0:i1][zvar25].values - pshiftdict[var] # p inV = incast_v.df.loc[(i0 + vplag) : (i1 + vplag)][var].values # var if sel == "wide": inV[inP < 0.1] = np.nan if form == "binned" or form == "SSCgrid": p, out = bindepth(inP, inV, edges, targets=targets, prebin=prebin) if len(outcast) == 0: # depth var not added yet outcast[zvar25] = p if form == "SSCgrid": outcast["indk"] = np.arange(0, len(p)) outcast[var] = out elif form == "raw": outcast[var] = dataPair(inP, inV) return # fix first 2 casts for which sb25 pump did not turn on. use sb19 if nn == 1 or nn == 2: ##xmiss: xmis25=1.14099414691*xmis19+-1.6910134322 cast19[nn].df["xmiss"] = ( 1.14099414691 * cast19[nn].df["CStarTr0"] - 1.6910134322 ) # down var for var in ("gsw_ctA0", "gsw_srA0", "xmiss"): # downcast makeVCol(cast25[nn], dCast, var, pS, ip, incast_v=cast19[nn]) # upcast makeVCol(cast25[nn], uCast, var, ip, pE, incast_v=cast19[nn]) makeVCol(cast25[nn], dCast, "par", pS, ip) makeVCol(cast25[nn], uCast, "par", ip, pE) uCast["wetStar"] = np.nan dCast["wetStar"] = np.nan uCast["sbeox0ML_L"] = np.nan dCast["sbeox0ML_L"] = np.nan else: for var in ( "gsw_ctA0", "gsw_srA0", "xmiss", "par", "wetStar", "sbeox0ML_L", ): if not nn == 14.2: # downcast makeVCol(cast25[nn], dCast, var, pS, ip) else: # special case where there is no downcast if len(dCast) == 0: # depth var not added yet dCast = pd.DataFrame(np.nan * np.ones(10), columns=[zvar25]) dCast[var] = np.nan * np.ones(10) if not nn == 14.1: # upcast makeVCol(cast25[nn], uCast, var, ip, pE) else: # special case where there is no upcast if len(uCast) == 0: # depth var not added yet uCast = pd.DataFrame(np.nan * np.ones(10), columns=[zvar25]) uCast[var] = np.nan * np.ones(10) for var in ("seaTurbMtrnoQC", "turb_uncor", "turb"): if not nn == 14.2: # turbidity downcast makeVCol(cast25[nn], dCast, var, pS, ip, incast_v=cast19[nn]) else: # special case where there is no downcast dCast[var] = np.nan * np.ones(10) if not nn == 14.1: # turbidity upcast makeVCol(cast25[nn], uCast, var, ip, pE, incast_v=cast19[nn]) else: # special case where there is no upcast uCast[var] = np.nan * np.ones(10) if form == "binned" or form == "SSCgrid": zCasts[nn] = zCast(uCast, dCast) return df0, zCasts def loadDataFRP_raw(exp="all", sel="narrow"): df0, zCasts = loadDataFRP(exp=exp, sel=sel, vert="Z", form="raw") return df0, zCasts def loadDataFRP_SSGrid( exp="all", sel="narrow", meshPath="/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702.nc", ): df0, zCasts = loadDataFRP( exp=exp, sel=sel, vert="Z", form="SSCgrid", meshPath=meshPath ) for ic in zCasts.values(): ic.dCast["depth_m"] = ic.dCast["Z"] ic.uCast["depth_m"] = ic.uCast["Z"] return df0, zCasts