# Copyright 2013 – present by the SalishSeaCast contributors
# and The University of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Load CTD cast data from Fraser River plume region that Elise Olson collected to ground-truth
her turbidity model."""
import os
import re
import string
import gsw
import netCDF4 as nc
import numpy as np
import pandas as pd
from scipy import signal as ssig
from salishsea_tools import geo_tools
# list CTD cnv files associated with cast numbers
cnvlist19 = {
1: "fraser2017101.cnv",
2: "fraser2017102.cnv",
3: "fraser2017103.cnv",
4: "fraser2017104.cnv",
5: "fraser2017105.cnv",
6: "fraser2017106.cnv",
7: "fraser2017107.cnv",
8: "fraser2017108.cnv",
9: "fraser2017109.cnv",
10: "fraser2017110.cnv",
11: "fraser2017111.cnv",
12: "fraser2017112.cnv",
13: "fraser2017113.cnv",
14.1: "fraser2017114.cnv",
14.2: "fraser2017114.cnv",
15: "fraser2017115.cnv",
16: "fraser2017116.cnv",
17: "fraser2017117.cnv",
18: "fraser2017118.cnv",
19: "fraser2017119.cnv",
20: "fraser2017120.cnv",
21: "fraser2017121.cnv",
22: "fraser2017122.cnv",
23: "fraser2017123.cnv",
24: "fraser2017124.cnv",
}
cnvlist25 = {
1: "fraser2017001.cnv",
2: "fraser2017002.cnv",
3: "fraser2017003.cnv",
4: "fraser2017004.cnv",
5: "fraser2017005.cnv",
6: "fraser2017006.cnv",
7: "fraser2017007.cnv",
8: "fraser2017008.cnv",
9: "fraser2017009.cnv",
10: "fraser2017010.cnv",
11: "fraser2017011.cnv",
12: "fraser2017012.cnv",
13: "fraser2017013.cnv",
14.1: "fraser2017014.cnv",
14.2: "fraser2017014.cnv",
15: "fraser2017015.cnv",
16: "fraser2017016.cnv",
17: "fraser2017017.cnv",
18: "fraser2017018.cnv",
19: "fraser2017019.cnv",
20: "fraser2017020.cnv",
21: "fraser2017021.cnv",
22: "fraser2017022.cnv",
23: "fraser2017023.cnv",
24: "fraser2017024.cnv",
}
class Cast:
def __init__(self, fpath):
mSta, mLat, mLon, df = readcnv(fpath)
self.sta = mSta
self.lat = mLat
self.lon = mLon
self.df = df
self.source = fpath
class zCast:
def __init__(self, updf, downdf):
self.uCast = updf
self.dCast = downdf
class rawCast:
def __init__(self):
self.uCast = dict()
self.dCast = dict()
class dataPair:
def __init__(self, zval, varval):
self.z = zval
self.val = varval
[docs]
def fmtVarName(strx):
"""transform string into one that meets python naming conventions"""
vName = re.sub(r"[^a-zA-Z0-9_\-\s/]", "", strx.strip())
vName = re.sub(r"[\s/]", "_", vName)
vName = re.sub("-", "_", vName)
if re.match("[0-9]", vName):
vName = "_" + vName
return vName
# def rolling_window(a, window):
# # source: https://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html
# # use example: np.mean(rolling_window(x, 3), -1)
# shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
# strides = a.strides + (a.strides[-1],)
# return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
#
# def rolling_window_padded(a,window):
# # extend rolling window to be same lenth as input array by duplicating first and last values
# # even values not symmetric
# test=rolling_window(a,window)
# while window>1:
# if window%2==0:
# test=np.concatenate(([test[0,:]],test),axis=0)
# else:
# test=np.concatenate((test,[test[-1,:]]),axis=0)
# window+=-1
# return test
def slidingWindowEval(x, func, window, axis=0):
# x is input array
# func is function to carry out over window
# window is window size
# axis is axis to act along, in case of multiple
# if window is even, results will be shifted left by 1/2 unit
x1 = np.lib.stride_tricks.sliding_window_view(x, window, axis)
b = func(x1, -1)
# the rest of the code pads the front and back to return an array of the same shape as the original
nfront = np.floor((window - 1) / 2)
nback = np.floor((window - 1) / 2) + (window - 1) % 2
inxf = [slice(None)] * np.ndim(b)
inxf[axis] = slice(0, 1, 1)
inxb = [slice(None)] * np.ndim(b)
inxb[axis] = slice(np.shape(b)[axis] - 1, np.shape(b)[axis], 1)
repsf = np.ones(np.ndim(b), dtype=int)
repsf[axis] = int(nfront)
repsb = np.ones(np.ndim(b), dtype=int)
repsb[axis] = int(nback)
x2 = np.concatenate(
(np.tile(b[tuple(inxf)], repsf), b, np.tile(b[tuple(inxb)], repsb)), axis=axis
)
return x2
def amp(var, dim=0):
return np.nanmax(var, dim) - np.nanmin(var, dim)
def turbQC(x):
# turbidity sensor produced erroneous zero readings interspersed with real data when too close to surface
# remove suspect values from analysis
# - median filter alone was not enough
# remove a point if the max-min of the surrounding 5 point window
# is greater than 1/3 the maximum turbidity value of the cast
# (remove data within 5 points of a large jump)
# ii1=amp(rolling_window_padded(x,5),-1)>.33*np.nanmax(x)
ii1 = slidingWindowEval(x, amp, 5) > 0.33 * np.nanmax(x) # was .5
# remove data within 5 points of a near-zero turbidity value
# ii2=np.nanmin(rolling_window_padded(x,5),-1)<.3
ii2 = slidingWindowEval(x, np.nanmin, 5) < 0.3
y = np.copy(x)
y[
np.logical_or(
ii1,
ii2,
)
] = np.nan
y = ssig.medfilt(y, 3)
return y
def readcnv(fpath):
alphnumlist = list(string.ascii_letters) + list(string.digits)
# define regexes for reading headers:
reSta = re.compile(
r"(?<=\*\*\sStation:)\s?([0-9])+\s?"
) # assumes numeric station identifiers
reLat = re.compile(r"(?<=\*\*\sLatitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([NS])")
reLon = re.compile(r"(?<=\*\*\sLongitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([EW])")
# start_time = May 08 2002 09:39:10
reST = re.compile(r"(?<=\#\sstart_time\s=).*")
# reTZ=re.compile('(?<=\*\*\s...\s\(Time\)\s=).*')
# reCr=re.compile('(?<=\*\*\sCruise:).*')
reNam = re.compile(r"(?<=\#\sname\s)([0-9]+)\s=\s(.*)\:\s?(.*)\s?")
# define regex for finding searching:
spStart = re.compile(r"^\s*[0-9]") # starts with space characters followed by digit
headers = list()
# lineno=0
mSta = None
mLat = None
mLon = None
with open(fpath, "rt", encoding="ISO-8859-1") as f:
for fline in f:
if fline.startswith("**"):
if reSta.search(fline):
mSta = reSta.search(fline).groups()
if reLat.search(fline):
mLat = reLat.search(fline).groups()
if reLon.search(fline):
mLon = reLon.search(fline).groups()
elif reNam.search(fline):
headers.append(fmtVarName(reNam.search(fline).groups(1)[1]))
elif fline.startswith("*END*"):
break
# lineno+=1
# still in with file open
df = pd.read_csv(f, delim_whitespace=True, names=headers)
# file closed
return mSta, mLat, mLon, df
def bindepth(inP, inV, edges, targets=[], prebin=False):
# calculate depth-associated variables
# 1st calculate bin averages of depth and variable
# then use np interp to estimate at-grid-point values
# edges must be monotonically increasing
if prebin == True:
newP, newV = bindepth(
inP, inV, np.arange(edges[0], edges[-1], 0.05), prebin=False
)
inP = newP
inV = newV
inP = inP[~np.isnan(inV)]
inV = inV[~np.isnan(inV)]
binned = np.digitize(inP, edges)
Pa = np.empty(len(edges) - 1)
Va = np.empty(len(edges) - 1)
if len(targets) == 0:
Pi = 0.5 * (edges[:-1] + edges[1:])
else:
Pi = targets[: (len(edges) - 1)]
Vi = np.empty(len(edges) - 1)
for jj in range(1, len(edges)):
ll = binned == jj # &(~np.isnan(inV))
if np.sum(ll) > 0:
Pa[jj - 1] = np.mean(inP[ll])
Va[jj - 1] = np.mean(inV[ll])
else:
Pa[jj - 1] = np.nan
Va[jj - 1] = np.nan
# linearly extrapolate some values, but not beyond range of original data
pnew = Pa[0] - (Pa[1] - Pa[0])
vnew = Va[0] - (Va[1] - Va[0])
Pa = np.concatenate(([pnew], Pa))
Va = np.concatenate(([vnew], Va))
Vi = np.interp(Pi, Pa[~np.isnan(Va)], Va[~np.isnan(Va)], right=np.nan, left=np.nan)
Vi[Pi > np.max(inP)] = np.nan
Vi[Pi < np.min(inP)] = np.nan
return Pi, Vi
def cXfromX(X):
X = np.array(X)
X[np.isnan(X)] = -5
Y = np.nan * X
iii = (X > 0) & (X < 100)
Y[iii] = -np.log(X[iii] / 100.0) / 0.25
return Y
def turbReg(m, Cx, fl):
return np.maximum(0.0, m[0] * Cx - m[1] * fl - m[2])
def turbFit(df0):
# calculate conversion factor for sb19 ctd turbidity to ALS bottle turbidity
# force through (0,0)
x = df0.loc[(df0.ALS_Turb_NTU > 0) & (df0.sb19Turb_uncorrected > 0)][
"sb19Turb_uncorrected"
].values
x = x[:, np.newaxis]
y = df0.loc[(df0.ALS_Turb_NTU > 0) & (df0.sb19Turb_uncorrected > 0)]["ALS_Turb_NTU"]
tinv = np.linalg.lstsq(x, y, rcond=None)[0]
tcor = 1.0 / tinv
return tcor
def loadDataFRP_init(exp="all"):
if exp not in {"exp1", "exp2", "exp3", "all"}:
print("option exp=" + exp + " is not defined.")
raise
with open(
"/ocean/shared/SalishSeaCastData/FRPlume/stationsDigitizedFinal.csv", "r"
) as fa:
df0_a = pd.read_csv(fa, header=0, na_values="None")
with open(
"/ocean/shared/SalishSeaCastData/FRPlume/util/stationsDigitized_ancillary.csv",
"r",
) as fb:
df0_b = pd.read_csv(fb, header=0, na_values="None")
df0 = pd.merge(df0_a, df0_b, how="left", on=["Station", "Date"])
# if values present, calculate correction factor for sb19 turbidity (divide sb19 turbidity by tcor)
# fit true turb to observed turb
# calculate here while all values present
if np.sum(df0.sb19Turb_uncorrected > 0) > 0:
tcor = turbFit(df0)
else:
tcor = np.nan
if exp == "exp1":
df0 = df0.drop(df0.index[df0.Date != 20170410])
elif exp == "exp2":
df0 = df0.drop(df0.index[df0.Date != 20170531])
elif exp == "exp3":
df0 = df0.drop(df0.index[df0.Date != 20171101])
basedir1 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170410/"
basedir2 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170531/"
basedir3 = "/ocean/shared/SalishSeaCastData/FRPlume/ctd/20171101/"
dir19 = "19-4561/4_derive"
dir25 = "25-0363/4_derive"
dir19T10 = "19-4561/4b_deriveTEOS10"
dir25T10 = "25-0363/4a_deriveTEOS10"
clist = []
if exp == "exp1" or exp == "all":
clist = clist + list(range(1, 10))
if exp == "exp2" or exp == "all":
clist = clist + [10, 11, 12, 13, 14.1, 14.2, 15, 16, 17, 18]
if exp == "exp3" or exp == "all":
clist = clist + list(range(19, 25))
fpath19 = dict()
fpath25 = dict()
for ii in clist:
if ii < 10:
fpath19[ii] = os.path.join(basedir1, dir19T10, cnvlist19[ii])
fpath25[ii] = os.path.join(basedir1, dir25T10, cnvlist25[ii])
elif ii < 19:
fpath19[ii] = os.path.join(basedir2, dir19T10, cnvlist19[ii])
fpath25[ii] = os.path.join(basedir2, dir25T10, cnvlist25[ii])
else:
fpath19[ii] = os.path.join(basedir3, dir19T10, cnvlist19[ii])
fpath25[ii] = os.path.join(basedir3, dir25T10, cnvlist25[ii])
cast19 = dict()
cast25 = dict()
for ii in clist:
cast19[ii] = Cast(fpath19[ii])
cast25[ii] = Cast(fpath25[ii])
return df0, clist, tcor, cast19, cast25
def loadDataFRP(
exp="all",
sel="narrow",
dp=1.0,
form="binned",
vert="P",
meshPath="/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc",
):
# exp determines which sampling date to load (or all)
# sel determines whether to use narrow data selection, which is a more conservative estimate,
# or 'wide' data selection, which includes more near-surface data but can include
# some time the intstruments were parked near-surface
# dp determines bin interval in terms of depth variable vert (only if form='binned')
# form can be 'binned','raw' or 'SSCgrid' and determines if binned and how
# vert determines vertical variable: 'P' or 'Z'
# meshPath is SSC mesh file location for binning to model grid (form='SSCgrid')
if exp not in {"exp1", "exp2", "exp3", "all"}:
print("option exp=" + exp + " is not defined.")
raise
if sel == "narrow":
prebin = False
elif sel == "wide":
prebin = True
else:
print("option sel=" + sel + " is not defined.")
raise
df0, clist, tcor, cast19, cast25 = loadDataFRP_init(exp=exp)
parDZ = 0.78
xmisDZ = 0.36
turbDZ = 0.67
pshiftdict = {
"gsw_ctA0": 0.0,
"gsw_srA0": 0.0,
"xmiss": xmisDZ,
"seaTurbMtr": turbDZ,
"par": parDZ,
"wetStar": 0.0,
"sbeox0ML_L": 0.0,
"seaTurbMtrnoQC": turbDZ,
"turb_uncor": turbDZ,
"turb": turbDZ,
}
# for SSC grid version, load model grid variables
if form == "SSCgrid":
with nc.Dataset(meshPath, "r") as mesh:
tmask = mesh.variables["tmask"][0, :, :, :]
gdept = mesh.variables["gdept_0"][0, :, :, :]
gdepw = mesh.variables["gdepw_0"][0, :, :, :]
nav_lat = mesh.variables["nav_lat"][:, :]
nav_lon = mesh.variables["nav_lon"][:, :]
zCasts = dict()
for nn in clist:
ip = np.argmax(cast25[nn].df["prSM"].values)
ilag = df0.loc[df0.Station == nn, "ishift_sub19"].values[0]
pS_pr = df0.loc[df0.Station == nn, "pS_pr"].values[0]
pE_pr = df0.loc[df0.Station == nn, "pE_pr"].values[0]
pS_tur = df0.loc[df0.Station == nn, "pStart25"].values[0]
pE_tur = df0.loc[df0.Station == nn, "pEnd25"].values[0]
lat = df0.loc[df0.Station == nn]["LatDecDeg"].values[0]
lon = df0.loc[df0.Station == nn]["LonDecDeg"].values[0]
if sel == "narrow":
pS = pS_tur
pE = pE_tur
elif sel == "wide":
pS = pS_pr
pE = pE_pr
cast19[nn].df["seaTurbMtrnoQC"] = cast19[nn].df["seaTurbMtr"]
cast19[nn].df["turb_uncor"] = turbQC(cast19[nn].df["seaTurbMtr"])
cast19[nn].df["turb"] = cast19[nn].df["turb_uncor"] * 1.0 / tcor
if vert == "Z":
# calc Z from p
cast25[nn].df["Z"] = -1 * gsw.z_from_p(cast25[nn].df["prSM"].values, lat)
cast19[nn].df["Z"] = -1 * gsw.z_from_p(cast19[nn].df["prdM"].values, lat)
zvar25 = "Z"
else:
zvar25 = "prSM"
pmax = cast25[nn].df.loc[ip, zvar25]
if form == "binned" or form == "raw":
edges = np.arange(dp / 2, pmax + dp, dp)
targets = [] # use default value of 1/2-way between edges
elif form == "SSCgrid":
jj, ii = geo_tools.find_closest_model_point(lon, lat, nav_lon, nav_lat)
edges = gdepw[:, jj, ii] # model w grid
targets = gdept[:, jj, ii] # model T grid
edges = edges[edges < pmax]
targets = targets[: (len(edges) - 1)]
if form == "binned" or form == "SSCgrid":
dCast = pd.DataFrame()
uCast = pd.DataFrame()
elif form == "raw":
zCasts[nn] = rawCast()
dCast = zCasts[nn].dCast
uCast = zCasts[nn].uCast
def makeVCol(incast_p, outcast, var, i0, i1, incast_v=None): # add in zvar25
# incast_p: [cast25[nn]] sb25 cast to get pressure from; also used for var unless incast_v specified
# outcast: [dCast or uCast] cast to save resulting aligned data to
# i0, i1: [pS:ip/ip:pE] starting and ending indices for data to include relative to incast_p
# var: variable name to extract
# incast_v [cast19[nn]] if variables are from sbe19, specify that cast; otherwise get variable from incast_p (25)
# vplag: ilag to align sbe19 to sbe25 in case where var is from sbe19; defaults to 0 for sbe25
# NOTE: dataframes are mutable; changes to outcast propagate to its input df
if incast_v is None:
incast_v = incast_p
vplag = 0
else:
vplag = ilag
inP = incast_p.df.loc[i0:i1][zvar25].values - pshiftdict[var] # p
inV = incast_v.df.loc[(i0 + vplag) : (i1 + vplag)][var].values # var
if sel == "wide":
inV[inP < 0.1] = np.nan
if form == "binned" or form == "SSCgrid":
p, out = bindepth(inP, inV, edges, targets=targets, prebin=prebin)
if len(outcast) == 0: # depth var not added yet
outcast[zvar25] = p
if form == "SSCgrid":
outcast["indk"] = np.arange(0, len(p))
outcast[var] = out
elif form == "raw":
outcast[var] = dataPair(inP, inV)
return
# fix first 2 casts for which sb25 pump did not turn on. use sb19
if nn == 1 or nn == 2:
##xmiss: xmis25=1.14099414691*xmis19+-1.6910134322
cast19[nn].df["xmiss"] = (
1.14099414691 * cast19[nn].df["CStarTr0"] - 1.6910134322
) # down var
for var in ("gsw_ctA0", "gsw_srA0", "xmiss"):
# downcast
makeVCol(cast25[nn], dCast, var, pS, ip, incast_v=cast19[nn])
# upcast
makeVCol(cast25[nn], uCast, var, ip, pE, incast_v=cast19[nn])
makeVCol(cast25[nn], dCast, "par", pS, ip)
makeVCol(cast25[nn], uCast, "par", ip, pE)
uCast["wetStar"] = np.nan
dCast["wetStar"] = np.nan
uCast["sbeox0ML_L"] = np.nan
dCast["sbeox0ML_L"] = np.nan
else:
for var in (
"gsw_ctA0",
"gsw_srA0",
"xmiss",
"par",
"wetStar",
"sbeox0ML_L",
):
if not nn == 14.2:
# downcast
makeVCol(cast25[nn], dCast, var, pS, ip)
else: # special case where there is no downcast
if len(dCast) == 0: # depth var not added yet
dCast = pd.DataFrame(np.nan * np.ones(10), columns=[zvar25])
dCast[var] = np.nan * np.ones(10)
if not nn == 14.1:
# upcast
makeVCol(cast25[nn], uCast, var, ip, pE)
else: # special case where there is no upcast
if len(uCast) == 0: # depth var not added yet
uCast = pd.DataFrame(np.nan * np.ones(10), columns=[zvar25])
uCast[var] = np.nan * np.ones(10)
for var in ("seaTurbMtrnoQC", "turb_uncor", "turb"):
if not nn == 14.2:
# turbidity downcast
makeVCol(cast25[nn], dCast, var, pS, ip, incast_v=cast19[nn])
else: # special case where there is no downcast
dCast[var] = np.nan * np.ones(10)
if not nn == 14.1:
# turbidity upcast
makeVCol(cast25[nn], uCast, var, ip, pE, incast_v=cast19[nn])
else: # special case where there is no upcast
uCast[var] = np.nan * np.ones(10)
if form == "binned" or form == "SSCgrid":
zCasts[nn] = zCast(uCast, dCast)
return df0, zCasts
def loadDataFRP_raw(exp="all", sel="narrow"):
df0, zCasts = loadDataFRP(exp=exp, sel=sel, vert="Z", form="raw")
return df0, zCasts
def loadDataFRP_SSGrid(
exp="all",
sel="narrow",
meshPath="/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702.nc",
):
df0, zCasts = loadDataFRP(
exp=exp, sel=sel, vert="Z", form="SSCgrid", meshPath=meshPath
)
for ic in zCasts.values():
ic.dCast["depth_m"] = ic.dCast["Z"]
ic.uCast["depth_m"] = ic.uCast["Z"]
return df0, zCasts