Source code for salishsea_tools.loadDataFRP

import numpy as np
import pandas as pd
from scipy import signal as ssig
from scipy import stats as spst
import os
import re
import string
from salishsea_tools import geo_tools
import netCDF4 as nc
import gsw

# list CTD cnv files associated with cast numbers 
cnvlist19={1:'fraser2017101.cnv',
        2:'fraser2017102.cnv',
        3:'fraser2017103.cnv',
        4:'fraser2017104.cnv',
        5:'fraser2017105.cnv',
        6:'fraser2017106.cnv',
        7:'fraser2017107.cnv',
        8:'fraser2017108.cnv',
        9:'fraser2017109.cnv',
        10:'fraser2017110.cnv',
        11:'fraser2017111.cnv',
        12:'fraser2017112.cnv',
        13:'fraser2017113.cnv',
        14.1:'fraser2017114.cnv',
        14.2:'fraser2017114.cnv',
        15:'fraser2017115.cnv',
        16:'fraser2017116.cnv',
        17:'fraser2017117.cnv',
        18:'fraser2017118.cnv',
        19:'fraser2017119.cnv',
        20:'fraser2017120.cnv',
        21:'fraser2017121.cnv',
        22:'fraser2017122.cnv',
        23:'fraser2017123.cnv',
        24:'fraser2017124.cnv'}

cnvlist25={1:'fraser2017001.cnv',
        2:'fraser2017002.cnv',
        3:'fraser2017003.cnv',
        4:'fraser2017004.cnv',
        5:'fraser2017005.cnv',
        6:'fraser2017006.cnv',
        7:'fraser2017007.cnv',
        8:'fraser2017008.cnv',
        9:'fraser2017009.cnv',
        10:'fraser2017010.cnv',
        11:'fraser2017011.cnv',
        12:'fraser2017012.cnv',
        13:'fraser2017013.cnv',
        14.1:'fraser2017014.cnv',
        14.2:'fraser2017014.cnv',
        15:'fraser2017015.cnv',
        16:'fraser2017016.cnv',
        17:'fraser2017017.cnv',
        18:'fraser2017018.cnv',
        19:'fraser2017019.cnv',
        20:'fraser2017020.cnv',
        21:'fraser2017021.cnv',
        22:'fraser2017022.cnv',
        23:'fraser2017023.cnv',
        24:'fraser2017024.cnv'}

class Cast:
    def __init__(self,fpath):
        mSta,mLat,mLon,df=readcnv(fpath)
        self.sta=mSta
        self.lat=mLat
        self.lon=mLon
        self.df=df
        self.source=fpath

class zCast:
    def __init__(self,updf,downdf):
        self.uCast=updf
        self.dCast=downdf

class rawCast:
    def __init__(self):
        self.uCast=dict()
        self.dCast=dict()

class dataPair:
    def __init__(self,zval,varval):
        self.z=zval
        self.val=varval

[docs] def fmtVarName(strx): """ transform string into one that meets python naming conventions""" vName=re.sub('[^a-zA-Z0-9_\-\s/]','',strx.strip()) vName=re.sub('[\s/]','_',vName) vName=re.sub('-','_',vName) if re.match('[0-9]',vName): vName='_'+vName return vName
#def rolling_window(a, window): # # source: http://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html # # use example: np.mean(rolling_window(x, 3), -1) # shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) # strides = a.strides + (a.strides[-1],) # return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) # #def rolling_window_padded(a,window): # # extend rolling window to be same lenth as input array by duplicating first and last values # # even values not symmetric # test=rolling_window(a,window) # while window>1: # if window%2==0: # test=np.concatenate(([test[0,:]],test),axis=0) # else: # test=np.concatenate((test,[test[-1,:]]),axis=0) # window+=-1 # return test def slidingWindowEval(x,func,window,axis=0): # x is input array # func is function to carry out over window # window is window size # axis is axis to act along, in case of multiple # if window is even, results will be shifted left by 1/2 unit x1=np.lib.stride_tricks.sliding_window_view(x, window, axis) b=func(x1,-1) # the rest of the code pads the front and back to return an array of the same shape as the original nfront=np.floor((window-1)/2) nback=np.floor((window-1)/2)+(window-1)%2 inxf=[slice(None)]*np.ndim(b) inxf[axis]=slice(0,1,1) inxb=[slice(None)]*np.ndim(b) inxb[axis]=slice(np.shape(b)[axis]-1,np.shape(b)[axis],1) repsf=np.ones(np.ndim(b),dtype=int) repsf[axis]=int(nfront) repsb=np.ones(np.ndim(b),dtype=int) repsb[axis]=int(nback) x2=np.concatenate((np.tile(b[tuple(inxf)],repsf),b,np.tile(b[tuple(inxb)],repsb)),axis=axis) return x2 def amp(var,dim=0): return np.nanmax(var,dim)-np.nanmin(var,dim) def turbQC(x): # turbidity sensor produced erroneous zero readings interspersed with real data when too close to surface # remove suspect values from analysis # - median filter alone was not enough # remove a point if the max-min of the surrounding 5 point window # is greater than 1/3 the maximum turbidity value of the cast # (remove data within 5 points of a large jump) #ii1=amp(rolling_window_padded(x,5),-1)>.33*np.nanmax(x) ii1=slidingWindowEval(x,amp,5)>.33*np.nanmax(x) # was .5 # remove data within 5 points of a near-zero turbidity value #ii2=np.nanmin(rolling_window_padded(x,5),-1)<.3 ii2=slidingWindowEval(x,np.nanmin,5)<.3 y=np.copy(x) y[np.logical_or(ii1,ii2,)]=np.nan y=ssig.medfilt(y,3) return y def readcnv(fpath): alphnumlist=list(string.ascii_letters)+list(string.digits) # define regexes for reading headers: reSta=re.compile('(?<=\*\*\sStation:)\s?([0-9])+\s?') # assumes numeric station identifiers reLat=re.compile('(?<=\*\*\sLatitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([NS])') reLon=re.compile('(?<=\*\*\sLongitude\s=)\s?([\-0-9\.]+)\s([\-\.0-9]+)\s?([EW])') # start_time = May 08 2002 09:39:10 reST=re.compile('(?<=\#\sstart_time\s=).*') #reTZ=re.compile('(?<=\*\*\s...\s\(Time\)\s=).*') #reCr=re.compile('(?<=\*\*\sCruise:).*') reNam=re.compile('(?<=\#\sname\s)([0-9]+)\s=\s(.*)\:\s?(.*)\s?') # define regex for finding searching: spStart=re.compile('^\s*[0-9]') # starts with space characters followed by digit headers=list() #lineno=0 mSta=None mLat=None mLon=None with open(fpath, 'rt', encoding="ISO-8859-1") as f: for fline in f: if fline.startswith('**'): if reSta.search(fline): mSta=reSta.search(fline).groups() if reLat.search(fline): mLat=reLat.search(fline).groups() if reLon.search(fline): mLon=reLon.search(fline).groups() elif reNam.search(fline): headers.append(fmtVarName(reNam.search(fline).groups(1)[1])) elif fline.startswith('*END*'): break #lineno+=1 #still in with file open df=pd.read_csv(f,delim_whitespace=True,names=headers) # file closed return mSta,mLat,mLon,df def bindepth(inP,inV,edges,targets=[],prebin=False): # calculate depth-associated variables # 1st calculate bin averages of depth and variable # then use np interp to estimate at-grid-point values # edges must be monotonically increasing if prebin==True: newP,newV=bindepth(inP,inV,np.arange(edges[0],edges[-1],.05),prebin=False) inP=newP inV=newV inP=inP[~np.isnan(inV)] inV=inV[~np.isnan(inV)] binned=np.digitize(inP,edges) Pa=np.empty(len(edges)-1) Va=np.empty(len(edges)-1) if len(targets) == 0: Pi=.5*(edges[:-1]+edges[1:]) else: Pi=targets[:(len(edges)-1)] Vi=np.empty(len(edges)-1) for jj in range(1,len(edges)): ll=(binned==jj) #&(~np.isnan(inV)) if np.sum(ll)>0: Pa[jj-1]=np.mean(inP[ll]) Va[jj-1]=np.mean(inV[ll]) else: Pa[jj-1]=np.nan Va[jj-1]=np.nan # linearly extrapolate some values, but not beyond range of original data pnew=Pa[0]-(Pa[1]-Pa[0]) vnew=Va[0]-(Va[1]-Va[0]) Pa=np.concatenate(([pnew],Pa)) Va=np.concatenate(([vnew],Va)) Vi=np.interp(Pi,Pa[~np.isnan(Va)],Va[~np.isnan(Va)],right=np.nan,left=np.nan) Vi[Pi>np.max(inP)]=np.nan Vi[Pi<np.min(inP)]=np.nan return Pi, Vi def cXfromX(X): X=np.array(X) X[np.isnan(X)]=-5 Y=np.nan*X iii=(X>0)&(X<100) Y[iii]=-np.log(X[iii]/100.0)/.25 return Y def turbReg(m,Cx,fl): return np.maximum(0.0,m[0]*Cx-m[1]*fl-m[2]) def turbFit(df0): # calculate conversion factor for sb19 ctd turbidity to ALS bottle turbidity # force through (0,0) x=df0.loc[(df0.ALS_Turb_NTU>0)&(df0.sb19Turb_uncorrected>0)]['sb19Turb_uncorrected'].values x=x[:,np.newaxis] y=df0.loc[(df0.ALS_Turb_NTU>0)&(df0.sb19Turb_uncorrected>0)]['ALS_Turb_NTU'] tinv=np.linalg.lstsq(x,y,rcond=None)[0] tcor=1.0/tinv return tcor def loadDataFRP_init(exp='all'): if exp not in {'exp1', 'exp2', 'exp3', 'all'}: print('option exp='+exp+' is not defined.') raise with open('/ocean/shared/SalishSeaCastData/FRPlume/stationsDigitizedFinal.csv','r') as fa: df0_a=pd.read_csv(fa,header=0,na_values='None') with open('/ocean/shared/SalishSeaCastData/FRPlume/util/stationsDigitized_ancillary.csv','r') as fb: df0_b=pd.read_csv(fb,header=0,na_values='None') df0=pd.merge(df0_a,df0_b,how='left',on=['Station','Date']) # if values present, calculate correction factor for sb19 turbidity (divide sb19 turbidity by tcor) # fit true turb to observed turb # calculate here while all values present if np.sum(df0.sb19Turb_uncorrected>0)>0: tcor=turbFit(df0) else: tcor=np.nan if exp=='exp1': df0=df0.drop(df0.index[df0.Date != 20170410]) elif exp=='exp2': df0=df0.drop(df0.index[df0.Date != 20170531]) elif exp=='exp3': df0=df0.drop(df0.index[df0.Date != 20171101]) basedir1='/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170410/' basedir2='/ocean/shared/SalishSeaCastData/FRPlume/ctd/20170531/' basedir3='/ocean/shared/SalishSeaCastData/FRPlume/ctd/20171101/' dir19='19-4561/4_derive' dir25='25-0363/4_derive' dir19T10='19-4561/4b_deriveTEOS10' dir25T10='25-0363/4a_deriveTEOS10' clist=[] if (exp=='exp1' or exp=='all'): clist = clist + list(range(1,10)) if (exp=='exp2' or exp=='all'): clist = clist + [10,11,12,13,14.1,14.2,15,16,17,18] if (exp=='exp3' or exp=='all'): clist = clist + list(range(19,25)) fpath19=dict() fpath25=dict() for ii in clist: if ii<10: fpath19[ii]=os.path.join(basedir1,dir19T10,cnvlist19[ii]) fpath25[ii]=os.path.join(basedir1,dir25T10,cnvlist25[ii]) elif ii<19: fpath19[ii]=os.path.join(basedir2,dir19T10,cnvlist19[ii]) fpath25[ii]=os.path.join(basedir2,dir25T10,cnvlist25[ii]) else: fpath19[ii]=os.path.join(basedir3,dir19T10,cnvlist19[ii]) fpath25[ii]=os.path.join(basedir3,dir25T10,cnvlist25[ii]) cast19=dict() cast25=dict() for ii in clist: cast19[ii]=Cast(fpath19[ii]) cast25[ii]=Cast(fpath25[ii]) return df0, clist, tcor, cast19, cast25 def loadDataFRP(exp='all',sel='narrow',dp=1.0,form='binned',vert='P', meshPath='/data/eolson/results/MEOPAR/NEMO-forcing-new/grid/mesh_mask201702.nc'): # exp determines which sampling date to load (or all) # sel determines whether to use narrow data selection, which is a more conservative estimate, # or 'wide' data selection, which includes more near-surface data but can include # some time the intstruments were parked near-surface # dp determines bin interval in terms of depth variable vert (only if form='binned') # form can be 'binned','raw' or 'SSCgrid' and determines if binned and how # vert determines vertical variable: 'P' or 'Z' # meshPath is SSC mesh file location for binning to model grid (form='SSCgrid') if exp not in {'exp1', 'exp2', 'exp3', 'all'}: print('option exp='+exp+' is not defined.') raise if sel == 'narrow': prebin=False elif sel == 'wide': prebin=True else: print('option sel='+sel+' is not defined.') raise df0, clist, tcor, cast19, cast25 = loadDataFRP_init(exp=exp) parDZ=.78 xmisDZ=.36 turbDZ=.67 pshiftdict={'gsw_ctA0':0.0,'gsw_srA0':0.0,'xmiss':xmisDZ,'seaTurbMtr':turbDZ,'par':parDZ, 'wetStar':0.0,'sbeox0ML_L':0.0,'seaTurbMtrnoQC':turbDZ,'turb_uncor':turbDZ,'turb':turbDZ} # for SSC grid version, load model grid variables if form=='SSCgrid': with nc.Dataset(meshPath,'r') as mesh: tmask=mesh.variables['tmask'][0,:,:,:] gdept=mesh.variables['gdept_0'][0,:,:,:] gdepw=mesh.variables['gdepw_0'][0,:,:,:] nav_lat=mesh.variables['nav_lat'][:,:] nav_lon=mesh.variables['nav_lon'][:,:] zCasts=dict() for nn in clist: ip=np.argmax(cast25[nn].df['prSM'].values) ilag=df0.loc[df0.Station==nn,'ishift_sub19'].values[0] pS_pr=df0.loc[df0.Station==nn,'pS_pr'].values[0] pE_pr=df0.loc[df0.Station==nn,'pE_pr'].values[0] pS_tur=df0.loc[df0.Station==nn,'pStart25'].values[0] pE_tur=df0.loc[df0.Station==nn,'pEnd25'].values[0] lat=df0.loc[df0.Station==nn]['LatDecDeg'].values[0] lon=df0.loc[df0.Station==nn]['LonDecDeg'].values[0] if sel=='narrow': pS=pS_tur pE=pE_tur elif sel=='wide': pS=pS_pr pE=pE_pr cast19[nn].df['seaTurbMtrnoQC']=cast19[nn].df['seaTurbMtr'] cast19[nn].df['turb_uncor']=turbQC(cast19[nn].df['seaTurbMtr']) cast19[nn].df['turb']=cast19[nn].df['turb_uncor']*1.0/tcor if vert=='Z': # calc Z from p cast25[nn].df['Z']=-1*gsw.z_from_p(cast25[nn].df['prSM'].values,lat) cast19[nn].df['Z']=-1*gsw.z_from_p(cast19[nn].df['prdM'].values,lat) zvar25='Z' else: zvar25='prSM' pmax=cast25[nn].df.loc[ip,zvar25] if form=='binned' or form=='raw': edges=np.arange(dp/2,pmax+dp,dp) targets=[] # use default value of 1/2-way between edges elif form=='SSCgrid': jj, ii=geo_tools.find_closest_model_point(lon,lat, nav_lon, nav_lat) edges=gdepw[:,jj,ii] # model w grid targets=gdept[:,jj,ii] # model T grid edges=edges[edges<pmax] targets=targets[:(len(edges)-1)] if form=='binned' or form =='SSCgrid': dCast=pd.DataFrame() uCast=pd.DataFrame() elif form=='raw': zCasts[nn]=rawCast() dCast=zCasts[nn].dCast uCast=zCasts[nn].uCast def makeVCol(incast_p,outcast,var,i0,i1,incast_v=None):# add in zvar25 # incast_p: [cast25[nn]] sb25 cast to get pressure from; also used for var unless incast_v specified # outcast: [dCast or uCast] cast to save resulting aligned data to # i0, i1: [pS:ip/ip:pE] starting and ending indices for data to include relative to incast_p # var: variable name to extract # incast_v [cast19[nn]] if variables are from sbe19, specify that cast; otherwise get variable from incast_p (25) # vplag: ilag to align sbe19 to sbe25 in case where var is from sbe19; defaults to 0 for sbe25 # NOTE: dataframes are mutable; changes to outcast propagate to its input df if incast_v is None: incast_v=incast_p vplag=0 else: vplag=ilag inP=incast_p.df.loc[i0:i1][zvar25].values-pshiftdict[var] # p inV=incast_v.df.loc[(i0+vplag):(i1+vplag)][var].values # var if sel=='wide': inV[inP<.1]=np.nan if form=='binned' or form=='SSCgrid': p, out=bindepth(inP,inV,edges,targets=targets,prebin=prebin) if len(outcast) == 0: # depth var not added yet outcast[zvar25]=p if form=='SSCgrid': outcast['indk']=np.arange(0,len(p)) outcast[var]=out elif form=='raw': outcast[var]=dataPair(inP,inV) return # fix first 2 casts for which sb25 pump did not turn on. use sb19 if nn==1 or nn==2: ##xmiss: xmis25=1.14099414691*xmis19+-1.6910134322 cast19[nn].df['xmiss']=1.14099414691*cast19[nn].df['CStarTr0']-1.6910134322 # down var for var in ('gsw_ctA0','gsw_srA0','xmiss'): #downcast makeVCol(cast25[nn],dCast,var,pS,ip,incast_v=cast19[nn]) #upcast makeVCol(cast25[nn],uCast,var,ip,pE,incast_v=cast19[nn]) makeVCol(cast25[nn],dCast,'par',pS,ip) makeVCol(cast25[nn],uCast,'par',ip,pE) uCast['wetStar']=np.nan dCast['wetStar']=np.nan uCast['sbeox0ML_L']=np.nan dCast['sbeox0ML_L']=np.nan else: for var in ('gsw_ctA0','gsw_srA0','xmiss','par','wetStar','sbeox0ML_L'): if not nn==14.2: #downcast makeVCol(cast25[nn],dCast,var,pS,ip) else:# special case where there is no downcast if len(dCast)==0: # depth var not added yet dCast=pd.DataFrame(np.nan*np.ones(10),columns=[zvar25]) dCast[var]=np.nan*np.ones(10) if not nn==14.1: #upcast makeVCol(cast25[nn],uCast,var,ip,pE) else:# special case where there is no upcast if len(uCast)==0: # depth var not added yet uCast=pd.DataFrame(np.nan*np.ones(10),columns=[zvar25]) uCast[var]=np.nan*np.ones(10) for var in ('seaTurbMtrnoQC','turb_uncor','turb'): if not nn==14.2: #turbidity downcast makeVCol(cast25[nn],dCast,var,pS,ip,incast_v=cast19[nn]) else: # special case where there is no downcast dCast[var]=np.nan*np.ones(10) if not nn==14.1: #turbidity upcast makeVCol(cast25[nn],uCast,var,ip,pE,incast_v=cast19[nn]) else: # special case where there is no upcast uCast[var]=np.nan*np.ones(10) if form=='binned' or form =='SSCgrid': zCasts[nn]=zCast(uCast,dCast) return df0, zCasts def loadDataFRP_raw(exp='all',sel='narrow'): df0,zCasts=loadDataFRP(exp=exp,sel=sel,vert='Z',form='raw') return df0, zCasts def loadDataFRP_SSGrid(exp='all',sel='narrow',meshPath='/ocean/eolson/MEOPAR/NEMO-forcing/grid/mesh_mask201702.nc'): df0,zCasts=loadDataFRP(exp=exp,sel=sel,vert='Z',form='SSCgrid',meshPath=meshPath) for ic in zCasts.values(): ic.dCast['depth_m']=ic.dCast['Z'] ic.uCast['depth_m']=ic.uCast['Z'] return df0, zCasts