Commit 6ffad722 authored by Thomas Shatwell's avatar Thomas Shatwell
Browse files

Initial commit

parents
^seefo\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
.Rproj.user
.Rhistory
.Rdata
.httr-oauth
.DS_Store
Package: seefo
Title: Useful Tools for The Lake Research Department at UFZ
Version: 0.0.0.9000
Authors@R:
person(given = "Tom",
family = "Shatwell",
role = c("aut", "cre"),
email = "tom.shatwell@ufz.de",
comment = c(ORCID = "0000-0002-4520-7916"))
Maintainer: Tom Shatwell <tom.shatwell@ufz.de>
Description: A collection of useful tools for lake research
License: GPL (>= 3)
Depends: R (>= 3.5)
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1
Imports:
data.table,
plot3D
Suggests:
MASS,
testthat (>= 3.0.0)
Config/testthat/edition: 3
This diff is collapsed.
# Generated by roxygen2: do not edit by hand
export(batch_read)
export(clean_profile)
export(delta_rho)
export(get_profile_info)
export(h2mix)
export(hmix)
export(ice_phenology)
export(leap)
export(read_bbe)
export(read_ctm)
export(read_ixx)
export(rho_water)
export(strat_phenology)
# seefo 0.0.0.9000
* Added a `NEWS.md` file to track changes to the package.
#' @title Reads raw sonde profile data from several files and compiles the results
#'
#' @description Reads raw sonde profile data from several files and compiles them into a data.frame.
#' By default, it searches a given directory and all subdirectories for matching files for a given sonde type,
#' cleans up the data, and returns it sorted and labelled.
#'
#' @param sonde Type of sonde. One of `"ctm"`,`"bbe"`,`"ixx"`.
#' @param path The path to a directory in which to search for matching files. Used only if `fnames`== `"autosearch"`
#' @param fnames The names of the files to read. Defaults to `"autosearch"`,
#' in which case all filenames with an extension matching the sonde type will be read.
#' If `fnames` != `"autosearch"`, then `path` and `recursive` are not used.
#' @param keep names of the columns to retain in the output. Defaults to `"all"`
#' @param recursive Should subdirectories in `path` also be searched? (logical, only if `fnames`== `"autosearch"`)
#' @param clean Should the profiles be cleaned, by removing measurements in air, near the sediment, and on the upward pull?? (logical)
#' @param TZ time zone given to the output datetimes, defaults to `"UTC"` (recommended)
#'
#' @details
#' To be added later.
#'
#' @return A `data.frame` containing the profile data
#'
#' @author Tom Shatwell
#'
#' @seealso \code{\link{get_profile_info}}, \code{\link{read_bbe}}, \code{\link{read_ctm}}
#'
#' @examples
#' \dontrun{
#' out <- batch_read(sonde="ctm", path="../data/")
#' }
#'
#' @export
batch_read <- function(sonde, path, fnames="autosearch", keep="all",
recursive=TRUE, clean=TRUE, TZ="UTC") {
# wrapper function to deal with errors
wrapper <- function(p, fn, ...) {
out <- NULL
tryCatch({
out <- fn(p, ...)
}, warning=function(x) {
out <<- as.character(x)
}, error=function(x) {
out <<- as.character(x)
})
return(out)
}
isValid <- function(x) {is.data.frame(x)}
filenames <- list.files(path = path,
full.names = TRUE, recursive = recursive)
if(sonde=="ctm") {
pat <- ".CTM"
func <- read_ctm
} else if(sonde=="bbe") {
pat <- ".FP"
func <- read_bbe
} else if(sonde == "ixx") {
pat <- ".i[*0-9]"
func <- read_ixx
} else {
stop("Sonde name must be one of 'ctm' 'bbe', 'ixx'.")
}
if(fnames[1]=="autosearch") fnames <- filenames[grep(pattern = pat, x = filenames)]
out <- lapply(X = fnames, FUN = wrapper, fn = func,
keep = keep, clean=clean, TZ=TZ) # read all the files
retain <- which(sapply(out, isValid)) # which files were successfully read?
if(keep[1] == "all") FILL <- TRUE else FILL <- FALSE
out <- data.table::rbindlist(out[retain], fill = FILL) # stack individual tables
data.table::setkey(out,"pr_dt") # sort chronologically by profile datetime
cat("Read", length(retain), "of", length(fnames), "files\n")
if(length(retain) < length(fnames)) { # print file names that were not successful as warning
warning(c("Could not read the following files:\n", paste(fnames[!fnames %in% retain],"\n")))
}
return(as.data.frame(out))
}
#' @title Clean up a profile
#'
#' @description Strips unwanted values from a profile, usually read from raw data.
#'
#' @param data The profile data (`data.frame``) containing a column named either "depth" or "press"
#' and a column named "dt" containing the time stamp of the measurement
#' @param remove_lower Numeric value of the thickness of the bottom layer to truncate
#'
#' @details
#' This function removes rows in a profile that:
#' * have depth less than zero (ie in air),
#' * were taken when the sonde was pulled back up
#' * are within a certain distance of the maximum depth, given by `remove_lower`.
#' The idea of `remove_lower` is to remove the lowest values when the sonde hits the sediment.
#'
#' @return The cleaned profile data
#'
#' @author Tom Shatwell
#'
#' @seealso \code{\link{get_profile_info}}
#'
#' @examples
#' \dontrun{
#' strat <- analyse_strat(Ts = df[,2], Tb = df[,ncol(df)], dates = df[,1])
#' }
#'
#' @export
clean_profile <- function(data,remove_lower=0.5) {
depthcol <- names(data) %in% c("depth","press")
# check increasing chronologically
data <- data[order(data$dt),]
# select data where depth is positive and increasing only
# this removes measurements in air, and also when pulling the sonde up
deepest <- rep(NA,nrow(data))
for(i in 1:nrow(data)) {
deepest[i] <- max(data[1:i,depthcol])
}
keep <- data[,depthcol] >= 0 &
data[,depthcol] >= deepest &
data[,depthcol] <= (max(data[,depthcol]) - remove_lower)
return(data[keep,])
}
#' @title Fast calculation of mixed layer depth (depth of density threshold)
#'
#' @description Fast calculation of the depth of a set density threshold
#'
#' @param T Temperature data. Must be a mxn matrix with n temperature profiles in the columns
#' @param z The depths corresponding to the temperature profiles. Numeric vector of length m.
#' @param drho The density threshold threshold depths corresponding to the temperature profiles. Numeric vector of length m.
#'
#' @details
#' Calculates mixed layer depths as the depth of when the given density gradient `drho` is first exceeded.
#' Designed to be as fast as possible for large data sets. Does very minimal checks to remove `Inf`s and `NaN`s, so be careful.
#' Choose the right vertical resolution in the temperatures to ensure a more robust result.
#' See Wilson et al. 2020, HESS XXXX
#'
#' @return A vector of mixed layer depths
#'
#' @author Tom Shatwell
#'
#' @seealso \code{\link{h2mix}}, \code{\link{delta_rho}}
#'
#' @examples
#' \dontrun{
#' z <- c(0,2,4,6,10)
#' T <- matrix(c(15,14,10,8,7,
#' 16,13,12,11,11,
#' 22,19,18,14,13),
#' nrow=length(z))
#' delta_rho(T,z)
#' }
#'
#' @export
delta_rho <- function(T,z,drho=0.1) {
rho <- rho_water(T) # density
rho_t <- rho[1,] + drho # abs density threshold
h <- (z[-length(z)] + (-rho[-length(z),]+rho_t) / (diff(rho)/diff(z))) * # interpolated MLDs
(diff(rho>rho_t)==1) # select the one at the threshold
h[!is.finite(h)]<-NA # get rid of Infs and NaNs
h1 <- suppressWarnings(apply(h,2,max,na.rm=TRUE))
h1[!is.finite(h1)] <- NA # filter results
h1[h1>max(z)] <- NA
h1[h1==0] <- NA
return(h1)
}
#' @title Extracts information from a sonde filename
#'
#' @description Extracts information from a sonde raw data filename, like the measuring location, date, and sonde number.
#'
#' @param filename Filename to examine (character), including the file extension.
#'
#' @details
#' This function parses the filename and extracts information according to the limnophsycs naming convention.
#' It assumes that the filename has the structure `"LOC_DATE_PROFILENUMBER_OPTIONALSTUFF.SONDENUM"`.
#' It separates the name into chunks separated by `"_"`. Whole paths can be given too. In this case the path discarded from the filename.
#'
#' @return A character vector with elements containing the location, date, profile number, file extension, and possibly comments.
#' Returns `NA` if any of these are missing.
#'
#' @author Tom Shatwell
#'
#'
#' @examples
#' \dontrun{
#' get_profile_info("../mydata/lakes/YH1_20210101_1.CTM644")
#' }
#'
#' @export
# get info from profile filenames
get_profile_info <- function (filename) {
# remove a path
splitted <- strsplit(x=filename, split='/')[[1]]
filename <- splitted [length(splitted)]
ext <- loc <- date <- pr_no <- comment <- NA
splitted <- strsplit(x=filename, split='\\.')[[1]]
l <-length (splitted)
if (l > 1 && sum(splitted[1:(l-1)] != '')) {
ext <-splitted [l]
# the extention must be the suffix of a non-empty name
filename <- paste(splitted[1:l-1],sep=".")
splitted <- strsplit(x=filename, split="_")[[1]]
l <- length(splitted)
loc <- splitted[1]
if(l>1) date <- splitted[2]
if(l>2) pr_no <- splitted[3]
if(l>3) comment <- paste(splitted[4:l],sep="_")
}
return(c(loc=loc,date=date,pr_no=pr_no,ext=ext,comment=comment))
}
#' @title Fast calculation of mixed layer depth (lower thermocline boundary)
#'
#' @description Fast calculation of lower thermocline boundary
#'
#' @param T Temperature data. Must be a mxn matrix with n temperature profiles in the columns
#' @param z The depths corresponding to the temperature profiles. Numeric vector of length m.
#'
#' @details
#' Calculates mixed layer depths as the depth of the maximum curvature of the temperature profile (d2T/dz2).
#' Designed to be as fast as possible for large data sets, so doesn't do any checks at all (be careful).
#' Choose the right vertical resolution in the temperatures to ensure a more robust result.
#'
#' @return A vector of mixed layer depths
#'
#' @author Georgiy Kirillin and Tom Shatwell
#'
#' @seealso \code{\link{hmix}}, \code{\link{delta_rho}}
#'
#' @examples
#' \dontrun{
#' z <- c(0,2,4,6,10)
#' T <- matrix(c(15,14,10,8,7,
#' 16,13,12,11,11,
#' 22,19,18,14,13),
#' nrow=length(z))
#' h2mix(T,z)
#' }
#'
#' @export
h2mix <- function(T,z) {
d2T<-diff(T,differences = 2)
dz2<-diff(z)^2
d2Tdz2 <- d2T/dz2[-length(dz2)]
return(z[apply(d2Tdz2,2,which.max)+1])
}
#' @title Fast calculation of mixed layer depth (upper thermocline boundary)
#'
#' @description Fast calculation of the upper thermocline boundary
#'
#' @param T Temperature data. Must be a mxn matrix with n temperature profiles in the columns
#' @param z The depths corresponding to the temperature profiles. Numeric vector of length m.
#'
#' @details
#' Calculates mixed layer depths as the depth of the maximum temperature gradient (dT/dz).
#' Designed to be as fast as possible for large data sets. It doesn't do any checks at all,
#' and only works with regular data, so be careful.
#' Choose the right vertical resolution in the temperatures to ensure a more robust result.
#' Use the rLakeAnalyzer functions for more robust solutions which are
#' orders of magnitude slower.
#'
#' @return A vector of mixed layer depths
#'
#' @author Georgiy Kirillin and Tom Shatwell
#'
#' @seealso \code{\link{h2mix}}, \code{\link{delta_rho}}
#'
#' @examples
#' \dontrun{
#' z <- c(0,2,4,6,10)
#' T <- matrix(c(15,14,10,8,7,
#' 16,13,12,11,11,
#' 22,19,18,14,13),
#' nrow=length(z))
#' hmix(T,z)
#' }
#'
#' @export
hmix <- function(T,z) z[apply(abs(diff(T)/diff(z)),2,which.max)]
#' @title Calculates ice phenology
#'
#' @description Calculates seasonal ice phenology including annual mean,
#' max and total duration of ice cover.
#'
#' @param H_ice A vector indicating the presence of ice cover. Can be numerical or logical where `TRUE` or any number > `0` (e.g. ice thickness, indicates ice.
#' `FALSE` or `0` indicates no ice cover.
#' @param dates The dates corresponding to the ice observations in POSIX style and same length as `H_ice`.
#' @param NH Is the lake in the northern hemisphere? (logical)
#'
#' @details
#' To be added later.
#'
#' @return A `data.frame` containing ice stratification phenology
#'
#' @author Tom Shatwell
#'
#'
#' @examples
#' \dontrun{
#' add later
#' }
#'
#' @export
# ice statistics -----------------------------------------------
# this function returns ice phenology statstics: annual mean, max and total
# this is a subset of the function above analyse_strat, but just for ice.
# NOTE: summer strat periods are allocated to the year in which the period
# starts. Winter stratification and ice periods are allocated to the year in
# which they end.
# H_ice: ice thickness time series, set to NULL if analysis not required
# dates: POSIX style date vector corresponding to rows of Ts and Tb.
# not used: dT: temperature difference between top and bottom indicating stratification.
# drho: density difference between top and bottom indicating stratificaiton [kg m^-3]
# NH: northern hemisphere? TRUE or FALSE
ice_phenology <- function(H_ice, dates, NH=TRUE) {
the_years <- as.POSIXlt(dates)$year+1900
yrs <- unique(the_years)
doys <- as.POSIXlt(dates)$yday # day of the year [0..364]
alt_doys <- doys # alternative counting from [-182 .. 182] (July 2/3) for ice in northern hemisphere or strat in southern hemisphere
alt_doys[doys>182] <- doys[doys>182] - (365 + leap(the_years[doys>182])) # Jan 1 is day 0, correct for leap years
alt_years <- the_years
alt_years[alt_doys<0] <- the_years[alt_doys<0] +1 # alternative counting of years (shifted forward by half a year)
if(NH) { # NH ice and SH stratification use alternative doy and year counts to adjust for ice and stratification events that span more than one calendar year
ice_yrs <- alt_years
ice_doys <- alt_doys
} else {
ice_yrs <- the_years
ice_doys <- doys
}
ice <- H_ice > 0
i_i_st <- diff(c(ice[1],ice))==1 # indices of ice cover onset
i_i_en <- diff(c(ice[1],ice))==-1 # # indices of ice cover end
if(ice[1]) i_i_st <- c(NA, i_i_st) # if initially frozen, set first start date to NA
if(ice[length(ice)]) i_i_en <- c(i_i_en, NA) # if frozen at end, set last thaw date to NA
ice_st <- dates[i_i_st] # ice start dates
ice_en <- dates[i_i_en] # ice end dates
# if(sum(ice)==0) # if there is no ice at all, set start and end to time=0
# maximum ice thickness
IceMax <- NULL
for(ii in unique(the_years)) {
Hice_maxi <- which.max(H_ice[ice_yrs == ii])
IceMaxOut <- data.frame(year=ii,
HiceMax = H_ice[ice_yrs == ii][Hice_maxi],
HiceMaxDay = ice_doys[ice_yrs==ii][Hice_maxi],
HiceMaxDate = dates[ice_yrs == ii][Hice_maxi])
if(sum(H_ice[ice_yrs == ii])==0) IceMaxOut[1,c("HiceMaxDay","HiceMaxDate")] <- NA
IceMax <- rbind(IceMax, IceMaxOut)
}
ice_start_doys <- ice_doys[i_i_st] # day of year of start of ice cover events
ice_end_doys <- ice_doys[i_i_en] # day of year of end of ice cover events
ice_event_yrs <- ice_yrs[i_i_st] # the years assigned to each ice event
# if there is no ice, set values to NA ...
if(sum(ice)==0) {
ice_start_doys <- ice_end_doys <- ice_event_yrs <- ice_st <- ice_en <- NA
}
ice_dur <- as.double(difftime(ice_en, ice_st, units="days")) # duration of ice periods
# summary of ice cover events
ice.summary <- data.frame(year = ice_event_yrs,
start = ice_st,
end = ice_en,
dur = ice_dur,
startday = ice_start_doys,
endday = ice_end_doys)
ice_out <- NULL
for(mm in unique(ice.summary$year[!is.na(ice.summary$year)])) {
ice2 <- subset(ice.summary, year==mm)
ice2_on <- ice2[which.max(ice2$dur),"startday"]
ice2_off <- ice2[which.max(ice2$dur),"endday"]
if(anyNA(ice2$dur)) ice2_on <- ice2_off <- NA
ice_out <- rbind(ice_out,
data.frame(year=mm,
MeanIceDur=mean(ice2$dur),
MaxIceDur=max(ice2$dur),
TotIceDur=sum(ice2$dur),
ice_on=ice2_on,
ice_off=ice2_off,
firstfreeze=min(ice2$startday),
lastthaw=max(ice2$endday)))
}
ice_out <- ice_out[ice_out$year %in% yrs,] # trim years outside the simulation range (eg ice that forms at the end of the last year, which should be assigned to the following year outside the simulation period)
ice_out1 <- data.frame(year=yrs, MeanIceDur=NA, MaxIceDur=NA,
TotIceDur=NA, IceOn=NA, IceOff=NA, FirstFreeze=NA,
LastThaw=NA, HiceMax=NA, HiceMaxDay=NA)
ice_out1[match(ice_out$year, yrs),
c("MeanIceDur","MaxIceDur","TotIceDur",
"IceOn","IceOff","FirstFreeze","LastThaw")] <- ice_out[,-1]
ice_out1[,c("HiceMax","HiceMaxDay")] <- IceMax[,c("HiceMax","HiceMaxDay")]
i8 <- ice_out1$IceOff < ice_out1$IceOn
i8[is.na(i8)]<-FALSE # this gets rid of any NAs
if(sum(i8, na.rm=TRUE)>0) {
ice_out1[i8, "IceOff"] <- ice_out1[i8,"IceOn"] + ice_out1[i8,"MaxIceDur"]
}
i9 <- ice_out1$LastThaw < ice_out1$IceOn & ice_out1$TotIceDur < 366
i9[is.na(i9)]<-FALSE # this gets rid of any NAs
if(sum(i9, na.rm=TRUE)>0) {
ice_out1[i9, "LastThaw"] <- ice_out1[i9,"LastThaw"] + 365 +
leap(ice_out1[i9,"year"])
}
return(ice_out1)
}
#' @title Calculation of leap years
#'
#' @description Determines whether a year is a leap year
#'
#' @param yr Year (numeric)
#'
#' @return Returns a vector the same length as `yr` containing `0` for non-leap years, and `1` for leap years.
#'
#' @author Tom Shatwell
#'
#' @examples
#' \dontrun{
#' leap(c(2000,2002,2004,2006,2008,2010))
#' }
#'
#' @export
leap <- function(yr) ((yr%%4)==0) - ((yr%%100)==0) + ((yr%%400)==0)
#' @title Reads profile data from raw output file from a CTM CTD
#'
#' @description Reads a data from an output file generated by a CTM CTD.
#'
#' @param filename Name of the file to read, including the file extension
#' @param keep names of the columns to retain in the output. Defaults to `"all"`
#' @param clean Should the profile be cleaned? (logical)
#' @param TZ time zone given to the output datetimes, defaults to `"UTC"` (recommended)
#'
#' @details
#' This function first collects information from the filename using `get_profile_info()`,
#' including the sampling location from the beginning of the filename,
#' and the sonde number from the file extension.
#' It begins to read data in the file 2 lines after the keyword "Datasets".
#' It assigns the names in `header` to the columns. If `header` is set to `"auto"`,
#' it will assign hard coded names according to the sonde number, hoping that one sonde always thas the same sensors attached.
#' It converts the date and time columns to a POSIX datetime with the timezone `TZ`.
#' The function finally drops any columns that are not named in `keep`.
#' If `keep` contains names not found in the header, it will add a column of `NA`s for each unfound name.
#' The `header` can include `c("no","press","temp","cond","cond_hi","salin","DO_sat","DO","pH","turb","chla","BGAPE","BGAPC","sv","intD","intT")`
#'
#' @return A `data.frame` containing the profile data
#'
#' @author Tom Shatwell
#'
#' @seealso \code{\link{get_profile_info}}, \code{\link{read_ctm}}
#'
#' @examples
#' \dontrun{
#' bbe <- read_bbe(Ts = df[,2], Tb = df[,ncol(df)], dates = df[,1])
#' }
#'
#' @export
read_bbe <- function(filename, keep = c("depth","sample_temperature","Green_Algae","Bluegreen",
"Diatoms","Cryptophyta","Yellow_substances","total_conc"),
clean = TRUE, TZ="UTC") {
info <- get_profile_info(filename) # extracts info from the filename
ext <- info["ext"] # sonde name comes from the file extension
if(!grepl(pattern = "FP", x = ext)) {
warning("File does not have FPxxxx extension, attempting anyway...")
}
# parse column names and convert into R-friendly names
top <- readLines(con = filename,n=5) # read the top 5 lines of the file
header <- unlist(strsplit(x = top[1], split = "\t")) # assume line 1 is header
for(i in 1:length(header)) {
header[i] <- sub(pattern = "#[0-9]\\(", replacement = "", x = header[i]) # remove silly hash headings
header[i] <- sub(pattern = "\\)", replacement = "", x = header[i]) # remove silly parentheses
header[i] <- sub(pattern = "\\.", replacement = "", x = header[i]) # remove silly dots
header[i] <- sub(pattern = " ", replacement = "_", x = header[i]) # replace spaces between words with "_"
header[i] <- sub(pattern = " ", replacement = "_", x = header[i]) # replace spaces between words with "_"
}
dat <- utils::read.table(file = filename, header = FALSE, dec = ",",
skip = 2, col.names = header)
dt <- as.POSIXct(paste(strptime(dat$date,
format="%d.%m.%Y", tz=TZ),
dat$time),tz=TZ)
pr_dt = as.POSIXct(cut(dt[1],"min"),tz=TZ)
if(keep[1]=="all") {
outvars <- header[!header %in% c("date","time")]
} else outvars <- keep
for(j in outvars) dat[dat[,j] == "invalid",j] <- NA
for(j in outvars) dat[,j] <- as.numeric(dat[,j])
if(sum(is.na(match(outvars,header)))>0) { # fill any missing columns with NA
newcols <- outvars[!outvars %in% header]
for(j in 1:length(newcols)) {
dat <- cbind(dat,NA)
}
names(dat) <- c(header,newcols)
}
dat <- data.frame(dt=dt,
pr_dt = pr_dt,
sonde=ext,
location=info["loc"],
dat[,outvars], #, comment=info["comment"]
row.names=NULL
)
if(clean) dat <- clean_profile(dat)
return(dat)
# return(data.frame(dt=dt,
# pr_dt = pr_dt,
# sonde=ext,
# location=info["loc"],
# dat[,outvars], #, comment=info["comment"]
# row.names=NULL