This tutorial demonstrates how to fit the TTR model to time series data using the R package TTR.PGM. It assumes the user is familiar with R. The tutorial is designed to accompany the manuscript “TTR.PGM: A R package for modelling the distributions and dynamics of plants using the Thornley transport resistance plant growth model”. This manuscript and the TTR.PGM help provide additional information.
If necessary install the TTR.PGM package using e.g.
install.packages("TTR.PGM_1.0.0.tar.gz",repos=NULL, type="source")
setwd(tempdir())
Load the required packages.
library(LaplacesDemon)
library(DEoptim)
library(TTR.PGM)
The model is written using LaplacesDemon style, which helps provide a predictable structure. In this case the model is setup as a state space model.
model_ssm <- function(parm, Data)
{
### parameters
ttr_parm <- TTR.PGM::get_parms(parm, Data)
parm <- TTR.PGM::interval_parms(parm,Data)
names(parm)<-Data$parm.names
### Process model
x <- TTR.PGM::run_ttr(ttr_parm,Data)
### Log-likelihood
steps <- Data$options$steps
Uc <- x["Uc",,steps,,drop=T]
mu_sif <- Uc*parm["b1_sif"] + parm["b0_sif"]
oe_sif <- parm["oe_sif_0"] + parm["oe_sif_1"]*Data$ymat[,"q"]
LL_sif <- sum( dnorm(Data$y[,"sif"], mu_sif, oe_sif, log = TRUE),na.rm=T)
LL <- LL_sif
yhat <- c(rnorm(Data$n.time,mu_sif,oe_sif) )
### Log-Prior
lo <- Data$bounds[,"lower"]
up <- Data$bounds[,"upper"]
bound.mu <- (lo+up)/2
bound.sd <- abs(lo-up)/50
prior_iM <- dunif( parm["iM"],lo["iM"],up["iM"],log=TRUE)
prior_oe <- dnorm(parm["oe_sif_0"],0,1,log=TRUE)+
dnorm(parm["oe_sif_1"],0,1,log=TRUE)
prior_pe <- sum(dgamma(parm[Data$pos.pe],3,10,log=TRUE))
prior_scale <- dnorm(parm["b0_sif"],0,10,log=TRUE) +
dnorm(parm["b1_sif"],0,100,log=TRUE)
prior_ttr_env <- sum(dnorm( parm[Data$pos.env],
mean = bound.mu[Data$pos.env],
sd = bound.sd[Data$pos.env],log=TRUE) )
priors <- prior_iM +
prior_oe +
prior_pe +
prior_scale +
prior_ttr_env
### Log-Posterior
LP <- LL + priors
### Return
if (Data$options$optim == "DEoptim")
return(-1 * LP)
if (Data$options$optim == "LaplacesDemon")
return(list(LP = LP, Dev = -2 * LL, Monitor = LP, yhat = yhat, parm = as.numeric(parm)))
if (Data$options$optim == "no")
return(list(sif = yhat))
}
Load a data.frame that contains the observational and environmental data for the time series. A prepared data set for a savanna site in Burkino Faso is used in this example (see help(data_input_time_SA_BFA_BNP)). The manuscript contains a brief description of how this data set was assembled.
site.name <- "SA_BFA_BNP"
data(data_input_time_SA_BFA_BNP)
ttr.time.in <- data_input_time_SA_BFA_BNP
Now use the get_input function to format ttr.time.in for the TTR.PGM package. For more detailed information on options see help(get_input).
seconds.week = 60*60*24*7
seconds.day = 60*60*24
input <- with(ttr.time.in, TTR.PGM::get_input(observations = data.frame(sif=sif*100),
lon = 1,
lat = 1,
tnur = tsoil,
tcur = tair-273.15,
tgrowth = tair-273.15,
tloss = tair-273.15,
seconds = seconds.week,
p3=p3, p4=p4,
catm = CA,
rsds = srad/seconds.day,
swc = msoil*100))
Specify some options (see help(standard_options)). In this case the “oak” variant is specified, and it is specified that process error and initial plant size should be estimated.
time_options <- standard_options
time_options$initial_mass_par <- TRUE
time_options$pe <- TRUE
time_options$ve <- "oak"
time_options$steps <- 1:dim(input$timeseries)[2]
time_options$optim <- "DEoptim"
Set the bounds; if these are not set the defaults are used. See help(bounds) for more information. If the statistical model requires additional parameters that are not in the defaults, they need to be defined here with their bounds. In this example, the oak model is simplified by removing some environmental factors by setting their bounds to NA.
max.swc <- max(input$timeseries["swc",,1])
bounds = list(alpha = list(
oe_sif_0 = c(0,100),
oe_sif_1 = c(0,100),
b1_sif = c(0,5000),
b0_sif = c(-1000,1000)
),
beta = list(
CU_Ns_1 = c(NA,NA),
CU_Ns_2 = c(NA,NA),
CU_swc_1 = c(0,max.swc),
CU_swc_2 = c(0,100),
g_swc_1 = c(0,max.swc),
g_swc_2 = c(0,100),
g_tgrowth_1 = c(0,50),
g_tgrowth_2 = c(0,50),
g_tgrowth_3 = c(0,50),
g_tgrowth_4 = c(0,50),
L_mix_1 = c(50,100),
L_mix_2 = c(0,100),
L_mix_3 = c(0,100),
L_tloss_1 = c(-20,50),
L_tloss_2 = c(0,50),
L_swc_1 = c(0,max.swc),
L_swc_2 = c(0,100),
NU_Nsoil_1 = c(NA,NA),
NU_Nsoil_2 = c(NA,NA),
NU_swc_1 = c(0,max.swc),
NU_swc_2 = c(0,100),
NU_swc_3 = c(0,100),
NU_swc_4 = c(0,100),
NU_tnur_1 = c(0,50),
NU_tnur_2 = c(0,50),
pe_Ms=c(0,1),
pe_Mr=c(0,1),
pe_Cs=c(0,1),
pe_Cr=c(0,1),
pe_Ns=c(0,1),
pe_Nr=c(0,1),
iM=c(1,100)
)
)
Set some of the global parameters. See help(standard_globals).
time_globals <- standard_globals
time_globals["gmax"] = 50
time_globals["mmax"] = 0.010
time_globals["KM"] = 0.5
time_globals["KA"] = 50
Using input, time_options, bounds and time_globals make the object needed to run and fit the model. See help(make_data).
MyData <- TTR.PGM::make_data(input,options=time_options,bounds=bounds, globals=time_globals)
Add an extra field (MODIS data quality) to the data structure, this data field is needed by model_ssm.
MyData$ymat <- data.frame( y = MyData$y,q =ttr.time.in$q)
Add some other parameters that are needed by model_ssm.
MyData$pos.pe <- grep("^pe",MyData$parm.names)
MyData$pos.env <- c(grep("^CU_",MyData$parm.names),
grep("^NU_",MyData$parm.names),
grep("^g_",MyData$parm.names),
grep("^L_",MyData$parm.names))
Fit the model using DEoptim. See help(DEoptim) for how to use DEoptim. This will take several minutes depending on the DEoptim settings selected and the size of MyData. For production analyses the DEoptim documentation should be followed to make sensible decisions on which values to use for deoptim_pop and deoptim_iter.
set.seed(2024)
MyData$options$optim <- "DEoptim"
deoptim_iter = 1000
deoptim_trace = deoptim_iter
deoptim_trace = 50
deoptim_pop = 50
ft.DE<-DEoptim(model_ssm, upper=MyData$bounds[,"upper"],
lower=MyData$bounds[,"lower"],
control=DEoptim.control(
NP = deoptim_pop,
itermax = deoptim_iter,
trace = deoptim_trace,
CR=0.9, F=0.8, c=0.0,
initialpop = NULL,
strategy = 2,
parallelType=0,
parVar=c("MyData"),
packages = list("LaplacesDemon","TTR.PGM")),
Data=MyData)
## Warning in DEoptim(model_ssm, upper = MyData$bounds[, "upper"], lower = MyData$bounds[, : For many problems it is best to set 'NP' (in 'control') to be at least ten times the length of the parameter vector.
If required you can save ft.DE and MyData.
saveRDS(ft.DE,paste0("ft.DE_",site.name,".rds"))
saveRDS(MyData,paste0("MyData_",site.name,".rds"))
Fit the model using LaplacesDemon. See help(LaplacesDemon) for how to use LaplacesDemon. This will take several minutes depending on the LaplacesDemon settings selected and the size of MyData. In this example we use the DEoptim solution to define the Initial.Values used by LaplacesDemon. We then run multiple chains in serial, using the covariance matrix from previous chains in subsequent chains.
ft.DE <- readRDS(paste0("ft.DE_",site.name,".rds"))
MyData <- readRDS(paste0("MyData_",site.name,".rds"))
iter<-2e4
status<-iter/100
thin<-10
MyData$options$optim <- "LaplacesDemon"
Initial.Values <- as.numeric(ft.DE$optim$bestmem)
ft.LD.DRAM <- LaplacesDemon(model_ssm, Data=MyData, Initial.Values,
Covar=NULL, Iterations=iter, Status=status, Thinning=thin,
Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))
Initial.Values <- as.initial.values(ft.LD.DRAM)
ft.LD.DRAM <- LaplacesDemon(model_ssm, Data=MyData, Initial.Values,
Covar=ft.LD.DRAM$covar, Iterations=iter, Status=status, Thinning=thin,
Algorithm="DRAM", Specs=list(Adaptive=iter/10, Periodicity=10))
Use the LaplacesDemon plot method to view the MCMC chains. The chains can be useful diagnostic tools to assess model convergence. Save the output.
## png
## 2
Functions for calculating the Maximum a Posteriori.
myMAPmean <- function(ft,MyData,q=0.9) {
LP<-ft$Monitor[,"LP"]
n <- length(LP)
LP <- LP[(n/2):n] # discard first half of chain
wLP <- which(LP > quantile(LP,q) )
MAPmeans <- apply(ft$Posterior1[wLP,MyData$parm.names],2,mean)
return(MAPmeans)
}
myMAPquantile <- function(ft,MyData,q=c(0.25,0.75)) {
LP<-ft$Monitor[,"LP"]
n <- length(LP)
LP <- LP[(n/2):n] # discard first half of chain
wLP <- which(LP > quantile(LP,q[1]) & LP < quantile(LP,q[2]))
inQ <- ft$Posterior1[wLP,MyData$parm.names]
return(inQ)
}
Functions for calculating the RMSE and R² values.
calc_rmse <- function(observed, modelled) {
rmse <- sqrt(mean((observed - modelled)^2))
return(rmse)
}
calc_r_squared <- function(observed, modelled) {
ss_total <- sum((observed - mean(observed))^2)
ss_residual <- sum((observed - modelled)^2)
rsq <- 1 - (ss_residual / ss_total)
return(rsq)
}
parm.m <-myMAPquantile(ft.LD.DRAM,MyData,q=c(0.01,0.99))
sif.mat <- t(apply(parm.m, 1, function(x) {predict_ttr(x, model_ssm, MyData)$sif}))
pred.lo <- apply(sif.mat,2,quantile,probs=0.025)
pred.up <- apply(sif.mat,2,quantile,probs=0.975)
pred.mean <- apply(sif.mat,2,mean)
First plot the data and model predictions as a time series.
oldpar <- par(mar=c(4,4,2,4))
mycol<-c("#92e2ff","#ff1f11","#377eb8")
x<-ttr.time.in$date
yup <- 100
plot(x,MyData$y[,"sif"],type="p",ylim=c(0,yup),ylab="",xlab="",main=site.name,
axes=F, pch=19, cex=0.5)
axis(2,at=seq(0,yup,length=5),labels=seq(0,yup,length=5))
polygon(c(x,rev(x)),c(pred.lo,rev(pred.up)),
col= adjustcolor( mycol[1], alpha.f = 0.6), border=mycol[1])
points(x,MyData$y[,"sif"],pch=19,cex=0.5)
lines(x,pred.mean,col=mycol[2],lwd=2.0,lty=1)
axis.Date(1,at = seq(as.Date("2000/01/1"),as.Date("2021/12/1"), "years"), line=0)
mtext("SIF", side=2, line=3, adj=0.5, cex=1.0, col="black", outer=FALSE)
par(oldpar)
Now plot the data versus model predictions.
plot(MyData$y[,"sif"],pred.mean,pch=19,col=mycol[1],
ylab="Predicted SIF",xlab="Observed SIF")
abline(a=0,b=1,lwd=2)
abline(lm(pred.mean~MyData$y[,"sif"]),lty=2,lwd=2)
legend("topleft",bty="n",lty=c(1,2),legend=c("1:1 line","regression line"))
calc_r_squared(MyData$y[,"sif"],pred.mean)
## [1] 0.9038121
calc_rmse(MyData$y[,"sif"],pred.mean)
## [1] 6.924631
## png
## 2
## png
## 2