functions {
// this function computes the adjustment to the variance
// as a dependence of the intrinsiv time scale and the
// interval between locations
real getphilag(real phi, real lag) {
real philag;
int i;
philag = 0;
i = 0;
while (i <= lag) {
philag = philag + pow(phi, lag * 2.0);
i = i + 1;
}
return philag;
}
// this function (at beta, say, > 10) approximates a
// cut function that rises from -delta to delta with
// slope 1/(2*delta)
real squash(real x, real delta, real beta){
real x_squashed;
x_squashed = (1.0 /(2.0 * delta*beta)) *
log( (1.0 + exp(beta*(x + delta))) /
(1.0 + exp(beta*(x - delta))));
return x_squashed;
}
}
data {
int<lower=0> n; // number of total observations
int k; // n. of individuals
real y[n]; // x-coordinate
real x[n]; // y-coordinate
real yday[n]; // day of year
real dday[n]; // yday[i] - yday[i-1] for each individual
int<lower=1,upper=k> id[n]; // ID of caribou
}
parameters {
vector<lower = min(x), upper = max(x)>[2] mux_mean;
vector<lower = min(y), upper = max(y)>[2] muy_mean;
real<lower = min(x), upper = max(x)> mux1[k];
real<lower = min(y), upper = max(y)> muy1[k];
real<lower = min(x), upper = max(x)> mux2[k];
real<lower = min(y), upper = max(y)> muy2[k];
// migration timing parameters
real<lower = min(yday), upper = max(yday)> t_mean;
real<lower = 0, upper = 30> t_sd;
real<lower = min(yday), upper = max(yday)> t[k]; //
real<lower = 0, upper = 100> dt_mean;
real<lower = 0, upper = 10> dt_sd;
real<lower = 0, upper = 100> dt[k]; //
// components of variance-covariance matrix
real<lower = 0> mux1_sigma;
real<lower = 0> muy1_sigma;
real<lower=-1, upper=1> rho1;
real<lower = 0> mux2_sigma;
real<lower = 0> muy2_sigma;
real<lower=-1, upper=1> rho2;
real<lower = 0> sigma_ar;
real<lower = 0> tau;
}
transformed parameters {
// population level covariance
cov_matrix[2] mu1_Sigma;
cov_matrix[2] mu2_Sigma;
// OUP process parameters
real phi_ar;
real sigma2;
real A;
mu1_Sigma[1,1] = mux1_sigma;
mu1_Sigma[2,2] = muy1_sigma;
mu1_Sigma[1,2] = sqrt(mux1_sigma*muy1_sigma) * rho1;
mu1_Sigma[2,1] = sqrt(mux1_sigma*muy1_sigma) * rho1;
mu2_Sigma[1,1] = mux2_sigma;
mu2_Sigma[2,2] = muy2_sigma;
mu2_Sigma[1,2] = sqrt(mux2_sigma*muy2_sigma) * rho2;
mu2_Sigma[2,1] = sqrt(mux2_sigma*muy2_sigma) * rho2;
phi_ar = exp(-1/tau);
sigma2 = 2 * sigma_ar^2 / (tau * (1 - exp(-2/tau))^2);
A = sigma2 * (-2*log(0.05)*pi());
}
model {
// ancillary variables
vector[n] x_hat;
vector[n] y_hat;
vector[n] x_res;
vector[n] y_res;
vector[2] mu1_mean;
vector[2] mu2_mean;
vector[2] mu1[k]; // mean locations of each individual
vector[2] mu2[k]; //
t_mean ~ normal(mean(yday), 20.0) T[min(yday),max(yday)];
t_sd ~ exponential(1/50.0);
dt_mean ~ normal(20.0, 20.0) T[0,max(yday)];
dt_sd ~ exponential(1/50.0);
mu1_mean[1] = mux_mean[1];
mu1_mean[2] = muy_mean[1];
mu2_mean[1] = mux_mean[2];
mu2_mean[2] = muy_mean[2];
t ~ normal(t_mean, t_sd);
dt ~ normal(dt_mean, dt_sd);
for (i in 1:k){
mu1[i][1] = mux1[i];
mu1[i][2] = muy1[i];
mu2[i][1] = mux2[i];
mu2[i][2] = muy2[i];
mu1[i] ~ multi_normal(mu1_mean, mu1_Sigma);
mu2[i] ~ multi_normal(mu2_mean, mu2_Sigma);
}
sigma_ar ~ exponential(1/(max(x) - min(x)));
tau ~ lognormal(7.0, 1);
x_res[1] = 0;
y_res[1] = 0;
for (i in 2:n){
x_hat[i] = mu1[id[i]][1] +
(mu2[id[i]][1] - mu1[id[i]][1]) *
squash(yday[i] - t[id[i]] - dt[id[i]]/2,
dt[id[i]]/2, 10.0);
y_hat[i] = mu1[id[i]][2] +
(mu2[id[i]][2] - mu1[id[i]][2]) *
squash(yday[i] - t[id[i]] - dt[id[i]]/2,
dt[id[i]]/2, 10.0);
if(id[i] == id[i-1]){
x_res[i] = x[i] - x_hat[i];
y_res[i] = y[i] - y_hat[i];
x_res[i] ~ normal(phi_ar^dday[i] * x_res[i-1],
sigma_ar * sqrt(getphilag(phi_ar, dday[i])));
y_res[i] ~ normal(phi_ar^dday[i] * y_res[i-1],
sigma_ar * sqrt(getphilag(phi_ar, dday[i])));
} else {
x_res[i] = 0;
y_res[i] = 0;
}
}
}
If the STAN code is saved as a file (e.g. hierarchicalSpringMigration.stan
) or text string, it is converted into a compiled portion of STAN code with R using the stan_model
function in the rstan
package:
library(rstan)
Consider movement data of the form:
str(move_data)
## 'data.frame': 40211 obs. of 5 variables:
## $ id : Factor w/ 50 levels "c1007","c1008",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ yday: num 251 252 252 252 253 253 253 254 254 254 ...
## $ year: num 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
## $ x : num -2304020 -2309370 -2311138 -2312638 -2312083 ...
## $ y : num 4188984 4183651 4181711 4179816 4179756 ...
Where id
refers to unique individuals, and x
and y
are projected coordinates. These data can be readily visualized:
To first prepare the data for fitting, we need to:
move_data <- subset(move_data, yday >= 90 & yday < 170)
We can readily view these data:
require(ggplot2); require(gridExtra); require(ggthemes)
xy <- ggplot(move_data, aes(x, y, col = id)) + geom_path() + guides(color = FALSE) + coord_fixed() + theme_few()
tx <- ggplot(move_data, aes(yday, x, col = id)) + geom_path() + guides(color = FALSE) + theme_few()
ty <- ggplot(move_data, aes(yday, y, col = id)) + geom_path() + guides(color = FALSE) + theme_few()
grid.arrange(xy, tx, ty, layout_matrix = rbind(c(1,1,2),c(1,1,3)))
require(plyr); require(magrittr)
move_data <- ddply(move_data, "id",
function(df) if(nrow(df) < 10) return(NULL) else return(df)) %>%
ddply("id", mutate, dday = c(0, diff(yday))) %>%
mutate(id = droplevels(id))
data
block of the STAN model:data_stan <- with(move_data,
list( n = nrow(move_data),
k = length(unique(id)),
x = x/1e3, y = y/1e3, yday = yday,
dday = dday,
id = as.integer(as.factor(droplevels(id)))))
Each chain of the eventual MCMC requires a set of initial parameters, ideally randomized. The following function generates these for data in the appropriate data format:
getInits <- function(my.data, t_mean = 110, dt_mean = 20,
chains = 4, plotme = TRUE){
start.days <- which(with(my.data, yday <= (min(yday) + 15)))
end.days <- which(with(my.data, yday >= (max(yday) - 15)))
inits.raw <-
with(my.data,
list(mux_mean = c(median(x[start.days]), median(x[end.days])),
muy_mean = c(median(y[start.days]), median(y[end.days])),
mux1_sigma = sd(x[start.days]),
muy1_sigma = sd(y[start.days]),
mux2_sigma = sd(x[end.days]),
muy2_sigma = sd(y[end.days]),
t_mean = t_mean, t_sd = 10,
dt_mean = dt_mean, dt_sd = 5,
tau = 10))
inits <- list()
for(i in 1:chains){
inits[[i]] <- lapply(inits.raw, jitter, factor = 1)
inits[[i]]$mux_mean <- pmin(pmax(min(my.data$x), inits[[i]]$mux_mean), max(my.data$x))
inits[[i]]$muy_mean <- pmin(pmax(min(my.data$y), inits[[i]]$muy_mean), max(my.data$y))
inits[[i]]$t_mean <- pmin(pmax(min(my.data$yday), inits[[i]]$t_mean), max(my.data$yday) - 30)
inits[[i]]$dt_mean <- dt_mean
}
if(plotme){
layout(cbind(c(1,1), 2:3))
with(my.data, plot(x, y, pch = 19, col = rgb(0,0,0,.4), cex= 0.5, asp = 1))
with(inits.raw, points(mux_mean, muy_mean, col = 2:3, cex = 2, lwd = 4, lty = 2))
lapply(inits, function(l) with(l, points(mux_mean, muy_mean, col = 2:3, cex = 2, lwd = 2, pch = 4)))
with(my.data, plot(yday, x, pch = 19, col = rgb(0,0,0,.4), cex= 0.5))
with(inits.raw, abline(v = c(t_mean, t_mean+dt_mean), col=2:3, lwd = 2))
lapply(inits, function(l) with(l, abline(v = c(t_mean, t_mean+dt_mean), col = 2:3, lty = 3)))
with(my.data, plot(yday, y, pch = 19, col = rgb(0,0,0,.4), cex= 0.5))
with(inits.raw, abline(v = c(t_mean, t_mean+dt_mean), col=2:3, lwd = 2))
lapply(inits, function(l) with(l, abline(v = c(t_mean, t_mean+dt_mean), col = 2:3, lty = 3)))
}
return(inits)
}
The initial valyes are drawn simply from taking, e.g, the mean of the positions near the beginning of the time series and near the end of the time series for \(\mu_1\) and \(\mu_2\), the standard deviation for \(\sigma_1\) and \(\sigma_2\), etc. A ballpark estimate for the mean start and duration of migration are useful to provide. There is a plotting feature which shows in red and green some of those initial values:
stan_inits <- getInits(data_stan, t_mean = 120, dt_mean = 30, chains = 2)
We are now ready to fit the model. In the following example there are only 400 iterations and a relatively short warmup to speed the process:
## Loading required package: rstan
## Loading required package: StanHeaders
## rstan (Version 2.17.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:magrittr':
##
## extract
migration.fit <- sampling(springmigration_stan, data = data_stan,
init = stan_inits, chains = 2,
iter = 400)
##
## SAMPLING FOR MODEL 'hierarchicalSpringMigration' NOW (CHAIN 1).
##
## Gradient evaluation took 0.047 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 470 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 2920.05 seconds (Warm-up)
## 363.453 seconds (Sampling)
## 3283.5 seconds (Total)
##
##
## SAMPLING FOR MODEL 'hierarchicalSpringMigration' NOW (CHAIN 2).
##
## Gradient evaluation took 0.029 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 290 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 400 [ 0%] (Warmup)
## Iteration: 40 / 400 [ 10%] (Warmup)
## Iteration: 80 / 400 [ 20%] (Warmup)
## Iteration: 120 / 400 [ 30%] (Warmup)
## Iteration: 160 / 400 [ 40%] (Warmup)
## Iteration: 200 / 400 [ 50%] (Warmup)
## Iteration: 201 / 400 [ 50%] (Sampling)
## Iteration: 240 / 400 [ 60%] (Sampling)
## Iteration: 280 / 400 [ 70%] (Sampling)
## Iteration: 320 / 400 [ 80%] (Sampling)
## Iteration: 360 / 400 [ 90%] (Sampling)
## Iteration: 400 / 400 [100%] (Sampling)
##
## Elapsed Time: 1266.83 seconds (Warm-up)
## 41.582 seconds (Sampling)
## 1308.41 seconds (Total)
save(migration.fit, file = "migrationfit.rda")