require(flexsurv)
In the simulation before, the hazard function is periodic (sinusoudal) annually between 0 and 0.5.
simPeriodicMorts <- function(n, A = .5, peak = .5, period = 1, dt = 0.01,
max.periods = 10, plotme = TRUE){
t <- seq(0, max.periods * period, dt)
hazard <- A*sin( (t - peak) * pi/period)^2
cum.prob.survival <- cumprod(1-hazard*dt)
cum.mortality <- 1 - cum.prob.survival
pdf.mortality <- c(0,diff(cum.mortality)*dt)
if(plotme){
par(mfrow = c(2,2), bty = "l", mar = c(2,4,4,2), oma = c(2,0,0,0), tck = 0.02, mgp = c(1.5,.25,0), xpd = NA)
plot(t, hazard, type = "l", main = "hazard function")
plot(t, cum.prob.survival, type = "l", ylim = c(0,1), main = "survival curve")
plot(t, cum.mortality, type = "l", ylim = c(0,1), main = "cumulative mortality: F(t)")
plot(t, pdf.mortality, type = "l", main = "pdf of mortality: f(t)")
}
pdf <- cbind(t, pdf.mortality)
sampleFromPdf <- function(n, pdf){
pdf[,2] <- pdf[,2]/max(pdf[,2])
xlim <- range(pdf[,1])
XY.scatter <- cbind(sample(pdf[,1], 1e4, replace = TRUE),
runif(1e4))
sample <- apply(XY.scatter, 1, function(v) if(v[2] < pdf[pdf[,1] == v[1],2]) return(v[1]))
sample <- unlist(sample)
return(sample[1:n])
}
morts <- sampleFromPdf(n, pdf)
return(morts)
}
Sampling from this process:
morts <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01, A = 1)
par(mfrow = c(1,1))
hist(morts, breaks = 60, col = "grey", bor = "darkgrey")
Using a spline fit from the flexsurv
package is not really useful … you need lots of knots and you lose information as you get out into the tails:
require(flexsurv); require(magrittr); require(plyr)
morts.df <- data.frame(morts, year = floor(morts)) %>% mutate(doy = morts - year)
with(morts.df, hist(morts - year, breaks = 60, col = "grey", bor = "darkgrey"))
sim.sp.fit <- flexsurvspline(Surv(morts) ~ 1, k = 8, data = morts.df)
par(mfrow = c(1,2))
plot(summary(sim.sp.fit, type="survival", ci=FALSE)[[1]], type = "l")
plot(summary(sim.sp.fit, type="hazard", ci=FALSE)[[1]], type = "l")
Basically, we’d like to estimate the parameters of a periodic hazard function, i.e.: \(h(t|\theta) = h(t \pm T_P | \theta)\), where \(T_P\) is the period of the periodic function. The simple sinusoid example above fills that requirement, where the parameters are the amplitude and mean of the hazard function.
The pdf of mortality given a hazard function is: \[f(t|\theta) = h(t|\theta) \exp(-\int_{t=0}^t h(t'|\theta) dt') \] The likelihood of a set of \(n\) observations \(T_i\) that entered the study at times \(T_{0,i}\): \[L(\theta | T_i, T_{0,i}) = \prod_{i = 1}^n h(T_i | \theta) \exp\left(-\int_{T_{0,i}}^{T_i} h(t'|\theta) dt'\right)\] and the log-likelihood is given by: \[{\cal l}(\theta | T_i) = -\sum_{i = 1}^n \left( \log(h(T_i | \theta)) - \int_{T_{0,i}}^{T_i} h(t'|\theta) dt' \right)\] or, assuming some discretization (e.g. daily), and scalar observations \(T_i\): \[{\cal l}(\theta | T_i) = \sum_{i = 1}^n \left( \log(h(T_i | \theta)) - \sum_{j = T_{0,i}}^{T_i} h(T_j|\theta) \Delta t \right) \]
loglike <- function(T, a, p, dt, period){
hazard <- function(t, A, peak, period)
A * sin( (t - peak) * pi/period)^2
logcumhaz <- sapply(T, function(t){
t.total <- seq(0, t, dt)
sum(hazard(t.total, A = a, peak = p, period)) * dt
})
loghaz <- log(hazard(T, A = a, peak = p, period) + 1e-40)
sum(loghaz - logcumhaz)
}
T.morts1 <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01,
A = 1, plotme = FALSE)
T.morts2 <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01,
A = 0.5, plotme = FALSE)
period <- 1
dt <- 0.01
As <- seq(0.1, 2, length=30)
peaks <- seq(0.01, .99, length = 30)
loglike.vector <- Vectorize(loglike, c("a","p"))
ll.matrix1 <- outer(As, peaks, function(a,p)
loglike.vector(T = T.morts1, a, p, dt = 0.01, period = 1))
ll.matrix2 <- outer(As, peaks, function(a,p)
loglike.vector(T = T.morts2, a, p, dt = 0.01, period = 1))
ll.matrix1[ll.matrix1 == -Inf] <- NA
require(fields)
par(mfrow = c(1,2))
image.plot(As, peaks, ll.matrix1, main = "True values: A = 1, peak = 0.3")
image.plot(As, peaks, ll.matrix2, main = "True values: A = 0.5, peak = 0.3")
loglike.optim <- function(par, T.morts, dt, period)
-loglike(T = T.morts, a = par["A"], p = par["peak"], dt, period)
par0 <- c(A = 1, peak = 0.5)
fit1 <- optim(par0, loglike.optim, T.morts = T.morts1, dt = .01, period = 1, hessian = TRUE,
method = "L-BFGS-B", lower = c(A = 0, peak = 0), upper = c(A = 10, peak = 1))
fit2 <- optim(par0, loglike.optim, T.morts = T.morts2, dt = .01, period = 1, hessian = TRUE,
method = "L-BFGS-B", lower = c(A = 0, peak = 0), upper = c(A = 10, peak = 1))
getCIs <- function(fit){
p <- fit$par
CIs <- sqrt(diag(solve(fit$hessian)))
cbind(estimate = p, CI.low = p - 2*CIs, CI.high = p + 2*CIs)
}
getCIs(fit1)
## estimate CI.low CI.high
## A 1.0357281 0.9161314 1.1553248
## peak 0.3010799 0.2834600 0.3186998
getCIs(fit2)
## estimate CI.low CI.high
## A 0.6468245 0.5721347 0.7215143
## peak 0.2991174 0.2814350 0.3167998
Good fits … the smaller A is somewhat underestimated, but the real focus is on the peak anyway, for which the precision is very high.
Period (wrapped) function, with controlled means and widths … AND … as a mixture. The basic distribution can be, for exmaple, a wrapped Cauchy, which has two parameters - a mean and a concentration parameter between 0 and 1 (separate question … how to convert that to something “standard deviation”-ish?)
require(circular)
loglike <- function(T, a, p, r, dt, period){
hazard <- function(t, A, peak, rho, period, dt){
tt <- t/period * 2*pi
mu <- p/period * 2*pi
A * DwrappedCauchy(tt, mu, rho) / dt
}
logcumhaz <- sapply(T, function(t){
t.total <- seq(0, t, dt)
sum(hazard(t.total, A = a, peak = p, rho = r, period, dt)) * dt
})
loghaz <- log(hazard(T, A = a, peak = p, rho = r, period, dt) + 1e-40)
sum(loghaz - logcumhaz)
}
rhos <- seq(0.01, .99, length=30)
peaks <- seq(0.01, .99, length = 30)
loglike.vector <- Vectorize(loglike, c("p","r"))
ll.matrix1 <- outer(rhos, peaks, function(rho, peak)
loglike.vector(T = T.morts1, a = .02, p = peak, r = rho, dt = 0.01, period = 1))
ll.matrix2 <- outer(rhos, peaks, function(rho, peak)
loglike.vector(T = T.morts2, a = .04, p = peak, r = rho, dt = 0.01, period = 1))
par(mfrow = c(1,2))
image.plot(rhos, peaks, ll.matrix1, main = "True values: peak = 0.3, rho = 0.3")
image.plot(rhos, peaks, ll.matrix2, main = "True values: peak = 0.7, rho = 0.6")
Looks great! Now use optim to fit:
loglike.optim <- function(par, T.morts, dt, period)
-loglike(T = T.morts, a = par["A"], p = par["peak"], r = par["rho"], dt, period)
par0 <- c(A = 0.01, peak = 0.5, rho = 0.5)
fit1 <- optim(par0, loglike.optim, T.morts = T.morts1, dt = .01, period = 1, hessian = TRUE,
method = "L-BFGS-B", lower = c(A = 0, peak = 0, rho = 0), upper = c(A = 10, peak = 1, rho = 1))
fit2 <- optim(par0, loglike.optim, T.morts = T.morts2, dt = .01, period = 1, hessian = TRUE,
method = "L-BFGS-B", lower = c(A = 0, peak = 0, rho = 0), upper = c(A = 10, peak = 1, rho = 1))
fit1 %>% getCIs
## estimate CI.low CI.high
## A 0.02232164 0.01974097 0.02490231
## peak 0.31598134 0.28238458 0.34957811
## rho 0.33889692 0.26560164 0.41219220
fit2 %>% getCIs
## estimate CI.low CI.high
## A 0.03979944 0.03516273 0.04443615
## peak 0.69511823 0.68219500 0.70804147
## rho 0.61763881 0.56618908 0.66908854
Awesome fits!
simPeriodicMorts <- function(n, A = .1, peak = .5, rho = 0.6, period = 1, dt = 0.01,
max.periods = 10, plotme = TRUE){
t <- seq(0, max.periods * period, dt)
getHazard <- function(t, A, peak, rho, period, dt){
tt <- t/period*2*pi
mu <- peak/period*2*pi
A * DwrappedCauchy(tt, mu, rho) / dt
}
hazard <- getHazard(t, A, peak, rho, period, dt)
cum.prob.survival <- cumprod(1-hazard*dt)
cum.mortality <- 1 - cum.prob.survival
pdf.mortality <- c(0,diff(cum.mortality)*dt)
if(plotme){
par(mfrow = c(2,2), bty = "l", mar = c(2,4,4,2), tck = 0.02, mgp = c(1.5,.25,0), xpd = NA)
plot(t, hazard, type = "l", main = "hazard function")
plot(t, cum.prob.survival, type = "l", ylim = c(0,1), main = "survival curve")
plot(t, cum.mortality, type = "l", ylim = c(0,1), main = "cumulative mortality: F(t)")
plot(t, pdf.mortality, type = "l", main = "pdf of mortality: f(t)")
}
pdf <- cbind(t, pdf.mortality)
sampleFromPdf <- function(n, pdf){
pdf[,2] <- pdf[,2]/max(pdf[,2])
xlim <- range(pdf[,1])
XY.scatter <- cbind(sample(pdf[,1], 1e4, replace = TRUE),
runif(1e4))
sample <- apply(XY.scatter, 1, function(v) if(v[2] < pdf[pdf[,1] == v[1],2]) return(v[1]))
sample <- unlist(sample)
return(sample[1:n])
}
morts <- sampleFromPdf(n, pdf)
return(morts)
}