Periodic Hazard Functions

require(flexsurv)

Simulated periodic survival data

In the simulation before, the hazard function is periodic (sinusoudal) annually between 0 and 0.5.

simPeriodicMorts <- function(n, A = .5, peak = .5, period = 1, dt = 0.01, 
                                                         max.periods = 10, plotme = TRUE){
    
    t <- seq(0, max.periods * period, dt)
    hazard <- A*sin( (t - peak) * pi/period)^2
    cum.prob.survival <- cumprod(1-hazard*dt)
    cum.mortality <- 1 - cum.prob.survival
    pdf.mortality <- c(0,diff(cum.mortality)*dt)
    
    if(plotme){
        par(mfrow = c(2,2), bty = "l", mar = c(2,4,4,2), oma = c(2,0,0,0), tck = 0.02, mgp = c(1.5,.25,0), xpd = NA)
        plot(t, hazard, type = "l", main = "hazard function")
        plot(t, cum.prob.survival, type = "l", ylim = c(0,1), main = "survival curve")
        plot(t, cum.mortality, type = "l", ylim = c(0,1), main = "cumulative mortality: F(t)")
        plot(t, pdf.mortality, type = "l", main = "pdf of mortality: f(t)")
    }
    
    pdf <- cbind(t, pdf.mortality)
    
    sampleFromPdf <- function(n, pdf){
      pdf[,2] <- pdf[,2]/max(pdf[,2])
      xlim <- range(pdf[,1])
      XY.scatter <- cbind(sample(pdf[,1], 1e4, replace = TRUE),
                          runif(1e4))
      sample <- apply(XY.scatter, 1, function(v) if(v[2] < pdf[pdf[,1] == v[1],2]) return(v[1])) 
      sample <- unlist(sample)
      return(sample[1:n])
    }
    
    morts <- sampleFromPdf(n, pdf)
    return(morts)
}

Sampling from this process:

morts <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01, A = 1)

par(mfrow = c(1,1))
hist(morts, breaks = 60, col = "grey", bor = "darkgrey")

Using a spline fit from the flexsurv package is not really useful … you need lots of knots and you lose information as you get out into the tails:

require(flexsurv); require(magrittr); require(plyr)
morts.df <- data.frame(morts, year = floor(morts)) %>% mutate(doy = morts - year)
with(morts.df, hist(morts - year, breaks = 60, col = "grey", bor = "darkgrey"))

sim.sp.fit <- flexsurvspline(Surv(morts) ~ 1, k = 8,  data = morts.df)

par(mfrow = c(1,2))
plot(summary(sim.sp.fit, type="survival", ci=FALSE)[[1]], type = "l")
plot(summary(sim.sp.fit, type="hazard", ci=FALSE)[[1]], type = "l")

likelihood

Basically, we’d like to estimate the parameters of a periodic hazard function, i.e.: \(h(t|\theta) = h(t \pm T_P | \theta)\), where \(T_P\) is the period of the periodic function. The simple sinusoid example above fills that requirement, where the parameters are the amplitude and mean of the hazard function.

The pdf of mortality given a hazard function is: \[f(t|\theta) = h(t|\theta) \exp(-\int_{t=0}^t h(t'|\theta) dt') \] The likelihood of a set of \(n\) observations \(T_i\) that entered the study at times \(T_{0,i}\): \[L(\theta | T_i, T_{0,i}) = \prod_{i = 1}^n h(T_i | \theta) \exp\left(-\int_{T_{0,i}}^{T_i} h(t'|\theta) dt'\right)\] and the log-likelihood is given by: \[{\cal l}(\theta | T_i) = -\sum_{i = 1}^n \left( \log(h(T_i | \theta)) - \int_{T_{0,i}}^{T_i} h(t'|\theta) dt' \right)\] or, assuming some discretization (e.g. daily), and scalar observations \(T_i\): \[{\cal l}(\theta | T_i) = \sum_{i = 1}^n \left( \log(h(T_i | \theta)) - \sum_{j = T_{0,i}}^{T_i} h(T_j|\theta) \Delta t \right) \]

loglike <- function(T, a, p, dt, period){
    hazard <- function(t, A, peak, period)
     A * sin( (t - peak) * pi/period)^2

    logcumhaz <- sapply(T, function(t){
            t.total <- seq(0, t, dt)
            sum(hazard(t.total, A = a, peak = p, period)) * dt
        })
    
    loghaz <- log(hazard(T, A = a, peak = p, period) + 1e-40)
    sum(loghaz - logcumhaz)
}

Likelihood surfaces

T.morts1 <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01, 
                                                        A = 1, plotme = FALSE)

T.morts2 <- simPeriodicMorts(300, period = 1, peak = 0.3, dt = .01, 
                                                        A = 0.5, plotme = FALSE)

period <- 1
dt <- 0.01

As <- seq(0.1, 2, length=30)
peaks <- seq(0.01, .99, length = 30)
loglike.vector <- Vectorize(loglike, c("a","p"))
ll.matrix1 <- outer(As, peaks, function(a,p)
    loglike.vector(T = T.morts1, a, p, dt = 0.01, period = 1))
ll.matrix2 <- outer(As, peaks, function(a,p)
    loglike.vector(T = T.morts2, a, p, dt = 0.01, period = 1))

ll.matrix1[ll.matrix1 == -Inf] <- NA

require(fields)
par(mfrow = c(1,2))
image.plot(As, peaks, ll.matrix1, main = "True values: A = 1, peak = 0.3")
image.plot(As, peaks, ll.matrix2, main = "True values: A = 0.5, peak = 0.3")

Likelihood fits:

loglike.optim <- function(par, T.morts, dt, period)
    -loglike(T = T.morts, a = par["A"], p = par["peak"], dt, period)

par0 <- c(A = 1, peak = 0.5)
fit1 <- optim(par0, loglike.optim, T.morts = T.morts1, dt = .01, period = 1, hessian = TRUE, 
                            method = "L-BFGS-B", lower = c(A = 0, peak = 0), upper = c(A = 10, peak = 1))

fit2 <- optim(par0, loglike.optim, T.morts = T.morts2, dt = .01, period = 1, hessian = TRUE,
                            method = "L-BFGS-B", lower = c(A = 0, peak = 0), upper = c(A = 10, peak = 1))

getCIs <- function(fit){
  p <- fit$par
  CIs <- sqrt(diag(solve(fit$hessian)))
  cbind(estimate = p, CI.low = p - 2*CIs, CI.high = p + 2*CIs)
}

getCIs(fit1)

##       estimate    CI.low   CI.high
## A    1.0357281 0.9161314 1.1553248
## peak 0.3010799 0.2834600 0.3186998

getCIs(fit2)

##       estimate    CI.low   CI.high
## A    0.6468245 0.5721347 0.7215143
## peak 0.2991174 0.2814350 0.3167998

Good fits … the smaller A is somewhat underestimated, but the real focus is on the peak anyway, for which the precision is very high.

Better Likelihood (with multiple pulses)

Period (wrapped) function, with controlled means and widths … AND … as a mixture. The basic distribution can be, for exmaple, a wrapped Cauchy, which has two parameters - a mean and a concentration parameter between 0 and 1 (separate question … how to convert that to something “standard deviation”-ish?)

require(circular)
loglike <- function(T, a, p, r, dt, period){
  hazard <- function(t, A, peak, rho, period, dt){
     tt <- t/period * 2*pi
     mu <- p/period * 2*pi
     A * DwrappedCauchy(tt, mu, rho) / dt
  }
    logcumhaz <- sapply(T, function(t){
            t.total <- seq(0, t, dt)
            sum(hazard(t.total, A = a, peak = p, rho = r, period, dt)) * dt
        })
    loghaz <- log(hazard(T, A = a, peak = p, rho = r, period, dt) + 1e-40)
    sum(loghaz - logcumhaz)
}

rhos <- seq(0.01, .99, length=30)
peaks <- seq(0.01, .99, length = 30)

loglike.vector <- Vectorize(loglike, c("p","r"))

ll.matrix1 <- outer(rhos, peaks, function(rho, peak)
    loglike.vector(T = T.morts1, a = .02, p = peak, r = rho, dt = 0.01, period = 1))

ll.matrix2 <- outer(rhos, peaks, function(rho, peak)
    loglike.vector(T = T.morts2, a = .04, p = peak, r = rho, dt = 0.01, period = 1))

par(mfrow = c(1,2))
image.plot(rhos, peaks, ll.matrix1, main = "True values: peak = 0.3, rho = 0.3")
image.plot(rhos, peaks, ll.matrix2, main = "True values: peak = 0.7, rho = 0.6")

Looks great! Now use optim to fit:

loglike.optim <- function(par, T.morts, dt, period)
    -loglike(T = T.morts, a = par["A"], p = par["peak"], r = par["rho"], dt, period)

par0 <- c(A = 0.01, peak = 0.5, rho = 0.5)

fit1 <- optim(par0, loglike.optim, T.morts = T.morts1, dt = .01, period = 1, hessian = TRUE, 
                            method = "L-BFGS-B", lower = c(A = 0, peak = 0, rho = 0), upper = c(A = 10, peak = 1, rho = 1))

fit2 <- optim(par0, loglike.optim, T.morts = T.morts2, dt = .01, period = 1, hessian = TRUE,
                            method = "L-BFGS-B", lower = c(A = 0, peak = 0, rho = 0), upper = c(A = 10, peak = 1, rho = 1))

fit1 %>% getCIs

##        estimate     CI.low    CI.high
## A    0.02232164 0.01974097 0.02490231
## peak 0.31598134 0.28238458 0.34957811
## rho  0.33889692 0.26560164 0.41219220

fit2 %>% getCIs

##        estimate     CI.low    CI.high
## A    0.03979944 0.03516273 0.04443615
## peak 0.69511823 0.68219500 0.70804147
## rho  0.61763881 0.56618908 0.66908854

Awesome fits!

Make flexible for random time STARTS

simPeriodicMorts <- function(n, A = .1, peak = .5, rho = 0.6, period = 1, dt = 0.01, 
                            max.periods = 10, plotme = TRUE){
    
    t <- seq(0, max.periods * period, dt)
    
    getHazard <- function(t, A, peak, rho, period, dt){
     tt <- t/period*2*pi
     mu <- peak/period*2*pi
     A * DwrappedCauchy(tt, mu, rho) / dt
    }
    
    hazard <- getHazard(t, A, peak, rho, period, dt)
    
    cum.prob.survival <- cumprod(1-hazard*dt)
    cum.mortality <- 1 - cum.prob.survival
    pdf.mortality <- c(0,diff(cum.mortality)*dt)
    
    if(plotme){
        par(mfrow = c(2,2), bty = "l", mar = c(2,4,4,2), tck = 0.02, mgp = c(1.5,.25,0), xpd = NA)
        plot(t, hazard, type = "l", main = "hazard function")
        plot(t, cum.prob.survival, type = "l", ylim = c(0,1), main = "survival curve")
        plot(t, cum.mortality, type = "l", ylim = c(0,1), main = "cumulative mortality: F(t)")
        plot(t, pdf.mortality, type = "l", main = "pdf of mortality: f(t)")
    }
    
    pdf <- cbind(t, pdf.mortality)
    
    sampleFromPdf <- function(n, pdf){
      pdf[,2] <- pdf[,2]/max(pdf[,2])
      xlim <- range(pdf[,1])
      XY.scatter <- cbind(sample(pdf[,1], 1e4, replace = TRUE),
                          runif(1e4))
      sample <- apply(XY.scatter, 1, function(v) if(v[2] < pdf[pdf[,1] == v[1],2]) return(v[1])) 
      sample <- unlist(sample)
      return(sample[1:n])
    }
    
    morts <- sampleFromPdf(n, pdf)
    return(morts)
}