# A Riemannian Penalty Method for Exploratory Item Factor Analysis # # Reference: # Liu, Y. (2020). A Riemannian Optimization Algorithm for Joint Maximum # Likelihood Estimation of High-Dimensional Exploratory Item Factor Analysis. # Psychometrika. doi: 10.1007/s11336-020-09711-8 # # Author: Yang Liu # # Last modified: 06/25/2020 # eval.par: evaluate the parameter matrix from the quadruplet (U, w, V, R) # eval.tvec: evaluate the tangent vector from the quadruplet (Va, B, UC, VD) # # args: # par: parameter quadruplet (list) # $U: column basis, orthonormal, w/o 1st column (numeric, dim = d1 x k) # $w: intercepts (numeric, dim = d2) # $V: row basis, orthonormal w/o 1st column (numeric, dim = d2 x k) # $R: upper triangular matrix s.t. Vbar = V %*% t(R) (numeric, dim = k x k) # tvec: tangent vector quadruplet (list) # $Va: Vperp %*% a (numeric, dim = d2) # $B: square matrix (numeric, dim = k x k) # $UC: Uperp %*% C (numeric, dim = d1 x k) # $VD: Vperp %*% D (numeric, dim = d2 x k) eval.par <- function(par) { d1 <- nrow(par$U) U <- cbind(d1^-0.5, par$U) V <- cbind(par$w, par$V %*% par$R) tcrossprod(U, V) } eval.tvec <- function(par, tvec) { d1 <- nrow(par$U) T1 <- cbind(d1^-0.5, par$U %*% tvec$B, tvec$UC, par$U) T2 <- cbind(tvec$Va, par$V, par$V, tvec$VD) tcrossprod(T1, T2) } # data.gen: data generation # # args: # Par: true parameter matrix (numeric, dim = d1 x d2) # n: expected number of observed entries (integer, dim = 1) # # returns: item response matrix (integer, dim = d1 x d2) data.gen <- function(Par, n) { d <- dim(Par) d1d2 <- prod(d) obs <- which(rbinom(d1d2, 1L, n / d1d2) == 1L) Dat <- matrix(NA, d[1L], d[2L]) Dat[obs] <- rbinom( length(obs), 1L, plogis(Par[obs]) ) Dat } # loglik: log-likelihood # pen: linear-quadratic penalty # dpen: derivatives of pen # objf: objective function (penalized log-likelihood) # dobjf: derivatives of objf # # args: # Par: parameter matrix (numeric, dim = d1 x d2) # par: out-of-bound parameters (numeric) # M: bound for infinity norm (numeric, dim = 1) # Dat: data matrix (integer, dim = d1 x d2) # mu: smoothing constant (numeric, dim = 1) # lmbd: penalizing constant (numeric, dim = 1) # # returns: # loglik: log-likelihood (numeric, dim = 1) # pen: linear-quadratic penalty (numeric, dim = 1) # dpen: derivatives of pen (numeric, dim = d1 x d2) # objf: objective function (numeric, dim = 1) # dobjf: derivatives of objf (numeric, dim = d1 x d2) loglik <- function(Par, Dat) { obs <- !is.na(Dat) sum( Dat[obs] * Par[obs] - log1p( exp(Par[obs]) ) ) } pen <- function(par, M, mu) { if (length(par) == 0L) return(0) x <- abs(par) - M sum(x[x >= mu] - 0.5 * mu) + sum(x[x < mu]^2L) / (2 * mu) } objf <- function(Par, Dat, M, mu, lmbd) { indx <- which(abs(Par) > M) loglik(Par, Dat) - lmbd * pen(Par[indx], M, mu) } dpen <- function(par, M, mu) { if (length(par) == 0L) return(0) x <- abs(par) - M sign(par) * ifelse( x >= mu, 1, x * (1 / mu) ) } dobjf <- function(Par, Dat, M, mu, lmbd) { d <- dim(Par) G <- matrix(0, d[1L], d[2L]) indx <- which(abs(Par) > M) G[indx] <- - lmbd * dpen(Par[indx], M, mu) obs <- !is.na(Dat) G[obs] <- G[obs] + Dat[obs] - plogis(Par[obs]) G } # grad.re: compute Riemannian gradient (i.e., project onto the tangent space) # # args: # par: parameter quadruplet (list) # G: Euclidean gradient matrix (numeric, dim = d1 x d2) # # returns: # gr: Riemannian gradient quadruplet (list) grad.re <- function(par, G) { GV <- G %*% par$V GtU <- crossprod(G, par$U) gr <- list() gr$Va <- nrow(G)^-0.5 * ( colSums(G) - par$V %*% colSums(GV) ) # colSum(G) = t(G) %*% 1 gr$B <- crossprod(par$U, GV) gr$UC <- GV - par$U %*% gr$B gr$VD <- GtU - tcrossprod(par$V, gr$B) gr } # vec.trans: vector transport # # args: # par0: 1st parameter quadruplet (list) # par1: 2nd parameter quadruplet (list) # tvec0: tangent vector quadruplet at par0 (list) # # returns: transported tangent vector quadruplet vec.trans <- function(par0, par1, tvec0) { U0tU1 <- crossprod(par0$U, par1$U) V0tV1 <- crossprod(par0$V, par1$V) UC0tU1 <- crossprod(tvec0$UC, par1$U) VD0tV1 <- crossprod(tvec0$VD, par1$V) B1 <- crossprod(U0tU1, tvec0$B %*% V0tV1) B2 <- crossprod(UC0tU1, V0tV1) B3 <- crossprod(U0tU1, VD0tV1) UC1 <- par0$U %*% (tvec0$B %*% V0tV1) UC2 <- tvec0$UC %*% V0tV1 UC3 <- par0$U %*% VD0tV1 VD1 <- par0$V %*% crossprod(tvec0$B, U0tU1) VD2 <- par0$V %*% UC0tU1 VD3 <- tvec0$VD %*% U0tU1 tvec1 <- list() dU <- dim(par0$U) r <- dU[1L]^-0.5 tvec1$Va <- tvec0$Va + r * par0$V %*% colSums(tvec0$UC) tvec1$B <- B1 + B2 + B3 tvec1$UC <- r * matrix(crossprod(tvec0$Va, par1$V), dU[1L], dU[2L], byrow = T) + UC1 + UC2 + UC3 tvec1$VD <- VD1 + VD2 + VD3 tvec1$Va <- tvec1$Va - par1$V %*% crossprod(par1$V, tvec1$Va) tvec1$UC <- tvec1$UC - par1$U %*% crossprod(par1$U, tvec1$UC) tvec1$VD <- tvec1$VD - par1$V %*% crossprod(par1$V, tvec1$VD) tvec1 } # retract: pull a tangent vector back to the low-rank manifold # # args: # par: parameter quadruplet (list) # tvec: tangent vector quadruplet (list) # s: step size (numeric, dim = 1) # # returns: "projected" parameter quadruplet retract <- function(par, tvec, s) { d1 <- nrow(par$U) # column basis U_ <- cbind(d1^-0.5, par$U %*% (t(par$R) + s * tvec$B) + s * tvec$UC) qrU_ <- qr(U_) Ru_ <- qr.R(qrU_) # row basis V_ <- cbind(par$w + s * tvec$Va, par$V + s * tvec$VD %*% solve(par$R) ) qrV <- qr(V_[, -1L, drop = F]) ret <- list( w = V_[, 1L] - V_[, -1L, drop = F] %*% Ru_[1L, -1L], U = qr.Q(qrU_)[, -1L, drop = F], V = qr.Q(qrV), R = tcrossprod(qr.R(qrV), Ru_[-1L, -1L]) ) ret } # binary operators for tangent vectors # %+%: sum of two tangent vectors # %-%: difference of two tangent vectors # %:%: element-wise product # %.%: product of a scalar and a tangent vector # %r%: Riemannian metric (inner product) # # args: # tvec: tangent vector quadruplet (list) # num: numeric vector/scalar (numeric) `%+%` <- function(tvec1, tvec2) mapply(`+`, tvec1, tvec2) `%-%` <- function(tvec1, tvec2) mapply(`-`, tvec1, tvec2) `%:%` <- function(tvec1, tvec2) mapply(`*`, tvec1, tvec2) `%.%` <- function(tvec, num) lapply(tvec, `*`, e2 = num) `%r%` <- function(tvec1, tvec2) sum( sapply(tvec1 %:% tvec2, sum) ) # conj.dir: conjugate direction # # args: # iter: current iterate (list) # cos.min: minimum cosine angle (numeric, dim = 1) # # returns: # iter: updated iterate (list) conj.dir <- function(iter, cos.min) { if ( !is.null(iter$par0) ) { # conjugate direction gr_ <- vec.trans(iter$par0, iter$par, iter$gr0) dir_ <- vec.trans(iter$par0, iter$par, iter$dir) delta <- iter$gr %-% gr_ beta <- max(0, (delta %r% iter$gr) * iter$ng0^-2) iter$dir <- iter$gr %+% (dir_ %.% beta) # check cosine similarity iter$inprod <- iter$dir %r% iter$gr cs <- iter$inprod * ( (iter$dir %r% iter$dir) * iter$ng * iter$ng )^-0.5 if (cs >= cos.min) return(iter) } iter$dir <- iter$gr iter$inprod <- iter$ng * iter$ng iter } # lnsrch: backtracking line search # # args: # iter: current iterate (list) # Dat: response data matrix (matrix, dim = d1 x d2) # M: bound for infinity norm (numeric, dim = 1) # mu: smoothing constant (numeric, dim = 1) # lmbd: penalizing constant (numeric, dim = 1) # ss.min: minimum initial step size (numeric, dim = 1) # ss.fac: amplifying factor for initial step size (numeric, dim = 1) # max.bk: maximum number of backtrack (numeric, dim = 1) # const.bk: Armijo constants for backtrack (numeric, dim = 2) # # returns: # iter: updated iterate (list) lnsrch <- function(iter, Dat, M, mu, lmbd, ss.min, ss.fac, max.bk, const.bk) { # initial step size iter$ss <- max(iter$ss * ss.fac, ss.min) # backtrack obj0 <- iter$obj for ( b in seq_len(max.bk) ) { iter$par <- retract(iter$par0, iter$dir, iter$ss) iter$Par <- eval.par(iter$par) iter$obj <- objf(iter$Par, Dat, M, mu, lmbd) dobj <- iter$obj - obj0 if (dobj > const.bk[2L] * iter$ss * iter$inprod) break # otherwise, shrink iter$ss <- iter$ss * const.bk[1L] } if (b >= max.bk) warning("Maximum number of backtracking reached.") iter } # efa.sub: solve sub-problem of exploratory item factor analysis (with fixed # penalty) # # args: # iter: current iterate (list) # $par: current parameter (list) # $Par: current parameter (numeric, dim = d1 x d2) # $par0: previous parameter (list) # $gr: current Riemannian gradient (list) # $gr0: previous Riemannian gradient (list) # $dir: conjugate direction (list) # $obj: objective function (numeric, dim = 1) # $ng: norm of gradient (numeric, dim = 1) # $ng0: previous norm of gradient (numeric, dim = 1) # $inprod: inner product between dir and gr (numeric, dim = 1) # $ss: step size (numeric, dim = 1) # $niter: number of iterations (numeric, dim = 1) # Dat: response data matrix (integer, dim = d1 x d2) # M: bound for infinity norm (numeric, dim = 1) # mu: smoothing constant (numeric, dim = 1) # lmbd: penalizing constant (numeric, dim = 1) # eps: tolerance for Riemannian gradient (numeric, dim = 1) # ss.min: minimum initial step size (numeric, dim = 1) # ss.fac: amplifying factor for initial step size (numeric, dim = 1) # max.bk: maximum # of backtrack cycles (integer, dim = 1) # const.bk: constants for Armijo backtrack (numeric, dim = 2) # cos.min: tolerance for angle (numeric, dim = 1) # # returns: # iter: updated iterate (list) efa.sub <- function(iter, Dat, M, mu, lmbd, eps = 0.1, ss.min = 1e-4, ss.fac = 4, max.bk = 50L, const.bk = c(0.5, 0.0001), cos.min = 0.1) { # main loop iter$ss <- 1 r <- 1L repeat { # update components #iter$Par <- eval.par(iter$par) iter$gr <- grad.re(iter$par, dobjf(iter$Par, Dat, M, mu, lmbd) ) iter$obj <- objf(iter$Par, Dat, M, mu, lmbd) iter$ng <- sqrt( (iter$gr %r% iter$gr) ) # print info cat("inner ", r, ": obj = ", format(iter$obj, nsmall = 3, digits = 3), ", grad = ", format(iter$ng, nsmall = 3, digits = 3), ", ss = ", format(iter$ss, nsmall = 3, digits = 3), '\r', sep = '') # check convergence if (iter$ng < eps) break # conjugate direction iter <- conj.dir(iter, cos.min) # line search and update parameters iter$par0 <- iter$par iter$gr0 <- iter$gr iter$ng0 <- iter$ng iter <- lnsrch(iter, Dat, M, mu, lmbd, ss.min, ss.fac, max.bk, const.bk) r <- r + 1L } cat('\n') iter$par0 <- iter$gr0 <- iter$ng0 <- iter$inprod <- iter$dir <- NULL iter$niter <- iter$niter + r # record number of iterations iter } # efa: fit exploratory item factor model # # args: # par: parameter quadruplet (list) # Dat: response data matrix (integer, dim = d1 x d2) # M: bound for infinity norm (numeric, dim = 1) # mu: initial smoothing constant (numeric, dim = 1) # mu.min: minimum smoothing constant (numeric, dim = 1) # mu.fac: multipler for smoothing constant (numeric, dim = 1) # eps: initial subproblem tolerance (numeric, dim = 1) # eps.min: minimum subproblem tolerance (numeric, dim = 1) # eps.fac: multipler for subproblem tolerance (numeric, dim = 1) # lmbd: initial penalizing constant (numeric, dim = 1) # lmbd.fac: multipler for penalizing constant (numeric, dim = 1) # tol.par: tolerance for parameter change (numeric, dim = 1) # tol.feas: tolerance for feasibility (numeric, dim = 1) # ss.min: minimum initial step size (numeric, dim = 1) # ss.fac: amplifying factor for initial step size (numeric, dim = 1) # max.bk: maximum # of backtrack cycles (integer, dim = 1) # const.bk: constants for Armijo backtrack (numeric, dim = 2) # cos.min: minimum cosine similarity (numeric, dim = 1) # # returns: # iter: updated iterate (list) efa <- function(par, Dat, M, mu = 0.1, mu.min = 1e-4, mu.fac = (mu.min / mu)^0.1, eps = 0.1, eps.min = 1e-4, eps.fac = (eps.min / mu)^0.1, lmbd = 1, lmbd.fac = 2.5, tol.par = 1e-4, tol.feas = 1e-4, ss.min = 1e-4, ss.fac = 2.5, max.bk = 50L, const.bk = c(0.5, 0.0001), cos.min = 0.1) { # initialization iter <- list(par = par) iter$Par <- eval.par(iter$par) iter$ll <- loglik(iter$Par, Dat) iter$niter <- 0L iter$ss <- 1 dpar <- Inf feas <- norm(iter$Par, type = 'M') - M # outer loop r <- 1L repeat { # print info cat("outer ", r, ": eps = ", format(eps, nsmall = 3, digits = 3), ", mu = ", format(mu, nsmall = 3, digits = 3), ", lmbd = ", format(lmbd, nsmall = 0, digits = 0), ", ll = ", format(iter$ll, nsmall = 3, digits = 3), ", dpar = ", format(dpar, nsmall = 3, digits = 3), ", feas = ", format(feas, nsmall = 3, digits = 3), '\n', sep = '') # solve subproblem Par0 <- iter$Par iter <- efa.sub(iter, Dat, M, mu, lmbd, eps, ss.min, ss.fac, max.bk, const.bk, cos.min) # update quantities iter$ll <- loglik(iter$Par, Dat) dpar <- norm(iter$Par - Par0, type = 'M') feas <- norm(iter$Par, type = 'M') - M # check convergence if (dpar < tol.par && eps - eps.min < .Machine$double.eps && mu - mu.min < .Machine$double.eps) break # update tuning parameters eps <- max(eps.min, eps.fac * eps) mu <- max(mu.min, mu.fac * mu) lmbd <- ifelse(feas < tol.feas, lmbd, lmbd * lmbd.fac) r <- r + 1L } iter } # example call # dimensions set.seed(12345) np <- 5000L # number of persons ni <- 500L # number of items rk <- 5L # true dimension of LV # data generation U1 <- cbind( 1, matrix(rnorm(np * rk), np, rk) ) # person parameters if (rk > 1) { Q <- as.matrix( expand.grid( rep( list(0:1), rk) ) )[ sample(2:2^rk - 1, ni, replace = T), ] } else if (rk == 1) Q <- matrix(1, ni, rk) V1 <- cbind( runif(ni, -2, 2), Q * matrix(runif(ni * rk, -2, 2), ni, rk) ) # item parameters dat <- data.gen(tcrossprod(U1, V1), ni * np) # simulate data # random starting values start <- list( U = qr.Q( qr( matrix(rnorm(np * rk), np, rk) ) ), w = rnorm(ni), V = qr.Q( qr( matrix(rnorm(ni * rk), ni, rk) ) ), R = qr.R( qr( matrix(rnorm(ni * rk), ni, rk) ) ) ) # fitting bnd <- 50 result <- efa(par = start, Dat = dat, M = bnd)