# Pseudo-likelihood approach - Robust Poisson-Tweedie regression models- # Author: Wagner Hugo Bonat LEG/IMADA ---------------------------------- # Date: 18/05/2016 ----------------------------------------------------- # Simulating from Poisson-Tweedie models ------------------------------- rptweedie <- function(n, mu, phi, power) { if(power != 3) { x <- rtweedie(n = n, mu = mu, phi = phi, power = power) } if(power == 3) { x <- rinvgauss(n = n, mean = mu, dispersion = phi) } y <- rpois(n = n, lambda = x) return(y) } # Simulating from Poisson-Tweedie regression models -------------------- rptweedie_reg <- function(beta, phi, power, X, offset = NULL) { eta <- X%*%beta if(!is.null(offset)) {eta = eta + offset} mu <- as.numeric(exp(eta)) if(power != 3) { y <- rptweedie(n = length(mu), mu = mu, phi = phi, power = power)} if(power == 3) { x <- rinvgauss(n = length(mu), mean = mu, dispersion = phi) y <- rpois(length(mu), lambda = x) } return(y) } # Pseudo-likelihood function-------------------------------------------- pll.ptw <- function(par, y, X, offset = NULL) { n_beta <- dim(X)[2] beta <- par[1:n_beta] phi <- par[c(n_beta+2)] power <- par[c(n_beta+1)] eta <- X%*%beta if(!is.null(offset)) {eta <- eta + offset} mu <- exp(eta) sd <- sqrt(mu + phi*(mu^power)) output <- sum(dnorm(x = y, mean = mu, sd = sd, log = TRUE)) return(output) } # Pseudo-score and pseudo-sensitivy functions -------------------------- pscore <- function(par, y, X, offset = NULL, sensitivity = FALSE) { output <- list() n_beta <- dim(X)[2] beta <- par[1:n_beta] phi <- par[c(n_beta+1)] power <- par[c(n_beta+2)] eta <- as.numeric(X%*%beta) if(!is.null(offset)) {eta <- eta + offset} mu <- as.numeric(exp(eta)) # Quantities used more than once mu.p <- mu^power phi.mu.p <- phi*mu.p res <- y-mu res2 <- res^2 mu.p1 <- mu^(power-1) den <- mu + phi.mu.p term1 <- mu.p1*phi*power+1 dpll.mu1 <- -0.5*(term1/den) dpll.mu2 <- res/den dpll.mu3 <- 0.5*( (res2*term1)/(den^2) ) dpll.mu <- dpll.mu1*mu + dpll.mu2*mu + dpll.mu3*mu #Pseudo-score beta pesc.beta <- colSums(dpll.mu*X) #Pseudo-score phi pesc.phi <- sum(-0.5* (mu.p/den) + 0.5* (res2*mu.p/(den^2))) #Pseudo-score power pesc.power <- sum(-0.5*(mu.p*phi*eta/den) + 0.5*(res2*eta*mu.p*phi/(den^2))) #Pseudo-score function output$pesc <- c(pesc.beta, pesc.phi, pesc.power) if(sensitivity == TRUE) { # Sensitivity for beta Sbb1 <- 1/den Sbb2 <- 0.5*((term1^2)/(den^2)) Sbb <- Sbb1 + Sbb2 mu2 <- mu^2 Sbeta <- matrix(NA, ncol = n_beta, nrow = n_beta) for(i in 1:n_beta) { for(j in i:n_beta) { temp <- X[,i]*X[,j] Sbeta[i,j] <- Sbeta[j,i] <- -sum(Sbb*mu2*temp) } } # Sensitivity phi mu.2p <- mu^(2*power) Sphi <- -0.5*sum(mu.2p/den^2) # Sensitivity power Spower <- -0.5*sum( (mu.2p*(log(mu)^2)*(phi^2) )/(den^2) ) # Cross-sensitivty phi and power Sphi.power <- -0.5*sum( (mu.2p*eta*phi)/(den^2) ) # Cross-sensivity beta and phi Sbeta.phi <- colSums(-0.5*(mu.p*term1/(den^2))*mu*X) Sbeta.power <- colSums(-(0.5*(mu.p*eta*phi*term1)/(den^2))*mu*X) b2 <- cbind(Sbeta.phi, Sbeta.power) b3 <- t(b2) b4 <- matrix(c(Sphi, Sphi.power, Sphi.power, Spower),2,2) tt1 <- cbind(Sbeta, b2) tt2 <- cbind(b3, b4) SS <- rbind(tt1,tt2) output$sensitivity <- SS } if(sensitivity == FALSE){output <- as.numeric(output$pesc)} return(output) } # Pseudo-variability function------------------------------------------- pvariability <- function(par, y, X, offset = NULL) { n_par <- length(par) n_beta <- dim(X)[2] temp <- matrix(0, ncol = n_par, nrow = n_par) for(i in 1:length(y)) { temp1 <- pscore(par = par, y = y[i], X = matrix(X[i,],1,n_beta), offset = offset[i], sensitivity = FALSE) temp <- temp + temp1%*%t(temp1) } return(temp) } # Newton scoring ------------------------------------------------------- newton_scoring <- function(ini, y, X, offset = NULL, tol=0.0001, max_iter = 20, verbose = FALSE, alpha = 1) { sol <- matrix(NA, max_iter,length(ini)) sol[1,] <- ini n_beta <- dim(X)[2] for(i in 2:max_iter){ TEMP <- pscore(par = ini, y = y, X = X, offset = offset, sensitivity = TRUE) P.SCORE <- TEMP$pesc P.SENSITIVITY <- TEMP$sensitivity sol[i,] <- as.numeric(ini - alpha*solve(P.SENSITIVITY, P.SCORE)) ini <- sol[i,] if(verbose == TRUE){print(round(ini, 4))} tolera <- abs(c(sol[i,]) - c(sol[i-1,])) if(all(tolera < tol) == TRUE)break } output <- list("Iteration" = sol, "Estimates" = ini, "Sensitivity" = P.SENSITIVITY) return(output) } # GLM style interface for Pseudo likelihood estimation ----------------- glm.ptw <- function(formula, initial, data, offset = NULL, approach = "PMLE", method = "Newton", method.optim = "BFGS", tol = 1e-04, max_iter = 20, verbose = FALSE, alpha = 1) { ini_beta <- coef(glm(formula, data = data, family = quasi(link = "log", variance = "mu"))) mf <- model.frame(formula, data = data) Y <- model.response(mf) X <- model.matrix(formula, data = data) if(approach == "PMLE") { initial <- c(ini_beta, initial) if(method == "Newton") { output <- newton_scoring(ini = initial, y = Y, X = X, offset = offset, tol = tol, max_iter = max_iter, verbose = verbose, alpha = alpha) Estimates <- output$Estimates Sensitivity <- output$Sensitivity } if(method == "optim") { output <- optim(par = initial, fn = pll.ptw, gr = pscore, y = Y, X = X, offset = offset, method = method.optim, hessian = TRUE, control = list(fnscale = -1)) Estimates <- output$par Sensitivity <- output$hessian } VARIABILITY <- pvariability(par = Estimates, y = Y, X = X, offset = offset) inv_S <- solve(-Sensitivity) VCOV <- inv_S%*%VARIABILITY%*%inv_S output <- list("Estimates" = Estimates, "VCOV" = VCOV) } if(approach == "QMLE") { output <- glm.qtw(formula = formula, initial = initial, data = data, tol = tol, max_iter = max_iter, alpha = alpha, verbose = verbose) } return(output) } # Quasi-likelihood estimation ------------------------------------------ quasi_score_beta <- function(beta, lambda, y, X, offset = NULL, sensitivity = FALSE) { output <- list() n_beta = dim(X)[2] phi <- lambda[1] power <- lambda[2] eta <- as.numeric(X%*%beta) if(!is.null(offset)) {eta <- eta + offset} mu <- as.numeric(exp(eta)) mu.p <- mu^power den <- mu + phi*mu.p res <- (y - mu) res2 <- (y - mu)^2 inv_C <- 1/den output$esc_beta <- colSums(mu*X*inv_C*res) if(sensitivity == TRUE) { II <- matrix(NA, ncol = n_beta, nrow = n_beta) for(i in 1:n_beta) { for(j in 1:n_beta) { II[i,j] <- -sum(mu*X[,i]*inv_C*mu*X[,j]) } } output$S_beta = II } return(output) } # Pearson estimating function for lambda ------------------------------- pearson_lambda <- function(lambda, beta, y, X, offset = NULL, sensivity = FALSE) { output <- list() phi <- lambda[1] power <- lambda[2] eta <- as.numeric(X%*%beta) if(!is.null(offset)) {eta <- eta + offset} mu <- as.numeric(exp(eta)) mu.p <- mu^power den <- mu + phi*mu.p res <- (y - mu) res2 <- (y - mu)^2 inv_C <- 1/den W_phi <- mu.p/(den^2) W_power <- mu.p*eta*phi/(den^2) esc_phi <- sum(W_phi*(res2 - den)) esc_power <- sum(W_power*(res2 - den)) output$esc_lambda <- c(esc_phi, esc_power) if(sensivity == TRUE) { S_phi <- - sum( (mu.p/den)^2 ) S_phi_power <- - sum( ((mu^(2*power))*eta*phi)/den^2) S_power <- -sum(((mu.p*eta*phi)^2)/(den^2)) SS <- matrix(c(S_phi, S_phi_power, S_phi_power, S_power), 2, 2) output$S_lambda <- SS } return(output) } # Chaser algorithm ----------------------------------------------------- chaser_quasi <- function(reg_ini, disp_ini, Y, X, offset = NULL, tol=0.0001, max_iter = 20, alpha = 1, verbose = FALSE) { sol_beta <- matrix(NA, max_iter,length(reg_ini)) sol_disp <- matrix(NA, max_iter, length(disp_ini)) sol_beta[1,] <- reg_ini sol_disp[1,] <- disp_ini for(i in 2:max_iter){ reg_temp <- quasi_score_beta(beta = reg_ini, lambda = disp_ini, y = Y, X = X, offset = offset, sensitivity = TRUE) sol_beta[i,] <- as.numeric(reg_ini - solve(reg_temp$S_beta, reg_temp$esc_beta)) reg_ini <- sol_beta[i,] disp_temp <- pearson_lambda(lambda = disp_ini, beta = reg_ini, y = Y, X = X, offset = offset, sensivity = TRUE) sol_disp[i,] <- as.numeric(disp_ini - alpha*solve(disp_temp$S_lambda, disp_temp$esc_lambda)) disp_ini <- sol_disp[i,] if(verbose == TRUE) print(round(disp_ini,4)) tolera <- abs(c(sol_beta[i,],sol_disp[i,]) - c(sol_beta[i-1,],sol_disp[i-1,])) if(all(tolera < tol) == TRUE)break } saida <- list("IterationRegression" = sol_beta, "IterarionCovariance" = sol_disp, "Regression" = reg_ini, "Covariance" = disp_ini, "S_beta" = reg_temp, "S_lambda" = disp_temp) return(saida) } # GLM style interface for Quasi likelihood estimation ------------------ glm.qtw <- function(formula, initial, data, tol = 1e-04, max_iter = 100, alpha = 1, verbose = FALSE) { ini_beta <- coef(glm(formula, data = data, family = quasi(link = "log", variance = "mu"))) initial <- c(ini_beta, initial) mf <- model.frame(formula, data = data) Y <- model.response(mf) X <- model.matrix(formula, data = data) n_beta <- dim(X)[2] reg_ini <- initial[1:n_beta] disp_ini <- initial[c(n_beta+1, n_beta+2)] output <- chaser_quasi(reg_ini = reg_ini, disp_ini = disp_ini, Y = Y, X = X, tol = tol, verbose = verbose, max_iter = max_iter, alpha = alpha) Estimates <- c(output$Regression, output$Covariance) eta <- as.numeric(X%*%output$Regression) mu <- exp(eta) phi <- output$Covariance[1] power <- output$Covariance[2] mu.p <- mu^power den <- mu + phi*mu.p S_beta_phi <- colSums(((mu.p*phi*X + mu*X)*mu.p)/(den^2)) S_beta_power <- colSums(((mu.p*phi*power*X + mu*X)*(mu.p*eta*phi))/(den^2)) S_cross <- rbind(S_beta_phi,S_beta_power) dd <- dim(S_cross) S_0 <- matrix(0, ncol = dd[1], nrow = dd[2] ) n_par <- length(initial) S1 <- cbind(output$S_beta$S_beta, S_0) S2 <- cbind(S_cross, output$S_lambda$S_lambda) Sensitivity = rbind(S1, S2) temp <- matrix(0, ncol = 2, nrow = 2) for(i in 1:length(Y)) { temp2 <- pearson_lambda(lambda = output$Covariance, beta = output$Regression, y = Y[i], X = matrix(X[i,], 1, n_beta)) escore <- c(temp2$esc_lambda) temp <- temp + escore%*%t(escore) } std.beta <- sqrt(diag(-solve(output$S_beta$S_beta))) std.lambda <- sqrt(diag(solve(-output$S_lambda$S_lambda)%*%temp%*%solve(-output$S_lambda$S_lambda))) output <- list("Estimates" = Estimates, "std.error" = c(std.beta,std.lambda)) return(output) } # MLE------------------------------------------------------------------- # Integrand ------------------------------------------------------------ integrand <- function(x, y, mu, phi, power) { int = dpois(y, lambda = x)*dtweedie(x, mu = mu, phi = phi, power = power) return(int) } # Numerical integration using Gauss-Laguerre method -------------------- gauss_laguerre <- function(integrand, n_pts, y, mu, phi, power) { pts <- gauss.quad(n_pts, kind="laguerre") integral <- sum(pts$weights*integrand(pts$nodes, y = y, mu = mu, phi = phi, power = power)/ exp(-pts$nodes)) return(integral) } gauss_laguerre_vec <- Vectorize(FUN = gauss_laguerre, vectorize.args = "y") # Numerical integration using Monte Carlo method ----------------------- monte_carlo <- function(integrand, n_pts, y, mu, phi, power) { pts <- rtweedie(n_pts, mu = mu, phi = phi, power = power) norma <- dtweedie(pts, mu = mu, phi = phi, power = power) integral <- mean(integrand(pts, y = y, mu = mu, phi = phi, power = power)/norma) return(integral) } # Probability mass function Poisson-Tweedie ---------------------------- dptweedie_aux <- function(y, mu, phi, power, n_pts, method) { if(method == "laguerre" | y > 0) { pmf <- gauss_laguerre(integrand = integrand, n_pts = n_pts, y = y, mu = mu, phi = phi, power = power) } if(method == "laguerre" & y == 0) { v.y <- round(10*sqrt(mu + phi*mu^power),0) if(v.y > 1000) {v.y <- 1000} #print(v.y) y.grid <- 0:v.y temp <- gauss_laguerre_vec(integrand = integrand, n_pts = n_pts, y = y.grid, mu = mu, phi = phi, power = power) pmf <- 1-sum(temp)+temp[1] } if(method == "MC") { pmf <- monte_carlo(integrand = integrand, n_pts = n_pts, y = y, mu = mu, phi = phi, power = power) } return(pmf) } # Vectorize version dptweedie <- Vectorize(FUN = dptweedie_aux, vectorize.args = c("y","mu")) # Plot plot_ptweedie <- function(mu, phi, power, n_sample, grid_y, n_pts, method, main) { obs <- rptweedie(n = n_sample, mu = mu, phi = phi, power = power) dens <- dptweedie(y = grid_y, mu = mu, phi = phi, power = power, method = method, n_pts = n_pts) plot(table(obs)/n_sample, ylab = "Mass function", xlab = "y", col = "gray", main = main) nn <- length(grid_y)-1 lines(c(0:nn), dens, lty = 2, lwd = 2, col = "black", type = "l") } # Log-likelihood function ---------------------------------------------- ll.ptw <- function(param, X, Y, method.integral, n_pts, offset) { n_beta <- dim(X)[2] beta <- param[1:n_beta] phi <- param[c(n_beta+1)] power <- param[c(n_beta+2)] if(power < 1){power <- 1.01} mu <- exp(X%*%beta + offset) ll <- log(dptweedie(y = Y, mu = mu, phi = phi, power = power, n_pts = n_pts, method = method.integral)) ll[which(ll == -Inf)] <- NA ll <- sum(ll, na.rm=TRUE) print(ll) print(round(param,2)) return(ll) } # Chaser for beta ------------------------------------------------------ chaser_beta <- function(reg_ini, disp_ini, Y, X, offset = NULL, tol=0.0001, max_iter = 20, alpha = 1, verbose = FALSE) { sol_beta <- matrix(NA, max_iter,length(reg_ini)) sol_beta[1,] <- reg_ini for(i in 2:max_iter){ reg_temp <- quasi_score_beta(beta = reg_ini, lambda = disp_ini, y = Y, X = X, offset = offset, sensitivity = TRUE) sol_beta[i,] <- as.numeric(reg_ini - solve(reg_temp$S_beta, reg_temp$esc_beta)) reg_ini <- sol_beta[i,] tolera <- abs(c(sol_beta[i,]) - c(sol_beta[i-1,])) if(all(tolera < tol) == TRUE)break } saida <- list("IterationRegression" = sol_beta, "Regression" = reg_ini, "S_beta" = reg_temp$S_beta) return(saida) } # Profile log-likelihood function -------------------------------------- profile.ll.ptw <- function(lambda, beta.ini, X, Y, offset = NULL, method.integral, n_pts) { n_beta <- dim(X)[2] beta <- beta.ini phi <- lambda[1] power <- lambda[2] if(power < 1){power <- 1.01} if(phi < 0){phi <- 1e-04} mu <- exp(X%*%beta) beta_temp <- chaser_beta(reg_ini = beta.ini, disp_ini = c(phi,power), Y = Y, X = X, offset = offset) mu <- exp(X%*%beta_temp$Regression) ll <- log(dptweedie(y = Y, mu = mu, phi = phi, power = power, n_pts = n_pts, method = method.integral)) ll[which(ll == -Inf)] <- NA ll <- sum(ll, na.rm=TRUE) print(round(c(phi,power),4)) print(ll) return(-ll) } # GLM style fitting function for Poisson-Tweedie regression models------ glm.mpt <- function(formula, data, offset = NULL, method.integral, method.optim, n_pts, initial) { mf <- model.frame(formula, data) Y <- model.response(mf) X <- model.matrix(formula, data) n_beta <- dim(X)[2] beta.ini <- initial[1:n_beta] ini <- c(initial[c(n_beta+1)],initial[c(n_beta+2)]) tt <- optim(par = ini, fn = profile.ll.ptw, method = method.optim, beta.ini = beta.ini, X = X, Y = Y, method.integral = method.integral, n_pts = n_pts, control = list(fnscale = 1), hessian = TRUE) beta_temp <- chaser_beta(reg_ini = beta.ini, disp_ini = tt$par, Y = Y, X = X, offset = offset) Regression <- beta_temp$Regression VCOV.beta <- solve(-beta_temp$S_beta) Dispersion <- tt$par VCOV.disp <- solve(tt$hessian) output <- list("Regression" = Regression, "VCOV.beta" = VCOV.beta, "Dispersion" = Dispersion, "VCOV.disp" = VCOV.disp, "logLik" = tt$value) return(output) } ## Summary simulation--------------------------------------------------- # Coverage rate coverage_rate <- function(pt, std, true_value){ ic.min <- pt - qnorm(0.975)*std ic.max <- pt + qnorm(0.975)*std fora <- sum(ic.min > true_value) + sum(ic.max < true_value) coverage <- 1 - fora/length(pt) } # Summary of simulation study summary_simulation <- function(results, true_values) { NN <- dim(results)[1] results <- na.exclude(results) FAIL <- NN - dim(results)[1] pontual <- results[,1:length(true_values)] std_error <- results[,c(length(true_values)+1):c(dim(results)[2])] pt_average <- colMeans(pontual, na.rm=TRUE) bias <- pt_average - true_values std_error_average <- colMeans(std_error, na.rm=TRUE) empirical_std_error <- apply(pontual, 2, sd) rate_emp_sample <- std_error_average/empirical_std_error coverage <- c() for(i in 1:length(true_values)){ coverage[i] <- coverage_rate(pontual[,i], std_error[,i], true_values[i]) } output <- data.frame("Expectation" = pt_average, "Bias" = bias, "Expected Std Error" = std_error_average, "Ic Min" = bias - std_error_average, "Ic Max" = bias + std_error_average, "Empirical Std Error" = empirical_std_error, "Coverage" = coverage, "FAIL" = FAIL) return(output) } ## Dispersion index disp_index <- function(mu, phi, power) { vv <- (mu + phi*mu^power)/mu return(mean(vv)) } disp_index <- Vectorize(FUN = disp_index, vectorize.args = c("mu")) # Zero inflation index zero_inflation <- function(mu, phi, power, n_pts = 50, method = "laguerre") { PX0 <- dptweedie(y = 0, mu = mu, phi = phi, power = power, n_pts = n_pts, method = method) ZI <- 1 + log(PX0)/mu return(ZI) } zero_inflation <- Vectorize(FUN = zero_inflation, vectorize.args = c("mu")) # Long-tail behavior long_tail <- function(x, mu, phi, power, n_pts = 50, method = "laguerre") { Px1 <- dptweedie(y = c(x, x+1), mu = mu, phi = phi, power = power, method = method, n_pts = n_pts) LTI <- Px1[2]/Px1[1] return(LTI) } long_tail <- Vectorize(FUN = long_tail, vectorize.args = c("x")) # Extract pll and pAIC for Poisson regression models ------------------- gof.poisson <- function(obj) { mu <- fitted(obj) VAR.mu <- mu ll <- sum(dnorm(obj$y, mean = mu, sd = sqrt(mu),log = TRUE)) pAIC <- 2*length(coef(obj)) - 2*ll return(data.frame("pll" = ll, "Df" = length(coef(obj)), "pAIC" = pAIC)) } # Extract pll and pAIC for Pseudo log-likelihood Poisson-Tweeide models- gof.ptw <- function(object, form, data) { X <- model.matrix(form, data = data) mf <- model.frame(form, data = data) Y <- model.response(mf) n_beta <- dim(X)[2] mu <- exp(X%*%object$Estimates[1:n_beta]) VAR.mu <- mu + object$Estimates[c(n_beta+1)]*(mu^object$Estimates[c(n_beta+2)]) pLL <- sum(dnorm(Y, mean = mu, sd = sqrt(VAR.mu), log = TRUE)) PPAIC <- 2*length(object$Estimates) - 2*pLL Df <- length(object$Estimates) output <- data.frame("plogLik" = pLL, "Df" = Df, "pAIC" = PPAIC) return(output) } # Painel plot for lattice object used in Figure 2 ---------------------- ac <- list(pad1=0.5, pad2=0.5, tck=0.5) mycol <- gray.colors(n=5) ps <- list(box.rectangle=list(col=1, fill=c("gray70")), box.umbrella=list(col=1, lty=1), dot.symbol=list(col=1), dot.line=list(col=1, lty=3), plot.symbol=list(col="gray", cex=0.7, pch = 20), plot.line=list(col=1), plot.polygon=list(col="gray80"), superpose.line=list(col=mycol), superpose.symbol=list(col=mycol), superpose.polygon=list(col=mycol), strip.background=list(col=c("gray90","gray50")), layout.widths=list( left.padding=0.25, right.padding=-1, ylab.axis.padding=0), layout.heights=list( bottom.padding=0.25, top.padding=0, axis.xlab.padding=0, xlab.top=0), axis.components=list(bottom=ac, top=ac, left=ac, right=ac) ) # ---------------------------------------------------------------------- ll.ptw.table <- function(param, tab, X) { beta <- param[1:3] phi <- param[4] p <- param[5] if(phi < 1e-04)phi <- 1e-04 power <- p if(power < 1)power <- 1.01 pred <- exp(X%*%beta) pred.mat.ptw <- matrix(NA, ncol = 5, nrow = 7) for(i in 1:5) { temp <- dptweedie(c(0:5,7), mu = pred[i], phi = phi, power = power, n_pts = 180, method = "laguerre") pred.mat.ptw[,i] <- temp } ll <- sum(log(pred.mat.ptw)*tab) print(ll) return(-ll) } ll.ptw.table2 <- function(b0,b1,b2,phi,p, tab, X) { beta <- c(b0,b1,b2) phi <- phi p <- p if(phi < 1e-04)phi <- 1e-04 power <- p if(power < 1)power <- 1.01 pred <- exp(X%*%beta) pred.mat.ptw <- matrix(NA, ncol = 5, nrow = 7) for(i in 1:5) { temp <- dptweedie(c(0:5,7), mu = pred[i], phi = phi, power = power, n_pts = 190, method = "laguerre") pred.mat.ptw[,i] <- temp } ll <- sum(log(pred.mat.ptw)*tab) print(ll) return(-ll) } ## Summary simulation--------------------------------------------------- # Coverage rate coverage_rate <- function(pt, std, true_value){ ic.min <- pt - qnorm(0.975)*std ic.max <- pt + qnorm(0.975)*std fora <- sum(ic.min > true_value) + sum(ic.max < true_value) coverage <- 1 - fora/length(pt) } # Summary of simulation study summary_simulation <- function(results, true_values) { NN <- dim(results)[1] results <- na.exclude(results) FAIL <- NN - dim(results)[1] pontual <- results[,1:length(true_values)] std_error <- results[,c(length(true_values)+1):c(dim(results)[2])] pt_average <- colMeans(pontual, na.rm=TRUE) bias <- pt_average - true_values std_error_average <- colMeans(std_error, na.rm=TRUE) empirical_std_error <- apply(pontual, 2, sd) rate_emp_sample <- std_error_average/empirical_std_error coverage <- c() for(i in 1:length(true_values)){ coverage[i] <- coverage_rate(pontual[,i], std_error[,i], true_values[i]) } output <- data.frame("Expectation" = pt_average, "Bias" = bias, "Expected Std Error" = std_error_average, "Ic Min" = bias - std_error_average, "Ic Max" = bias + std_error_average, "Empirical Std Error" = empirical_std_error, "Coverage" = coverage, "FAIL" = FAIL) return(output) } # Extra function for the Figure panel.beeswarm <- function(x, y, subscripts, r, ...){ xx <- x; yy <- y aux <- by(cbind(yy, xx, subscripts), xx, function(i){ or <- order(i[,1]) ys <- i[or,1] yt <- table(ys) dv <- sapply(unlist(yt), function(j){ seq(1,j,l=j)-(j+1)/2 }) if(!is.list(dv)){ dv <- as.list(dv) } xs <- i[or,2]+r*do.call(c, dv) cbind(x=xs, y=ys, subscripts[or]) }) aux <- do.call(rbind, aux) panel.xyplot(aux[,1], aux[,2], subscripts=aux[,3], ...) }