# Geometric and Tweedie regression models ---------------------------- # Date: 14/10/2019 --------------------------------------------------- rm(list=ls()) ## Loading extra packages #require(devtools) #install_github("wbonat/mcglm", force = TRUE) # install version from github to use variance = "geom_tweedie" require(mcglm) require(splines) require(bbmle) require(tweedie) require(Matrix) require(statmod) # Data set 1: Smoothing time series of rainfall in Curitiba ---------- data1 <- read.table("wire_dataset1.txt", sep = ",", header = TRUE) head(data1) str(data1) data1$IDX <- 1:2191 levels(data1$SEASON) <- c("AUTUMN","SPRING","SPRING","SUMMER","WINTER") data1$SEASON <- factor(data1$SEASON, c("SUMMER","AUTUMN","WINTER","SPRING")) data1$MONTH <- factor(data1$MONTH, c("JAN","FEB","MAR","APR","MAY","JUN", "JUL","AUG","SET","OCT","NOV","DEC")) ## Loading extra functions source("gtw.r") ## Smooth component K = 50 D <- diff(diff(diag(K))) Xs <- bs(data1$IDX, df = K) ## QR decomposition of design matrix qrx <- qr(Xs) RD <- forwardsolve(t(qr.R(qrx)), t(D)) es <- eigen(RD%*%t(RD), symmetric = TRUE) Xs <- qr.qy(qrx, rbind(es$vector, matrix(0, nrow(Xs) - nrow(RD), nrow(RD)))) # Linear predictor form1 <- PREC ~ Xs-1 # GTRM - QMLE ex1_gtrm_qmle <- mcglm(c(form1), list(mc_id(data1)), link = "log", variance = 'geom_tweedie', covariance = "expm", power_fixed = FALSE, data = data1) # GTRM - MLE pts <- gauss.quad(50, kind = "laguerre") ini <- coef(ex1_gtrm_qmle)$Estimates[1:50] ex1_gtrm_mle <- glm_geom_tweedie(form1, points = pts, integration = "GL", initial = c(ini, 1.41,log(10.16)), data = data1) ## Tweedie regression models source("tweedieregfunctions.r") ex1_trm_qmle <- mcglm(c(form1), list(mc_id(data1)), link = "log", variance = 'tweedie', power_fixed = FALSE, covariance = "expm", data = data1) ex1_trm_mle <- glm.tw_mle(form1, initial = c(log(20),1.5), data = data1, method = "profile", method.optim = "Nelder-Mead") ## Table --------------------------------------------------------------------------------- round(coef(ex1_gtrm_qmle, type = c("power",'tau'), std.error = TRUE)[,1:2],2) round(coef(ex1_trm_qmle, type = c("power",'tau'), std.error = TRUE)[,1:2],2) round(cbind(ex1_trm_mle$Estimates[52:51], sqrt(diag(ex1_trm_mle$VCOV))[52:51]),2) round(cbind(ex1_gtrm_mle$Summary$Estimates,ex1_gtrm_mle$Summary$Std_error),2) cbind(plogLik(ex1_gtrm_qmle, verbose = F)[[1]], plogLik(ex1_trm_qmle, verbose =F)[[1]], ex1_gtrm_mle$logLik ,ex1_trm_mle$logLik) ## Penalized fit Z <- ex1_trm_mle$Estimates[1:50]/sqrt(diag(ex1_trm_mle$VCOV))[1:50] COLL <- which(abs(Z) > 1.96) form <- PREC ~ Xs[,COLL]-1 ex1_trm_mle_pen <- glm.tw_mle(form, initial = c(log(20),1.5), data = data1, method = "profile", method.optim = "Nelder-Mead") ex1_trm_mle_pen pts <- gauss.quad(50, kind = "laguerre") ini <- ex1_trm_mle_pen$Estimates[1:15] ex1_gtrm_mle_pen <- glm_geom_tweedie(form, points = pts, integration = "GL", initial = c(ini, 1.41,log(10.16)), data = data1) ini <- c(ini, ex1_gtrm_mle_pen$Summary$Estimates) ini <- c(ex1_gtrm_mle_pen_full$Summary$Estimates, ex1_gtrm_mle_pen$Summary$Estimates) ex1_gtrm_mle_pen_full <- glm_geom_tweedie(form, points = pts, integration = "GL", method = "Nelder-Mead", initial = ini, data = data1, fit = "beta") ## Fitted values TweedieMLE TRM_MLE <- ex1_trm_mle_pen$Estimates[1:15] TRM_MLE_VCOV <- ex1_trm_mle_pen$VCOV[1:15,1:15] X <- model.matrix(form, data = data1) eta.mle <- X%*%TRM_MLE erro.mle = sqrt(diag(X%*%TRM_MLE_VCOV%*%t(X))) mle.pt <- exp(eta.mle) mle.min <- exp(eta.mle - qnorm(0.975)*erro.mle) mle.max <- exp(eta.mle + qnorm(0.975)*erro.mle) ## Fitted values Geometric Tweedie MLE GTRM_MLE <- ex1_gtrm_mle_pen_full$Summary$Estimates GTRM_MLE_VCOV <- ex1_gtrm_mle_pen_full$VCOV X <- model.matrix(form, data = data1) eta.geom_mle <- X%*%GTRM_MLE erro.geom_mle = sqrt(diag(X%*%GTRM_MLE_VCOV%*%t(X))) mle.geom_pt <- exp(eta.geom_mle) mle.geom_min <- exp(eta.geom_mle - qnorm(0.975)*erro.geom_mle) mle.geom_max <- exp(eta.geom_mle + qnorm(0.975)*erro.geom_mle) # Figure --------------------------------------------------------------------------------- par(mar=c(2.6, 2.5, 1, 0.1), mgp = c(1.6, 0.6, 0)) plot(data1$PREC ~ data1$IDX, type = "l", ylab = "Rainfall", xlab = "Days", main = "", col = "gray", ylim = c(0,60)) abline(v = c(365,365*2, 365*3,365*4,365*5), pch = 2, col = "black") lines(data1$IDX, mle.pt, col = "black", lwd = 2, lty = 1) lines(data1$IDX, mle.min, col = "black", lwd = 1, lty = 2) lines(data1$IDX, mle.max, col = "black", lwd = 1, lty = 2) lines(data1$IDX, mle.geom_pt, col = "red", lwd = 2, lty = 1) lines(data1$IDX, mle.geom_min, col = "red", lwd = 1, lty = 2) lines(data1$IDX, mle.geom_max, col = "red", lwd = 1, lty = 2) legend("topright", legend = c("TRM","GTRM"), lty = 1, col = c("black","red")) require(ggplot2) data_graph <- data.frame("Point" = c(mle.pt, mle.min, mle.max), "x" = rep(data1$IDX, 3), "Estimates" = c(rep("Mean", length(mle.pt)), rep("2.5%", length(mle.min)), rep("97.5%", length(mle.max)))) data_graph_g <- data.frame("Point" = c(mle.geom_pt, mle.geom_min, mle.geom_max), "x" = rep(data1$IDX, 3), "Estimates" = c(rep("Mean", length(mle.pt)), rep("2.5%", length(mle.min)), rep("97.5%", length(mle.max)))) ggplot(data = data1) + geom_line(mapping = aes(x = IDX, y = PREC), col = "gray70") + ylim(0,60) + ylab("Rainfall") + xlab("Days") + geom_vline(xintercept = c(365,365*2, 365*3,365*4,365*5), pch = 2, col = "black")+ geom_line(mapping = aes(x = x, y = Point, lty = Estimates), data = data_graph) + geom_line(mapping = aes(x = x, y = Point, lty = Estimates), col = "red", data = data_graph_g)