The Gamma-count distribution in the analysis of experimental underdispersed data

Journal of applied statistics

Supplementary material

Walmes Marques Zeviani (I),
Paulo Justiniano Ribeiro Jr (I),
Wagner Hugo Bonat (I),
Silvia Shimakura (I),
Joel Augusto Muniz (II),

(I) LEG: Statistics and Geoinformation Lab), DEST: Department of Statistics, UFPR: Federal University of Paraná

(II) DEX: Department of Exact Sciences, UFLA: Federal University of Lavras,

Table of contents


Loading packages and functions

##-----------------------------------------------------------------------------
##
##             The Gamma-count distribution in the analysis of
##                     experimental underdispersed data
## 
##                          Walmes Marques Zeviani
##                       Paulo Justiniano Ribeiro Jr
##                            Wagner Hugo Bonat
##                             Silvia Shimakura
##                            Joel Augusto Muniz
## 
##-----------------------------------------------------------------------------
## Required packages.

require(lattice)
require(latticeExtra)
require(plyr)
require(ellipse)
require(rootSolve)

##-----------------------------------------------------------------------------
## Package versions.

sessionInfo()

##-----------------------------------------------------------------------------
## Log-lokelihodd function for the gamma count regression model.

ll <- function(theta, y, X){
    ## theta: parameter vector;
    ## y: response vector (counts);
    ## X: model matrix;
    eXb <- exp(X%*%theta[-1])
    ## returns the log-likelihood.
    sum(log(pgamma(1, theta[1]*y, theta[1]*eXb)-
            pgamma(1, theta[1]*y+theta[1], theta[1]*eXb)))
}

##-----------------------------------------------------------------------------
## Profile log-likelihood function to alpha parameter.

ll.alpha <- function(theta, alpha, y, X){
    ## theta: parameter vector;
    ## alpha: scalar;
    ## y: response vector (counts);
    ## X: model matrix;
    eXb <- exp(X%*%theta)
    sum(log(pgamma(1, alpha*y, alpha*eXb)-
            pgamma(1, alpha*y+alpha, alpha*eXb)))
}

ll.alpha.b0 <- function(theta, alpha, b0, y, X){
    ## theta: parameter vector;
    ## alpha: scalar;
    ## b0: scalar intercpet;
    ## y: response vector (counts);
    ## X: model matrix;
    eXb <- exp(X%*%c(b0,theta)) #*theta[1]
    sum(log(pgamma(1, alpha*y, alpha*eXb)-
            pgamma(1, alpha*y+alpha, alpha*eXb)))
}

##-----------------------------------------------------------------------------
## Function to estimate gamma count using estimates based on Poisson as
## guess values.

poi2cg <- function(m0){
    ## m0: an aobject of class glm;
    form <- formula(m0)
    form.split <- strsplit(as.character(form), "~")
    yobs <- m0$data[,form.split[[2]]]
    X <- model.matrix(m0)
    alpha <- 1; gamma <- coef(m0)
    op <- optim(c(alpha, gamma), ll, y=yobs, X=X,
                method="L-BFGS-B", hessian=TRUE,
                lower=c(0,rep(-Inf,length(gamma))),
                upper=c(Inf,rep(Inf,length(gamma))),
                control=list(fnscale=-1))
    return(op)
}

##-----------------------------------------------------------------------------
## Likelihood ratio test for a nested sequence of gamma count models.

anova.cg <- function(...){
    cg.list <- list(...)
    lls <- sapply(cg.list, function(x) x$value)
    nps <- sapply(cg.list, function(x) length(x$par))
    cst <- 2*diff(lls)
    pvs <- pchisq(cst, df=diff(nps), lower.tail=FALSE)
    data.frame(ll=lls, npar=nps, two.ll.dif=c(NA,cst),
               npar.dif=c(NA,diff(nps)), pvalue=c(NA,pvs))
}

##-----------------------------------------------------------------------------
## Table of estimatimates, standard error, z and p values. 

summary.cg <- function(m0, conf.int=0.95){
    est <- m0$par
    sdt <- sqrt(diag(-solve(m0$hessian)))
    zval <- est/sdt
    pval <- 2*pnorm(abs(zval), lower=FALSE)
    ci <- est+outer(sdt, c(1,-1)*qnorm((1-conf.int)/2), "*")
    cbind(Estimate=est, Std.Error=sdt, "z value"=zval, "P(>|z|)"=pval,
          lwr=ci[,1], upr=ci[,2])
}

##-----------------------------------------------------------------------------
## Trellis panel function to get a beewarm plot.

panel.beeswarm <- function(x, y, subscripts, r, ...){
    xx <- x; yy <- y
    aux <- by(cbind(yy, xx, subscripts), xx,
              function(i){
                  or <- order(i[,1])
                  ys <- i[or,1]
                  yt <- table(ys)
                  dv <- sapply(unlist(yt),
                               function(j){
                                   seq(1,j,l=j)-(j+1)/2
                               })
                  if(!is.list(dv)){ dv <- as.list(dv) }
                  xs <- i[or,2]+r*do.call(c, dv)
                  cbind(x=xs, y=ys, subscripts[or])
              })
    aux <- do.call(rbind, aux)
    panel.xyplot(aux[,1], aux[,2], subscripts=aux[,3], ...)
}
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
## [1] rootSolve_1.6.5     ellipse_0.3-8       plyr_1.8.1         
## [4] latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29    
## [7] knitr_1.6          
## 
## loaded via a namespace (and not attached):
## [1] digest_0.6.4   evaluate_0.5.5 formatR_0.10   grid_3.1.0    
## [5] methods_3.1.0  Rcpp_0.11.2    stringr_0.6.2  tools_3.1.0

Back to table of contents

Loading and exploring the data

##-----------------------------------------------------------------------------
## Dataset.

est.ing <- c("vegetative","flower bud","blossom","fig","cotton boll")
cap <- expand.grid(rept=1:5,
                   des=seq(0,100,l=5)/100,
                   est=factor(est.ing, levels=est.ing))

cap$nc <- c(10, 9, 8, 8, 10, 11, 9, 10, 10, 10, 8, 8, 10, 8, 9, 9, 7, 7,
            8, 9, 8, 6, 6, 5, 6, 7, 8, 8, 9, 10, 9, 12, 7, 10, 9, 8, 9,
            9, 10, 8, 11, 10, 7, 8, 8, 7, 7, 7, 7, 8, 10, 9, 8, 12, 8,
            7, 5, 5, 7, 5, 6, 5, 7, 4, 7, 8, 5, 7, 6, 4, 5, 5, 4, 4, 5,
            8, 10, 7, 8, 10, 9, 6, 6, 8, 6, 9, 7, 11, 8, 9,6, 6, 6, 6,
            7, 3, 3, 2, 4, 3, 11, 7, 9, 12 , 11, 9, 13, 8, 10, 10, 9, 7,
            7, 9, 9, 8, 8, 10, 8, 10, 9, 8, 10, 8, 10)
## str(cap)
## $ rept: unit experimental indice into each factor combination cell;
## $ des : artificial defoliation level;
## $ est : growth stage;
## $ nc  : number of bolls produced by two cotton plants at harvest;
##-----------------------------------------------------------------------------

## Scatter plot with a beewarm taste.
p1 <- xyplot(nc~des|est, data=cap, layout=c(2,3), as.table=1, col=1,
             xlim=extendrange(c(0:1),f=0.15),
             xlab="Artificial defoliation level",
             ylab="Number of bolls produced", type=c("p","smooth"),
             col.line="gray50",
             strip=strip.custom(bg="gray90", factor.levels=est.ing),
             r=0.05, panel=panel.beeswarm)

aux1 <- ddply(cap, .(des, est), summarise, m=mean(nc), v=var(nc))

## Sample variance vs sample mean.
p2 <- 
    xyplot(v~m, aux1, aspect="iso", col=1, type=c("p","g","r"), lty=1,
           col.line="gray50", 
           jitter.x=TRUE,
           xlab=expression(Sample~mean~(bar(y))),
           ylab=expression(Sample~variance~(s^2)), 
           xlim=c(0,11), ylim=c(0,11),
           panel=function(...){
               panel.xyplot(...)
               panel.abline(a=0, b=1, lty=2)
           })

print(p1, split=c(1,1,2,1), more=TRUE)
print(p2, split=c(2,1,2,1))

plot of chunk unnamed-chunk-3 Back to table of contents

Model estimation

##-----------------------------------------------------------------------------
## Poisson model.

cpP0 <- glm(nc~1, data=cap, family=poisson)
cpP1 <- glm(nc~des+I(des^2), data=cap, family=poisson)
cpP2 <- glm(nc~est:des+I(des^2), data=cap, family=poisson)
cpP3 <- glm(nc~est:(des+I(des^2)), data=cap, family=poisson)
par(mfrow=c(2,2)); plot(cpP3); layout(1)
anova(cpP0, cpP1, cpP2, cpP3, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: nc ~ 1
## Model 2: nc ~ des + I(des^2)
## Model 3: nc ~ est:des + I(des^2)
## Model 4: nc ~ est:(des + I(des^2))
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
## 1       124       75.5                         
## 2       122       58.4  2    17.16  0.00019 ***
## 3       118       33.0  4    25.36  4.3e-05 ***
## 4       114       27.3  4     5.74  0.21926    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot of chunk unnamed-chunk-4

##-----------------------------------------------------------------------------
## Quasi poisson model.

cpQ0 <- glm(formula(cpP0), data=cap, family=quasipoisson)
cpQ1 <- glm(formula(cpP1), data=cap, family=quasipoisson)
cpQ2 <- glm(formula(cpP2), data=cap, family=quasipoisson)
cpQ3 <- glm(formula(cpP3), data=cap, family=quasipoisson)
anova(cpQ0, cpQ1, cpQ2, cpQ3, test="F")
## Analysis of Deviance Table
## 
## Model 1: nc ~ 1
## Model 2: nc ~ des + I(des^2)
## Model 3: nc ~ est:des + I(des^2)
## Model 4: nc ~ est:(des + I(des^2))
##   Resid. Df Resid. Dev Df Deviance     F  Pr(>F)    
## 1       124       75.5                              
## 2       122       58.4  2    17.16 35.59 9.8e-13 ***
## 3       118       33.0  4    25.36 26.30 1.8e-15 ***
## 4       114       27.3  4     5.74  5.96 0.00022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Gamma count model.

cpG0 <- poi2cg(cpP0)
cpG1 <- poi2cg(cpP1)
cpG2 <- poi2cg(cpP2)
cpG3 <- poi2cg(cpP3)
anova.cg(cpG0, cpG1, cpG2, cpG3)
##       ll npar two.ll.dif npar.dif    pvalue
## 1 -272.4    2         NA       NA        NA
## 2 -256.0    4      32.83        2 7.431e-08
## 3 -220.1    8      71.67        4 1.007e-14
## 4 -208.4   12      23.52        4 9.976e-05

Back to table of contents

Prediction

##-----------------------------------------------------------------------------
## Predicted values.

pred <- expand.grid(est=levels(cap$est), des=seq(0,1,l=20))
form <- as.formula(do.call(paste,
                           strsplit(as.character(formula(cpP3)), "nc")))
Xf <- model.matrix(form, pred)

##-----------------------------------------------------------------------------
## By gamma count.

tau <- cpG3$par[-1]; delta <- cpG3$par[1]
vcov <- -solve(cpG3$hessian)
pred$Geta <- Xf%*%tau
pred$Gmu <- sapply(pred$Geta,
                   function(m){
                       sum(pgamma(1,
                                  shape=delta*(1:20),
                                  rate=delta*exp(m)))
                   })

## Conditional vcov.
Vc <- vcov[-1,-1]-vcov[-1,1]%*%solve(vcov[1,1])%*%vcov[1,-1]

## Marginal vcov.
V <- vcov[-1,-1]

## sum(abs(vcov[-1,-1]-Vc)) ## almost ortogonal, so conditional = marginal.

U <- chol(Vc)
se <- sqrt(apply(Xf%*%t(U), 1, function(x) sum(x^2))) # erro padrão
## head(se)
## cbind(se, sqrt(diag(Xf%*%V%*%t(Xf))))

aux <- c(pred$Geta)+outer(se, qnorm(0.975)*c(lwr=-1, fit=0, upr=1), "*")
aux <- c(pred$Geta)+
    outer(se, qt(0.975, df=df.residual(cpP3))*
          c(lwr=-1, fit=0, upr=1), "*")
n <- 1:40 ## summation from 0 to 40 is enaugh.
aux <- matrix(sapply(aux,
                     function(m){
                         sum(pgamma(1, shape=delta*(n),
                                    rate=delta*exp(m)))
                     }),
              ncol=3, dimnames=list(NULL, c("Glwr","Gfit","Gupr")))
pred <- cbind(pred, aux)

##-----------------------------------------------------------------------------
## By Poisson.

aux <- predict(cpP3, newdata=pred, se.fit=TRUE)
aux <- exp(aux$fit+
           outer(aux$se.fit, qnorm(0.975)*
                 c(lwr=-1, fit=0, upr=1), "*"))
colnames(aux) <- c("Plwr","Pfit","Pupr")
pred <- cbind(pred, aux)

##-----------------------------------------------------------------------------
## By quasi-Poisson

aux <- predict(cpQ3, newdata=pred, se.fit=TRUE)
aux <- exp(aux$fit+
           outer(aux$se.fit, qnorm(0.975)*
                 c(lwr=-1, fit=0, upr=1), "*"))
colnames(aux) <- c("Qlwr","Qfit","Qupr")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame':    100 obs. of  13 variables:
##  $ est : Factor w/ 5 levels "vegetative","flower bud",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ des : num  0 0 0 0 0 ...
##  $ Geta: num [1:100, 1] 2.23 2.23 2.23 2.23 2.23 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr  "1" "2" "3" "4" ...
##   .. ..$ : NULL
##  $ Gmu : num  8.94 8.94 8.94 8.94 8.94 ...
##  $ Glwr: num  8.43 8.43 8.43 8.43 8.43 ...
##  $ Gfit: num  8.94 8.94 8.94 8.94 8.94 ...
##  $ Gupr: num  9.47 9.47 9.47 9.47 9.47 ...
##  $ Plwr: num  7.89 7.89 7.89 7.89 7.89 ...
##  $ Pfit: num  8.93 8.93 8.93 8.93 8.93 ...
##  $ Pupr: num  10.1 10.1 10.1 10.1 10.1 ...
##  $ Qlwr: num  8.4 8.4 8.4 8.4 8.4 ...
##  $ Qfit: num  8.93 8.93 8.93 8.93 8.93 ...
##  $ Qupr: num  9.49 9.49 9.49 9.49 9.49 ...
##-----------------------------------------------------------------------------
## Prediction with 95% confidence bands.

xyplot(nc~des|est, data=cap, layout=c(5,1), col=1,
       xlim=extendrange(c(0:1),f=0.15), 
       ## xlab="Nível de desfolha artificial",
       ## ylab="Número de capulhos produzidos", 
       xlab="Artificial defoliation level",
       ylab="Number of bolls produced",
       strip=strip.custom(bg="gray90", factor.levels=est.ing),
       key=list(columns=3,
           text=list(c("Poisson","Quasi-Poisson","Gamma-count")),
           lines=list(lty=c(2,3,1), lwd=2, col=1)),
       scales=list(x=list(at=seq(0,1,0.25))),
       r=0.05, panel=panel.beeswarm)+
    as.layer(xyplot(Gfit+Glwr+Gupr~des|est, data=pred,
                    type="l", col=1, lty=1, lwd=c(2,1,1)))+
    as.layer(xyplot(Pfit+Plwr+Pupr~des|est, data=pred,
                    type="l", col=1, lty=2, lwd=c(2,1,1)))+
    as.layer(xyplot(Qfit+Qlwr+Qupr~des|est, data=pred,
                    type="l", col=1, lty=3, lwd=c(2,1,1)))

plot of chunk unnamed-chunk-8

Back to table of contents

Count distribution for a zero defoliation level

##-----------------------------------------------------------------------------
## Density of a gamma count random variable.

dgammacount <- function(n, alpha, beta){
  pgamma(1, n*alpha, beta)-pgamma(1, n*alpha+alpha, beta)
}

alpha <- cpG3$par[1]
beta <- alpha*exp(cpG3$par[2])
lambda <- exp(coef(cpP3)[1])
n <- 1:20

## Probability distribution.
gcp <- dgammacount(n, alpha, beta)
pp <- dpois(n, lambda=lambda)

cols <- c("gray50",1)
xyplot(gcp~c(n-0.1), type="h", lwd=3, col=cols[1],
       xlab="n", ylab=expression(P(N==n)),
       key=list(corner=c(0.9,0.9),
           lines=list(lty=1, lwd=2, col=cols),
           ## text=list(c("contagem-Gama","Poisson"))),
           text=list(c("Count-gamma","Poisson"))),
       panel=function(...){
           panel.xyplot(...)
           panel.points(pp~c(n+0.1), type="h", lwd=3, col=cols[2])
           panel.abline(v=lambda, lty=2)
       })

plot of chunk unnamed-chunk-9

Back to table of contents

Single and joint profile likelihood for alpha

##-----------------------------------------------------------------------------
## Single profile.

alpha <- seq(3,8,l=30)
perfil <- sapply(alpha,
                 function(a){
                     op <- optim(cpG3$par[-1], ll.alpha, alpha=a,
                                 y=cap$nc, X=model.matrix(cpP3),
                                 method="BFGS", hessian=TRUE,
                                 ## lower=c(rep(-Inf,length(gamma))),
                                 ## upper=c(rep(Inf,length(gamma))),
                                 control=list(fnscale=-1))
                     c(op$value, op$par[1])
                 })

## plot(perfil[1,]~alpha)

## Exact profile likelihood and its quadratic approximation.
cp.a1 <- cpG3$value; cp.a2 <- cpG3$par[1];
cp.a3 <- summary.cg(cpG3)[1,2]
dev.perf <- 2*(cp.a1-perfil[1,])
dev.quad <- ((cp.a2-alpha)/cp.a3)^2

qchi <- qchisq(0.95, df=1)
fperf <- approxfun(alpha, dev.perf-qchi)
lim <- uniroot.all(fperf, c(0, 10))
lim2 <- cp.a2+c(-1,1)*1.96*cp.a3

## Range of confidence intervals.
diff(lim)
diff(lim2)
mean(lim-lim2)

keytext <- c("profile log-likelihood","quadratic approximation")
xyplot(dev.perf~alpha, type="l", lty=1, col=1,
       xlab=expression(alpha),
       ylab=expression(2*(L(hat(alpha))-L(alpha))),
       key=list(columns=1,
           lines=list(lty=1:2, col=1),
           text=list(keytext))
       )+
    as.layer(xyplot(dev.quad~alpha, type="l", lty=2, col=1))
trellis.focus("panel", 1,1, highlight=FALSE)
panel.abline(h=qchi, lty=3)
panel.arrows(lim, qchi, lim, 0, lty=1, length=0.1)
panel.arrows(lim2, ((cp.a2-lim2)/cp.a3)^2, lim2, 0, lty=2, length=0.1)
panel.abline(v=cp.a2)
trellis.unfocus()
## [1] 2.707
## [1] 2.7
## [1] 0.1297

plot of chunk unnamed-chunk-10

##-----------------------------------------------------------------------------
## Joint profile likelihood.

n <- 50
## cpG3$par[2]+c(-2,2)*summary.cg(cpG3)[2,2]
chut <- cpG3$par[-c(1:2)]
grid <- expand.grid(alpha=seq(3, 8, l=n), b0=seq(2.13, 2.33, l=n))
vcov <- -solve(cpG3$hessian)

## This step is very time consuming.
perfil2 <- apply(grid, 1,
                 function(x){
                     op <- optim(chut, ll.alpha.b0, alpha=x[1], b0=x[2],
                                 y=cap$nc, X=model.matrix(cpP3),
                                 method="BFGS", #hessian=TRUE,
                                 control=list(fnscale=-1))
                     op$value
                 })

grid$dev <- 2*(cp.a1-perfil2)
niveis <- c(0.9,0.95,0.99)
cortes <- qchisq(niveis, df=2)

el <- lapply(niveis,
             function(l){
                 ellipse(x=vcov[1:2,1:2], level=l)
             })

keytext <- c("profile log-likelihood", "quadratic approximation",
             expression(alpha~"profile log-likelihood"))
cuts <- 50
levelplot(dev~alpha+b0, data=grid,
          cuts=cuts, col.regions=gray.colors(cuts+1, 0, 0.9),
          xlab=expression(alpha), ylab=expression(gamma[0]),
          key=list(columns=1,
              lines=list(lty=c(1,2,1), col=c(1,1,1), lwd=c(1,1,2)),
              text=list(keytext)),
          panel=function(..., at, contour, region){
              panel.levelplot(..., at=at)
              panel.contourplot(..., at=cortes, contour=TRUE, region=FALSE)
              panel.lines(alpha, perfil[2,], col=1, lwd=2)
              panel.abline(v=cp.a2, h=cpG3$par[2], lty=3)
              lapply(el,
                     function(x){
                         panel.points(x[,1]+cp.a2, x[,2]+cpG3$par[2],
                                      type="l", col=1, lty=2)
                     })
          })

plot of chunk unnamed-chunk-11

Back to table of contents

Illustration of the gamma count process

##-----------------------------------------------------------------------------

mycol <- c("#2E8B57","#551A8B","#FF4500","#00688B","#B22222")
ps <- list(
    box.rectangle=list(col=1, fill=c("gray70")),
    box.umbrella=list(col=1, lty=1),
    dot.symbol=list(col=1),
    dot.line=list(col=1, lty=3),
    plot.symbol=list(col=1, cex=0.7),
    plot.line=list(col=1, lty=1.5),
    plot.polygon=list(col="gray80"),
    superpose.line=list(col=mycol),
    superpose.symbol=list(col=mycol),
    superpose.polygon=list(col=mycol),
    strip.background=list(col=c("gray90","gray50"))
    )

##-----------------------------------------------------------------------------
## Hazard function.

harz <- function(t, beta, alpha){
  f <- dgamma(t, shape=alpha, rate=beta)
  F <- pgamma(t, shape=alpha, rate=beta)
  f/(1-F)
}

##-----------------------------------------------------------------------------
## Densities for time to event random variables and its hazard functions.

x <- seq(0,3, l=200)
da <- expand.grid(x=x, fun=c("f(x)","h(x)"), case=c("under","equal","over"))
L <- list(dgamma(x,5,5), harz(x,5,5),
          dgamma(x,1,1), harz(x,1,1),
          dgamma(x,0.2,0.2), harz(x,0.2,0.2))
da$y <- do.call(c, L)

ylim <- list(c(0,1.2), c(0,4))
ylim <- rep(ylim, e=3)
yat <- rep(list(NA,NULL,NULL), 2)

fl1 <- c(expression(G(alpha==5, beta==5)*": underdipersion"),
         expression(G(alpha==1, beta==1)*": equidispersion"),
         expression(G(alpha==0.2, beta==0.2)*": overdispersion"))

xy <-
    xyplot(y~x|case+fun, da, type="l",
           ylim=ylim, #xlim=c(0,NA),
           as.table=TRUE,
           ylab=c("Hazard","Density"),
           xlab=expression(tau),
           scales=list(y=list(relation="free",
                           at=yat),
               x=list(alternating=1)),
           between=list(x=-1.5,y=0.3),
           par.settings=ps
           )+
    layer(panel.abline(v=1, lty=2))+
    layer(panel.abline(h=0, v=0, lty=3))
p1 <- useOuterStrips(xy, strip=strip.custom(factor.levels=fl1))
## print(p1)

##-----------------------------------------------------------------------------
## Simulation of under, equi and over dispersed count data.

n <- 100000; set.seed(54321)
r1 <- rgamma(n, shape=1, rate=1)     # equi
r2 <- rgamma(n, shape=5, rate=5)     # sub
r3 <- rgamma(n, shape=0.2, rate=0.2) # super

db <- data.frame(caso=gl(3, k=n, labels=c("under","equi","over")))
db$tau <- c(r2, r1, r3)
db$y <- do.call(c, tapply(db$tau, db$caso, cumsum))

m <- 60
dc <- subset(db, y<m)
dc$caso <- factor(dc$caso, levels=rev(levels(dc$caso)))
df <- do.call(rbind, by(dc, dc$caso,
                        function(y){
                            tb <- table(cut(y$y, seq(0,m,1)))
                            tb <- unclass(tb)
                            tb <- data.frame(caso=y$caso[1],
                                             x=seq_along(tb)-0.5, f=tb)
                            tb
                        }))

p2 <- 
    dotplot(caso~y, data=dc, pch=1, ylim=c(0.5,4),
            xlim=extendrange(c(0,m),f=0.025),
            scales=list(y=list(at=1:3, labels=levels(dc$caso))),
            xlab=expression(vartheta),
            par.settings=ps,
            function(x, y, subscripts, ...){
                panel.abline(v=seq(0,m,1), col="gray50")
                panel.dotplot(x, y, subscripts=subscripts, ...)
                with(df,
                     panel.text(y=as.numeric(caso), x=x, f, pos=3, cex=0.8))
            })
## print(p2)

##-----------------------------------------------------------------------------
## Frequency distribution of occurrences.

dg <- do.call(rbind, by(db, db$caso,
                        function(y){
                            tb <- table(cut(y$y, seq(0,max(y$y),20)))
                            tb <- unclass(tb)
                            tb <- data.frame(caso=y$caso[1], f=tb)
                            tb
                        }))

p3 <- histogram(~f|caso, data=dg, layout=c(NA,1),
              strip=strip.custom(
                  factor.levels=c("underdispersion",
                      "equidispersion","overdispersion")),
              xlab="Number of events in each interval of length 20",
              type="density", breaks=seq(0,max(dg$f)+1,1),
              between=list(x=0.2,y=0.3),
              scales=list(x=list(alternating=1)),
              par.settings=ps)+
      layer(panel.abline(v=20, lty=2))
## print(p3)

##-----------------------------------------------------------------------------
## The plots together.

f <- c(4,3,5); f <- cumsum(f)/sum(f)
print(plot(p3, position=c(0.00, 0.00, 1.02, f[1]), more=TRUE))
print(plot(p2, position=c(0.01, f[1], 1.01, f[2]), more=TRUE))
print(plot(p1, position=c(0.01, f[2], 1.00, f[3]), more=FALSE))

plot of chunk unnamed-chunk-12

Back to table of contents

How to cite this article

Zeviani, W. M., Ribeiro Jr, P. J., Bonat, W. H., Shimakura, S. E., & Muniz, J. A. (2014). The Gamma-count distribution in the analysis of experimental underdispersed data. Journal of Applied Statistics, n.??, v.??, p.??-??.

Available online at: article online.

@article{zeviani-etal-2014,
    title   = {The Gamma-count distribution in the analysis of
               experimental underdispersed data},
    author  = {Walmes Marques Zeviani and
               Paulo Justiniano {Ribeiro Jr} and  Wagner Hugo Bonat and
               Silvia Emiko Shimakura and Joel Augusto Muniz},
    journal = {Journal of Applied Statistics},
    volume  = {to appear},
    number  = {to appear},
    pages   = {to appear},
    year    = {2014},
    url     = {http://www.tandfonline.com/doi/full/10.1080/02664763.2014.922168},
    doi     = {10.1080/02664763.2014.922168},
}

Back to table of contents