Universidade Federal do Paraná
Curso de Estatística
CE 089 - Estatística Computacional II - 2014/2
Prof. Dr. Walmes Marques Zeviani

Aula 11

Tabela de conteúdo

Nível de cobertura para intervalos de confiança

##-----------------------------------------------------------------------------
## Funções que representam a v.a. X.

Fx <- function(x, th1) x/(th1+x)

D(expression(x/(th1+x)), "x")

## 1/(th1 + x) - x/(th1 + x)^2

fx <- function(x, th1) 1/(th1+x)-x/(th1+x)^2

iFx <- function(u, th1) -th1*u/(u-1)

set.seed(1234)
u <- runif(20)
x <- iFx(u, th1=2)
x

##  [1]  0.25658  3.29520  3.11869  3.31038 12.37974  3.56035  0.01917  0.60603  3.98953
## [10]  2.11735  4.52723  2.39536  0.78836 24.12108  0.82612 10.29223  0.80200  0.72785
## [19]  0.45919  0.60493

par(mfrow=c(1,2))
plot(ecdf(x))
curve(Fx(x, th1=2), add=TRUE, col=2)
plot(density(x, from=0)); rug(x)
curve(fx(x, th1=2), add=TRUE, col=2); layout(1)

plot of chunk unnamed-chunk-2

##-----------------------------------------------------------------------------
## Funções para estimação por máxima verossimilhança.

## Função de log-verossimilhança.
ll <- function(th1, y){
    sum(log(1/(th1+y)-y/(th1+y)^2))
}

## Versão vetorizada em th1.
llv <- Vectorize(ll, "th1")

## Função deviance*.
dev <- function(th1, y, llmax, qtil=3.84){
    ll <- sum(log(1/(th1+y)-y/(th1+y)^2))
    dev <- -2*(ll-llmax)
    return(dev-qtil)
}

## Versão vetorizada.
Dev <- Vectorize(FUN=dev, "th1")

## Sequência de valores de th1 e suas log-verossimilhanças.
th1.seq <- seq(0.1, 5, by=0.1) ## Lembrar que th1>0.
ll.seq <- llv(th1.seq, y=x)
dev.seq <- Dev(th1.seq, y=x, llmax=opt$objective)

## Error: object 'opt' not found

## Gráfico da log-verossimilhança em th1.
plot(ll.seq~th1.seq, type="l",
     ylab="log-verossimilhaça", xlab=expression(theta[1]))

##-----------------------------------------------------------------------------
## Encontrando o máximo da log-verossimilhança.

opt <- optimize(f=llv, interval=c(0, 10), y=x, maximum=TRUE)
opt

## $maximum
## [1] 1.713
## 
## $objective
## [1] -46.28

plot(ll.seq~th1.seq, type="l",
     ylab="log-verossimilhaça", xlab=expression(theta[1]))
abline(v=opt$maximum, h=opt$objective, lty=2)

##-----------------------------------------------------------------------------
## Encontrando os extremos do IC.

require(rootSolve)

## Loading required package: rootSolve

ic <- uniroot.all(f=Dev, y=x, llmax=opt$objective, interval=c(0.1, 10))

plot(dev.seq~th1.seq, type="l",
     ylab="log-verossimilhaça", xlab=expression(theta[1]))

## Error: object 'dev.seq' not found

abline(v=opt$maximum, h=0, lty=2)
abline(v=ic, lty=2, col=2)

##-----------------------------------------------------------------------------
## Uso da aproximação quadrática da verossimilhança. A
## log-verossimilhança é complicada de derivar. Veja a derivada do log
## da função densidade.

D(expression(log(1/(th1+y)-y/(th1+y)^2)), "th1")

## -((1/(th1 + y)^2 - y * (2 * (th1 + y))/((th1 + y)^2)^2)/(1/(th1 + 
##     y) - y/(th1 + y)^2))

D(D(expression(log(1/(th1+y)-y/(th1+y)^2)), "th1"), "th1")

## (2 * (th1 + y)/((th1 + y)^2)^2 + (y * 2/((th1 + y)^2)^2 - y * 
##     (2 * (th1 + y)) * (2 * (2 * (th1 + y) * ((th1 + y)^2)))/(((th1 + 
##     y)^2)^2)^2))/(1/(th1 + y) - y/(th1 + y)^2) - (1/(th1 + y)^2 - 
##     y * (2 * (th1 + y))/((th1 + y)^2)^2) * (1/(th1 + y)^2 - y * 
##     (2 * (th1 + y))/((th1 + y)^2)^2)/(1/(th1 + y) - y/(th1 + 
##     y)^2)^2

## Função gradiente: ll'(th1).
grad <- function(th1, y){
    sum(
-((1/(th1 + y)^2 - y * (2 * (th1 + y))/((th1 + y)^2)^2)/(1/(th1 + 
    y) - y/(th1 + y)^2))
        )
}

## Gradiente na estimativa (se é máximo, então vale ~zero).
grad(th1=opt$max, y=x)

## [1] 2.433e-05

## Função hessiano: ll''(th1).
hes <- function(th1, y){
sum(
(2 * (th1 + y)/((th1 + y)^2)^2 + (y * 2/((th1 + y)^2)^2 - y * 
    (2 * (th1 + y)) * (2 * (2 * (th1 + y) * ((th1 + y)^2)))/(((th1 + 
    y)^2)^2)^2))/(1/(th1 + y) - y/(th1 + y)^2) - (1/(th1 + y)^2 - 
    y * (2 * (th1 + y))/((th1 + y)^2)^2) * (1/(th1 + y)^2 - y * 
    (2 * (th1 + y))/((th1 + y)^2)^2)/(1/(th1 + y) - y/(th1 + 
    y)^2)^2
    )
}

## Valor da l''(th1) avaliada na estimativa de forma analítica.
hes(th1=opt$max, y=x)

## [1] -2.471

## Otimizando com a optim() que retorna o hessiano numérico como
## subproduto da otimização.
optm <- optim(par=1.5, fn=llv, y=x, method="BFGS",
              control=list(fnscale=-1), hessian=TRUE)
str(optm)

## List of 6
##  $ par        : num 1.71
##  $ value      : num -46.3
##  $ counts     : Named int [1:2] 15 5
##   ..- attr(*, "names")= chr [1:2] "function" "gradient"
##  $ convergence: int 0
##  $ message    : NULL
##  $ hessian    : num [1, 1] -2.47

str(opt)

## List of 2
##  $ maximum  : num 1.71
##  $ objective: num -46.3

## O valor avaliado de f''(th1).
optm$hessian

##        [,1]
## [1,] -2.471

## IC de Wald.
icw <- optm$par+c(-1,1)*sqrt(-3.84/optm$hessian)

## Os extremos por cada tipo de intervalo.
cbind(ic, icw)

##          ic    icw
## [1,] 0.8237 0.4668
## [2,] 3.5545 2.9598

## Os intervalos de confiança.
plot(dev.seq~th1.seq, type="l", col=4,
     ylab="log-verossimilhaça", xlab=expression(theta[1]))

## Error: object 'dev.seq' not found

with(optm, curve(-3.84-hessian*(x-par)^2, col=2, add=TRUE))
abline(v=opt$maximum, h=0, lty=2)
abline(v=ic, lty=2, col=4)
abline(v=icw, lty=2, col=2)

plot of chunk unnamed-chunk-2

## legend(x="topright", lty=1, col=c(4,2), bty="n",
##        legend=c("Deviance", "Wald"))

##-----------------------------------------------------------------------------
## Considerar uma reparametrização na qual th1 = exp(kap1).

## Função de log-verossimilhança.
llr <- function(kap1, y){
    sum(log(1/(exp(kap1)+y)-y/(exp(kap1)+y)^2))
}
llrv <- Vectorize(llr, "kap1")

## Função deviance*.
devr <- function(kap1, y, llmax, qtil=3.84){
    ll <- sum(log(1/(exp(kap1)+y)-y/(exp(kap1)+y)^2))
    dev <- -2*(ll-llmax)
    return(dev-qtil)
}
Devr <- Vectorize(FUN=devr, "kap1")

## log(c(0.1, 5))
kap1.seq <- seq(-2.3, 1.6, by=0.1) 
llr.seq <- llrv(kap1.seq, y=x)
devr.seq <- Devr(th1.seq, y=x, llmax=opt$objective)

## Gráfico da log-verossimilhança em th1.
plot(llr.seq~kap1.seq, type="l",
     ylab="log-verossimilhaça", xlab=expression(kappa[1]))

optr <- optimize(f=llrv, interval=c(-5, 5), y=x, maximum=TRUE)
optr

## $maximum
## [1] 0.5384
## 
## $objective
## [1] -46.28

plot(llr.seq~kap1.seq, type="l",
     ylab="log-verossimilhaça", xlab=expression(kappa[1]))
abline(v=optr$maximum, h=optr$objective, lty=2)

plot of chunk unnamed-chunk-2

##-----------------------------------------------------------------------------
## Como se comportam os níveis de cobertura dos intervalos de Wald?

icWald <- function(th1, n){
    u <- runif(20)
    x <- iFx(u, th1=2)
    optm <- optim(par=th1, fn=llv, y=x, method="BFGS",
                  control=list(fnscale=-1), hessian=TRUE)
    icw <- optm$par+c(-1,1)*sqrt(-3.84/optm$hessian)
    optmr <- optim(par=log(th1), fn=llrv, y=x, method="BFGS",
                  control=list(fnscale=-1), hessian=TRUE)
    icwr <- optmr$par+c(-1,1)*sqrt(-3.84/optmr$hessian)
    ## return(c(icw, icwr))
    inside <- c(sum(icw<th1)==1, sum(icwr<log(th1))==1)
    return(inside)
}

icWald(th1=2, n=20)

## [1] TRUE TRUE

##-----------------------------------------------------------------------------
## Correndo o estudo de simulação.

N <- 1000 ## Use 5000.
res <- replicate(N, icWald(th1=2, n=20))

str(res)

##  logi [1:2, 1:1000] TRUE TRUE FALSE FALSE TRUE TRUE ...

## Taxa nominal de cobetura dos intervalos de confiança de Wald.
apply(res, 1, sum)/N

## [1] 0.928 0.956