Ajuste de Modelos Não Lineares

Carregando Pacotes
Ganho de peso em perus em função da metionina na dieta
Consumo de energia em função da temperatura
Ajuste de modelos de forma interativa
Informações da Sessão

Carregando Pacotes

#-----------------------------------------------------------------------
# Load packages.

library(lattice)
library(latticeExtra)
library(car)
library(alr3)
library(nlstools)
library(nls2)
library(rootSolve)
library(wzRfun)

# url <- "http://nls2.googlecode.com/svn-history/r4/trunk/R/as.lm.R"
# download.file(url, dest = basename(url))
# path <- ifelse(Sys.info()["user"] == "walmes", basename(url), url)
# source(path)
source("~/Dropbox/CursoR/GeneticaEsalq/as.lm.R")

Ganho de peso em perus em função da metionina na dieta

#-----------------------------------------------------------------------
# Ajuste de modelo de regressão não linear.

# turk0
str(turk0)

## 'data.frame':    35 obs. of  2 variables:
##  $ A   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gain: int  644 631 661 624 633 610 615 605 608 599 ...

xyplot(Gain ~ A, data = turk0, type = c("p", "smooth"))

#-----------------------------------------------------------------------
# Valores iniciais baseados na interpretação gráfica.
# Modelo: th0 + th1 * x/(th2 + x);

start <- list(th0 = 625, th1 = 800 - 625, th2 = 0.1)

xyplot(Gain ~ A, data = turk0) +
    layer(panel.curve(th0 + th1 * x/(th2 + x)),
          data = start)

#-----------------------------------------------------------------------
# Ajuste.

n0 <- nls(Gain ~ th0 + th1 * A/(th2 + A),
          data = turk0,
          start = start)
summary(n0)

## 
## Formula: Gain ~ th0 + th1 * A/(th2 + A)
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)    
## th0 622.24167    6.20283 100.316  < 2e-16 ***
## th1 235.21690   22.56028  10.426 8.09e-12 ***
## th2   0.15476    0.04059   3.812 0.000591 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.14 on 32 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 1.151e-06

xyplot(Gain ~ A, data = turk0)+
    layer(panel.curve(th0 + th1 * x/(th2 + x), col = 2),
          data = as.list(coef(n0)))

#-----------------------------------------------------------------------
# Intervalos de confiança.

# Baseado na log-verossimilhança.
confint(n0)

## Waiting for profiling to be done...

##           2.5%       97.5%
## th0 609.656524 634.7143666
## th1 196.536953 290.9141161
## th2   0.094081   0.2665297

# Baseado na aproximação quadrática da verossimilhança, conhecido como
# intervalos de Wald ou assintóticos. São simétricos por construção.
confint.default(n0)

##            2.5 %      97.5 %
## th0 610.08434930 634.3989826
## th1 190.99955017 279.4342411
## th2   0.07519662   0.2343179

#-----------------------------------------------------------------------
# Colocar bandas de confiança.

# Modelo escrito como função dos parâmetros (theta).
f <- function(theta, xx){
    with(as.list(theta),
         th0 + th1 * xx/(th2 + xx))
}

# Matriz de derivadas parciais em theta (n x p).
gradient(f, x = coef(n0), xx = c(0, 0.2, 0.4))

##      th0       th1      th2
## [1,]   1 0.0000000    0.000
## [2,]   1 0.5637657 -373.797
## [3,]   1 0.7210361 -305.719

pred <- data.frame(A = seq(0, 0.5, l = 20))
pred$fit <- predict(n0, newdata = pred)
der <- gradient(f, x = coef(n0), xx = pred$A)
str(der)

##  num [1:20, 1:3] 1 1 1 1 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "th0" "th1" "th2"

# Etapas até o IC passando pelo erro padrão e margem de erro.
F <- der
U <- chol(vcov(n0))
pred$se <- sqrt(apply(F %*% t(U), 1, function(x) sum(x^2)))
tval <- qt(p = c(lwr = 0.025, upr = 0.975), df = df.residual(n0))
me <- outer(pred$se, tval, "*")
pred <- cbind(pred, sweep(me, 1, pred$fit, "+"))
str(pred)

## 'data.frame':    20 obs. of  5 variables:
##  $ A  : num  0 0.0263 0.0526 0.0789 0.1053 ...
##  $ fit: num  622 656 682 702 717 ...
##  $ se : num  6.2 4.8 5.51 5.79 5.66 ...
##  $ lwr: num  610 647 671 690 706 ...
##  $ upr: num  635 666 693 713 729 ...

# Equação do modelo ajustado.
coef(n0)

##         th0         th1         th2 
## 622.2416659 235.2168957   0.1547573

formula(n0)

## Gain ~ th0 + th1 * A/(th2 + A)

# Observados, preditos e a banda de confiança.
xyplot(Gain ~ A, data = turk0) +
    as.layer(xyplot(fit ~ A, data = pred, type = "l",
                    prepanel = prepanel.cbH, cty = "bands",
                    ly = pred$lwr, uy = pred$upr, panel = panel.cbH))

#-----------------------------------------------------------------------
# Região de confiança para os parâmetros.

apropos("contour")

## [1] "contour"           "contour.default"   "contourLines"     
## [4] "contourplot"       ".filled.contour"   "filled.contour"   
## [7] "nlsContourRSS"     "panel.3d.contour"  "panel.contourplot"

ncp0 <- nlsContourRSS(n0)

## 0%33%66%100%
##  RSS contour surface array returned

plot(ncp0)

ncr0 <- nlsConfRegions(n0)

##   0%1%2%3%4%5%6%7%8%9%10%11%12%13%14%15%16%17%18%19%20%21%22%23%24%25%26%27%28%29%30%31%32%33%34%35%36%37%38%39%40%41%42%43%44%45%46%47%48%49%50%51%52%53%54%55%56%57%58%59%60%61%62%63%64%65%66%67%68%69%70%71%72%73%74%75%76%77%78%79%80%81%82%83%84%85%86%87%88%89%90%91%92%93%94%95%96%97%98%99%100%
##  Confidence regions array returned

plot(ncr0)

Consumo de energia em função da temperatura

#-----------------------------------------------------------------------
# Consumo de energia (KWH/dia) em função da temperatura (F).

str(segreg)

## 'data.frame':    39 obs. of  2 variables:
##  $ Temp: num  8.55 10.66 12.45 16.9 20.52 ...
##  $ C   : num  86.2 72.4 74.1 68.2 72.4 ...

xyplot(C ~ Temp, data = segreg, type = c("p", "smooth"))

#-----------------------------------------------------------------------
# Ajuste do modelo platô linear.
# f(x) = th0 + th1 * (x - th2) * (x >= th2) + 0 * (x < th2)

start <- list(th0 = 75, th1 = 0.5, th2 = 50)
xyplot(C ~ Temp, data = segreg) +
    layer(panel.curve(th0 + th1 * (x - th2) * (x >= th2) +
                      0 * (x < th2)), data = start)

# Ajuste.
n2 <- nls(C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) +
              0 * (Temp < th2),
          data = segreg, start = start)

# Estimativas e medidas de ajuste.
summary(n2)

## 
## Formula: C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## th0  74.6953     1.3433  55.607  < 2e-16 ***
## th1   0.5674     0.1006   5.641 2.10e-06 ***
## th2  41.9512     4.6583   9.006 9.43e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.373 on 36 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 1.141e-08

# Valor de F e R².
R2nls(n2)

## $anova
##              Df   Sum Sq   Mean Sq  F value       Pr(>F)
## 1 regression  2 2098.134 1049.0670 36.33724 2.307536e-09
## 2  residuals 36 1039.331   28.8703       NA           NA
## 
## $R2
## [1] 0.6687355

# Intervalos de confiança.
# confint(n2)
confint.default(n2)

##          2.5 %     97.5 %
## th0 72.0625625 77.3281125
## th1  0.3702916  0.7645672
## th2 32.8212184 51.0812375

# Observados e preditos.
xyplot(C ~ Temp, data = segreg) +
    layer(panel.curve(th0 + th1 * (x - th2) * (x >= th2) +
                      0 * (x < th2), col = 4), data = as.list(coef(n2)))

#-----------------------------------------------------------------------
# Análise dos resíduos.

m2 <- as.lm(n2)
par(mfrow = c(2, 2))
plot(m2)

layout(1)

#-----------------------------------------------------------------------
# Colocar bandas de confiança.

f <- function(theta, xx) {
    with(as.list(theta),
         th0 + th1 * (xx - th2) * (xx >= th2) +
         0 * (xx < th2))
}

pred <- data.frame(Temp = sort(c(seq(10, 80, l = 100),
                                 coef(n2)["th2"] +
                                 c(-0.001, 0, 0.001))))

pred$fit <- predict(n2, newdata = pred)
der <- gradient(f, x = coef(n2), xx = pred$Temp)
str(der)

##  num [1:103, 1:3] 1 1 1 1 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "th0" "th1" "th2"

F <- der
U <- chol(vcov(n2))
pred$se <- sqrt(apply(F %*% t(U), 1, function(x) sum(x^2)))
tval <- qt(p = c(lwr = 0.025, upr = 0.975), df = df.residual(n2))
me <- outer(pred$se, tval, "*")
pred <- cbind(pred, sweep(me, 1, pred$fit, "+"))
str(pred)

## 'data.frame':    103 obs. of  5 variables:
##  $ Temp: num  10 10.7 11.4 12.1 12.8 ...
##  $ fit : num  74.7 74.7 74.7 74.7 74.7 ...
##  $ se  : num  1.34 1.34 1.34 1.34 1.34 ...
##  $ lwr : num  72 72 72 72 72 ...
##  $ upr : num  77.4 77.4 77.4 77.4 77.4 ...

# Equação do modelo ajustado.
coef(n2)

##        th0        th1        th2 
## 74.6953375  0.5674294 41.9512279

formula(n2)

## C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)

# Arredonda as estimativas.
theta <- mapply(round,
                x = coef(n2),
                digits = c(2, 4, 2),
                SIMPLIFY = FALSE)
theta

## $th0
## [1] 74.7
## 
## $th1
## [1] 0.5674
## 
## $th2
## [1] 41.95

# Equação.
formula(n2)

## C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)

eq <- substitute(
    expr = c(
        expression(C==th0~", se"~Temp < th2),
        expression(C==th0 + th1 * (Temp - th2)~", se"~Temp >= th2)),
    env = theta)
eval(eq)

## expression(C == 74.7 ~ ", se" ~ Temp < 41.95, C == 74.7 + 0.5674 * 
##     (Temp - 41.95) ~ ", se" ~ Temp >= 41.95)

# Observados, preditos e a banda de confiança.
xyplot(C ~ Temp, data = segreg) +
    as.layer(xyplot(fit ~ Temp, data = pred, type = "l",
                    prepanel = prepanel.cbH, cty = "bands",
                    ly = pred$lwr, uy = pred$upr,
                    panel = panel.cbH)) +
    layer(panel.key(points = FALSE, text = eval(eq),
                    corner = c(0.05, 0.95)))

Ajuste de modelos de forma interativa

#-----------------------------------------------------------------------
# Exemplos da documentação.

# library(wzRfun)
# help(rp.nls, h = "html")

# Se não possuir o pacote wzRfun, carregue a função da fonte.
source(paste0("https://raw.githubusercontent.com/walmes/wzRfun/",
              "master/R/rp.nls.R"))

#--------------------------------------------
# A non linear regression.

library(rpanel)

data(turk0, package = "alr3")
str(turk0)

plot(Gain ~ A, data = turk0,
     xlab = "Metionine", ylab = "Weight gain")

turk.fit <- rp.nls(model = Gain ~ Int + (Asy - Int) * A/(Half + A),
                   data = turk0,
                   start = list(Int = c(600, 650),
                                Asy = c(750, 850),
                                Half = c(0, 0.2)),
                   extra = function(Int, Asy, Half) {
                       abline(h = c(Asy, Int), v = Half,
                              col = "green")
                   },
                   xlab = "Metionine", ylab = "Weight gain")

summary(turk.fit)
confint(turk.fit)

f <- formula(turk.fit)

plot(Gain ~ A, data = turk0,
     xlab = "Metionine", ylab = "Weight gain")
with(as.list(coef(turk.fit)), {
    eval(call("curve", f[[3]], add = TRUE,
              xname = intersect(all.vars(f[-2]), names(turk0))))
})

#--------------------------------------------
# A more interesting example.

library(lattice)

xyplot(rate ~ conc, groups = state, data = Puromycin,
       type = c("p", "smooth"), auto.key = TRUE)

Puro.fit <- rp.nls(model = rate ~
                       Int + (Top - Int) * conc/(Half + conc),
                   data = Puromycin,
                   start = list(Int = c(20, 70),
                                Top = c(100, 200),
                                Half = c(0, 0.6)),
                   subset = "state",
                   gpar = list(try = list(col = 2, lty = 2),
                               fit = list(col = "blue", lwd = 1.5)),
                   xlab = "Concentration", ylab = "Rate",
                   xlim = c(0, 1.2), ylim = c(40, 220))

length(Puro.fit)
sapply(Puro.fit, coef)
sapply(Puro.fit, logLik)
sapply(Puro.fit, deviance)

#-----------------------------------------------------------------------
# 1. Ajuste de curvas de retenção de água do solo.

cra <- read.table("http://www.leg.ufpr.br/~walmes/data/cra_manejo.txt",
                  header = TRUE, sep = "\t")

cra$tens[cra$tens == 0] <- 0.1
cras <- subset(cra, condi == "LVA3,5")
cras <- aggregate(umid ~ posi + prof + tens, data = cras, FUN = mean)
cras$caso <- with(cras, interaction(posi, prof))
cras$ltens <- log(cras$tens)

xyplot(umid ~ ltens | posi, groups = prof, data = cras,
       type = c("p", "a"))

# modelo : van Genuchten com retrição de Mualem.
# ltens  : representado por ltens (log da tensão matricial, psi).
# umid   : representado por umid, conteúdo de água do solo ( % ).
# Us     : assíntota inferior, mínimo da função, quando x -> +infinito.
# Ur     : assíntota superior, máximo da função, quando x -> -infinito.
# al     : locação, translada o ponto de inflexão.
# n      : forma, altera a taxa ao redor da inflexão.

model <- umid ~ Ur + (Us - Ur)/(1 + exp(n * (al + ltens)))^(1 - 1/n)

start <- list(Ur = c(init = 0.2, from = 0, to = 0.5),
              Us = c(init = 0.6, from = 0.4, to = 0.8),
              al = c(1, -2, 4),
              n = c(1.5, 1, 4))

cra.fit <- rp.nls(model = model,
                  data = cras,
                  start = start,
                  subset = "caso")

sapply(cra.fit, coef)

#-----------------------------------------------------------------------
# 2. Curva de produção em função da desfolha do algodão.

cap <- read.table("http://www.leg.ufpr.br/~walmes/data/algodão.txt",
                  header = TRUE, sep = "\t", encoding = "latin1")

cap$desf <- cap$desf/100
cap <- subset(cap, select = c(estag, desf, pcapu))
cap$estag <- factor(cap$estag, labels = c("vegetativo", "botão floral",
                                          "florescimento", "maçã",
                                          "capulho"))
str(cap)

xyplot(pcapu ~ desf | estag, data = cap, layout = c(5, 1),
       xlab = "Nível de desfolha artificial", ylab = "Peso de capulhos")

# modelo: potência.
# desf  : representado por desf (nível de desfolha artifical).
# pcapu : representado por pcapu (peso de capulhos), produto do algodão.
# f0    : intercepto, valor da função quando x=0 (situação ótima).
# delta : diferença no valor da função para x=0 e x=1 (situação
#         extrema).
# curv  : forma, indica como a função decresce, se th3=0 então função
#         linear.

model <- pcapu ~ f0 - delta * desf^exp(curv)
start <- list(f0 = c(30, 25, 35),
              delta = c(8, 0, 16),
              curv = c(0, -2, 4))

cap.fit <- rp.nls(model = model,
                  data = cap,
                  start = start,
                  subset = "estag")

length(cap.fit)
sapply(cap.fit, coef)
lapply(cap.fit, summary)

#-----------------------------------------------------------------------
# 3. Curva de produção em função do nível de potássio no solo.

soja <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
                   header = TRUE, sep = "\t", encoding = "latin1",
                   dec = ",")

soja$agua <- factor(soja$agua)
str(soja)

xyplot(rengrao ~ potassio|agua, data=soja)

# modelo: linear-plato.
# x: representado por potássio, conteúdo de potássio do solo.
# y: representado por rengrao, redimento de grãos por parcela.
# f0: intercepto, valor da função quando x=0.
# tx: taxa de incremento no rendimento por unidade de x.
# brk: valor acima do qual a função é constante.

model <- rengrao ~ f0 + tx * potassio * (potassio < brk) +
    tx * brk * (potassio >= brk)
start <- list(f0 = c(15, 5, 25),
              tx = c(0.2, 0, 1),
              brk = c(50, 0, 180))

pot.fit <- rp.nls(model = model, data = soja, start = start,
                  subset = "agua")

sapply(pot.fit, coef)

#-----------------------------------------------------------------------
# 4. Curva de lactação.

lac <- read.table("http://www.leg.ufpr.br/~walmes/data/saxton_lactacao1.txt",
                  header = TRUE, sep = "\t", encoding = "latin1")

lac$vaca <- factor(lac$vaca)
str(lac)

xyplot(leite ~ dia | vaca, data = lac)

# modelo: de Wood (nucleo da densidade gama).
# x: representado por dia, dia após parto.
# y: representado por leite, quantidade produzida.
# th1: escala, desprovido de interpretação direta.
# th2: forma, desprovido de interpretação direta.
# th3: forma, desprovido de interpretação direta.

model <- leite ~ th1 * dia^th2 * exp(-th3 * dia)
start <- list(th1 = c(15, 10, 20),
              th2 = c(0.2, 0.05, 0.5),
              th3 = c(0.0025, 0.001, 0.008))

lac.fit <- rp.nls(model = model,
                  data = lac,
                  start = start,
                  subset = "vaca",
                  xlim = c(0, 310))

sapply(lac.fit, coef)

#-----------------------------------------------------------------------
# 5. Curvas de crescimento em placa de petri.

cre <- read.table("http://www.leg.ufpr.br/~walmes/data/cresmicelial.txt",
                  header = TRUE, sep = "\t", encoding = "latin1")

cre$isolado <- factor(cre$isolado)
cre$mmdia <- sqrt(cre$mmdia)
str(cre)

xyplot(mmdia ~ temp | isolado, data = cre)

# modelo: quadrático na forma canônica.
# x: representado por temp, temperatura da câmara de crescimento.
# y: representado por mmdia, taxa média de crescimento.
# thy: valor da função no ponto de máximo.
# thc: curvatura ou grau de especificidade à condição ótima.
# thx: ponto de máximo, temperatura de crescimento mais rápido.

model <- mmdia ~ thy + thc * (temp - thx)^2
start <- list(thy = c(4, 0, 7),
              thc = c(-0.05, 0, -0.5),
              thx = c(23, 18, 30))

mic.fit <- rp.nls(model = model,
                  data = cre,
                  start = start,
                  subset = "isolado",
                  xlim = c(17, 31),
                  ylim = c(0, 6))

t(sapply(mic.fit, coef))

#-----------------------------------------------------------------------
# 6. Curva de secagem do solo em microondas.

sec <- read.table("http://www.leg.ufpr.br/~walmes/data/emr11.txt",
                  header = TRUE, sep = "\t", encoding = "latin1")
str(sec)

xyplot(umrel ~ tempo|nome, data=sec)

# modelo: logístico.
# x: representado por tempo, período da amostra dentro do microondas.
# y: representado por umrel, umidade relativa o conteúdo total de água.
# th1: assíntota superior.
# th2: tempo para evaporar metade do conteúdo total de água.
# th3: proporcional à taxa máxima do processo.

model <- umrel ~ th1/(1 + exp(-(tempo - th2)/th3))
start <- list(th1 = c(1, 0.8, 1.2),
              th2 = c(15, 0, 40),
              th3 = c(8, 2, 14))

sec.fit <- rp.nls(model = model,
                  data = sec,
                  start = start,
                  subset = "nome")

sapply(sec.fit, coef)
lapply(sec.fit, summary)

Informações da Sessão

## Updated in 2016-10-10.
## 
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.1 LTS
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.70         rootSolve_1.6.6     nls2_0.2           
##  [4] proto_0.3-10        nlstools_1.0-2      alr3_2.0.5         
##  [7] car_2.1-2           latticeExtra_0.6-28 RColorBrewer_1.1-2 
## [10] lattice_0.20-33     rmarkdown_1.0       knitr_1.14         
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.7        formatR_1.4        nloptr_1.0.4      
##  [4] methods_3.3.1      tools_3.3.1        digest_0.6.10     
##  [7] lme4_1.1-12        evaluate_0.9       nlme_3.1-128      
## [10] mgcv_1.8-13        Matrix_1.2-6       rpanel_1.1-3      
## [13] yaml_2.1.13        parallel_3.3.1     mvtnorm_1.0-5     
## [16] SparseM_1.7        stringr_1.0.0      MatrixModels_0.4-1
## [19] grid_3.3.1         nnet_7.3-12        survival_2.39-4   
## [22] multcomp_1.4-6     TH.data_1.0-7      minqa_1.2.4       
## [25] magrittr_1.5       codetools_0.2-14   htmltools_0.3.5   
## [28] MASS_7.3-45        splines_3.3.1      pbkrtest_0.4-6    
## [31] quantreg_5.26      sandwich_2.3-4     stringi_1.1.1     
## [34] doBy_4.5-15        zoo_1.7-13

Ajuste de Modelos Não Lineares

Walmes Marques Zeviani

Carregando Pacotes

Ganho de peso em perus em função da metionina na dieta

Consumo de energia em função da temperatura

Ajuste de modelos de forma interativa

Informações da Sessão