Regressão não linear

##=============================================================================
## Aplicação de modelos de regressão linear e
##   não linear em ciências agrárias
##
##   09 à 11 de Dezembro de 2014 - Goiânia/GO
##   Embrapa Arroz e Feijão
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão.

pkg <- c("lattice", "latticeExtra", "reshape",
         "car", "alr3", "nls2", "nlstools", "rootSolve",
         "plyr", "wzRfun")
sapply(pkg, library, character.only=TRUE, logical.return=TRUE)

##      lattice latticeExtra      reshape          car         alr3         nls2 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##     nlstools    rootSolve         plyr       wzRfun 
##         TRUE         TRUE         TRUE         TRUE

trellis.device(color=FALSE)

## url <- "http://nls2.googlecode.com/svn-history/r4/trunk/R/as.lm.R"
## download.file(url, dest=basename(url))
## path <- ifelse(Sys.info()["user"]=="walmes", basename(url), url)
## source(path)
source("~/Dropbox/CursoR/GeneticaEsalq/as.lm.R")

Ganho de peso em perus em função da metionina na dieta

##-----------------------------------------------------------------------------
## Ajuste de modelo de regressão não linear.

## turk0
str(turk0)

## 'data.frame':    35 obs. of  2 variables:
##  $ A   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gain: int  644 631 661 624 633 610 615 605 608 599 ...

xyplot(Gain~A, data=turk0, type=c("p","smooth"))

##-----------------------------------------------------------------------------
## Valores iniciais baseados na interpretação gráfica.
## Modelo: th0+th1*x/(th2+x);

start <- list(th0=625, th1=800-625, th2=0.1)

xyplot(Gain~A, data=turk0)+
    layer(with(start, panel.curve(th0+th1*x/(th2+x))))

##-----------------------------------------------------------------------------
## Ajuste.

n0 <- nls(Gain~th0+th1*A/(th2+A), data=turk0,
          start=start)
summary(n0)

## 
## Formula: Gain ~ th0 + th1 * A/(th2 + A)
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)    
## th0 622.24167    6.20283 100.316  < 2e-16 ***
## th1 235.21690   22.56029  10.426 8.09e-12 ***
## th2   0.15476    0.04059   3.812 0.000591 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.14 on 32 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 1.176e-06

xyplot(Gain~A, data=turk0)+
    layer(with(as.list(coef(n0)),
               panel.curve(th0+th1*x/(th2+x), col=2)))

##-----------------------------------------------------------------------------
## Intervalos de confiança.

## Baseado na log-verossimilhança.
confint(n0)

## Waiting for profiling to be done...

##           2.5%       97.5%
## th0 609.656524 634.7143666
## th1 196.536953 290.9141162
## th2   0.094081   0.2665297

## Baseado na aproximação quadrática da verossimilhança, conhecido como
## intervalos de Wald ou assintóticos. São simétricos por construção.
confint.default(n0)

##            2.5 %     97.5 %
## th0 610.08435035 634.398984
## th1 190.99955337 279.434253
## th2   0.07519662   0.234318

##-----------------------------------------------------------------------------
## Valores iniciais baseados na interpretação gráfica.
## Modelo: th0+th1*(1-2^(-x/th2))

start <- list(th0=625, th1=800-625, th2=0.1)

xyplot(Gain~A, data=turk0)+
    layer(with(start, panel.curve(th0+th1*(1-2^(-x/th2)))))

n1 <- nls(Gain~th0+th1*(1-2^(-A/th2)), data=turk0,
          start=start)
summary(n1)

## 
## Formula: Gain ~ th0 + th1 * (1 - 2^(-A/th2))
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)    
## th0 622.95806    5.90089  105.57  < 2e-16 ***
## th1 178.25193   11.63559   15.32 2.74e-16 ***
## th2   0.09732    0.01647    5.91 1.41e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.66 on 32 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 2.339e-06

xyplot(Gain~A, data=turk0)+
    layer(with(as.list(coef(n1)),
               panel.curve(th0+th1*(1-2^(-x/th2)), col=2)))

confint(n1)

## Waiting for profiling to be done...

##             2.5%       97.5%
## th0 610.97091738 634.8551958
## th1 156.32267475 204.4767104
## th2   0.07089817   0.1395906

confint.default(n1)

##            2.5 %      97.5 %
## th0 611.39252556 634.5235962
## th1 155.44658106 201.0572728
## th2   0.06504651   0.1295974

##-----------------------------------------------------------------------------
## Explorando o conteúdo dentro da estrutura do objeto.

## Estrutura do objeto.
str(n1)

## List of 6
##  $ m          :List of 16
##   ..$ resid     :function ()  
##   ..$ fitted    :function ()  
##   ..$ formula   :function ()  
##   ..$ deviance  :function ()  
##   ..$ lhs       :function ()  
##   ..$ gradient  :function ()  
##   ..$ conv      :function ()  
##   ..$ incr      :function ()  
##   ..$ setVarying:function (vary = rep_len(TRUE, length(useParams)))  
##   ..$ setPars   :function (newPars)  
##   ..$ getPars   :function ()  
##   ..$ getAllPars:function ()  
##   ..$ getEnv    :function ()  
##   ..$ trace     :function ()  
##   ..$ Rmat      :function ()  
##   ..$ predict   :function (newdata = list(), qr = FALSE)  
##   ..- attr(*, "class")= chr "nlsModel"
##  $ convInfo   :List of 5
##   ..$ isConv     : logi TRUE
##   ..$ finIter    : int 4
##   ..$ finTol     : num 2.34e-06
##   ..$ stopCode   : int 0
##   ..$ stopMessage: chr "converged"
##  $ data       : symbol turk0
##  $ call       : language nls(formula = Gain ~ th0 + th1 * (1 - 2^(-A/th2)), data = turk0, start = start, algorithm = "default",      control = structure(list(maxiter = 50, tol = 1e-05, minFactor = 0.0009765625,  ...
##  $ dataClasses: Named chr "numeric"
##   ..- attr(*, "names")= chr "A"
##  $ control    :List of 5
##   ..$ maxiter  : num 50
##   ..$ tol      : num 1e-05
##   ..$ minFactor: num 0.000977
##   ..$ printEval: logi FALSE
##   ..$ warnOnly : logi FALSE
##  - attr(*, "class")= chr "nls"

n1

## Nonlinear regression model
##   model: Gain ~ th0 + th1 * (1 - 2^(-A/th2))
##    data: turk0
##       th0       th1       th2 
## 622.95806 178.25193   0.09732 
##  residual sum-of-squares: 12367
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 2.339e-06

## Estrutura do summary do objeto.
sn1 <- summary(n1)
str(sn1)

## List of 11
##  $ formula     :Class 'formula' length 3 Gain ~ th0 + th1 * (1 - 2^(-A/th2))
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  $ residuals   : num [1:35] 21.04 8.04 38.04 1.04 10.04 ...
##  $ sigma       : num 19.7
##  $ df          : int [1:2] 3 32
##  $ cov.unscaled: num [1:3, 1:3] 0.090096 -0.05547 0.000102 -0.05547 0.350306 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "th0" "th1" "th2"
##   .. ..$ : chr [1:3] "th0" "th1" "th2"
##  $ call        : language nls(formula = Gain ~ th0 + th1 * (1 - 2^(-A/th2)), data = turk0, start = start, algorithm = "default",      control = structure(list(maxiter = 50, tol = 1e-05, minFactor = 0.0009765625,  ...
##  $ convInfo    :List of 5
##   ..$ isConv     : logi TRUE
##   ..$ finIter    : int 4
##   ..$ finTol     : num 2.34e-06
##   ..$ stopCode   : int 0
##   ..$ stopMessage: chr "converged"
##  $ control     :List of 5
##   ..$ maxiter  : num 50
##   ..$ tol      : num 1e-05
##   ..$ minFactor: num 0.000977
##   ..$ printEval: logi FALSE
##   ..$ warnOnly : logi FALSE
##  $ na.action   : NULL
##  $ coefficients: num [1:3, 1:4] 622.9581 178.2519 0.0973 5.9009 11.6356 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "th0" "th1" "th2"
##   .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"
##  $ parameters  : num [1:3, 1:4] 622.9581 178.2519 0.0973 5.9009 11.6356 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "th0" "th1" "th2"
##   .. ..$ : chr [1:4] "Estimate" "Std. Error" "t value" "Pr(>|t|)"
##  - attr(*, "class")= chr "summary.nls"

## Erro padrão residual.
sn1$sigma

## [1] 19.65914

cbind(sn1$parameters, confint.default(n1))

##         Estimate  Std. Error    t value     Pr(>|t|)        2.5 %      97.5 %
## th0 622.95806090  5.90089177 105.570155 2.855738e-42 611.39252556 634.5235962
## th1 178.25192693 11.63559435  15.319538 2.743729e-16 155.44658106 201.0572728
## th2   0.09732197  0.01646738   5.909986 1.408734e-06   0.06504651   0.1295974

##-----------------------------------------------------------------------------
## Colocando as curvas no mesmo gráfico.

xyplot(Gain~A, data=turk0)+
    layer(with(as.list(coef(n0)),
               panel.curve(th0+th1*x/(th2+x), col=2)))+
    layer(with(as.list(coef(n1)),
               panel.curve(th0+th1*(1-2^(-x/th2)), col=4)))

## Soma de quadrados dos resíduos (menor melhor).
deviance(n0)

## [1] 12982.31

deviance(n1)

## [1] 12367.42

## R² (maior melhor).
cor(turk0$Gain, fitted(n0))^2

## [1] 0.9187919

cor(turk0$Gain, fitted(n1))^2

## [1] 0.9226382

## R² com quadro de anova.
R2nls(n0)

## $anova
##              Df    Sum Sq    Mean Sq  F value Pr(>F)
## 1 regression  2 146882.44 73441.2181 181.0248      0
## 2  residuals 32  12982.31   405.6971       NA     NA
## 
## $R2
## [1] 0.9187919

R2nls(n1)

## $anova
##              Df    Sum Sq    Mean Sq  F value Pr(>F)
## 1 regression  2 147497.33 73748.6628 190.8205      0
## 2  residuals 32  12367.42   386.4818       NA     NA
## 
## $R2
## [1] 0.9226382

##-----------------------------------------------------------------------------
## Colocar bandas de confiança.

## Modelo escrito como função dos parâmetros (theta).
f <- function(theta, xx){
  with(as.list(theta), th0+th1*(1-2^(-xx/th2)))
}

## Matriz de derivadas parciais em theta (n x p).
gradient(f, x=coef(n1), xx=c(0.0, 0.2, 0.4))

##      th0       th1       th2
## [1,]   1 0.0000000    0.0000
## [2,]   1 0.7593572 -627.8283
## [3,]   1 0.9420910 -302.1648

pred <- data.frame(A=seq(0, 0.5, l=20))
## pred$fit <- f(theta=coef(n1), xx=pred$A)
pred$fit <- predict(n1, newdata=pred)
der <- gradient(f, x=coef(n1), xx=pred$A)
str(der)

##  num [1:20, 1:3] 1 1 1 1 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "th0" "th1" "th2"

## Etapas até o IC passando pelo erro padrão e margem de erro.
F <- der
U <- chol(vcov(n1))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
tval <- qt(p=c(lwr=0.025, upr=0.975), df=df.residual(n1))
me <- outer(pred$se, tval, "*")
pred <- cbind(pred, sweep(me, 1, pred$fit, "+"))
str(pred)

## 'data.frame':    20 obs. of  5 variables:
##  $ A  : num  0 0.0263 0.0526 0.0789 0.1053 ...
##  $ fit: num  623 653 679 700 717 ...
##  $ se : num  5.9 4.4 4.76 5.34 5.61 ...
##  $ lwr: num  611 644 669 689 706 ...
##  $ upr: num  635 662 688 710 728 ...

## Equação do modelo ajustado.
coef(n1)

##          th0          th1          th2 
## 622.95806090 178.25192693   0.09732197

formula(n1)

## Gain ~ th0 + th1 * (1 - 2^(-A/th2))

## Arredonda as estimativas.
theta <- mapply(round,
                x=coef(n1), digits=c(2,2,4),
                SIMPLIFY=FALSE)
theta

## $th0
## [1] 622.96
## 
## $th1
## [1] 178.25
## 
## $th2
## [1] 0.0973

## Equação.
formula(n1)

## Gain ~ th0 + th1 * (1 - 2^(-A/th2))

eq <- substitute(
    expr=expression(Gain==th0+th1*(1-2^(-A/th2))),
    env=theta)
eval(eq)

## expression(Gain == 622.96 + 178.25 * (1 - 2^(-A/0.0973)))

## Observados, preditos e a banda de confiança.
xyplot(Gain~A, data=turk0)+
    as.layer(xyplot(fit~A, data=pred, type="l",
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred$lwr, uy=pred$upr,
                    panel=panel.cbH))+
    layer(panel.key(points=FALSE, text=eval(eq),
                    corner=c(0.95,0.05)))

##-----------------------------------------------------------------------------
## Região de confiança para os parâmetros.

apropos("contour")
ncp0 <- nlsContourRSS(n0)
ncp1 <- nlsContourRSS(n1)

plot(ncp0)
plot(ncp1)

ncr0 <- nlsConfRegions(n0)
ncr1 <- nlsConfRegions(n1)

plot(ncr0)
plot(ncr1)

Consumo de energia em função da temperatura

##-----------------------------------------------------------------------------
## Consumo de energia (KWH/dia) em função da temperatura (F).

str(segreg)

## 'data.frame':    39 obs. of  2 variables:
##  $ Temp: num  8.55 10.66 12.45 16.9 20.52 ...
##  $ C   : num  86.2 72.4 74.1 68.2 72.4 ...

xyplot(C~Temp, data=segreg, type=c("p","smooth"))

##-----------------------------------------------------------------------------
## Ajuste do modelo platô linear.
## f(x) = th0+th1*(x-th2)*(x>=th2)+0*(x<th2)

start <- list(th0=75, th1=0.5, th2=50)
xyplot(C~Temp, data=segreg)+
    layer(with(start,
               panel.curve(th0+th1*(x-th2)*(x>=th2)+0*(x<th2))))

## Ajuste.
n2 <- nls(C~th0+th1*(Temp-th2)*(Temp>=th2)+0*(Temp<th2),
          data=segreg, start=start)

## Estimativas e medidas de ajuste.
summary(n2)

## 
## Formula: C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## th0  74.6953     1.3433  55.607  < 2e-16 ***
## th1   0.5674     0.1006   5.641 2.10e-06 ***
## th2  41.9512     4.6583   9.006 9.44e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.373 on 36 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 9.323e-09

## Valor de F e R².
R2nls(n2)

## $anova
##              Df   Sum Sq   Mean Sq  F value       Pr(>F)
## 1 regression  2 2098.134 1049.0670 36.33724 2.307536e-09
## 2  residuals 36 1039.331   28.8703       NA           NA
## 
## $R2
## [1] 0.6687355

## Intervalos de confiança.
## confint(n2)
confint.default(n2)

##          2.5 %     97.5 %
## th0 72.0625625 77.3281125
## th1  0.3702916  0.7645672
## th2 32.8212170 51.0812374

## Observados e preditos.
xyplot(C~Temp, data=segreg)+
    layer(with(as.list(coef(n2)),
               panel.curve(th0+th1*(x-th2)*(x>=th2)+0*(x<th2), col=2)))

##-----------------------------------------------------------------------------
## Análise dos resíduos.

m2 <- as.lm(n2)
par(mfrow=c(2,2)); plot(m2); layout(1)

##-----------------------------------------------------------------------------
## Colocar bandas de confiança.

f <- function(theta, xx){
  with(as.list(theta), th0+th1*(xx-th2)*(xx>=th2)+0*(xx<th2))
}

## gradient(f, x=coef(n1), xx=c(0.0, 0.2, 0.4))

pred <- data.frame(Temp=sort(c(seq(10, 80, l=100),
                       coef(n2)["th2"]+c(-0.001,0,0.001))))
pred$fit <- predict(n2, newdata=pred)
der <- gradient(f, x=coef(n2), xx=pred$Temp)
str(der)

##  num [1:103, 1:3] 1 1 1 1 1 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "th0" "th1" "th2"

F <- der
U <- chol(vcov(n2))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
tval <- qt(p=c(lwr=0.025, upr=0.975), df=df.residual(n2))
me <- outer(pred$se, tval, "*")
pred <- cbind(pred, sweep(me, 1, pred$fit, "+"))
str(pred)

## 'data.frame':    103 obs. of  5 variables:
##  $ Temp: num  10 10.7 11.4 12.1 12.8 ...
##  $ fit : num  74.7 74.7 74.7 74.7 74.7 ...
##  $ se  : num  1.34 1.34 1.34 1.34 1.34 ...
##  $ lwr : num  72 72 72 72 72 ...
##  $ upr : num  77.4 77.4 77.4 77.4 77.4 ...

## Equação do modelo ajustado.
coef(n2)

##        th0        th1        th2 
## 74.6953375  0.5674294 41.9512272

formula(n2)

## C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)

## Arredonda as estimativas.
theta <- mapply(round,
                x=coef(n2), digits=c(2,4,2),
                SIMPLIFY=FALSE)
theta

## $th0
## [1] 74.7
## 
## $th1
## [1] 0.5674
## 
## $th2
## [1] 41.95

## Equação.
formula(n2)

## C ~ th0 + th1 * (Temp - th2) * (Temp >= th2) + 0 * (Temp < th2)

eq <- substitute(
    expr=c(
        expression(C==th0~", se"~Temp<th2),
        expression(C==th0+th1*(Temp-th2)~", se"~Temp>=th2)),
    env=theta)
eval(eq)

## expression(C == 74.7 ~ ", se" ~ Temp < 41.95, C == 74.7 + 0.5674 * 
##     (Temp - 41.95) ~ ", se" ~ Temp >= 41.95)

## Observados, preditos e a banda de confiança.
xyplot(C~Temp, data=segreg)+
    as.layer(xyplot(fit~Temp, data=pred, type="l",
                    prepanel=prepanel.cbH, cty="bands",
                    ly=pred$lwr, uy=pred$upr,
                    panel=panel.cbH))+
    layer(panel.key(points=FALSE, text=eval(eq),
                    corner=c(0.05,0.95)))

Tamanho em função da idade para peixes

##-----------------------------------------------------------------------------
## Tamanho em função da idade dos peixes.

str(lakemary)

## 'data.frame':    78 obs. of  2 variables:
##  $ Age   : int  1 1 2 2 2 2 3 3 3 3 ...
##  $ Length: int  67 62 109 83 91 88 137 131 122 122 ...

xyplot(Length~Age, data=lakemary, xlim=c(0,NA), ylim=c(0,NA))

## Ajustar o modelo von Bertalanffy.
## Original: tha*(1-exp(-thk*(x-thi)));
## Alternativo: th0+(tha-th0)*(1-2^(-x/thv));

start <- list(th0=30, tha=250, thv=3)
xyplot(Length~Age, data=lakemary)+
    layer(with(start,
               panel.curve(th0+(tha-th0)*(1-2^(-x/thv)), col=2)))

n3 <- nls(Length~th0+(tha-th0)*(1-2^(-Age/thv)),
          data=lakemary, start=start)
summary(n3)

## 
## Formula: Length ~ th0 + (tha - th0) * (1 - 2^(-Age/thv))
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## th0  -6.4397    20.3820  -0.316    0.753    
## tha 192.8109    13.0804  14.740  < 2e-16 ***
## thv   1.7061     0.3714   4.593 1.73e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.96 on 75 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 6.047e-06

## confint(n3)
confint.default(n3)

##           2.5 %     97.5 %
## th0 -46.3876668  33.508328
## tha 167.1738835 218.447950
## thv   0.9781027   2.434099

xyplot(Length~Age, data=lakemary, xlim=c(0,NA), ylim=c(0,NA))+
    layer(with(as.list(coef(n3)),
               panel.curve(th0+(tha-th0)*(1-2^(-x/thv)), col=2)))

Abundância de peixes

##-----------------------------------------------------------------------------

str(swan96)

## 'data.frame':    27 obs. of  2 variables:
##  $ Day  : int  0 1 2 34 35 36 37 55 56 57 ...
##  $ LCPUE: num  0.368 0.636 0 1.358 0.847 ...

xyplot(LCPUE~Day, data=swan96)

## Swan Lake, Minnesota, 1996.
## LCPUE: log da quantidade de peixes por unidade de esforço.
## Day: dia de pesça.

m0 <- lm(LCPUE~Day+I(Day^2), swan96)
c0 <- coef(m0)
names(c0) <- sprintf("b%i", 0:2)

xyplot(LCPUE~Day, data=swan96)+
    layer(with(as.list(c0),
               panel.curve(b0+b1*x+b2*x^2, col=2)))

## Estimar o dia correspondente ao máximo.
with(as.list(c0), -0.5*b1/b2)

## [1] 183.1104

## E o intervalo de confiança? Método delta?

n4 <- nls(LCPUE~thy+thc*(Day-thx)^2, data=swan96,
          start=list(thy=4, thx=180, thc=c0["b2"]))
summary(n4)

## 
## Formula: LCPUE ~ thy + thc * (Day - thx)^2
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## thy  4.359e+00  2.062e-01  21.142  < 2e-16 ***
## thx  1.831e+02  5.961e+00  30.716  < 2e-16 ***
## thc -1.273e-04  1.678e-05  -7.583 8.03e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5716 on 24 degrees of freedom
## 
## Number of iterations to convergence: 2 
## Achieved convergence tolerance: 3.301e-09

## confint(n4)
confint.default(n4)

##             2.5 %        97.5 %
## thy  3.954959e+00  4.763178e+00
## thx  1.714262e+02  1.947947e+02
## thc -1.601526e-04 -9.436730e-05

##-----------------------------------------------------------------------------
## Ajuste do modelo linear com dois segmentos.

## Modelo:
## f(x) = th0+          th1*x,       se x<th2
##      = (th0+th1*th2)+th3*(x-th2), se x>=th2

start <- list(th0=0.3, th1=3/100, th2=150, th3=-1/100)

xyplot(LCPUE~Day, data=swan96)+
    layer(with(start,
               panel.curve(th0+th1*x*(x<th2)+
                           (th1*th2+th3*(x-th2))*(x>=th2), col=2)))

n5 <- nls(LCPUE~th0+th1*Day*(Day<th2)+(th1*th2+th3*(Day-th2))*(Day>=th2),
          data=swan96, start=start)
summary(n5)

## 
## Formula: LCPUE ~ th0 + th1 * Day * (Day < th2) + (th1 * th2 + th3 * (Day - 
##     th2)) * (Day >= th2)
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## th0   0.082586   0.230142   0.359    0.723    
## th1   0.037893   0.004151   9.128 4.15e-09 ***
## th2 103.620897   8.706149  11.902 2.60e-11 ***
## th3  -0.003786   0.001813  -2.088    0.048 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4689 on 23 degrees of freedom
## 
## Number of iterations to convergence: 4 
## Achieved convergence tolerance: 2.496e-07

## confint(n5)
confint.default(n5)

##            2.5 %        97.5 %
## th0 -0.368483065  5.336560e-01
## th1  0.029756680  4.602965e-02
## th2 86.557159102  1.206846e+02
## th3 -0.007339894 -2.327923e-04

xyplot(LCPUE~Day, data=swan96)+
    layer(with(as.list(coef(n5)),
               panel.curve(th0+th1*x*(x<th2)+
                           (th1*th2+th3*(x-th2))*(x>=th2), col=2)))

##-----------------------------------------------------------------------------
## Regiões de confiança conjunta por simulação: aceitação/rejeição.

rc <- nlsConfRegions(n5, exp=1.5, length=500)
plot(rc)

formula(n4)
rc <- nlsConfRegions(n4, exp=2.5, length=500)
plot(rc)

rc <- nlsConfRegions(n2, exp=2.5, length=500)
plot(rc)

Ajuste de modelos de forma interativa

##-----------------------------------------------------------------------------
## Exemplos da documentação.

help(rp.nls, h="html")

##--------------------------------------------
## A simple linear regression.

rp.nls(model=dist~b0+b1*speed,
       data=cars,
       start=list(
           b0=c(init=0, from=-20, to=20),
           b1=c(init=2, from=0, to=10)),
       assignTo="cars.fit")

cars.fit

##--------------------------------------------
## A more interesting example.

xyplot(rate~conc, groups=state, data=Puromycin)

rp.nls(model=rate~Int+(Top-Int)*conc/(Half+conc),
       data=Puromycin,
       start=list(
           Int=c(init=50, from=20, to=70),
           Top=c(init=150, from=100, to=200),
           Half=c(init=0.1, from=0, to=0.6)),
       subset="state",
       assignTo="Puro.fit",
       startCurve=list(col=3, lty=3, lwd=1),
       fittedCurve=list(col=4, lty=1, lwd=1.5),
       extraplot=function(Int, Top, Half){
           abline(h=c(Int, Top), v=Half, col=2, lty=2)
       },
       finalplot=function(Int, Top, Half){
           abline(h=c(Int, Top), v=Half, col=3, lty=1)
       },
       xlab="Concentration",
       ylab="Rate",
       xlim=c(0, 1.2),
       ylim=c(40, 220))

length(Puro.fit)
sapply(Puro.fit, coef)
sapply(Puro.fit, logLik)
sapply(Puro.fit, deviance)

##-----------------------------------------------------------------------------
## 1. Ajuste de curvas de retenção de água do solo.

cra <- read.table("http://www.leg.ufpr.br/~walmes/data/cra_manejo.txt",
                  header=TRUE, sep="\t")

cra$tens[cra$tens==0] <- 0.1
cras <- subset(cra, condi=="LVA3,5")
cras <- aggregate(umid~posi+prof+tens, data=cras, FUN=mean)
cras$caso <- with(cras, interaction(posi, prof))
cras$ltens <- log(cras$tens)

xyplot(umid~ltens|posi, groups=prof, data=cras, type=c("p","a"))

## modelo: van Genuchten com retrição de Mualem.
## x: representado por ltens (log da tensão matricial, psi).
## y: representado por umid, conteúdo de água do solo (%).
## th1: assíntota inferior, mínimo da função, quando x -> +infinito.
## th2: assíntota superior, máximo da função, quando x -> -infinito.
## th3: locação, translada o ponto de inflexão.
## th4: forma, altera a taxa ao redor da inflexão.

model <- umid~Ur+(Us-Ur)/(1+exp(n*(al+ltens)))^(1-1/n)
start <- list(Ur=c(init=0.2, from=0, to=0.5),
              Us=c(init=0.6, from=0.4, to=0.8),
              al=c(1, -2, 4),
              n=c(1.5, 1, 4))

rp.nls(model=model, data=cras,
       start=start, subset="caso",
       assignTo="cra.fit")

sapply(cra.fit, coef)

##-----------------------------------------------------------------------------
## 2. Curva de produção em função da desfolha do algodão.

cap <- read.table("http://www.leg.ufpr.br/~walmes/data/algodão.txt",
                  header=TRUE, sep="\t", encoding="latin1")

cap$desf <- cap$desf/100
cap <- subset(cap, select=c(estag, desf, pcapu))
cap$estag <- factor(cap$estag, labels=c("vegetativo","botão floral",
                                 "florescimento","maçã","capulho"))
str(cap)

xyplot(pcapu~desf|estag, data=cap, layout=c(5,1),
       xlab="Nível de desfolha artificial", ylab="Peso de capulhos")

## modelo: potência.
## x: representado por desf (nível de desfolha artifical).
## y: representado por pcapu (peso de capulhos), produto do algodão.
## th1: intercepto, valor da função quando x=0 (situação ótima).
## th2: diferença no valor da função para x=0 e x=1 (situação extrema).
## th3: forma, indica como a função decresce, se th3=0 então função linear.

model <- pcapu~f0-delta*desf^exp(curv)
start <- list(f0=c(30,25,35), delta=c(8,0,16), curv=c(0,-2,4))

rp.nls(model=model, data=cap,
       start=start, subset="estag",
       assignTo="cap.fit")

model <- pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde))
start <- list(f0=c(30,25,35), f1=c(8,0,16), xde=c(0.5,0,1))

x11()
rp.nls(model=model, data=cap,
       start=start, subset="estag",
       assignTo="cap.fit",
       extraplot=function(f0,f1,xde){
           abline(v=xde, h=c(f0, f0-f1), lty=2, col=2)
       })

length(cap.fit)
sapply(cap.fit, coef)
lapply(cap.fit, summary)

##-----------------------------------------------------------------------------
## 3. Curva de produção em função do nível de potássio no solo.

soja <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
                   header=TRUE, sep="\t", encoding="latin1", dec=",")

soja$agua <- factor(soja$agua)
str(soja)

xyplot(rengrao~potassio|agua, data=soja)

## modelo: linear-plato.
## x: representado por potássio, conteúdo de potássio do solo.
## y: representado por rengrao, redimento de grãos por parcela.
## f0: intercepto, valor da função quando x=0.
## tx: taxa de incremento no rendimento por unidade de x.
## brk: valor acima do qual a função é constante.

model <- rengrao~f0+tx*potassio*(potassio<brk)+tx*brk*(potassio>=brk)
start <- list(f0=c(15,5,25), tx=c(0.2,0,1), brk=c(50,0,180))

rp.nls(model=model, data=soja,
       start=start, subset="agua",
       assignTo="pot.fit")

sapply(pot.fit, coef)

##-----------------------------------------------------------------------------
## 4. Curva de lactação.

lac <- read.table("http://www.leg.ufpr.br/~walmes/data/saxton_lactacao1.txt",
                  header=TRUE, sep="\t", encoding="latin1")

lac$vaca <- factor(lac$vaca)
str(lac)

xyplot(leite~dia|vaca, data=lac)

## modelo: de Wood (nucleo da densidade gama).
## x: representado por dia, dia após parto.
## y: representado por leite, quantidade produzida.
## th1: escala, desprovido de interpretação direta.
## th2: forma, desprovido de interpretação direta.
## th3: forma, desprovido de interpretação direta.

model <- leite~th1*dia^th2*exp(-th3*dia)
start <- list(th1=c(15,10,20), th2=c(0.2,0.05,0.5),
              th3=c(0.0025,0.0010,0.0080))

rp.nls(model=model, data=lac,
       start=start, subset="vaca",
       assignTo="lac.fit", xlim=c(0,310))

sapply(lac.fit, coef)

##-----------------------------------------------------------------------------
## 5. Curvas de crescimento em placa de petri.

cre <- read.table("http://www.leg.ufpr.br/~walmes/data/cresmicelial.txt",
                  header=TRUE, sep="\t", encoding="latin1")

cre$isolado <- factor(cre$isolado)
cre$mmdia <- sqrt(cre$mmdia)
str(cre)

xyplot(mmdia~temp|isolado, data=cre)

## modelo: quadrático na forma canônica.
## x: representado por temp, temperatura da câmara de crescimento.
## y: representado por mmdia, taxa média de crescimento.
## thy: valor da função no ponto de máximo.
## thc: curvatura ou grau de especificidade à condição ótima.
## thx: ponto de máximo, temperatura de crescimento mais rápido.

model <- mmdia~thy+thc*(temp-thx)^2
start <- list(thy=c(4,0,7), thc=c(-0.05,0,-0.5), thx=c(23,18,30))

rp.nls(model=model, data=cre,
       start=start, subset="isolado",
       assignTo="mic.fit",
       extraplot=function(thy, thx, thc){
           abline(v=thx, h=thy, lty=2, col=2)
       },
       xlim=c(17,31), ylim=c(0,6))

t(sapply(mic.fit, coef))

##-----------------------------------------------------------------------------
## 6. Curva de secagem do solo em microondas.

sec <- read.table("http://www.leg.ufpr.br/~walmes/data/emr11.txt",
                  header=TRUE, sep="\t", encoding="latin1")
str(sec)

xyplot(umrel~tempo|nome, data=sec)

## modelo: logístico.
## x: representado por tempo, período da amostra dentro do microondas.
## y: representado por umrel, umidade relativa o conteúdo total de água.
## th1: assíntota superior.
## th2: tempo para evaporar metade do conteúdo total de água.
## th3: proporcional à taxa máxima do processo.

model <- umrel~th1/(1+exp(-(tempo-th2)/th3))
start <- list(th1=c(1,0.8,1.2), th2=c(15,0,40), th3=c(8,2,14))

rp.nls(model=model, data=sec,
       start=start, subset="nome",
       assignTo="sec.fit",
       extraplot=function(th1, th2, th3){
           abline(v=th2, h=th1/(1:2), lty=2, col=2)
       })

sapply(sec.fit, coef)
lapply(sec.fit, summary)

##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()

## [1] "2014-12-11 20:59:59 BRST"

sessionInfo()

## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.4          plyr_1.8.1          rootSolve_1.6.5.1   nlstools_1.0-0     
##  [5] nls2_0.2            proto_0.3-10        alr3_2.0.5          car_2.0-22         
##  [9] reshape_0.8.5       latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29    
## [13] rmarkdown_0.3.3     knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4    doBy_4.5-12     evaluate_0.5.5  formatR_1.0     grid_3.1.2     
##  [6] htmltools_0.2.6 MASS_7.3-35     Matrix_1.1-4    methods_3.1.2   multcomp_1.3-7 
## [11] mvtnorm_1.0-1   nnet_7.3-8      Rcpp_0.11.3     sandwich_2.3-2  splines_3.1.2  
## [16] stringr_0.6.2   survival_2.37-7 TH.data_1.0-5   tools_3.1.2     yaml_2.1.13    
## [21] zoo_1.7-11

Aplicação de modelos de regressão linear e não linear em ciências agrárias
09 à 11 de Dezembro de 2014 - Goiânia - GO	Prof. Dr. Walmes M. Zeviani
Embrapa Arroz e Feijão	Lab. de Estatística e Geoinformação - LEG
	Departamento de Estatística - UFPR

Aplicação de modelos de regressão linear e não linear em ciências agrárias

Regressão não linear

Ganho de peso em perus em função da metionina na dieta

Consumo de energia em função da temperatura

Tamanho em função da idade para peixes

Abundância de peixes

Ajuste de modelos de forma interativa