Modelo de regressão não linear
##=============================================================================
## Aplicação de modelos de regressão linear e
## não linear em ciências agrárias
##
## 09 à 11 de Dezembro de 2014 - Goiânia/GO
## Embrapa Arroz e Feijão
##
## Prof. Dr. Walmes M. Zeviani
## LEG/DEST/UFPR
##=============================================================================
##-----------------------------------------------------------------------------
## Definições da sessão.
pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3",
"plyr", "reshape", "nlme", "segmented", "wzRfun")
sapply(pkg, require, character.only=TRUE)
## lattice latticeExtra gridExtra car alr3 plyr
## TRUE TRUE TRUE TRUE TRUE TRUE
## reshape nlme segmented wzRfun
## TRUE TRUE TRUE TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## Dados de crescimento de goiaba.
goi <- read.table("http://www.leg.ufpr.br/~walmes/data/goiaba.txt",
header=TRUE, sep="\t")
goi$daa2 <- goi$daa^2.5
str(goi)
## 'data.frame': 174 obs. of 8 variables:
## $ daa : int 13 13 13 13 13 13 13 13 13 13 ...
## $ coleta: int 1 1 1 1 1 1 1 1 1 1 ...
## $ rep : int 1 2 3 4 5 6 7 8 9 10 ...
## $ long : num 29.1 32.9 36.3 34.7 33.3 ...
## $ trans : num 20.5 22.8 22.9 22.7 21.4 ...
## $ peso : num 6.66 8.51 9.93 9.14 7.6 7.54 6.27 9.94 7.95 7.79 ...
## $ volume: int 6 9 11 10 7 8 6 9 8 8 ...
## $ daa2 : num 609 609 609 609 609 ...
## Calcula média e desvio padrão em cada data.
aux <- ddply(goi, .(daa), summarise, m=mean(peso), s=sd(peso))
p1 <- xyplot(peso~daa, data=goi, type=c("p","a"),
ylab="Peso do fruto (g)", xlab="Dias após antese")
p2 <- xyplot(s~m, aux, type=c("p","r"),
xlab="Média de peso", ylab="Desvio-padrão de peso")
grid.arrange(p1, p2, nrow=1)

## Na escala log para o peso.
aux <- ddply(goi, .(daa), summarise, m=mean(log(peso)), s=sd(log(peso)))
p1 <- xyplot(log(peso)~daa, data=goi, type=c("p","a"),
ylab="Peso do fruto (g)", xlab="Dias após antese")
p2 <- xyplot(s~m, aux, type=c("p","r"),
xlab="Média de peso", ylab="Desvio-padrão de peso")
grid.arrange(p1, p2, nrow=1)

##-----------------------------------------------------------------------------
## Ajustes.
## Modelo tradicional = suposição de variância constante.
n0 <- gnls(peso~A-(A-D)*exp(-exp(C*(daa-B))), data=goi,
start=c(A=200, C=0.09, B=105, D=7))
summary(n0)
## Generalized nonlinear least squares fit
## Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B)))
## Data: goi
## AIC BIC logLik
## 1619.093 1634.888 -804.5465
##
## Coefficients:
## Value Std.Error t-value p-value
## A 205.76239 4.196193 49.03549 0
## C 0.09367 0.010148 9.22980 0
## B 107.35182 0.874391 122.77326 0
## D 18.70608 3.179082 5.88411 0
##
## Correlation:
## A C B
## C -0.357
## B 0.590 -0.131
## D -0.103 0.467 0.219
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.17237358 -0.42834311 -0.07757784 0.35077905 3.94396837
##
## Residual standard error: 24.94118
## Degrees of freedom: 174 total; 170 residual
## Modelo com função de variâcia: var(e)=sigma^2*exp(2*delta*daa).
n1 <- update(n0,
## weights=varExp(0.03, form=~daa)
weights=varPower(0.1, form=~log(daa))
)
summary(n1)
## Generalized nonlinear least squares fit
## Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B)))
## Data: goi
## AIC BIC logLik
## 1432.407 1451.362 -710.2036
##
## Variance function:
## Structure: Power of variance covariate
## Formula: ~log(daa)
## Parameter estimates:
## power
## 5.764348
##
## Coefficients:
## Value Std.Error t-value p-value
## A 217.15613 8.784140 24.72139 0
## C 0.05520 0.003531 15.63409 0
## B 108.05936 1.880553 57.46148 0
## D 7.49496 0.426361 17.57893 0
##
## Correlation:
## A C B
## C -0.566
## B 0.879 -0.624
## D -0.390 0.837 -0.386
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.66220159 -0.53129856 0.09399151 0.63400182 2.80361091
##
## Residual standard error: 0.003829197
## Degrees of freedom: 174 total; 170 residual
logLik(n0)
## 'log Lik.' -804.5465 (df=5)
logLik(n1)
## 'log Lik.' -710.2036 (df=6)
anova(n1, n0)
## Model df AIC BIC logLik Test L.Ratio p-value
## n1 1 6 1432.407 1451.362 -710.2036
## n0 2 5 1619.093 1634.888 -804.5465 1 vs 2 188.6858 <.0001
##-----------------------------------------------------------------------------
## Diagnóstico.
da <- rbind(data.frame(model="trad",
r=residuals(n0, type="pearson"),
f=fitted(n0)),
data.frame(model="var",
r=residuals(n1, type="pearson"),
f=fitted(n1)))
p1 <- qqmath(~r|model, data=da,
xlab="Quantis teóricos",
ylab="Resíduos padronizados")
p2 <- xyplot(sqrt(abs(r))~f|model, data=da, type=c("p","smooth"),
xlab="Valor ajustado",
ylab=expression(sqrt("|Resíduos padronizados|")))
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Predição a partir dos modelos.
model <- deriv3(~ A-(A-D)*exp(-exp(C*(daa-B))),
c("A","C","B","D"),
function(daa, A, C, B, D){NULL})
l <- list(n0, n1)
names(l) <- levels(da$model)
L <- l
for(i in seq_along(L)){
m4 <- l[[i]]
U <- chol(vcov(m4))
pred <- data.frame(daa=seq(1,140,1))
m <- model(daa=pred$daa, A=coef(m4)["A"], C=coef(m4)["C"],
B=coef(m4)["B"], D=coef(m4)["D"])
F0 <- attr(m, "gradient")
pred$y <- c(m)
pred$se <- sqrt(apply(F0%*%t(U), 1, function(x) sum(x^2)))
z <- qnorm(0.975)
pred <- transform(pred, lwr=y-z*se, upr=y+z*se)
L[[i]] <- pred
}
str(L)
## List of 2
## $ trad:'data.frame': 140 obs. of 5 variables:
## ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ y : num [1:140] 18.7 18.7 18.7 18.7 18.7 ...
## ..$ se : num [1:140] 3.17 3.17 3.17 3.17 3.17 ...
## ..$ lwr: num [1:140] 12.5 12.5 12.5 12.5 12.5 ...
## ..$ upr: num [1:140] 24.9 24.9 24.9 24.9 24.9 ...
## $ var :'data.frame': 140 obs. of 5 variables:
## ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ y : num [1:140] 8.06 8.1 8.13 8.16 8.2 ...
## ..$ se : num [1:140] 0.278 0.273 0.267 0.262 0.256 ...
## ..$ lwr: num [1:140] 7.52 7.56 7.61 7.65 7.7 ...
## ..$ upr: num [1:140] 8.61 8.63 8.65 8.68 8.7 ...
L <- ldply(L)
names(L)[1] <- "model"
L$model <- factor(L$model, levels=levels(da$model))
str(L)
## 'data.frame': 280 obs. of 6 variables:
## $ model: Factor w/ 2 levels "trad","var": 1 1 1 1 1 1 1 1 1 1 ...
## $ daa : num 1 2 3 4 5 6 7 8 9 10 ...
## $ y : num 18.7 18.7 18.7 18.7 18.7 ...
## $ se : num 3.17 3.17 3.17 3.17 3.17 ...
## $ lwr : num 12.5 12.5 12.5 12.5 12.5 ...
## $ upr : num 24.9 24.9 24.9 24.9 24.9 ...
icm <- ddply(goi, .(daa), summarise,
m=mean(peso),
lwr=mean(peso)-qt(0.975, length(peso)-1)*sd(peso)*
sqrt(1/length(peso)),
upr=mean(peso)+qt(0.975, length(peso)-1)*sd(peso)*
sqrt(1/length(peso)))
##-----------------------------------------------------------------------------
## Gráfico com as bandas de confiança.
ylim <- range(goi$peso)
xyplot(y~daa|model, data=L, type="l", col=1,
ylab="Massa fresca dos frutos (g)",
xlab="Dias após a antese",
strip=strip.custom(bg="gray90"),
ly=L$lwr, uy=L$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)+
layer(panel.arrows(icm$daa, icm$lwr, icm$daa, icm$upr,
code=3, length=0.05, angle=90))

Modelo segmentado para variável transformada
##-----------------------------------------------------------------------------
## Ajuste na escala log da resposta.
## log base 10 da resposta.
goi$lpeso <- log10(goi$peso)
## Semanas após antese.
goi$saa <- goi$daa/7
xyplot(log10(peso)~saa, data=goi, type=c("p","a"),
ylab="log 10 do peso do fruto (g)", xlab="Dias após antese")

##-----------------------------------------------------------------------------
## Ajustar modelo linear segmentado.
## Modelo linear simples.
m0 <- lm(lpeso~saa, data=goi)
## 1 ponto de quebra.
s1 <- segmented(m0, seg.Z=~saa,
psi=list(saa=c(17)),
control=seg.control(display=FALSE))
summary(s1)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~saa, psi = list(saa = c(17)),
## control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## 17.7200 0.6508
##
## t value for the gap-variable(s) V: 0
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.693875 0.018733 37.040 <2e-16 ***
## saa 0.090500 0.001556 58.172 <2e-16 ***
## U1.saa -0.096918 0.042570 -2.277 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1009 on 170 degrees of freedom
## Multiple R-Squared: 0.9607, Adjusted R-squared: 0.96
##
## Convergence attained in 15 iterations with relative change 0.0008556697
confint(s1)
## $saa
## Est. CI(95%).l CI(95%).u
## 17.72 16.44 19.01
class(s1) <- c("segmented","lm")
plot(lpeso~saa, data=goi)
plot(s1, add=TRUE, col=2)

class(s1) <- c("lm","segmented")
par(mfrow=c(2,2)); plot(s1); layout(1)

pureErrorAnova(s1)
## Analysis of Variance Table
##
## Response: lpeso
## Df Sum Sq Mean Sq F value Pr(>F)
## saa 1 41.997 41.997 6004.8198 < 2.2e-16 ***
## U1.saa 1 0.319 0.319 45.6508 2.417e-10 ***
## psi1.saa 1 0.002 0.002 0.3227 0.5708
## Residuals 170 1.731 0.010
## Lack of fit 8 0.598 0.075 10.6813 5.276e-12 ***
## Pure Error 162 1.133 0.007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 pontos de quebra.
s2 <- update(s1, psi=list(saa=c(10,17)))
summary(s2)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~saa, psi = list(saa = c(10, 17)),
## control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.saa 12.96 0.6145
## psi2.saa 16.78 0.2425
##
## t value for the gap-variable(s) V: 0 0
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.790262 0.019286 40.976 <2e-16 ***
## saa 0.073870 0.002382 31.010 <2e-16 ***
## U1.saa 0.075574 0.015347 4.924 NA
## U2.saa -0.161626 0.022992 -7.030 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08304 on 168 degrees of freedom
## Multiple R-Squared: 0.9737, Adjusted R-squared: 0.9729
##
## Convergence attained in 2 iterations with relative change -1.915515e-16
slope(s2)
## $saa
## Est. St.Err. t value CI(95%).l CI(95%).u
## slope1 0.07387 0.002382 31.0100 0.06917 0.07857
## slope2 0.14940 0.015160 9.8570 0.11950 0.17940
## slope3 -0.01218 0.017290 -0.7048 -0.04631 0.02194
confint(s2)
## $saa
## Est. CI(95%).l CI(95%).u
## psi1.saa 12.96 11.75 14.18
## psi2.saa 16.78 16.30 17.26
## class(s1) <- c("segmented","lm")
plot(lpeso~saa, data=goi)
plot(s2, add=TRUE, col=2)

class(s2) <- c("lm","segmented")
par(mfrow=c(2,2)); plot(s2); layout(1)

pureErrorAnova(s2)
## Analysis of Variance Table
##
## Response: lpeso
## Df Sum Sq Mean Sq F value Pr(>F)
## saa 1 41.997 41.997 6004.8198 < 2e-16 ***
## U1.saa 1 0.021 0.021 2.9787 0.08627 .
## U2.saa 1 0.871 0.871 124.4906 < 2e-16 ***
## psi1.saa 1 0.000 0.000 0.0289 0.86519
## psi2.saa 1 0.000 0.000 0.0023 0.96175
## Residuals 168 1.158 0.007
## Lack of fit 6 0.025 0.004 0.6053 0.72580
## Pure Error 162 1.133 0.007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Requer um ponto de quebra a mais?
anova(s1, s2)
## Analysis of Variance Table
##
## Model 1: lpeso ~ saa + U1.saa + psi1.saa
## Model 2: lpeso ~ saa + U1.saa + U2.saa + psi1.saa + psi2.saa
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 170 1.7306
## 2 168 1.1584 2 0.57223 41.494 2.265e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Predição.
## er <- extendrange(goi$saa, f=0.1)
## pred <- data.frame(saa=seq(er[1], er[2], l=50))
## pred <- cbind(pred, predict(s2, newdata=pred, interval="confidence"))
## Declarar o modelo com a nls para fazer bandas de predição.
slope(s2)
## $saa
## Est. St.Err. t value CI(95%).l CI(95%).u
## slope1 0.07387 0.002382 31.0100 0.06917 0.07857
## slope2 0.14940 0.015160 9.8570 0.11950 0.17940
## slope3 -0.01218 0.017290 -0.7048 -0.04631 0.02194
str(goi)
## 'data.frame': 174 obs. of 10 variables:
## $ daa : int 13 13 13 13 13 13 13 13 13 13 ...
## $ coleta: int 1 1 1 1 1 1 1 1 1 1 ...
## $ rep : int 1 2 3 4 5 6 7 8 9 10 ...
## $ long : num 29.1 32.9 36.3 34.7 33.3 ...
## $ trans : num 20.5 22.8 22.9 22.7 21.4 ...
## $ peso : num 6.66 8.51 9.93 9.14 7.6 7.54 6.27 9.94 7.95 7.79 ...
## $ volume: int 6 9 11 10 7 8 6 9 8 8 ...
## $ daa2 : num 609 609 609 609 609 ...
## $ lpeso : num 0.823 0.93 0.997 0.961 0.881 ...
## $ saa : num 1.86 1.86 1.86 1.86 1.86 ...
model <- lpeso~b0+b1*saa+b2*(saa-xb1)*(saa>xb1)+b3*(saa-xb2)*(saa>xb2)
start <- list(b0=c(0.9, 0.5, 1.2), b1=c(0.07, 0.05, 0.2),
b2=c(0.1, -0.2, 0.2), b3=c(0.1, -0.2, 0.2),
xb1=c(10, 5, 15), xb2=c(17, 15, 20))
## rp.nls(model=model, start=start, data=goi)
## summary(rp.fit)
## dput(as.list(round(coef(rp.fit),3)))
start <- list(b0 = 0.793, b1 = 0.073, b2 = 0.067, xb1 = 12.496,
b3 = -0.154, xb2 = 16.856)
n0 <- nls(model, data=goi, start=start)
##-----------------------------------------------------------------------------
## Predição.
f <- function(th, saa){
y <- th["b0"]+th["b1"]*saa+
th["b2"]*(saa-th["xb1"])*(saa>th["xb1"])+
th["b3"]*(saa-th["xb2"])*(saa>th["xb2"])
return(y)
}
er <- extendrange(goi$saa, f=0.1)
pred <- data.frame(saa=seq(er[1], er[2], l=100))
pred$y <- f(coef(n0), saa=pred$saa)
F <- rootSolve::gradient(f=f, x=coef(n0),
saa=pred$saa)
U <- chol(vcov(n0))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
aux <- outer(pred$se, qnorm(0.975)*c(lwr=-1, fit=0, upr=1), FUN="*")
pred <- cbind(pred, sweep(aux, MARGIN=1, STATS=pred$y, FUN="+"))
str(pred)
## 'data.frame': 100 obs. of 6 variables:
## $ saa: num 0.0714 0.2879 0.5043 0.7208 0.9372 ...
## $ y : num 0.799 0.814 0.83 0.846 0.862 ...
## $ se : num 0.0214 0.0208 0.0202 0.0196 0.019 ...
## $ lwr: num 0.757 0.774 0.791 0.808 0.825 ...
## $ fit: num 0.799 0.814 0.83 0.846 0.862 ...
## $ upr: num 0.84 0.855 0.87 0.884 0.899 ...
p1 <- xyplot(log10(peso)~saa, data=goi,
ylab="log 10 do peso do fruto (g)",
xlab="Dias após antese")
p2 <- xyplot(fit~saa, data=pred, type="l",
ly=pred$lwr, uy=pred$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2, under=TRUE)+
layer(panel.abline(v=coef(n0)[c("xb1","xb2")], lty=2))

##-----------------------------------------------------------------------------
## Informações da sessão.
Sys.time()
## [1] "2014-12-11 21:06:13 BRST"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=pt_BR.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets base
##
## other attached packages:
## [1] wzRfun_0.4 segmented_0.5-0.0 nlme_3.1-118 reshape_0.8.5
## [5] plyr_1.8.1 alr3_2.0.5 car_2.0-22 gridExtra_0.9.1
## [9] latticeExtra_0.6-26 RColorBrewer_1.0-5 lattice_0.20-29 rmarkdown_0.3.3
## [13] knitr_1.8
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 doBy_4.5-12 evaluate_0.5.5 formatR_1.0
## [5] htmltools_0.2.6 MASS_7.3-35 Matrix_1.1-4 methods_3.1.2
## [9] multcomp_1.3-7 mvtnorm_1.0-1 nnet_7.3-8 Rcpp_0.11.3
## [13] rootSolve_1.6.5.1 sandwich_2.3-2 splines_3.1.2 stringr_0.6.2
## [17] survival_2.37-7 TH.data_1.0-5 tools_3.1.2 yaml_2.1.13
## [21] zoo_1.7-11