Trailer do curso
Regressão linear simples
##=============================================================================
## Aplicação de modelos de regressão linear e
## não linear em ciências agrárias
##
## 09 à 11 de Dezembro de 2014 - Goiânia/GO
## Embrapa Arroz e Feijão
##
## Prof. Dr. Walmes M. Zeviani
## LEG/DEST/UFPR
##=============================================================================
##-----------------------------------------------------------------------------
## Definições da sessão.
pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "ellipse",
"alr3", "plyr", "doBy", "multcomp", "aod", "nlme", "rsm",
"segmented", "splines", "mgcv", "rootSolve", "wzRfun")
sapply(pkg, require, character.only=TRUE)
## lattice latticeExtra gridExtra car ellipse alr3
## TRUE TRUE TRUE TRUE TRUE TRUE
## plyr doBy multcomp aod nlme rsm
## TRUE TRUE TRUE TRUE TRUE TRUE
## segmented splines mgcv rootSolve wzRfun
## TRUE TRUE TRUE TRUE TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## Análise de dados reais.
## Comprimento da diagonal de aparelhos celulares (cm).
x <- c(12.5, 13.2, 14.4, 11.3, 11.2, 11.2, 11.4, 11.0, 10.6, 12.4, 12.4)
## Peso dos aparelhos (g)
y <- c(136.7, 144.8, 152.6, 120.9, 113.6, 126.9, 91.3, 107.9, 103.6,
116.9, 117.3)
data.frame(diagonal=x, peso=y)
## diagonal peso
## 1 12.5 136.7
## 2 13.2 144.8
## 3 14.4 152.6
## 4 11.3 120.9
## 5 11.2 113.6
## 6 11.2 126.9
## 7 11.4 91.3
## 8 11.0 107.9
## 9 10.6 103.6
## 10 12.4 116.9
## 11 12.4 117.3
##-----------------------------------------------------------------------------
## Visualizar os dados.
xyplot(y~x,
xlab="Comprimento diagonal (cm)",
ylab="Peso do aparelho (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo de regressão linear simples.
m0 <- lm(y~x)
summary(m0)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.5642 -5.1349 0.0575 8.0189 15.6162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -33.221 37.878 -0.877 0.40326
## x 12.902 3.153 4.092 0.00271 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.3 on 9 degrees of freedom
## Multiple R-squared: 0.6504, Adjusted R-squared: 0.6115
## F-statistic: 16.74 on 1 and 9 DF, p-value: 0.00271
confint(m0)
## 2.5 % 97.5 %
## (Intercept) -118.906100 52.46451
## x 5.769088 20.03530
vcov(m0)
## (Intercept) x
## (Intercept) 1434.7200 -118.95289
## x -118.9529 9.94287
##-----------------------------------------------------------------------------
## Verificando o modelo ajustado sobre os dados.
plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
abline(m0)
legend("bottomright", legend=c("Observado","Ajustado"),
pch=c(1,NA), lty=c(NA,1), bty="n")

##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.
par(mfrow=c(2, 2)); plot(m0); layout(1)

##-----------------------------------------------------------------------------
## Medidas de influência.
cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0))
## r r.pad r.stu
## 1 8.64336872 0.812204307 0.795460862
## 2 7.71183298 0.767694013 0.748718343
## 3 0.02920028 0.003864617 0.003643599
## 4 8.32600142 0.787680565 0.769634511
## 5 2.31622081 0.220530830 0.208482518
## 6 15.61622081 1.486843622 1.613979130
## 7 -22.56421798 -2.123123371 -2.833241337
## 8 -0.80334041 -0.077706591 -0.073287066
## 9 0.05753715 0.005823405 0.005490369
## 10 -9.86641189 -0.923198066 -0.914791714
## 11 -9.46641189 -0.885770151 -0.874080397
influence.measures(m0)
## Influence measures of
## lm(formula = y ~ x) :
##
## dfb.1_ dfb.x dffit cov.r cook.d hat inf
## 1 -0.10299 0.12642 0.28435 1.226 4.21e-02 0.113
## 2 -0.26654 0.29057 0.38592 1.399 7.83e-02 0.210
## 3 -0.00354 0.00370 0.00405 2.831 9.24e-06 0.553 *
## 4 0.17407 -0.15236 0.29115 1.254 4.44e-02 0.125
## 5 0.05369 -0.04780 0.08282 1.450 3.84e-03 0.136
## 6 0.41563 -0.37002 0.64117 0.834 1.74e-01 0.136
## 7 -0.55360 0.47380 -1.02453 0.357 2.95e-01 0.116
## 8 -0.02363 0.02154 -0.03236 1.510 5.89e-04 0.163
## 9 0.00255 -0.00239 0.00305 1.656 5.23e-06 0.236
## 10 0.09106 -0.11778 -0.31455 1.160 5.04e-02 0.106
## 11 0.08701 -0.11254 -0.30055 1.179 4.64e-02 0.106
##-----------------------------------------------------------------------------
## Predição acompanhada de bandas de confiança e predição.
pred <- data.frame(x=seq(min(x), max(x), l=20))
bc <- predict(m0, newdata=pred, interval="confidence")
bp <- predict(m0, newdata=pred, interval="prediction")
plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
matlines(x=pred$x, bc, col=1, lty=c(1,2,2))
matlines(x=pred$x, bp, col=1, lty=c(1,3,3))
legend("bottomright",
legend=c("Observado","Ajustado",
"Banda de confiança","Banda de predição"),
pch=c(1,NA,NA,NA), lty=c(NA,1,2,3),
bty="n")

Regressão linear múltipla
##-----------------------------------------------------------------------------
## Dados.
da <- read.table(
"http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example12.14.txt",
header=TRUE, sep="\t")
names(da) <- tolower(names(da))
str(da)
## 'data.frame': 38 obs. of 6 variables:
## $ clarity : num 1 1 1 1 1 1 1 1 1 1 ...
## $ aroma : num 3.3 4.4 3.9 3.9 5.6 4.6 4.8 5.3 4.3 4.3 ...
## $ body : num 2.8 4.9 5.3 2.6 5.1 4.7 4.8 4.5 4.3 3.9 ...
## $ flavor : num 3.1 3.5 4.8 3.1 5.5 5 4.8 4.3 3.9 4.7 ...
## $ oakiness: num 4.1 3.9 4.7 3.6 5.1 4.1 3.3 5.2 2.9 3.9 ...
## $ quality : num 9.8 12.6 11.9 11.1 13.3 12.8 12.8 12 13.6 13.9 ...
scatterplotMatrix(da, span=0.8)

##-----------------------------------------------------------------------------
## Ajuste.
m0 <- lm(quality~., data=da)
summary(m0)
##
## Call:
## lm(formula = quality ~ ., data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.85552 -0.57448 -0.07092 0.67275 1.68093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9969 2.2318 1.791 0.082775 .
## clarity 2.3395 1.7348 1.349 0.186958
## aroma 0.4826 0.2724 1.771 0.086058 .
## body 0.2732 0.3326 0.821 0.417503
## flavor 1.1683 0.3045 3.837 0.000552 ***
## oakiness -0.6840 0.2712 -2.522 0.016833 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.163 on 32 degrees of freedom
## Multiple R-squared: 0.7206, Adjusted R-squared: 0.6769
## F-statistic: 16.51 on 5 and 32 DF, p-value: 4.703e-08
##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.
par(mfrow=c(2, 2)); plot(m0); layout(1)

residualPlots(m0)

## Test stat Pr(>|t|)
## clarity 1.140 0.263
## aroma 0.419 0.678
## body 0.976 0.337
## flavor -0.258 0.798
## oakiness -0.353 0.727
## Tukey test 0.137 0.891
##-----------------------------------------------------------------------------
## Medidas de influência.
head(cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0)))
## r r.pad r.stu
## 1 0.28905165 0.27289988 0.26891509
## 2 1.38047606 1.29611865 1.31057072
## 3 -0.15912591 -0.15005811 -0.14774683
## 4 1.01214849 0.98944643 0.98911153
## 5 -0.06905214 -0.06179415 -0.06082458
## 6 -0.07708542 -0.06829302 -0.06722238
im <- influence.measures(m0)
summary(im)
## Potentially influential observations of
## lm(formula = quality ~ ., data = da) :
##
## dfb.1_ dfb.clrt dfb.arom dfb.body dfb.flvr dfb.okns dffit cov.r cook.d hat
## 12 0.83 -0.97 -0.40 0.23 -0.01 -0.04 1.38_* 1.16 0.30 0.39
## 14 -0.03 0.11 -0.16 -0.05 0.05 0.08 -0.28 1.84_* 0.01 0.36
## 20 -0.04 -0.38 0.46 -1.06_* 0.65 0.60 -1.54_* 0.30_* 0.31 0.20
## 32 -0.04 0.04 -0.03 0.02 0.04 -0.03 -0.07 1.57_* 0.00 0.23
## 37 -0.06 -0.15 0.04 0.28 -0.37 0.39 0.61 1.72_* 0.06 0.38
##-----------------------------------------------------------------------------
## Seleção stepwise.
## step(m0, k=2) ## AIC.
m1 <- step(m0, k=log(nrow(da))) ## BIC.
## Start: AIC=26.74
## quality ~ clarity + aroma + body + flavor + oakiness
##
## Df Sum of Sq RSS AIC
## - body 1 0.9118 44.160 23.897
## - clarity 1 2.4577 45.706 25.204
## - aroma 1 4.2397 47.488 26.658
## <none> 43.248 26.741
## - oakiness 1 8.5978 51.846 29.994
## - flavor 1 19.8986 63.147 37.487
##
## Step: AIC=23.9
## quality ~ clarity + aroma + flavor + oakiness
##
## Df Sum of Sq RSS AIC
## - clarity 1 1.6936 45.853 21.689
## <none> 44.160 23.897
## - aroma 1 5.3545 49.514 24.608
## - oakiness 1 8.0807 52.241 26.645
## - flavor 1 27.3280 71.488 38.564
##
## Step: AIC=21.69
## quality ~ aroma + flavor + oakiness
##
## Df Sum of Sq RSS AIC
## <none> 45.853 21.689
## - aroma 1 6.6026 52.456 23.164
## - oakiness 1 6.9989 52.852 23.450
## - flavor 1 25.6888 71.542 34.955
summary(m1)
##
## Call:
## lm(formula = quality ~ aroma + flavor + oakiness, data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5707 -0.6256 0.1521 0.6467 1.7741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.4672 1.3328 4.852 2.67e-05 ***
## aroma 0.5801 0.2622 2.213 0.033740 *
## flavor 1.1997 0.2749 4.364 0.000113 ***
## oakiness -0.6023 0.2644 -2.278 0.029127 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared: 0.7038, Adjusted R-squared: 0.6776
## F-statistic: 26.92 on 3 and 34 DF, p-value: 4.203e-09
##-----------------------------------------------------------------------------
## R², R² ajustado e PRESS.
smr <- summary(m1)
smr$r.squared
## [1] 0.7037672
smr$adj.r.squared
## [1] 0.677629
sum(residuals(m1)^2)
## [1] 45.85341
sum((residuals(m1)/(1-hatvalues(m1)))^2)
## [1] 56.05239
Regressão polinomial
##-----------------------------------------------------------------------------
## Ganho de peso de perus em função de metionina na dieta.
xyplot(Gain~A, data=turk0, type=c("p","smooth"),
xlab="Metionina (% da dieta)",
ylab="Ganho de peso (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo saturado (considerando A como fator).
turk0$Afat <- factor(turk0$A)
m1 <- lm(Gain~Afat, turk0)
anova(m1)
## Analysis of Variance Table
##
## Response: Gain
## Df Sum Sq Mean Sq F value Pr(>F)
## Afat 5 150041 30008.2 88.587 < 2.2e-16 ***
## Residuals 29 9824 338.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Ajuste do modelo de segundo grau: b0+b1*x+b2*x^2;
## m2 <- lm(Gain~poly(A, 2), turk0)
m2 <- lm(Gain~A+I(A^2), turk0)
##-----------------------------------------------------------------------------
## Teste de falta de ajuste para o modelo m2.
anova(m1, m2)
## Analysis of Variance Table
##
## Model 1: Gain ~ Afat
## Model 2: Gain ~ A + I(A^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 29 9823.6
## 2 32 11339.9 -3 -1516.2 1.492 0.2374
##-----------------------------------------------------------------------------
## Gráfico dos valores preditos com bandas de confiança.
pred2 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m2, newdata=pred2, interval="confidence")
pred2 <- cbind(pred2, a)
## Sobrepondo as estimativas de médias do modelo m1.
pred1 <- data.frame(Afat=levels(turk0$Afat))
a <- predict(m1, newdata=pred1, interval="confidence")
pred1 <- cbind(pred1, a)
pred1$A <- as.numeric(as.character(pred1$Afat))
## pred1
xyplot(Gain~A, data=turk0,
xlab="Metionina (% da dieta)",
ylab="Ganho de peso (g)")+
as.layer(xyplot(fit~A, data=pred2, type="l",
ly=pred2$lwr, uy=pred2$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH))+
as.layer(xyplot(fit~(A+0.005), data=pred1, pch=19,
ly=pred1$lwr, uy=pred1$upr, cty="bars",
prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Uso de pesos no ajuste do modelo de regressão.
tu <- ddply(turk0, .(A), summarise,
mGain=mean(Gain), nGain=length(Gain))
tu
## A mGain nGain
## 1 0.00 623.0 10
## 2 0.04 668.4 5
## 3 0.10 715.6 5
## 4 0.16 732.0 5
## 5 0.28 794.0 5
## 6 0.44 785.4 5
m3 <- lm(mGain~A+I(A^2), data=tu, weights=nGain)
summary(m3)
##
## Call:
## lm(formula = mGain ~ A + I(A^2), data = tu, weights = nGain)
##
## Weighted Residuals:
## 1 2 3 4 5 6
## -8.037 14.442 16.171 -29.042 11.611 -1.816
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 625.542 6.243 100.201 2.19e-06 ***
## A 964.471 86.763 11.116 0.00156 **
## I(A^2) -1362.069 198.338 -6.867 0.00632 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.48 on 3 degrees of freedom
## Multiple R-squared: 0.9899, Adjusted R-squared: 0.9832
## F-statistic: 146.9 on 2 and 3 DF, p-value: 0.001016
##-----------------------------------------------------------------------------
## Sobrepondo os valores preditos com bandas de confiança obtido com o
## ajuste com as médias.
pred3 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m3, newdata=pred3, interval="confidence")
pred3 <- cbind(pred3, a)
xyplot(Gain~A, data=turk0,
xlab="Metionina (% da dieta)",
ylab="Ganho de peso (g)")+
as.layer(xyplot(fit~A, data=pred2, type="l",
ly=pred2$lwr, uy=pred2$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH))+
as.layer(xyplot(fit~A, data=pred3, type="l",
ly=pred3$lwr, uy=pred3$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH))

Superfície de resposta
##-----------------------------------------------------------------------------
## Dados.
da <- read.table(
"http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example14.12.txt",
header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 4)
str(da)
## 'data.frame': 13 obs. of 6 variables:
## $ time: int 50 60 50 60 48 62 55 55 55 55 ...
## $ temp: int 160 160 170 170 165 165 158 172 165 165 ...
## $ x1 : num -1 1 -1 1 -1.41 ...
## $ x2 : num -1 -1 1 1 0 ...
## $ conv: num 65.3 68.2 66 69.8 64.5 69 64 68.5 68.9 69.7 ...
## $ visc: int 35 39 36 43 30 44 31 45 37 34 ...
## Planejamento composto central.
xyplot(x1~x2, da, jitter.x=TRUE, jitter.y=TRUE)

##-----------------------------------------------------------------------------
## Ajuste do modelo.
m0 <- lm(conv~x1+x2+x1:x2+I(x1^2)+I(x2^2), data=da)
summary(m0)
##
## Call:
## lm(formula = conv ~ x1 + x2 + x1:x2 + I(x1^2) + I(x2^2), data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1315 -0.3537 -0.0999 0.3057 0.9627
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.0999 0.3506 197.082 2.29e-14 ***
## x1 1.6331 0.2772 5.891 0.000605 ***
## x2 1.0830 0.2772 3.907 0.005846 **
## I(x1^2) -0.9688 0.2973 -3.259 0.013893 *
## I(x2^2) -1.2189 0.2973 -4.100 0.004575 **
## x1:x2 0.2250 0.3920 0.574 0.583946
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.784 on 7 degrees of freedom
## Multiple R-squared: 0.9143, Adjusted R-squared: 0.853
## F-statistic: 14.93 on 5 and 7 DF, p-value: 0.001291
anova(m0)
## Analysis of Variance Table
##
## Response: conv
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 21.3335 21.3335 34.7079 0.0006047 ***
## x2 1 9.3824 9.3824 15.2644 0.0058462 **
## I(x1^2) 1 4.6409 4.6409 7.5504 0.0285946 *
## I(x2^2) 1 10.3305 10.3305 16.8069 0.0045752 **
## x1:x2 1 0.2025 0.2025 0.3295 0.5839456
## Residuals 7 4.3026 0.6147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Usando as funções do rsm.
m1 <- rsm(conv~SO(x1, x2), data=da)
summary(m1)
##
## Call:
## rsm(formula = conv ~ SO(x1, x2), data = da)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.09990 0.35062 197.0815 2.286e-14 ***
## x1 1.63312 0.27721 5.8913 0.0006047 ***
## x2 1.08304 0.27721 3.9070 0.0058462 **
## x1:x2 0.22500 0.39200 0.5740 0.5839456
## x1^2 -0.96880 0.29731 -3.2585 0.0138931 *
## x2^2 -1.21887 0.29731 -4.0996 0.0045752 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Multiple R-squared: 0.9143, Adjusted R-squared: 0.853
## F-statistic: 14.93 on 5 and 7 DF, p-value: 0.001291
##
## Analysis of Variance Table
##
## Response: conv
## Df Sum Sq Mean Sq F value Pr(>F)
## FO(x1, x2) 2 30.7158 15.3579 24.9861 0.0006502
## TWI(x1, x2) 1 0.2025 0.2025 0.3295 0.5839456
## PQ(x1, x2) 2 14.9714 7.4857 12.1786 0.0052560
## Residuals 7 4.3026 0.6147
## Lack of fit 3 3.4426 1.1475 5.3374 0.0697146
## Pure error 4 0.8600 0.2150
##
## Stationary point of response surface:
## x1 x2
## 0.9041430 0.5277301
##
## Eigenanalysis:
## $values
## [1] -0.9256357 -1.2620325
##
## $vectors
## [,1] [,2]
## x1 -0.9336473 -0.3581937
## x2 -0.3581937 0.9336473
##-----------------------------------------------------------------------------
## Predição.
pred <- expand.grid(
x1=seq(-sqrt(2), sqrt(2), l=20),
x2=seq(-sqrt(2), sqrt(2), l=20))
pred$y <- predict(m0, newdata=pred)
pred <- transform(pred,
time=55+x1*5,
temp=165+x1*5)
colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")
wireframe(y~time+temp,
data=pred,
xlab=list("Tempo (min)", rot=22),
ylab=list("Temperatura (°F)", rot=-36),
zlab=list("Conversão (%)", rot=90),
scales=list(arrows=FALSE),
zlim=c(56, max(pred$y)),
panel.3d.wireframe=panel.3d.contour,
nlevels=18,# col="gray30",
type=c("on", "bottom"),
col.regions=colr(101), drape=TRUE,
screen=list(x=-75, z=10, y=35))

Regressão na presença de fatores categóricos
##-----------------------------------------------------------------------------
## Dados.
da <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
header=TRUE, sep="\t", dec=",")
da <- transform(da, a=agua, A=factor(agua),
k=potassio, K=factor(potassio))
str(da)
## 'data.frame': 75 obs. of 14 variables:
## $ potassio: int 0 30 60 120 180 0 30 60 120 180 ...
## $ agua : num 37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
## $ bloco : Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ rengrao : num 14.6 21.5 24.6 21.9 28.1 ...
## $ pesograo: num 10.7 13.5 15.8 12.8 14.8 ...
## $ kgrao : num 15.1 17.1 19.1 18.1 19.1 ...
## $ pgrao : num 1.18 0.99 0.82 0.85 0.88 1.05 1.08 0.74 1.01 1.01 ...
## $ ts : int 136 159 156 171 190 140 193 200 208 237 ...
## $ nvi : int 22 2 0 2 0 20 6 6 7 10 ...
## $ nv : int 56 62 66 68 82 63 86 94 86 97 ...
## $ a : num 37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
## $ A : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 2 2 2 2 2 ...
## $ k : int 0 30 60 120 180 0 30 60 120 180 ...
## $ K : Factor w/ 5 levels "0","30","60",..: 1 2 3 4 5 1 2 3 4 5 ...
xyplot(pesograo~k|A, data=da, type=c("p","a"))

##-----------------------------------------------------------------------------
## Ajuste.
m0 <- aov(kgrao~bloco+A*poly(k, degree=2), data=da,
contrasts=list(bloco=contr.sum))
anova(m0)
## Analysis of Variance Table
##
## Response: kgrao
## Df Sum Sq Mean Sq F value Pr(>F)
## bloco 4 16.32 4.079 1.0970 0.3660
## A 2 5.49 2.747 0.7387 0.4819
## poly(k, degree = 2) 2 423.70 211.850 56.9735 9.064e-15 ***
## A:poly(k, degree = 2) 4 6.92 1.731 0.4656 0.7607
## Residuals 62 230.54 3.718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m1 <- update(m0, .~bloco+A/poly(k, degree=2))
anova(m1)
## Analysis of Variance Table
##
## Response: kgrao
## Df Sum Sq Mean Sq F value Pr(>F)
## bloco 4 16.32 4.079 1.0970 0.3660
## A 2 5.49 2.747 0.7387 0.4819
## A:poly(k, degree = 2) 6 430.63 71.771 19.3016 1.515e-12 ***
## Residuals 62 230.54 3.718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(m1)
##
## Call:
## aov(formula = kgrao ~ bloco + A + A:poly(k, degree = 2), data = da,
## contrasts = list(bloco = contr.sum))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7011 -1.0098 0.0263 0.8936 5.4240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.0568 0.3857 46.820 < 2e-16 ***
## bloco1 -0.7364 0.4453 -1.654 0.1033
## bloco2 0.1263 0.4453 0.284 0.7777
## bloco3 -0.0404 0.4453 -0.091 0.9280
## bloco4 -0.0724 0.4453 -0.163 0.8714
## A50 -0.1616 0.5454 -0.296 0.7680
## A62.5 -0.6376 0.5454 -1.169 0.2469
## A37.5:poly(k, degree = 2)1 17.3599 3.3399 5.198 2.39e-06 ***
## A50:poly(k, degree = 2)1 20.6219 3.3399 6.174 5.64e-08 ***
## A62.5:poly(k, degree = 2)1 21.3761 3.3399 6.400 2.32e-08 ***
## A37.5:poly(k, degree = 2)2 -2.9066 3.3399 -0.870 0.3875
## A50:poly(k, degree = 2)2 -7.3413 3.3399 -2.198 0.0317 *
## A62.5:poly(k, degree = 2)2 -6.7801 3.3399 -2.030 0.0467 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.928 on 62 degrees of freedom
## Multiple R-squared: 0.6624, Adjusted R-squared: 0.5971
## F-statistic: 10.14 on 12 and 62 DF, p-value: 1.247e-10
L <- list(A37.5lin=1, A37.5qua=2,
A50.0lin=3, A50.0qua=4,
A62.5lin=5, A62.5qua=6)
summary(m1, split=list("A:poly(k, degree = 2)"=L))
## Df Sum Sq Mean Sq F value Pr(>F)
## bloco 4 16.3 4.08 1.097 0.3660
## A 2 5.5 2.75 0.739 0.4819
## A:poly(k, degree = 2) 6 430.6 71.77 19.302 1.51e-12 ***
## A:poly(k, degree = 2): A37.5lin 1 100.5 100.45 27.016 2.39e-06 ***
## A:poly(k, degree = 2): A37.5qua 1 141.8 141.75 38.122 5.64e-08 ***
## A:poly(k, degree = 2): A50.0lin 1 152.3 152.31 40.962 2.32e-08 ***
## A:poly(k, degree = 2): A50.0qua 1 2.8 2.82 0.757 0.3875
## A:poly(k, degree = 2): A62.5lin 1 18.0 17.96 4.831 0.0317 *
## A:poly(k, degree = 2): A62.5qua 1 15.3 15.32 4.121 0.0467 *
## Residuals 62 230.5 3.72
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- update(m0, .~bloco+A*poly(k, degree=1))
anova(m0, m2)
## Analysis of Variance Table
##
## Model 1: kgrao ~ bloco + A * poly(k, degree = 2)
## Model 2: kgrao ~ bloco + A + poly(k, degree = 1) + A:poly(k, degree = 1)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 62 230.54
## 2 65 266.64 -3 -36.104 3.2365 0.0281 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Predição.
pred <- expand.grid(bloco="I", A=levels(da$A), k=seq(0,185,5))
ci <- predict(m0, newdata=pred, interval="confidence")-coef(m0)["bloco1"]
pred <- cbind(pred, ci)
mycol <- brewer.pal(6, "Set1")
p1 <-
xyplot(kgrao~k, groups=A, data=da,
scales="free", jitter.x=TRUE,
xlab=expression("Potássio no solo"~(mg~dm^{-3})),
ylab="Peso de 100 grãos (g)",
auto.key=list(title="Níveis de água (%)", cex.title=1.2,
columns=3, points=FALSE, lines=TRUE),
par.settings=list(superpose.symbol=list(col=mycol),
superpose.line=list(col=mycol),
strip.background=list(col="gray90")))
p2 <-
xyplot(fit~k, groups=A, data=pred,
type="l", lwd=1.5,
ly=pred$lwr, uy=pred$upr,
cty="bands", alpha=0.25,
panel.groups=panel.cbH,
panel=panel.superpose)
p1+as.layer(under=TRUE, p2)

Regressão não linear
##-----------------------------------------------------------------------------
## Dados.
da <- read.table("http://www.leg.ufpr.br/~walmes/data/algodão.txt",
header=TRUE, sep="\t", encoding="latin1")
da$desf <- da$desf/100
levels(da$estag) <- c("Vegetativo", "Botão floral",
"Florescimento", "Maçã", "Capulho")
str(da)
## 'data.frame': 125 obs. of 8 variables:
## $ estag : Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ desf : num 0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
## $ rep : int 1 2 3 4 5 1 2 3 4 5 ...
## $ pcapu : num 33.2 28.7 31.5 28.9 36.4 ...
## $ nnos : int 32 29 29 27 28 30 33 23 29 26 ...
## $ alt : num 151 146 140 155 150 ...
## $ ncapu : int 10 9 8 8 10 11 9 10 10 10 ...
## $ nestru: int 10 9 8 9 10 11 9 11 10 10 ...
p0 <-
xyplot(pcapu~desf|estag, data=da, layout=c(5,1),
xlab="Nível de desfolha artificial",
ylab="Peso de capulhos produzidos (g)")
## p0
##-----------------------------------------------------------------------------
## Ajuste.
n0 <- lapply(split(da, da$estag), nls,
formula=pcapu~f0-f1*(desf+0.02)^exp(C),
start=list(f0=30, f1=15, C=0))
n1 <- lapply(split(da, da$estag), nls,
formula=pcapu~f0-f1*(desf+0.02)^((log(5)-log(f1))/log(xde)),
start=list(f0=30, f1=10, xde=0.7))
chute1 <- sapply(n1, coef)
chute2 <- cbind(chute1[,1], sweep(chute1[,2:5], 1, chute1[,1], "-"))
c(t(chute2))
## [1] 31.15716578 -1.89166251 -1.01579242 -2.88803986 3.63178792 9.29140391 -3.59069095
## [8] 5.24697027 10.42379324 -0.50404109 0.87337687 0.09069715 -0.73457062 -0.12914993
## [15] -0.44398962
nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
params=f0+f1+xde~estag,
start=c(t(chute2)))
## summary(nn0)
anova(nn0, type="marginal")
## Denom. DF: 110
## numDF F-value p-value
## f0.(Intercept) 1 894.8326 <.0001
## f0.estag 4 3.0698 0.0193
## f1.(Intercept) 1 29.5203 <.0001
## f1.estag 4 10.1658 <.0001
## xde.(Intercept) 1 111.3543 <.0001
## xde.estag 4 5.9569 0.0002
##-----------------------------------------------------------------------------
## Predição.
nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
params=f0+f1+xde~-1+estag,
start=c(t(chute1)))
th <- coef(nn0); th
## f0.estagVegetativo f0.estagBotão floral f0.estagFlorescimento
## 31.1492629 29.2628830 28.5011919
## f0.estagMaçã f0.estagCapulho f1.estagVegetativo
## 28.2634222 34.1531615 10.1662666
## f1.estagBotão floral f1.estagFlorescimento f1.estagMaçã
## 6.1173914 13.0146648 21.6085661
## f1.estagCapulho xde.estagVegetativo xde.estagBotão floral
## 8.2777934 0.8538359 0.9440167
## xde.estagFlorescimento xde.estagMaçã xde.estagCapulho
## 0.2021509 0.7243590 0.4954611
xde <- th[grepl("^xde", names(th))]
pred <- with(da,
expand.grid(estag=levels(estag),
desf=seq(0, 1, l=20)))
pred$y <- predict(nn0, newdata=pred)
f <- function(th, desf, estag){
X <- model.matrix(~0+estag)
p <- gsub("\\..*$", "", names(th))
ths <- split(th, f=p)
ths <- lapply(ths, matrix, ncol=1)
f0 <- X%*%ths$f0
f1 <- X%*%ths$f1
xde <- X%*%ths$xde
y <- f0-f1*desf^((log(5)-log(f1))/log(xde))
return(y)
}
## f(th=coef(nn0), desf=pred$desf, estag=pred$estag)
F <- rootSolve::gradient(f=f, x=coef(nn0),
desf=pred$desf, estag=pred$estag)
U <- chol(vcov(nn0))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
aux <- outer(pred$se, qnorm(0.975)*c(lwr=-1, fit=0, upr=1), FUN="*")
pred <- cbind(pred, sweep(aux, MARGIN=1, STATS=pred$y, FUN="+"))
str(pred)
## 'data.frame': 100 obs. of 7 variables:
## $ estag: Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ desf : num 0 0 0 0 0 ...
## $ y : atomic 31.1 29.3 28.5 28.3 34.2 ...
## ..- attr(*, "label")= chr "Predicted values"
## $ se : num 1.04 1.12 1.58 1.04 1.58 ...
## $ lwr : num 29.1 27.1 25.4 26.2 31.1 ...
## $ fit : num 31.1 29.3 28.5 28.3 34.2 ...
## $ upr : num 33.2 31.4 31.6 30.3 37.2 ...
xyplot(pcapu~desf|estag, data=da, col=1, layout=c(5,1),
xlim=c(-0.2,1.2),
xlab="Desfolha artifical", ylab="Peso de capulhos (g)",
panel=function(...){
panel.xyplot(...)
panel.abline(v=xde[which.packet()], col=2)
})+
as.layer(xyplot(fit+lwr+upr~desf|estag, data=pred,
type="l", col=1, lty=c(1,2,2)))

Extensão de modelos de regressão
##-----------------------------------------------------------------------------
## Dados.
da <- read.table("http://www.leg.ufpr.br/~walmes/ridiculas/osmo.txt",
header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 3)
str(da)
## 'data.frame': 213 obs. of 3 variables:
## $ esp: Factor w/ 4 levels "A. reticulatus",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sal: int 0 0 0 0 0 0 10 10 10 10 ...
## $ osm: int 592 601 485 497 596 587 530 328 339 516 ...
xyplot(osm~sal|esp, data=da, type=c("p","smooth"),
xlab="Salinidade", ylab="Osmolalidade")

##-----------------------------------------------------------------------------
## Regressão segmentada.
db <- droplevels(subset(da, esp=="F. citerosum"))
m0 <- lm(osm~sal, data=db)
## 1 ponto de quebra.
s1 <- segmented(m0, seg.Z=~sal,
psi=list(sal=c(5)),
control=seg.control(display=FALSE))
## 2 pontos de quebra.
s2 <- update(s1, psi=list(sal=c(10,30)))
summary(s2)
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.lm(obj = m0, seg.Z = ~sal, psi = list(sal = c(10, 30)),
## control = seg.control(display = FALSE))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.sal 10.18 1.317
## psi2.sal 34.87 1.117
##
## t value for the gap-variable(s) V: 0 0
##
## Meaningful coefficients of the linear terms:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 523.492 31.409 16.667 < 2e-16 ***
## sal -19.493 4.890 -3.986 0.000187 ***
## U1.sal 41.143 5.784 7.113 NA
## U2.sal -48.197 10.262 -4.697 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.54 on 59 degrees of freedom
## Multiple R-Squared: 0.7712, Adjusted R-squared: 0.7518
##
## Convergence attained in 2 iterations with relative change 0
slope(s2)
## $sal
## Est. St.Err. t value CI(95%).l CI(95%).u
## slope1 -19.49 4.890 -3.986 -29.28 -9.708
## slope2 21.65 3.089 7.009 15.47 27.830
## slope3 -26.55 9.786 -2.713 -46.13 -6.965
confint(s2)
## $sal
## Est. CI(95%).l CI(95%).u
## psi1.sal 10.18 7.549 12.82
## psi2.sal 34.87 32.630 37.10
plot(osm~sal, data=db)
abline(m0)
plot(s1, add=TRUE, col=2)
plot(s2, add=TRUE, col=4)
lines(s1)
lines(s2)

##-----------------------------------------------------------------------------
## Regressão com polinômios locais.
l0 <- loess(osm~sal, data=db)
summary(l0)
## Call:
## loess(formula = osm ~ sal, data = db)
##
## Number of Observations: 65
## Equivalent Number of Parameters: 4.51
## Residual Standard Error: 101.2
## Trace of smoother matrix: 4.93
##
## Control settings:
## normalize: TRUE
## span : 0.75
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
class(l0)
## [1] "loess"
methods(class="loess")
## [1] anova.loess* predict.loess* print.loess* summary.loess*
##
## Non-visible functions are asterisked
pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(l0, newdata=pred, se=TRUE)
me <- outer(aux$se.fit, c(lwr=-1, fit=0, upr=1)*qnorm(0.975), "*")
aux <- sweep(me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## Splines.
## p0 <- lm(osm~bs(sal, df=4), data=da)
p0 <- lm(osm~ns(sal, df=4), data=da)
summary(p0)
##
## Call:
## lm(formula = osm ~ ns(sal, df = 4), data = da)
##
## Residuals:
## Min 1Q Median 3Q Max
## -317.74 -83.72 -11.85 84.15 364.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 524.024 25.142 20.842 < 2e-16 ***
## ns(sal, df = 4)1 1.777 47.245 0.038 0.97
## ns(sal, df = 4)2 275.654 43.342 6.360 1.26e-09 ***
## ns(sal, df = 4)3 284.906 69.243 4.115 5.59e-05 ***
## ns(sal, df = 4)4 268.793 35.445 7.583 1.10e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 150.3 on 208 degrees of freedom
## Multiple R-squared: 0.3763, Adjusted R-squared: 0.3643
## F-statistic: 31.37 on 4 and 208 DF, p-value: < 2.2e-16
pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(p0, newdata=pred, interval="confidence")
pred <- cbind(pred, aux)
plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## GAM.
rock$lperm <- log(rock$perm)
pairs(rock)

g0 <- gam(log(perm)~s(peri)+s(shape), data=rock)
summary(g0)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log(perm) ~ s(peri) + s(shape)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1075 0.1446 35.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(peri) 2.892 3.587 11.645 2.64e-06 ***
## s(shape) 1.940 2.421 3.842 0.0231 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.628 Deviance explained = 66.7%
## GCV = 1.1426 Scale est. = 1.0038 n = 48
cloud(perm~peri+shape, data=rock)

pred <- with(rock,
expand.grid(
peri=seq(min(peri), max(peri), l=20),
shape=seq(min(shape), max(shape), l=20))
)
pred$perm <- predict(g0, newdata=pred)
colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")
colr <- rgb(t(col2rgb(colr(101)))/255,
alpha=0.5)
wireframe(perm~peri+shape,
data=pred,
zlim=range(rock$lperm),
xlab=list("Perimetro", rot=22),
ylab=list("Forma", rot=-36),
zlab=list("Permeabilidade", rot=90),
scales=list(arrows=FALSE),
panel.3d.wireframe=panel.3d.contour,
nlevels=18,# col="gray30",
type=c("on", "bottom"),
col.regions=colr, drape=TRUE,
screen=list(x=-75, z=10, y=35),
## screen=list(x=-80, z=-5, y=-25),
panel=function(...){
L <- list(...)
## print(str(L))
L$x <- rock$peri
L$y <- rock$shape
L$z <- rep(L$zlim[1], nrow(rock))
L$type <- "p";
L$pch <- 4;
L$col <- 1;
L$scales.3d$x.scales$draw <- FALSE
L$scales.3d$y.scales$draw <- FALSE
L$scales.3d$z.scales$draw <- FALSE
L$xlab <- ""; L$ylab <- ""; L$zlab <- "";
L$par.box <- list(col=rep(NA, 9))
L$box.3d$lty <- 0
L$panel.3d.wireframe <- NULL
do.call(panel.cloud, L)
L$pch <- 1;
L$z <- rock$lperm
do.call(panel.cloud, L)
apply(rock[,c("peri","shape","lperm")], 1,
function(line){
L$x <- rep(line[1], 2)
L$y <- rep(line[2], 2)
L$z <- c(L$zlim[1], line[3])
L$type <- "l"
do.call(panel.cloud, L)
})
panel.abline(v=0.3)
panel.wireframe(...)
})

Modelagem conjunta da variância
##-----------------------------------------------------------------------------
goi <- read.table("http://www.leg.ufpr.br/~walmes/data/goiaba.txt",
header=TRUE, sep="\t")
goi$daa2 <- goi$daa^2.5
str(goi)
## 'data.frame': 174 obs. of 8 variables:
## $ daa : int 13 13 13 13 13 13 13 13 13 13 ...
## $ coleta: int 1 1 1 1 1 1 1 1 1 1 ...
## $ rep : int 1 2 3 4 5 6 7 8 9 10 ...
## $ long : num 29.1 32.9 36.3 34.7 33.3 ...
## $ trans : num 20.5 22.8 22.9 22.7 21.4 ...
## $ peso : num 6.66 8.51 9.93 9.14 7.6 7.54 6.27 9.94 7.95 7.79 ...
## $ volume: int 6 9 11 10 7 8 6 9 8 8 ...
## $ daa2 : num 609 609 609 609 609 ...
aux <- ddply(goi, .(daa), summarise, m=mean(peso), s=sd(peso))
p1 <- xyplot(peso~daa, data=goi, type=c("p","a"),
ylab="Peso do fruto (g)", xlab="Dias após antese")
p2 <- xyplot(s~m, aux, type=c("p","r"),
xlab="Média de peso", ylab="Desvio-padrão de peso")
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Ajustes.
## Modelo tradicional.
n0 <- gnls(peso~A-(A-D)*exp(-exp(C*(daa-B))), data=goi,
start=c(A=200, C=0.09, B=105, D=7))
summary(n0)
## Generalized nonlinear least squares fit
## Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B)))
## Data: goi
## AIC BIC logLik
## 1619.093 1634.888 -804.5465
##
## Coefficients:
## Value Std.Error t-value p-value
## A 205.76239 4.196193 49.03549 0
## C 0.09367 0.010148 9.22980 0
## B 107.35182 0.874391 122.77326 0
## D 18.70608 3.179082 5.88411 0
##
## Correlation:
## A C B
## C -0.357
## B 0.590 -0.131
## D -0.103 0.467 0.219
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -3.17237358 -0.42834311 -0.07757784 0.35077905 3.94396837
##
## Residual standard error: 24.94118
## Degrees of freedom: 174 total; 170 residual
## Modelo com função de variâcia: var(e)=sigma^2*exp(2*delta*daa).
n1 <- update(n0,
## weights=varExp(0.03, form=~daa)
weights=varPower(0.1, form=~log(daa))
)
summary(n1)
## Generalized nonlinear least squares fit
## Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B)))
## Data: goi
## AIC BIC logLik
## 1432.407 1451.362 -710.2036
##
## Variance function:
## Structure: Power of variance covariate
## Formula: ~log(daa)
## Parameter estimates:
## power
## 5.764348
##
## Coefficients:
## Value Std.Error t-value p-value
## A 217.15613 8.784140 24.72139 0
## C 0.05520 0.003531 15.63409 0
## B 108.05936 1.880553 57.46148 0
## D 7.49496 0.426361 17.57893 0
##
## Correlation:
## A C B
## C -0.566
## B 0.879 -0.624
## D -0.390 0.837 -0.386
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.66220159 -0.53129856 0.09399151 0.63400182 2.80361091
##
## Residual standard error: 0.003829197
## Degrees of freedom: 174 total; 170 residual
anova(n1, n0)
## Model df AIC BIC logLik Test L.Ratio p-value
## n1 1 6 1432.407 1451.362 -710.2036
## n0 2 5 1619.093 1634.888 -804.5465 1 vs 2 188.6858 <.0001
##-----------------------------------------------------------------------------
## Diagnóstico.
da <- rbind(data.frame(model="trad",
r=residuals(n0, type="pearson"),
f=fitted(n0)),
data.frame(model="var",
r=residuals(n1, type="pearson"),
f=fitted(n1)))
p1 <- qqmath(~r|model, data=da,
xlab="Quantis teóricos",
ylab="Resíduos padronizados")
p2 <- xyplot(sqrt(abs(r))~f|model, data=da, type=c("p","smooth"),
xlab="Valor ajustado",
ylab=expression(sqrt("|Resíduos padronizados|")))
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Predição.
model <- deriv3(~ A-(A-D)*exp(-exp(C*(daa-B))),
c("A","C","B","D"),
function(daa, A, C, B, D){NULL})
l <- list(n0, n1)
names(l) <- levels(da$model)
L <- l
for(i in seq_along(L)){
m4 <- l[[i]]
U <- chol(vcov(m4))
pred <- data.frame(daa=seq(1,140,1))
m <- model(daa=pred$daa, A=coef(m4)["A"], C=coef(m4)["C"],
B=coef(m4)["B"], D=coef(m4)["D"])
F0 <- attr(m, "gradient")
pred$y <- c(m)
pred$se <- sqrt(apply(F0%*%t(U), 1, function(x) sum(x^2)))
z <- qnorm(0.975)
pred <- transform(pred, lwr=y-z*se, upr=y+z*se)
L[[i]] <- pred
}
str(L)
## List of 2
## $ trad:'data.frame': 140 obs. of 5 variables:
## ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ y : num [1:140] 18.7 18.7 18.7 18.7 18.7 ...
## ..$ se : num [1:140] 3.17 3.17 3.17 3.17 3.17 ...
## ..$ lwr: num [1:140] 12.5 12.5 12.5 12.5 12.5 ...
## ..$ upr: num [1:140] 24.9 24.9 24.9 24.9 24.9 ...
## $ var :'data.frame': 140 obs. of 5 variables:
## ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ y : num [1:140] 8.06 8.1 8.13 8.16 8.2 ...
## ..$ se : num [1:140] 0.278 0.273 0.267 0.262 0.256 ...
## ..$ lwr: num [1:140] 7.52 7.56 7.61 7.65 7.7 ...
## ..$ upr: num [1:140] 8.61 8.63 8.65 8.68 8.7 ...
L <- ldply(L)
names(L)[1] <- "model"
L$model <- factor(L$model, levels=levels(da$model))
str(L)
## 'data.frame': 280 obs. of 6 variables:
## $ model: Factor w/ 2 levels "trad","var": 1 1 1 1 1 1 1 1 1 1 ...
## $ daa : num 1 2 3 4 5 6 7 8 9 10 ...
## $ y : num 18.7 18.7 18.7 18.7 18.7 ...
## $ se : num 3.17 3.17 3.17 3.17 3.17 ...
## $ lwr : num 12.5 12.5 12.5 12.5 12.5 ...
## $ upr : num 24.9 24.9 24.9 24.9 24.9 ...
icm <- ddply(goi, .(daa), summarise,
m=mean(peso),
lwr=mean(peso)-qt(0.975, length(peso)-1)*sd(peso)*
sqrt(1/length(peso)),
upr=mean(peso)+qt(0.975, length(peso)-1)*sd(peso)*
sqrt(1/length(peso)))
#-----------------------------------------------------------------------------
# Gráfico com as bandas de confiança.
ylim <- range(goi$peso)
xyplot(y~daa|model, data=L, type="l", col=1,
ylab="Massa fresca dos frutos (g)",
xlab="Dias após a antese",
strip=strip.custom(bg="gray90"),
ly=L$lwr, uy=L$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)+
layer(panel.arrows(icm$daa, icm$lwr, icm$daa, icm$upr,
code=3, length=0.05, angle=90))

Modelo de efeito aleatório
##-----------------------------------------------------------------------------
## Modelos.
str(BodyWeight)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 176 obs. of 4 variables:
## $ weight: num 240 250 255 260 262 258 266 266 265 272 ...
## $ Time : num 1 8 15 22 29 36 43 44 50 57 ...
## $ Rat : Ord.factor w/ 16 levels "2"<"3"<"4"<"1"<..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Diet : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "outer")=Class 'formula' length 2 ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "formula")=Class 'formula' length 3 weight ~ Time | Rat
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(g)"
## - attr(*, "FUN")=function (x)
## ..- attr(*, "source")= chr "function(x) max(x, na.rm = TRUE)"
## - attr(*, "order.groups")= logi TRUE
plot(BodyWeight)

mm0 <- lme(weight~Diet*Time, random=~1+Time, data=BodyWeight,
correlation=corCAR1())
summary(mm0)
## Linear mixed-effects model fit by REML
## Data: BodyWeight
## AIC BIC logLik
## 1154.544 1189.038 -566.2721
##
## Random effects:
## Formula: ~1 + Time | Rat
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 36.9056825 (Intr)
## Time 0.2333653 -0.152
## Residual 5.4600807
##
## Correlation Structure: Continuous AR(1)
## Formula: ~1 | Rat
## Parameter estimate(s):
## Phi
## 0.5316409
## Fixed effects: weight ~ Diet * Time
## Value Std.Error DF t-value p-value
## (Intercept) 251.30348 13.152622 157 19.106721 0.0000
## Diet2 200.68117 22.781009 13 8.809143 0.0000
## Diet3 253.67433 22.781009 13 11.135342 0.0000
## Time 0.36417 0.091878 157 3.963552 0.0001
## Diet2:Time 0.62374 0.159138 157 3.919461 0.0001
## Diet3:Time 0.27874 0.159138 157 1.751532 0.0818
## Correlation:
## (Intr) Diet2 Diet3 Time Dt2:Tm
## Diet2 -0.577
## Diet3 -0.577 0.333
## Time -0.181 0.104 0.104
## Diet2:Time 0.104 -0.181 -0.060 -0.577
## Diet3:Time 0.104 -0.060 -0.181 -0.577 0.333
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.07548837 -0.43553702 0.06177724 0.45498272 1.75954253
##
## Number of Observations: 176
## Number of Groups: 16
plot(augPred(mm0))

anova(mm0)
## numDF denDF F-value p-value
## (Intercept) 1 157 1727.0904 <.0001
## Diet 2 13 86.6759 <.0001
## Time 1 157 82.4113 <.0001
## Diet:Time 2 157 7.7925 6e-04
##-----------------------------------------------------------------------------
## Predição dos efeitos fixos e aleatório.
pred0 <- with(BodyWeight,
expand.grid(Diet=levels(Diet),
Time=seq(min(Time), max(Time), l=20)))
aux <- unique(BodyWeight[,c("Diet","Rat")])
pred1 <- merge(pred0, aux, all=TRUE)
pred0$y <- predict(mm0, newdata=pred0, level=0)
pred1$y <- predict(mm0, newdata=pred1, level=1)
U <- chol(vcov(mm0))
F <- model.matrix(formula(mm0)[-2], data=pred0)
pred0$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
z <- qnorm(0.975)
aux <- outer(pred0$se, z*c(lwr=-1, fit=0, upr=1), FUN="*")
pred0 <- cbind(pred0, sweep(aux, 1, STATS=pred0$y, FUN="+"))
p0 <-
xyplot(weight~Time|Diet, BodyWeight, groups=Rat,
layout=c(3,1), xlab="Tempo", ylab="Peso (g)")
p1 <-
xyplot(y~Time|Diet, pred0, type="l", lwd=2, layout=c(3,1),
ly=pred0$lwr, uy=pred0$upr, cty="bands",
panel=panel.cbH)
p2 <-
xyplot(y~Time|Diet, groups=Rat, data=pred1,
type="l", col="blue", lty=2)
p0+as.layer(p1)+as.layer(p2)

##-----------------------------------------------------------------------------
## Informações da sessão.
Sys.time()
## [1] "2014-12-11 20:51:21 BRST"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=pt_BR.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] methods splines grid stats graphics grDevices utils datasets
## [9] base
##
## other attached packages:
## [1] wzRfun_0.4 rootSolve_1.6.5.1 mgcv_1.8-3 segmented_0.5-0.0
## [5] rsm_2.07 nlme_3.1-118 aod_1.3 multcomp_1.3-7
## [9] TH.data_1.0-5 mvtnorm_1.0-1 doBy_4.5-12 survival_2.37-7
## [13] plyr_1.8.1 alr3_2.0.5 ellipse_0.3-8 car_2.0-22
## [17] gridExtra_0.9.1 latticeExtra_0.6-26 RColorBrewer_1.0-5 lattice_0.20-29
## [21] rmarkdown_0.3.3 knitr_1.8
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6 MASS_7.3-35
## [6] Matrix_1.1-4 nnet_7.3-8 Rcpp_0.11.3 sandwich_2.3-2 stringr_0.6.2
## [11] tools_3.1.2 yaml_2.1.13 zoo_1.7-11