Aplicação de modelos de regressão linear e não linear em ciências agrárias

09 à 11 de Dezembro de 2014 - Goiânia - GO
Prof. Dr. Walmes M. Zeviani
Embrapa Arroz e Feijão
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR


Trailer do curso


Regressão linear simples

##=============================================================================
## Aplicação de modelos de regressão linear e
##   não linear em ciências agrárias
##
##   09 à 11 de Dezembro de 2014 - Goiânia/GO
##   Embrapa Arroz e Feijão
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão. 

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "ellipse",
         "alr3", "plyr", "doBy", "multcomp", "aod", "nlme", "rsm",
         "segmented", "splines", "mgcv", "rootSolve", "wzRfun")

sapply(pkg, require, character.only=TRUE)
##      lattice latticeExtra    gridExtra          car      ellipse         alr3 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##         plyr         doBy     multcomp          aod         nlme          rsm 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##    segmented      splines         mgcv    rootSolve       wzRfun 
##         TRUE         TRUE         TRUE         TRUE         TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## Análise de dados reais.

## Comprimento da diagonal de aparelhos celulares (cm).
x <- c(12.5, 13.2, 14.4, 11.3, 11.2, 11.2, 11.4, 11.0, 10.6, 12.4, 12.4)

## Peso dos aparelhos (g)
y <- c(136.7, 144.8, 152.6, 120.9, 113.6, 126.9, 91.3, 107.9, 103.6,
       116.9, 117.3)

data.frame(diagonal=x, peso=y)
##    diagonal  peso
## 1      12.5 136.7
## 2      13.2 144.8
## 3      14.4 152.6
## 4      11.3 120.9
## 5      11.2 113.6
## 6      11.2 126.9
## 7      11.4  91.3
## 8      11.0 107.9
## 9      10.6 103.6
## 10     12.4 116.9
## 11     12.4 117.3
##-----------------------------------------------------------------------------
## Visualizar os dados.

xyplot(y~x,
       xlab="Comprimento diagonal (cm)",
       ylab="Peso do aparelho (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo de regressão linear simples.

m0 <- lm(y~x)
summary(m0)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5642  -5.1349   0.0575   8.0189  15.6162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -33.221     37.878  -0.877  0.40326   
## x             12.902      3.153   4.092  0.00271 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.3 on 9 degrees of freedom
## Multiple R-squared:  0.6504, Adjusted R-squared:  0.6115 
## F-statistic: 16.74 on 1 and 9 DF,  p-value: 0.00271
confint(m0)
##                   2.5 %   97.5 %
## (Intercept) -118.906100 52.46451
## x              5.769088 20.03530
vcov(m0)
##             (Intercept)          x
## (Intercept)   1434.7200 -118.95289
## x             -118.9529    9.94287
##-----------------------------------------------------------------------------
## Verificando o modelo ajustado sobre os dados.

plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
abline(m0)
legend("bottomright", legend=c("Observado","Ajustado"),
       pch=c(1,NA), lty=c(NA,1), bty="n")

##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.

par(mfrow=c(2, 2)); plot(m0); layout(1)

##-----------------------------------------------------------------------------
## Medidas de influência.

cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0))
##               r        r.pad        r.stu
## 1    8.64336872  0.812204307  0.795460862
## 2    7.71183298  0.767694013  0.748718343
## 3    0.02920028  0.003864617  0.003643599
## 4    8.32600142  0.787680565  0.769634511
## 5    2.31622081  0.220530830  0.208482518
## 6   15.61622081  1.486843622  1.613979130
## 7  -22.56421798 -2.123123371 -2.833241337
## 8   -0.80334041 -0.077706591 -0.073287066
## 9    0.05753715  0.005823405  0.005490369
## 10  -9.86641189 -0.923198066 -0.914791714
## 11  -9.46641189 -0.885770151 -0.874080397
influence.measures(m0)
## Influence measures of
##   lm(formula = y ~ x) :
## 
##      dfb.1_    dfb.x    dffit cov.r   cook.d   hat inf
## 1  -0.10299  0.12642  0.28435 1.226 4.21e-02 0.113    
## 2  -0.26654  0.29057  0.38592 1.399 7.83e-02 0.210    
## 3  -0.00354  0.00370  0.00405 2.831 9.24e-06 0.553   *
## 4   0.17407 -0.15236  0.29115 1.254 4.44e-02 0.125    
## 5   0.05369 -0.04780  0.08282 1.450 3.84e-03 0.136    
## 6   0.41563 -0.37002  0.64117 0.834 1.74e-01 0.136    
## 7  -0.55360  0.47380 -1.02453 0.357 2.95e-01 0.116    
## 8  -0.02363  0.02154 -0.03236 1.510 5.89e-04 0.163    
## 9   0.00255 -0.00239  0.00305 1.656 5.23e-06 0.236    
## 10  0.09106 -0.11778 -0.31455 1.160 5.04e-02 0.106    
## 11  0.08701 -0.11254 -0.30055 1.179 4.64e-02 0.106
##-----------------------------------------------------------------------------
## Predição acompanhada de bandas de confiança e predição.

pred <- data.frame(x=seq(min(x), max(x), l=20))
bc <- predict(m0, newdata=pred, interval="confidence")
bp <- predict(m0, newdata=pred, interval="prediction")

plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
matlines(x=pred$x, bc, col=1, lty=c(1,2,2))
matlines(x=pred$x, bp, col=1, lty=c(1,3,3))
legend("bottomright",
       legend=c("Observado","Ajustado",
           "Banda de confiança","Banda de predição"),
       pch=c(1,NA,NA,NA), lty=c(NA,1,2,3),
       bty="n")


Regressão linear múltipla

##-----------------------------------------------------------------------------
## Dados.

da <- read.table(
    "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example12.14.txt",
    header=TRUE, sep="\t")
names(da) <- tolower(names(da))
str(da)
## 'data.frame':    38 obs. of  6 variables:
##  $ clarity : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ aroma   : num  3.3 4.4 3.9 3.9 5.6 4.6 4.8 5.3 4.3 4.3 ...
##  $ body    : num  2.8 4.9 5.3 2.6 5.1 4.7 4.8 4.5 4.3 3.9 ...
##  $ flavor  : num  3.1 3.5 4.8 3.1 5.5 5 4.8 4.3 3.9 4.7 ...
##  $ oakiness: num  4.1 3.9 4.7 3.6 5.1 4.1 3.3 5.2 2.9 3.9 ...
##  $ quality : num  9.8 12.6 11.9 11.1 13.3 12.8 12.8 12 13.6 13.9 ...
scatterplotMatrix(da, span=0.8)

##-----------------------------------------------------------------------------
## Ajuste.

m0 <- lm(quality~., data=da)
summary(m0)
## 
## Call:
## lm(formula = quality ~ ., data = da)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.85552 -0.57448 -0.07092  0.67275  1.68093 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.9969     2.2318   1.791 0.082775 .  
## clarity       2.3395     1.7348   1.349 0.186958    
## aroma         0.4826     0.2724   1.771 0.086058 .  
## body          0.2732     0.3326   0.821 0.417503    
## flavor        1.1683     0.3045   3.837 0.000552 ***
## oakiness     -0.6840     0.2712  -2.522 0.016833 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.163 on 32 degrees of freedom
## Multiple R-squared:  0.7206, Adjusted R-squared:  0.6769 
## F-statistic: 16.51 on 5 and 32 DF,  p-value: 4.703e-08
##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.

par(mfrow=c(2, 2)); plot(m0); layout(1)

residualPlots(m0)

##            Test stat Pr(>|t|)
## clarity        1.140    0.263
## aroma          0.419    0.678
## body           0.976    0.337
## flavor        -0.258    0.798
## oakiness      -0.353    0.727
## Tukey test     0.137    0.891
##-----------------------------------------------------------------------------
## Medidas de influência.

head(cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0)))
##             r       r.pad       r.stu
## 1  0.28905165  0.27289988  0.26891509
## 2  1.38047606  1.29611865  1.31057072
## 3 -0.15912591 -0.15005811 -0.14774683
## 4  1.01214849  0.98944643  0.98911153
## 5 -0.06905214 -0.06179415 -0.06082458
## 6 -0.07708542 -0.06829302 -0.06722238
im <- influence.measures(m0)
summary(im)
## Potentially influential observations of
##   lm(formula = quality ~ ., data = da) :
## 
##    dfb.1_ dfb.clrt dfb.arom dfb.body dfb.flvr dfb.okns dffit   cov.r   cook.d hat  
## 12  0.83  -0.97    -0.40     0.23    -0.01    -0.04     1.38_*  1.16    0.30   0.39
## 14 -0.03   0.11    -0.16    -0.05     0.05     0.08    -0.28    1.84_*  0.01   0.36
## 20 -0.04  -0.38     0.46    -1.06_*   0.65     0.60    -1.54_*  0.30_*  0.31   0.20
## 32 -0.04   0.04    -0.03     0.02     0.04    -0.03    -0.07    1.57_*  0.00   0.23
## 37 -0.06  -0.15     0.04     0.28    -0.37     0.39     0.61    1.72_*  0.06   0.38
##-----------------------------------------------------------------------------
## Seleção stepwise.

## step(m0, k=2)              ## AIC.
m1 <- step(m0, k=log(nrow(da)))  ## BIC.
## Start:  AIC=26.74
## quality ~ clarity + aroma + body + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## - body      1    0.9118 44.160 23.897
## - clarity   1    2.4577 45.706 25.204
## - aroma     1    4.2397 47.488 26.658
## <none>                  43.248 26.741
## - oakiness  1    8.5978 51.846 29.994
## - flavor    1   19.8986 63.147 37.487
## 
## Step:  AIC=23.9
## quality ~ clarity + aroma + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## - clarity   1    1.6936 45.853 21.689
## <none>                  44.160 23.897
## - aroma     1    5.3545 49.514 24.608
## - oakiness  1    8.0807 52.241 26.645
## - flavor    1   27.3280 71.488 38.564
## 
## Step:  AIC=21.69
## quality ~ aroma + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## <none>                  45.853 21.689
## - aroma     1    6.6026 52.456 23.164
## - oakiness  1    6.9989 52.852 23.450
## - flavor    1   25.6888 71.542 34.955
summary(m1)
## 
## Call:
## lm(formula = quality ~ aroma + flavor + oakiness, data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5707 -0.6256  0.1521  0.6467  1.7741 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.4672     1.3328   4.852 2.67e-05 ***
## aroma         0.5801     0.2622   2.213 0.033740 *  
## flavor        1.1997     0.2749   4.364 0.000113 ***
## oakiness     -0.6023     0.2644  -2.278 0.029127 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared:  0.7038, Adjusted R-squared:  0.6776 
## F-statistic: 26.92 on 3 and 34 DF,  p-value: 4.203e-09
##-----------------------------------------------------------------------------
## R², R² ajustado e PRESS.

smr <- summary(m1)
smr$r.squared
## [1] 0.7037672
smr$adj.r.squared
## [1] 0.677629
sum(residuals(m1)^2)
## [1] 45.85341
sum((residuals(m1)/(1-hatvalues(m1)))^2)
## [1] 56.05239

Regressão polinomial

##-----------------------------------------------------------------------------
## Ganho de peso de perus em função de metionina na dieta.

xyplot(Gain~A, data=turk0, type=c("p","smooth"),
       xlab="Metionina (% da dieta)",
       ylab="Ganho de peso (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo saturado (considerando A como fator).

turk0$Afat <- factor(turk0$A)
m1 <- lm(Gain~Afat, turk0)
anova(m1)
## Analysis of Variance Table
## 
## Response: Gain
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Afat       5 150041 30008.2  88.587 < 2.2e-16 ***
## Residuals 29   9824   338.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Ajuste do modelo de segundo grau: b0+b1*x+b2*x^2;

## m2 <- lm(Gain~poly(A, 2), turk0)
m2 <- lm(Gain~A+I(A^2), turk0)

##-----------------------------------------------------------------------------
## Teste de falta de ajuste para o modelo m2.

anova(m1, m2)
## Analysis of Variance Table
## 
## Model 1: Gain ~ Afat
## Model 2: Gain ~ A + I(A^2)
##   Res.Df     RSS Df Sum of Sq     F Pr(>F)
## 1     29  9823.6                          
## 2     32 11339.9 -3   -1516.2 1.492 0.2374
##-----------------------------------------------------------------------------
## Gráfico dos valores preditos com bandas de confiança.

pred2 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m2, newdata=pred2, interval="confidence")
pred2 <- cbind(pred2, a)

## Sobrepondo as estimativas de médias do modelo m1.
pred1 <- data.frame(Afat=levels(turk0$Afat))
a <- predict(m1, newdata=pred1, interval="confidence")
pred1 <- cbind(pred1, a)
pred1$A <- as.numeric(as.character(pred1$Afat))
## pred1

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~(A+0.005), data=pred1, pch=19,
                    ly=pred1$lwr, uy=pred1$upr, cty="bars",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Uso de pesos no ajuste do modelo de regressão.

tu <- ddply(turk0, .(A), summarise,
            mGain=mean(Gain), nGain=length(Gain))
tu
##      A mGain nGain
## 1 0.00 623.0    10
## 2 0.04 668.4     5
## 3 0.10 715.6     5
## 4 0.16 732.0     5
## 5 0.28 794.0     5
## 6 0.44 785.4     5
m3 <- lm(mGain~A+I(A^2), data=tu, weights=nGain)
summary(m3)
## 
## Call:
## lm(formula = mGain ~ A + I(A^2), data = tu, weights = nGain)
## 
## Weighted Residuals:
##       1       2       3       4       5       6 
##  -8.037  14.442  16.171 -29.042  11.611  -1.816 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   625.542      6.243 100.201 2.19e-06 ***
## A             964.471     86.763  11.116  0.00156 ** 
## I(A^2)      -1362.069    198.338  -6.867  0.00632 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.48 on 3 degrees of freedom
## Multiple R-squared:  0.9899, Adjusted R-squared:  0.9832 
## F-statistic: 146.9 on 2 and 3 DF,  p-value: 0.001016
##-----------------------------------------------------------------------------
## Sobrepondo os valores preditos com bandas de confiança obtido com o
## ajuste com as médias.

pred3 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m3, newdata=pred3, interval="confidence")
pred3 <- cbind(pred3, a)

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~A, data=pred3, type="l",
                    ly=pred3$lwr, uy=pred3$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))


Superfície de resposta

##-----------------------------------------------------------------------------
## Dados.

da <- read.table(
    "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example14.12.txt",
    header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 4)
str(da)
## 'data.frame':    13 obs. of  6 variables:
##  $ time: int  50 60 50 60 48 62 55 55 55 55 ...
##  $ temp: int  160 160 170 170 165 165 158 172 165 165 ...
##  $ x1  : num  -1 1 -1 1 -1.41 ...
##  $ x2  : num  -1 -1 1 1 0 ...
##  $ conv: num  65.3 68.2 66 69.8 64.5 69 64 68.5 68.9 69.7 ...
##  $ visc: int  35 39 36 43 30 44 31 45 37 34 ...
## Planejamento composto central.
xyplot(x1~x2, da, jitter.x=TRUE, jitter.y=TRUE)

##-----------------------------------------------------------------------------
## Ajuste do modelo.

m0 <- lm(conv~x1+x2+x1:x2+I(x1^2)+I(x2^2), data=da)
summary(m0)
## 
## Call:
## lm(formula = conv ~ x1 + x2 + x1:x2 + I(x1^2) + I(x2^2), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1315 -0.3537 -0.0999  0.3057  0.9627 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  69.0999     0.3506 197.082 2.29e-14 ***
## x1            1.6331     0.2772   5.891 0.000605 ***
## x2            1.0830     0.2772   3.907 0.005846 ** 
## I(x1^2)      -0.9688     0.2973  -3.259 0.013893 *  
## I(x2^2)      -1.2189     0.2973  -4.100 0.004575 ** 
## x1:x2         0.2250     0.3920   0.574 0.583946    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.784 on 7 degrees of freedom
## Multiple R-squared:  0.9143, Adjusted R-squared:  0.853 
## F-statistic: 14.93 on 5 and 7 DF,  p-value: 0.001291
anova(m0)
## Analysis of Variance Table
## 
## Response: conv
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x1         1 21.3335 21.3335 34.7079 0.0006047 ***
## x2         1  9.3824  9.3824 15.2644 0.0058462 ** 
## I(x1^2)    1  4.6409  4.6409  7.5504 0.0285946 *  
## I(x2^2)    1 10.3305 10.3305 16.8069 0.0045752 ** 
## x1:x2      1  0.2025  0.2025  0.3295 0.5839456    
## Residuals  7  4.3026  0.6147                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Usando as funções do rsm.

m1 <- rsm(conv~SO(x1, x2), data=da) 
summary(m1)
## 
## Call:
## rsm(formula = conv ~ SO(x1, x2), data = da)
## 
##             Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 69.09990    0.35062 197.0815 2.286e-14 ***
## x1           1.63312    0.27721   5.8913 0.0006047 ***
## x2           1.08304    0.27721   3.9070 0.0058462 ** 
## x1:x2        0.22500    0.39200   0.5740 0.5839456    
## x1^2        -0.96880    0.29731  -3.2585 0.0138931 *  
## x2^2        -1.21887    0.29731  -4.0996 0.0045752 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Multiple R-squared:  0.9143, Adjusted R-squared:  0.853 
## F-statistic: 14.93 on 5 and 7 DF,  p-value: 0.001291
## 
## Analysis of Variance Table
## 
## Response: conv
##             Df  Sum Sq Mean Sq F value    Pr(>F)
## FO(x1, x2)   2 30.7158 15.3579 24.9861 0.0006502
## TWI(x1, x2)  1  0.2025  0.2025  0.3295 0.5839456
## PQ(x1, x2)   2 14.9714  7.4857 12.1786 0.0052560
## Residuals    7  4.3026  0.6147                  
## Lack of fit  3  3.4426  1.1475  5.3374 0.0697146
## Pure error   4  0.8600  0.2150                  
## 
## Stationary point of response surface:
##        x1        x2 
## 0.9041430 0.5277301 
## 
## Eigenanalysis:
## $values
## [1] -0.9256357 -1.2620325
## 
## $vectors
##          [,1]       [,2]
## x1 -0.9336473 -0.3581937
## x2 -0.3581937  0.9336473
##-----------------------------------------------------------------------------
## Predição.

pred <- expand.grid(
    x1=seq(-sqrt(2), sqrt(2), l=20),
    x2=seq(-sqrt(2), sqrt(2), l=20))
pred$y <- predict(m0, newdata=pred)

pred <- transform(pred,
                  time=55+x1*5,
                  temp=165+x1*5)

colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")

wireframe(y~time+temp,
          data=pred,
          xlab=list("Tempo (min)", rot=22),
          ylab=list("Temperatura (°F)", rot=-36),
          zlab=list("Conversão (%)", rot=90),
          scales=list(arrows=FALSE),
          zlim=c(56, max(pred$y)),
          panel.3d.wireframe=panel.3d.contour,
          nlevels=18,# col="gray30",
          type=c("on", "bottom"),
          col.regions=colr(101),  drape=TRUE,
          screen=list(x=-75, z=10, y=35))


Regressão na presença de fatores categóricos

##-----------------------------------------------------------------------------
## Dados.

da <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
                 header=TRUE, sep="\t", dec=",")
da <- transform(da, a=agua, A=factor(agua),
                k=potassio, K=factor(potassio))
str(da)
## 'data.frame':    75 obs. of  14 variables:
##  $ potassio: int  0 30 60 120 180 0 30 60 120 180 ...
##  $ agua    : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ bloco   : Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ rengrao : num  14.6 21.5 24.6 21.9 28.1 ...
##  $ pesograo: num  10.7 13.5 15.8 12.8 14.8 ...
##  $ kgrao   : num  15.1 17.1 19.1 18.1 19.1 ...
##  $ pgrao   : num  1.18 0.99 0.82 0.85 0.88 1.05 1.08 0.74 1.01 1.01 ...
##  $ ts      : int  136 159 156 171 190 140 193 200 208 237 ...
##  $ nvi     : int  22 2 0 2 0 20 6 6 7 10 ...
##  $ nv      : int  56 62 66 68 82 63 86 94 86 97 ...
##  $ a       : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ A       : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 2 2 2 2 2 ...
##  $ k       : int  0 30 60 120 180 0 30 60 120 180 ...
##  $ K       : Factor w/ 5 levels "0","30","60",..: 1 2 3 4 5 1 2 3 4 5 ...
xyplot(pesograo~k|A, data=da, type=c("p","a")) 

##-----------------------------------------------------------------------------
## Ajuste.

m0 <- aov(kgrao~bloco+A*poly(k, degree=2), data=da,
          contrasts=list(bloco=contr.sum))
anova(m0)
## Analysis of Variance Table
## 
## Response: kgrao
##                       Df Sum Sq Mean Sq F value    Pr(>F)    
## bloco                  4  16.32   4.079  1.0970    0.3660    
## A                      2   5.49   2.747  0.7387    0.4819    
## poly(k, degree = 2)    2 423.70 211.850 56.9735 9.064e-15 ***
## A:poly(k, degree = 2)  4   6.92   1.731  0.4656    0.7607    
## Residuals             62 230.54   3.718                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m1 <- update(m0, .~bloco+A/poly(k, degree=2))
anova(m1)
## Analysis of Variance Table
## 
## Response: kgrao
##                       Df Sum Sq Mean Sq F value    Pr(>F)    
## bloco                  4  16.32   4.079  1.0970    0.3660    
## A                      2   5.49   2.747  0.7387    0.4819    
## A:poly(k, degree = 2)  6 430.63  71.771 19.3016 1.515e-12 ***
## Residuals             62 230.54   3.718                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(m1)
## 
## Call:
## aov(formula = kgrao ~ bloco + A + A:poly(k, degree = 2), data = da, 
##     contrasts = list(bloco = contr.sum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7011 -1.0098  0.0263  0.8936  5.4240 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 18.0568     0.3857  46.820  < 2e-16 ***
## bloco1                      -0.7364     0.4453  -1.654   0.1033    
## bloco2                       0.1263     0.4453   0.284   0.7777    
## bloco3                      -0.0404     0.4453  -0.091   0.9280    
## bloco4                      -0.0724     0.4453  -0.163   0.8714    
## A50                         -0.1616     0.5454  -0.296   0.7680    
## A62.5                       -0.6376     0.5454  -1.169   0.2469    
## A37.5:poly(k, degree = 2)1  17.3599     3.3399   5.198 2.39e-06 ***
## A50:poly(k, degree = 2)1    20.6219     3.3399   6.174 5.64e-08 ***
## A62.5:poly(k, degree = 2)1  21.3761     3.3399   6.400 2.32e-08 ***
## A37.5:poly(k, degree = 2)2  -2.9066     3.3399  -0.870   0.3875    
## A50:poly(k, degree = 2)2    -7.3413     3.3399  -2.198   0.0317 *  
## A62.5:poly(k, degree = 2)2  -6.7801     3.3399  -2.030   0.0467 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.928 on 62 degrees of freedom
## Multiple R-squared:  0.6624, Adjusted R-squared:  0.5971 
## F-statistic: 10.14 on 12 and 62 DF,  p-value: 1.247e-10
L <- list(A37.5lin=1, A37.5qua=2,
          A50.0lin=3, A50.0qua=4,
          A62.5lin=5, A62.5qua=6)
summary(m1, split=list("A:poly(k, degree = 2)"=L))
##                                   Df Sum Sq Mean Sq F value   Pr(>F)    
## bloco                              4   16.3    4.08   1.097   0.3660    
## A                                  2    5.5    2.75   0.739   0.4819    
## A:poly(k, degree = 2)              6  430.6   71.77  19.302 1.51e-12 ***
##   A:poly(k, degree = 2): A37.5lin  1  100.5  100.45  27.016 2.39e-06 ***
##   A:poly(k, degree = 2): A37.5qua  1  141.8  141.75  38.122 5.64e-08 ***
##   A:poly(k, degree = 2): A50.0lin  1  152.3  152.31  40.962 2.32e-08 ***
##   A:poly(k, degree = 2): A50.0qua  1    2.8    2.82   0.757   0.3875    
##   A:poly(k, degree = 2): A62.5lin  1   18.0   17.96   4.831   0.0317 *  
##   A:poly(k, degree = 2): A62.5qua  1   15.3   15.32   4.121   0.0467 *  
## Residuals                         62  230.5    3.72                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- update(m0, .~bloco+A*poly(k, degree=1))
anova(m0, m2)
## Analysis of Variance Table
## 
## Model 1: kgrao ~ bloco + A * poly(k, degree = 2)
## Model 2: kgrao ~ bloco + A + poly(k, degree = 1) + A:poly(k, degree = 1)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)  
## 1     62 230.54                             
## 2     65 266.64 -3   -36.104 3.2365 0.0281 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##-----------------------------------------------------------------------------
## Predição.
 
pred <- expand.grid(bloco="I", A=levels(da$A), k=seq(0,185,5))
ci <- predict(m0, newdata=pred, interval="confidence")-coef(m0)["bloco1"]
pred <- cbind(pred, ci)
 
mycol <- brewer.pal(6, "Set1")

p1 <- 
    xyplot(kgrao~k, groups=A, data=da,
           scales="free", jitter.x=TRUE,
           xlab=expression("Potássio no solo"~(mg~dm^{-3})),
           ylab="Peso de 100 grãos (g)",
           auto.key=list(title="Níveis de água (%)", cex.title=1.2,
               columns=3, points=FALSE, lines=TRUE),
           par.settings=list(superpose.symbol=list(col=mycol),
               superpose.line=list(col=mycol),
               strip.background=list(col="gray90")))

p2 <- 
    xyplot(fit~k, groups=A, data=pred,
           type="l", lwd=1.5,
           ly=pred$lwr, uy=pred$upr,
           cty="bands", alpha=0.25,
           panel.groups=panel.cbH,
           panel=panel.superpose)

p1+as.layer(under=TRUE, p2)


Regressão não linear

##-----------------------------------------------------------------------------
## Dados.

da <- read.table("http://www.leg.ufpr.br/~walmes/data/algodão.txt",
                 header=TRUE, sep="\t", encoding="latin1")
da$desf <- da$desf/100
levels(da$estag) <- c("Vegetativo", "Botão floral",
                      "Florescimento", "Maçã", "Capulho")
str(da)
## 'data.frame':    125 obs. of  8 variables:
##  $ estag : Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ desf  : num  0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
##  $ rep   : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ pcapu : num  33.2 28.7 31.5 28.9 36.4 ...
##  $ nnos  : int  32 29 29 27 28 30 33 23 29 26 ...
##  $ alt   : num  151 146 140 155 150 ...
##  $ ncapu : int  10 9 8 8 10 11 9 10 10 10 ...
##  $ nestru: int  10 9 8 9 10 11 9 11 10 10 ...
p0 <-
    xyplot(pcapu~desf|estag, data=da, layout=c(5,1),
           xlab="Nível de desfolha artificial",
           ylab="Peso de capulhos produzidos (g)")
## p0

##-----------------------------------------------------------------------------
## Ajuste.

n0 <- lapply(split(da, da$estag), nls,
             formula=pcapu~f0-f1*(desf+0.02)^exp(C),
             start=list(f0=30, f1=15, C=0))
n1 <- lapply(split(da, da$estag), nls,
             formula=pcapu~f0-f1*(desf+0.02)^((log(5)-log(f1))/log(xde)),
             start=list(f0=30, f1=10, xde=0.7))

chute1 <- sapply(n1, coef)
chute2 <- cbind(chute1[,1], sweep(chute1[,2:5], 1, chute1[,1], "-"))
c(t(chute2))
##  [1] 31.15716578 -1.89166251 -1.01579242 -2.88803986  3.63178792  9.29140391 -3.59069095
##  [8]  5.24697027 10.42379324 -0.50404109  0.87337687  0.09069715 -0.73457062 -0.12914993
## [15] -0.44398962
nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
            params=f0+f1+xde~estag,
            start=c(t(chute2)))

## summary(nn0)
anova(nn0, type="marginal")
## Denom. DF: 110 
##                 numDF  F-value p-value
## f0.(Intercept)      1 894.8326  <.0001
## f0.estag            4   3.0698  0.0193
## f1.(Intercept)      1  29.5203  <.0001
## f1.estag            4  10.1658  <.0001
## xde.(Intercept)     1 111.3543  <.0001
## xde.estag           4   5.9569  0.0002
##-----------------------------------------------------------------------------
## Predição.

nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
            params=f0+f1+xde~-1+estag,
            start=c(t(chute1)))
th <- coef(nn0); th
##     f0.estagVegetativo   f0.estagBotão floral  f0.estagFlorescimento 
##             31.1492629             29.2628830             28.5011919 
##           f0.estagMaçã        f0.estagCapulho     f1.estagVegetativo 
##             28.2634222             34.1531615             10.1662666 
##   f1.estagBotão floral  f1.estagFlorescimento           f1.estagMaçã 
##              6.1173914             13.0146648             21.6085661 
##        f1.estagCapulho    xde.estagVegetativo  xde.estagBotão floral 
##              8.2777934              0.8538359              0.9440167 
## xde.estagFlorescimento          xde.estagMaçã       xde.estagCapulho 
##              0.2021509              0.7243590              0.4954611
xde <- th[grepl("^xde", names(th))]

pred <- with(da,
             expand.grid(estag=levels(estag),
                         desf=seq(0, 1, l=20)))
pred$y <- predict(nn0, newdata=pred)

f <- function(th, desf, estag){
    X <- model.matrix(~0+estag)
    p <- gsub("\\..*$", "", names(th))
    ths <- split(th, f=p)
    ths <- lapply(ths, matrix, ncol=1)
    f0 <- X%*%ths$f0
    f1 <- X%*%ths$f1
    xde <- X%*%ths$xde
    y <- f0-f1*desf^((log(5)-log(f1))/log(xde))
    return(y)
}

## f(th=coef(nn0), desf=pred$desf, estag=pred$estag)
F <- rootSolve::gradient(f=f, x=coef(nn0),
                         desf=pred$desf, estag=pred$estag)
U <- chol(vcov(nn0))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
aux <- outer(pred$se, qnorm(0.975)*c(lwr=-1, fit=0, upr=1), FUN="*")
pred <- cbind(pred, sweep(aux, MARGIN=1, STATS=pred$y, FUN="+"))
str(pred)
## 'data.frame':    100 obs. of  7 variables:
##  $ estag: Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ desf : num  0 0 0 0 0 ...
##  $ y    : atomic  31.1 29.3 28.5 28.3 34.2 ...
##   ..- attr(*, "label")= chr "Predicted values"
##  $ se   : num  1.04 1.12 1.58 1.04 1.58 ...
##  $ lwr  : num  29.1 27.1 25.4 26.2 31.1 ...
##  $ fit  : num  31.1 29.3 28.5 28.3 34.2 ...
##  $ upr  : num  33.2 31.4 31.6 30.3 37.2 ...
xyplot(pcapu~desf|estag, data=da, col=1, layout=c(5,1),
       xlim=c(-0.2,1.2),
       xlab="Desfolha artifical", ylab="Peso de capulhos (g)",
       panel=function(...){
           panel.xyplot(...)
           panel.abline(v=xde[which.packet()], col=2)
       })+
    as.layer(xyplot(fit+lwr+upr~desf|estag, data=pred,
                    type="l", col=1, lty=c(1,2,2)))


Extensão de modelos de regressão

##-----------------------------------------------------------------------------
## Dados.
 
da <- read.table("http://www.leg.ufpr.br/~walmes/ridiculas/osmo.txt",
                   header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 3)
str(da)
## 'data.frame':    213 obs. of  3 variables:
##  $ esp: Factor w/ 4 levels "A. reticulatus",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sal: int  0 0 0 0 0 0 10 10 10 10 ...
##  $ osm: int  592 601 485 497 596 587 530 328 339 516 ...
xyplot(osm~sal|esp, data=da, type=c("p","smooth"),
       xlab="Salinidade", ylab="Osmolalidade")

##-----------------------------------------------------------------------------
## Regressão segmentada.

db <- droplevels(subset(da, esp=="F. citerosum"))

m0 <- lm(osm~sal, data=db)

## 1 ponto de quebra.
s1 <- segmented(m0, seg.Z=~sal,
                psi=list(sal=c(5)),
                control=seg.control(display=FALSE))

## 2 pontos de quebra.
s2 <- update(s1, psi=list(sal=c(10,30)))
summary(s2)
## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~sal, psi = list(sal = c(10, 30)), 
##     control = seg.control(display = FALSE))
## 
## Estimated Break-Point(s):
##            Est. St.Err
## psi1.sal 10.18  1.317
## psi2.sal 34.87  1.117
## 
## t value for the gap-variable(s) V:  0 0 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  523.492     31.409  16.667  < 2e-16 ***
## sal          -19.493      4.890  -3.986 0.000187 ***
## U1.sal        41.143      5.784   7.113       NA    
## U2.sal       -48.197     10.262  -4.697       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 94.54 on 59 degrees of freedom
## Multiple R-Squared: 0.7712,  Adjusted R-squared: 0.7518 
## 
## Convergence attained in 2 iterations with relative change 0
slope(s2)
## $sal
##          Est. St.Err. t value CI(95%).l CI(95%).u
## slope1 -19.49   4.890  -3.986    -29.28    -9.708
## slope2  21.65   3.089   7.009     15.47    27.830
## slope3 -26.55   9.786  -2.713    -46.13    -6.965
confint(s2)
## $sal
##           Est. CI(95%).l CI(95%).u
## psi1.sal 10.18     7.549     12.82
## psi2.sal 34.87    32.630     37.10
plot(osm~sal, data=db)
abline(m0)
plot(s1, add=TRUE, col=2)
plot(s2, add=TRUE, col=4)
lines(s1)
lines(s2)

##-----------------------------------------------------------------------------
## Regressão com polinômios locais.

l0 <- loess(osm~sal, data=db)
summary(l0)
## Call:
## loess(formula = osm ~ sal, data = db)
## 
## Number of Observations: 65 
## Equivalent Number of Parameters: 4.51 
## Residual Standard Error: 101.2 
## Trace of smoother matrix: 4.93 
## 
## Control settings:
##   normalize:  TRUE 
##   span       :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
class(l0)
## [1] "loess"
methods(class="loess")
## [1] anova.loess*   predict.loess* print.loess*   summary.loess*
## 
##    Non-visible functions are asterisked
pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(l0, newdata=pred, se=TRUE)
me <- outer(aux$se.fit, c(lwr=-1, fit=0, upr=1)*qnorm(0.975), "*")
aux <- sweep(me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)

plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
         lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## Splines.

## p0 <- lm(osm~bs(sal, df=4), data=da)
p0 <- lm(osm~ns(sal, df=4), data=da)
summary(p0) 
## 
## Call:
## lm(formula = osm ~ ns(sal, df = 4), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -317.74  -83.72  -11.85   84.15  364.67 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       524.024     25.142  20.842  < 2e-16 ***
## ns(sal, df = 4)1    1.777     47.245   0.038     0.97    
## ns(sal, df = 4)2  275.654     43.342   6.360 1.26e-09 ***
## ns(sal, df = 4)3  284.906     69.243   4.115 5.59e-05 ***
## ns(sal, df = 4)4  268.793     35.445   7.583 1.10e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 150.3 on 208 degrees of freedom
## Multiple R-squared:  0.3763, Adjusted R-squared:  0.3643 
## F-statistic: 31.37 on 4 and 208 DF,  p-value: < 2.2e-16
pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(p0, newdata=pred, interval="confidence")
pred <- cbind(pred, aux)

plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
         lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## GAM.

rock$lperm <- log(rock$perm)
pairs(rock)

g0 <- gam(log(perm)~s(peri)+s(shape), data=rock)
summary(g0)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(perm) ~ s(peri) + s(shape)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1075     0.1446   35.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df      F  p-value    
## s(peri)  2.892  3.587 11.645 2.64e-06 ***
## s(shape) 1.940  2.421  3.842   0.0231 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.628   Deviance explained = 66.7%
## GCV = 1.1426  Scale est. = 1.0038    n = 48
cloud(perm~peri+shape, data=rock)

pred <- with(rock,
             expand.grid(
                 peri=seq(min(peri), max(peri), l=20),
                 shape=seq(min(shape), max(shape), l=20))
             )
pred$perm <- predict(g0, newdata=pred)

colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")
colr <- rgb(t(col2rgb(colr(101)))/255,
            alpha=0.5)

wireframe(perm~peri+shape,
          data=pred,
          zlim=range(rock$lperm),
          xlab=list("Perimetro", rot=22),
          ylab=list("Forma", rot=-36),
          zlab=list("Permeabilidade", rot=90),
          scales=list(arrows=FALSE),
          panel.3d.wireframe=panel.3d.contour,
          nlevels=18,# col="gray30",
          type=c("on", "bottom"),
          col.regions=colr,  drape=TRUE,
          screen=list(x=-75, z=10, y=35),
          ## screen=list(x=-80, z=-5, y=-25),
          panel=function(...){
              L <- list(...)
              ## print(str(L))
              L$x <- rock$peri
              L$y <- rock$shape
              L$z <- rep(L$zlim[1], nrow(rock))
              L$type <- "p";
              L$pch <- 4;
              L$col <- 1;
              L$scales.3d$x.scales$draw <- FALSE
              L$scales.3d$y.scales$draw <- FALSE
              L$scales.3d$z.scales$draw <- FALSE
              L$xlab <- ""; L$ylab <- ""; L$zlab <- "";
              L$par.box <- list(col=rep(NA, 9))
              L$box.3d$lty <- 0
              L$panel.3d.wireframe <- NULL
              do.call(panel.cloud, L)
              L$pch <- 1;
              L$z <- rock$lperm
              do.call(panel.cloud, L)
              apply(rock[,c("peri","shape","lperm")], 1,
                    function(line){
                        L$x <- rep(line[1], 2)
                        L$y <- rep(line[2], 2)
                        L$z <- c(L$zlim[1], line[3])
                        L$type <- "l"
                        do.call(panel.cloud, L)
                    })
              panel.abline(v=0.3)
              panel.wireframe(...)
          })


Modelagem conjunta da variância

##-----------------------------------------------------------------------------

goi <- read.table("http://www.leg.ufpr.br/~walmes/data/goiaba.txt",
                  header=TRUE, sep="\t")
goi$daa2 <- goi$daa^2.5
str(goi)
## 'data.frame':    174 obs. of  8 variables:
##  $ daa   : int  13 13 13 13 13 13 13 13 13 13 ...
##  $ coleta: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ rep   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ long  : num  29.1 32.9 36.3 34.7 33.3 ...
##  $ trans : num  20.5 22.8 22.9 22.7 21.4 ...
##  $ peso  : num  6.66 8.51 9.93 9.14 7.6 7.54 6.27 9.94 7.95 7.79 ...
##  $ volume: int  6 9 11 10 7 8 6 9 8 8 ...
##  $ daa2  : num  609 609 609 609 609 ...
aux <- ddply(goi, .(daa), summarise, m=mean(peso), s=sd(peso))

p1 <- xyplot(peso~daa, data=goi, type=c("p","a"),
             ylab="Peso do fruto (g)", xlab="Dias após antese")
p2 <- xyplot(s~m, aux, type=c("p","r"),
             xlab="Média de peso", ylab="Desvio-padrão de peso")
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Ajustes.

## Modelo tradicional.
n0 <- gnls(peso~A-(A-D)*exp(-exp(C*(daa-B))), data=goi,
           start=c(A=200, C=0.09, B=105, D=7))
summary(n0)
## Generalized nonlinear least squares fit
##   Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B))) 
##   Data: goi 
##        AIC      BIC    logLik
##   1619.093 1634.888 -804.5465
## 
## Coefficients:
##       Value Std.Error   t-value p-value
## A 205.76239  4.196193  49.03549       0
## C   0.09367  0.010148   9.22980       0
## B 107.35182  0.874391 122.77326       0
## D  18.70608  3.179082   5.88411       0
## 
##  Correlation: 
##   A      C      B     
## C -0.357              
## B  0.590 -0.131       
## D -0.103  0.467  0.219
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.17237358 -0.42834311 -0.07757784  0.35077905  3.94396837 
## 
## Residual standard error: 24.94118 
## Degrees of freedom: 174 total; 170 residual
## Modelo com função de variâcia: var(e)=sigma^2*exp(2*delta*daa).
n1 <- update(n0,
             ## weights=varExp(0.03, form=~daa)
             weights=varPower(0.1, form=~log(daa))             
             )
summary(n1)
## Generalized nonlinear least squares fit
##   Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B))) 
##   Data: goi 
##        AIC      BIC    logLik
##   1432.407 1451.362 -710.2036
## 
## Variance function:
##  Structure: Power of variance covariate
##  Formula: ~log(daa) 
##  Parameter estimates:
##    power 
## 5.764348 
## 
## Coefficients:
##       Value Std.Error  t-value p-value
## A 217.15613  8.784140 24.72139       0
## C   0.05520  0.003531 15.63409       0
## B 108.05936  1.880553 57.46148       0
## D   7.49496  0.426361 17.57893       0
## 
##  Correlation: 
##   A      C      B     
## C -0.566              
## B  0.879 -0.624       
## D -0.390  0.837 -0.386
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.66220159 -0.53129856  0.09399151  0.63400182  2.80361091 
## 
## Residual standard error: 0.003829197 
## Degrees of freedom: 174 total; 170 residual
anova(n1, n0)
##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## n1     1  6 1432.407 1451.362 -710.2036                        
## n0     2  5 1619.093 1634.888 -804.5465 1 vs 2 188.6858  <.0001
##-----------------------------------------------------------------------------
## Diagnóstico.

da <- rbind(data.frame(model="trad",
                       r=residuals(n0, type="pearson"),
                       f=fitted(n0)),
            data.frame(model="var",
                       r=residuals(n1, type="pearson"),
                       f=fitted(n1)))

p1 <- qqmath(~r|model, data=da,
             xlab="Quantis teóricos",
             ylab="Resíduos padronizados")
p2 <- xyplot(sqrt(abs(r))~f|model, data=da, type=c("p","smooth"),
             xlab="Valor ajustado",
             ylab=expression(sqrt("|Resíduos padronizados|")))
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Predição.

model <- deriv3(~ A-(A-D)*exp(-exp(C*(daa-B))),
                c("A","C","B","D"),
                function(daa, A, C, B, D){NULL})

l <- list(n0, n1)
names(l) <- levels(da$model)
L <- l
for(i in seq_along(L)){
  m4 <- l[[i]]
  U <- chol(vcov(m4))
  pred <- data.frame(daa=seq(1,140,1))
  m <- model(daa=pred$daa, A=coef(m4)["A"], C=coef(m4)["C"],
             B=coef(m4)["B"], D=coef(m4)["D"])
  F0 <- attr(m, "gradient")
  pred$y <- c(m)
  pred$se <- sqrt(apply(F0%*%t(U), 1, function(x) sum(x^2)))
  z <- qnorm(0.975)
  pred <- transform(pred, lwr=y-z*se, upr=y+z*se)
  L[[i]] <- pred
}

str(L)
## List of 2
##  $ trad:'data.frame':    140 obs. of  5 variables:
##   ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ y  : num [1:140] 18.7 18.7 18.7 18.7 18.7 ...
##   ..$ se : num [1:140] 3.17 3.17 3.17 3.17 3.17 ...
##   ..$ lwr: num [1:140] 12.5 12.5 12.5 12.5 12.5 ...
##   ..$ upr: num [1:140] 24.9 24.9 24.9 24.9 24.9 ...
##  $ var :'data.frame':    140 obs. of  5 variables:
##   ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ y  : num [1:140] 8.06 8.1 8.13 8.16 8.2 ...
##   ..$ se : num [1:140] 0.278 0.273 0.267 0.262 0.256 ...
##   ..$ lwr: num [1:140] 7.52 7.56 7.61 7.65 7.7 ...
##   ..$ upr: num [1:140] 8.61 8.63 8.65 8.68 8.7 ...
L <- ldply(L)
names(L)[1] <- "model"
L$model <- factor(L$model, levels=levels(da$model))
str(L)
## 'data.frame':    280 obs. of  6 variables:
##  $ model: Factor w/ 2 levels "trad","var": 1 1 1 1 1 1 1 1 1 1 ...
##  $ daa  : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ y    : num  18.7 18.7 18.7 18.7 18.7 ...
##  $ se   : num  3.17 3.17 3.17 3.17 3.17 ...
##  $ lwr  : num  12.5 12.5 12.5 12.5 12.5 ...
##  $ upr  : num  24.9 24.9 24.9 24.9 24.9 ...
icm <- ddply(goi, .(daa), summarise,
             m=mean(peso),
             lwr=mean(peso)-qt(0.975, length(peso)-1)*sd(peso)*
             sqrt(1/length(peso)),
             upr=mean(peso)+qt(0.975, length(peso)-1)*sd(peso)*
             sqrt(1/length(peso)))

#-----------------------------------------------------------------------------
# Gráfico com as bandas de confiança.

ylim <- range(goi$peso)
xyplot(y~daa|model, data=L, type="l", col=1,
       ylab="Massa fresca dos frutos (g)",
       xlab="Dias após a antese",
       strip=strip.custom(bg="gray90"),
       ly=L$lwr, uy=L$upr, cty="bands",
       prepanel=prepanel.cbH, panel=panel.cbH)+
  layer(panel.arrows(icm$daa, icm$lwr, icm$daa, icm$upr,
                     code=3, length=0.05, angle=90))


Modelo de efeito aleatório

##-----------------------------------------------------------------------------
## Modelos.

str(BodyWeight)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   176 obs. of  4 variables:
##  $ weight: num  240 250 255 260 262 258 266 266 265 272 ...
##  $ Time  : num  1 8 15 22 29 36 43 44 50 57 ...
##  $ Rat   : Ord.factor w/ 16 levels "2"<"3"<"4"<"1"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Diet  : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "outer")=Class 'formula' length 2 ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "formula")=Class 'formula' length 3 weight ~ Time | Rat
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(g)"
##  - attr(*, "FUN")=function (x)  
##   ..- attr(*, "source")= chr "function(x) max(x, na.rm = TRUE)"
##  - attr(*, "order.groups")= logi TRUE
plot(BodyWeight)

mm0 <- lme(weight~Diet*Time, random=~1+Time, data=BodyWeight,
           correlation=corCAR1())
summary(mm0)
## Linear mixed-effects model fit by REML
##  Data: BodyWeight 
##        AIC      BIC    logLik
##   1154.544 1189.038 -566.2721
## 
## Random effects:
##  Formula: ~1 + Time | Rat
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev     Corr  
## (Intercept) 36.9056825 (Intr)
## Time         0.2333653 -0.152
## Residual     5.4600807       
## 
## Correlation Structure: Continuous AR(1)
##  Formula: ~1 | Rat 
##  Parameter estimate(s):
##       Phi 
## 0.5316409 
## Fixed effects: weight ~ Diet * Time 
##                 Value Std.Error  DF   t-value p-value
## (Intercept) 251.30348 13.152622 157 19.106721  0.0000
## Diet2       200.68117 22.781009  13  8.809143  0.0000
## Diet3       253.67433 22.781009  13 11.135342  0.0000
## Time          0.36417  0.091878 157  3.963552  0.0001
## Diet2:Time    0.62374  0.159138 157  3.919461  0.0001
## Diet3:Time    0.27874  0.159138 157  1.751532  0.0818
##  Correlation: 
##            (Intr) Diet2  Diet3  Time   Dt2:Tm
## Diet2      -0.577                            
## Diet3      -0.577  0.333                     
## Time       -0.181  0.104  0.104              
## Diet2:Time  0.104 -0.181 -0.060 -0.577       
## Diet3:Time  0.104 -0.060 -0.181 -0.577  0.333
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.07548837 -0.43553702  0.06177724  0.45498272  1.75954253 
## 
## Number of Observations: 176
## Number of Groups: 16
plot(augPred(mm0))

anova(mm0)
##             numDF denDF   F-value p-value
## (Intercept)     1   157 1727.0904  <.0001
## Diet            2    13   86.6759  <.0001
## Time            1   157   82.4113  <.0001
## Diet:Time       2   157    7.7925   6e-04
##-----------------------------------------------------------------------------
## Predição dos efeitos fixos e aleatório.

pred0 <- with(BodyWeight,
              expand.grid(Diet=levels(Diet),
                          Time=seq(min(Time), max(Time), l=20)))
aux <- unique(BodyWeight[,c("Diet","Rat")])
pred1 <- merge(pred0, aux, all=TRUE)

pred0$y <- predict(mm0, newdata=pred0, level=0)
pred1$y <- predict(mm0, newdata=pred1, level=1)

U <- chol(vcov(mm0))
F <- model.matrix(formula(mm0)[-2], data=pred0)
pred0$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
z <- qnorm(0.975)
aux <- outer(pred0$se, z*c(lwr=-1, fit=0, upr=1), FUN="*")
pred0 <- cbind(pred0, sweep(aux, 1, STATS=pred0$y, FUN="+"))

p0 <-
    xyplot(weight~Time|Diet, BodyWeight, groups=Rat,
           layout=c(3,1), xlab="Tempo", ylab="Peso (g)")

p1 <-
    xyplot(y~Time|Diet, pred0, type="l", lwd=2, layout=c(3,1),
           ly=pred0$lwr, uy=pred0$upr, cty="bands",
           panel=panel.cbH)

p2 <-
    xyplot(y~Time|Diet, groups=Rat, data=pred1,
           type="l", col="blue", lty=2)

p0+as.layer(p1)+as.layer(p2)


##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()
## [1] "2014-12-11 20:51:21 BRST"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] methods   splines   grid      stats     graphics  grDevices utils     datasets 
## [9] base     
## 
## other attached packages:
##  [1] wzRfun_0.4          rootSolve_1.6.5.1   mgcv_1.8-3          segmented_0.5-0.0  
##  [5] rsm_2.07            nlme_3.1-118        aod_1.3             multcomp_1.3-7     
##  [9] TH.data_1.0-5       mvtnorm_1.0-1       doBy_4.5-12         survival_2.37-7    
## [13] plyr_1.8.1          alr3_2.0.5          ellipse_0.3-8       car_2.0-22         
## [17] gridExtra_0.9.1     latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29    
## [21] rmarkdown_0.3.3     knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4    evaluate_0.5.5  formatR_1.0     htmltools_0.2.6 MASS_7.3-35    
##  [6] Matrix_1.1-4    nnet_7.3-8      Rcpp_0.11.3     sandwich_2.3-2  stringr_0.6.2  
## [11] tools_3.1.2     yaml_2.1.13     zoo_1.7-11