Trailer do curso

Regressão linear simples

##=============================================================================
## Aplicação de modelos de regressão linear e
##   não linear em ciências agrárias
##
##   09 à 11 de Dezembro de 2014 - Goiânia/GO
##   Embrapa Arroz e Feijão
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão. 

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "ellipse",
         "alr3", "plyr", "doBy", "multcomp", "aod", "nlme", "rsm",
         "segmented", "splines", "mgcv", "rootSolve", "wzRfun")

sapply(pkg, require, character.only=TRUE)

##      lattice latticeExtra    gridExtra          car      ellipse         alr3 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##         plyr         doBy     multcomp          aod         nlme          rsm 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##    segmented      splines         mgcv    rootSolve       wzRfun 
##         TRUE         TRUE         TRUE         TRUE         TRUE

trellis.device(color=FALSE)

##-----------------------------------------------------------------------------
## Análise de dados reais.

## Comprimento da diagonal de aparelhos celulares (cm).
x <- c(12.5, 13.2, 14.4, 11.3, 11.2, 11.2, 11.4, 11.0, 10.6, 12.4, 12.4)

## Peso dos aparelhos (g)
y <- c(136.7, 144.8, 152.6, 120.9, 113.6, 126.9, 91.3, 107.9, 103.6,
       116.9, 117.3)

data.frame(diagonal=x, peso=y)

##    diagonal  peso
## 1      12.5 136.7
## 2      13.2 144.8
## 3      14.4 152.6
## 4      11.3 120.9
## 5      11.2 113.6
## 6      11.2 126.9
## 7      11.4  91.3
## 8      11.0 107.9
## 9      10.6 103.6
## 10     12.4 116.9
## 11     12.4 117.3

##-----------------------------------------------------------------------------
## Visualizar os dados.

xyplot(y~x,
       xlab="Comprimento diagonal (cm)",
       ylab="Peso do aparelho (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo de regressão linear simples.

m0 <- lm(y~x)
summary(m0)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5642  -5.1349   0.0575   8.0189  15.6162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -33.221     37.878  -0.877  0.40326   
## x             12.902      3.153   4.092  0.00271 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.3 on 9 degrees of freedom
## Multiple R-squared:  0.6504, Adjusted R-squared:  0.6115 
## F-statistic: 16.74 on 1 and 9 DF,  p-value: 0.00271

confint(m0)

##                   2.5 %   97.5 %
## (Intercept) -118.906100 52.46451
## x              5.769088 20.03530

vcov(m0)

##             (Intercept)          x
## (Intercept)   1434.7200 -118.95289
## x             -118.9529    9.94287

##-----------------------------------------------------------------------------
## Verificando o modelo ajustado sobre os dados.

plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
abline(m0)
legend("bottomright", legend=c("Observado","Ajustado"),
       pch=c(1,NA), lty=c(NA,1), bty="n")

##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.

par(mfrow=c(2, 2)); plot(m0); layout(1)

##-----------------------------------------------------------------------------
## Medidas de influência.

cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0))

##               r        r.pad        r.stu
## 1    8.64336872  0.812204307  0.795460862
## 2    7.71183298  0.767694013  0.748718343
## 3    0.02920028  0.003864617  0.003643599
## 4    8.32600142  0.787680565  0.769634511
## 5    2.31622081  0.220530830  0.208482518
## 6   15.61622081  1.486843622  1.613979130
## 7  -22.56421798 -2.123123371 -2.833241337
## 8   -0.80334041 -0.077706591 -0.073287066
## 9    0.05753715  0.005823405  0.005490369
## 10  -9.86641189 -0.923198066 -0.914791714
## 11  -9.46641189 -0.885770151 -0.874080397

influence.measures(m0)

## Influence measures of
##   lm(formula = y ~ x) :
## 
##      dfb.1_    dfb.x    dffit cov.r   cook.d   hat inf
## 1  -0.10299  0.12642  0.28435 1.226 4.21e-02 0.113    
## 2  -0.26654  0.29057  0.38592 1.399 7.83e-02 0.210    
## 3  -0.00354  0.00370  0.00405 2.831 9.24e-06 0.553   *
## 4   0.17407 -0.15236  0.29115 1.254 4.44e-02 0.125    
## 5   0.05369 -0.04780  0.08282 1.450 3.84e-03 0.136    
## 6   0.41563 -0.37002  0.64117 0.834 1.74e-01 0.136    
## 7  -0.55360  0.47380 -1.02453 0.357 2.95e-01 0.116    
## 8  -0.02363  0.02154 -0.03236 1.510 5.89e-04 0.163    
## 9   0.00255 -0.00239  0.00305 1.656 5.23e-06 0.236    
## 10  0.09106 -0.11778 -0.31455 1.160 5.04e-02 0.106    
## 11  0.08701 -0.11254 -0.30055 1.179 4.64e-02 0.106

##-----------------------------------------------------------------------------
## Predição acompanhada de bandas de confiança e predição.

pred <- data.frame(x=seq(min(x), max(x), l=20))
bc <- predict(m0, newdata=pred, interval="confidence")
bp <- predict(m0, newdata=pred, interval="prediction")

plot(y~x, xlab="Comprimento diagonal (cm)", ylab="Peso (g)")
matlines(x=pred$x, bc, col=1, lty=c(1,2,2))
matlines(x=pred$x, bp, col=1, lty=c(1,3,3))
legend("bottomright",
       legend=c("Observado","Ajustado",
           "Banda de confiança","Banda de predição"),
       pch=c(1,NA,NA,NA), lty=c(NA,1,2,3),
       bty="n")

Regressão linear múltipla

##-----------------------------------------------------------------------------
## Dados.

da <- read.table(
    "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example12.14.txt",
    header=TRUE, sep="\t")
names(da) <- tolower(names(da))
str(da)

## 'data.frame':    38 obs. of  6 variables:
##  $ clarity : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ aroma   : num  3.3 4.4 3.9 3.9 5.6 4.6 4.8 5.3 4.3 4.3 ...
##  $ body    : num  2.8 4.9 5.3 2.6 5.1 4.7 4.8 4.5 4.3 3.9 ...
##  $ flavor  : num  3.1 3.5 4.8 3.1 5.5 5 4.8 4.3 3.9 4.7 ...
##  $ oakiness: num  4.1 3.9 4.7 3.6 5.1 4.1 3.3 5.2 2.9 3.9 ...
##  $ quality : num  9.8 12.6 11.9 11.1 13.3 12.8 12.8 12 13.6 13.9 ...

scatterplotMatrix(da, span=0.8)

##-----------------------------------------------------------------------------
## Ajuste.

m0 <- lm(quality~., data=da)
summary(m0)

## 
## Call:
## lm(formula = quality ~ ., data = da)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.85552 -0.57448 -0.07092  0.67275  1.68093 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.9969     2.2318   1.791 0.082775 .  
## clarity       2.3395     1.7348   1.349 0.186958    
## aroma         0.4826     0.2724   1.771 0.086058 .  
## body          0.2732     0.3326   0.821 0.417503    
## flavor        1.1683     0.3045   3.837 0.000552 ***
## oakiness     -0.6840     0.2712  -2.522 0.016833 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.163 on 32 degrees of freedom
## Multiple R-squared:  0.7206, Adjusted R-squared:  0.6769 
## F-statistic: 16.51 on 5 and 32 DF,  p-value: 4.703e-08

##-----------------------------------------------------------------------------
## Avaliação dos pressupostos.

par(mfrow=c(2, 2)); plot(m0); layout(1)

residualPlots(m0)

##            Test stat Pr(>|t|)
## clarity        1.140    0.263
## aroma          0.419    0.678
## body           0.976    0.337
## flavor        -0.258    0.798
## oakiness      -0.353    0.727
## Tukey test     0.137    0.891

##-----------------------------------------------------------------------------
## Medidas de influência.

head(cbind(r=residuals(m0), r.pad=rstandard(m0), r.stu=rstudent(m0)))

##             r       r.pad       r.stu
## 1  0.28905165  0.27289988  0.26891509
## 2  1.38047606  1.29611865  1.31057072
## 3 -0.15912591 -0.15005811 -0.14774683
## 4  1.01214849  0.98944643  0.98911153
## 5 -0.06905214 -0.06179415 -0.06082458
## 6 -0.07708542 -0.06829302 -0.06722238

im <- influence.measures(m0)
summary(im)

## Potentially influential observations of
##   lm(formula = quality ~ ., data = da) :
## 
##    dfb.1_ dfb.clrt dfb.arom dfb.body dfb.flvr dfb.okns dffit   cov.r   cook.d hat  
## 12  0.83  -0.97    -0.40     0.23    -0.01    -0.04     1.38_*  1.16    0.30   0.39
## 14 -0.03   0.11    -0.16    -0.05     0.05     0.08    -0.28    1.84_*  0.01   0.36
## 20 -0.04  -0.38     0.46    -1.06_*   0.65     0.60    -1.54_*  0.30_*  0.31   0.20
## 32 -0.04   0.04    -0.03     0.02     0.04    -0.03    -0.07    1.57_*  0.00   0.23
## 37 -0.06  -0.15     0.04     0.28    -0.37     0.39     0.61    1.72_*  0.06   0.38

##-----------------------------------------------------------------------------
## Seleção stepwise.

## step(m0, k=2)              ## AIC.
m1 <- step(m0, k=log(nrow(da)))  ## BIC.

## Start:  AIC=26.74
## quality ~ clarity + aroma + body + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## - body      1    0.9118 44.160 23.897
## - clarity   1    2.4577 45.706 25.204
## - aroma     1    4.2397 47.488 26.658
## <none>                  43.248 26.741
## - oakiness  1    8.5978 51.846 29.994
## - flavor    1   19.8986 63.147 37.487
## 
## Step:  AIC=23.9
## quality ~ clarity + aroma + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## - clarity   1    1.6936 45.853 21.689
## <none>                  44.160 23.897
## - aroma     1    5.3545 49.514 24.608
## - oakiness  1    8.0807 52.241 26.645
## - flavor    1   27.3280 71.488 38.564
## 
## Step:  AIC=21.69
## quality ~ aroma + flavor + oakiness
## 
##            Df Sum of Sq    RSS    AIC
## <none>                  45.853 21.689
## - aroma     1    6.6026 52.456 23.164
## - oakiness  1    6.9989 52.852 23.450
## - flavor    1   25.6888 71.542 34.955

summary(m1)

## 
## Call:
## lm(formula = quality ~ aroma + flavor + oakiness, data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5707 -0.6256  0.1521  0.6467  1.7741 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.4672     1.3328   4.852 2.67e-05 ***
## aroma         0.5801     0.2622   2.213 0.033740 *  
## flavor        1.1997     0.2749   4.364 0.000113 ***
## oakiness     -0.6023     0.2644  -2.278 0.029127 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.161 on 34 degrees of freedom
## Multiple R-squared:  0.7038, Adjusted R-squared:  0.6776 
## F-statistic: 26.92 on 3 and 34 DF,  p-value: 4.203e-09

##-----------------------------------------------------------------------------
## R², R² ajustado e PRESS.

smr <- summary(m1)
smr$r.squared

## [1] 0.7037672

smr$adj.r.squared

## [1] 0.677629

sum(residuals(m1)^2)

## [1] 45.85341

sum((residuals(m1)/(1-hatvalues(m1)))^2)

## [1] 56.05239

Regressão polinomial

##-----------------------------------------------------------------------------
## Ganho de peso de perus em função de metionina na dieta.

xyplot(Gain~A, data=turk0, type=c("p","smooth"),
       xlab="Metionina (% da dieta)",
       ylab="Ganho de peso (g)")

##-----------------------------------------------------------------------------
## Ajuste do modelo saturado (considerando A como fator).

turk0$Afat <- factor(turk0$A)
m1 <- lm(Gain~Afat, turk0)
anova(m1)

## Analysis of Variance Table
## 
## Response: Gain
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Afat       5 150041 30008.2  88.587 < 2.2e-16 ***
## Residuals 29   9824   338.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##-----------------------------------------------------------------------------
## Ajuste do modelo de segundo grau: b0+b1*x+b2*x^2;

## m2 <- lm(Gain~poly(A, 2), turk0)
m2 <- lm(Gain~A+I(A^2), turk0)

##-----------------------------------------------------------------------------
## Teste de falta de ajuste para o modelo m2.

anova(m1, m2)

## Analysis of Variance Table
## 
## Model 1: Gain ~ Afat
## Model 2: Gain ~ A + I(A^2)
##   Res.Df     RSS Df Sum of Sq     F Pr(>F)
## 1     29  9823.6                          
## 2     32 11339.9 -3   -1516.2 1.492 0.2374

##-----------------------------------------------------------------------------
## Gráfico dos valores preditos com bandas de confiança.

pred2 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m2, newdata=pred2, interval="confidence")
pred2 <- cbind(pred2, a)

## Sobrepondo as estimativas de médias do modelo m1.
pred1 <- data.frame(Afat=levels(turk0$Afat))
a <- predict(m1, newdata=pred1, interval="confidence")
pred1 <- cbind(pred1, a)
pred1$A <- as.numeric(as.character(pred1$Afat))
## pred1

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~(A+0.005), data=pred1, pch=19,
                    ly=pred1$lwr, uy=pred1$upr, cty="bars",
                    prepanel=prepanel.cbH, panel=panel.cbH))

##-----------------------------------------------------------------------------
## Uso de pesos no ajuste do modelo de regressão.

tu <- ddply(turk0, .(A), summarise,
            mGain=mean(Gain), nGain=length(Gain))
tu

##      A mGain nGain
## 1 0.00 623.0    10
## 2 0.04 668.4     5
## 3 0.10 715.6     5
## 4 0.16 732.0     5
## 5 0.28 794.0     5
## 6 0.44 785.4     5

m3 <- lm(mGain~A+I(A^2), data=tu, weights=nGain)
summary(m3)

## 
## Call:
## lm(formula = mGain ~ A + I(A^2), data = tu, weights = nGain)
## 
## Weighted Residuals:
##       1       2       3       4       5       6 
##  -8.037  14.442  16.171 -29.042  11.611  -1.816 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   625.542      6.243 100.201 2.19e-06 ***
## A             964.471     86.763  11.116  0.00156 ** 
## I(A^2)      -1362.069    198.338  -6.867  0.00632 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.48 on 3 degrees of freedom
## Multiple R-squared:  0.9899, Adjusted R-squared:  0.9832 
## F-statistic: 146.9 on 2 and 3 DF,  p-value: 0.001016

##-----------------------------------------------------------------------------
## Sobrepondo os valores preditos com bandas de confiança obtido com o
## ajuste com as médias.

pred3 <- data.frame(A=seq(0, 0.45, by=0.025))
a <- predict(m3, newdata=pred3, interval="confidence")
pred3 <- cbind(pred3, a)

xyplot(Gain~A, data=turk0,
     xlab="Metionina (% da dieta)",
     ylab="Ganho de peso (g)")+
    as.layer(xyplot(fit~A, data=pred2, type="l",
                    ly=pred2$lwr, uy=pred2$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))+
    as.layer(xyplot(fit~A, data=pred3, type="l",
                    ly=pred3$lwr, uy=pred3$upr, cty="bands",
                    prepanel=prepanel.cbH, panel=panel.cbH))

Superfície de resposta

##-----------------------------------------------------------------------------
## Dados.

da <- read.table(
    "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example14.12.txt",
    header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 4)
str(da)

## 'data.frame':    13 obs. of  6 variables:
##  $ time: int  50 60 50 60 48 62 55 55 55 55 ...
##  $ temp: int  160 160 170 170 165 165 158 172 165 165 ...
##  $ x1  : num  -1 1 -1 1 -1.41 ...
##  $ x2  : num  -1 -1 1 1 0 ...
##  $ conv: num  65.3 68.2 66 69.8 64.5 69 64 68.5 68.9 69.7 ...
##  $ visc: int  35 39 36 43 30 44 31 45 37 34 ...

## Planejamento composto central.
xyplot(x1~x2, da, jitter.x=TRUE, jitter.y=TRUE)

##-----------------------------------------------------------------------------
## Ajuste do modelo.

m0 <- lm(conv~x1+x2+x1:x2+I(x1^2)+I(x2^2), data=da)
summary(m0)

## 
## Call:
## lm(formula = conv ~ x1 + x2 + x1:x2 + I(x1^2) + I(x2^2), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1315 -0.3537 -0.0999  0.3057  0.9627 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  69.0999     0.3506 197.082 2.29e-14 ***
## x1            1.6331     0.2772   5.891 0.000605 ***
## x2            1.0830     0.2772   3.907 0.005846 ** 
## I(x1^2)      -0.9688     0.2973  -3.259 0.013893 *  
## I(x2^2)      -1.2189     0.2973  -4.100 0.004575 ** 
## x1:x2         0.2250     0.3920   0.574 0.583946    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.784 on 7 degrees of freedom
## Multiple R-squared:  0.9143, Adjusted R-squared:  0.853 
## F-statistic: 14.93 on 5 and 7 DF,  p-value: 0.001291

anova(m0)

## Analysis of Variance Table
## 
## Response: conv
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x1         1 21.3335 21.3335 34.7079 0.0006047 ***
## x2         1  9.3824  9.3824 15.2644 0.0058462 ** 
## I(x1^2)    1  4.6409  4.6409  7.5504 0.0285946 *  
## I(x2^2)    1 10.3305 10.3305 16.8069 0.0045752 ** 
## x1:x2      1  0.2025  0.2025  0.3295 0.5839456    
## Residuals  7  4.3026  0.6147                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##-----------------------------------------------------------------------------
## Usando as funções do rsm.

m1 <- rsm(conv~SO(x1, x2), data=da) 
summary(m1)

## 
## Call:
## rsm(formula = conv ~ SO(x1, x2), data = da)
## 
##             Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 69.09990    0.35062 197.0815 2.286e-14 ***
## x1           1.63312    0.27721   5.8913 0.0006047 ***
## x2           1.08304    0.27721   3.9070 0.0058462 ** 
## x1:x2        0.22500    0.39200   0.5740 0.5839456    
## x1^2        -0.96880    0.29731  -3.2585 0.0138931 *  
## x2^2        -1.21887    0.29731  -4.0996 0.0045752 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Multiple R-squared:  0.9143, Adjusted R-squared:  0.853 
## F-statistic: 14.93 on 5 and 7 DF,  p-value: 0.001291
## 
## Analysis of Variance Table
## 
## Response: conv
##             Df  Sum Sq Mean Sq F value    Pr(>F)
## FO(x1, x2)   2 30.7158 15.3579 24.9861 0.0006502
## TWI(x1, x2)  1  0.2025  0.2025  0.3295 0.5839456
## PQ(x1, x2)   2 14.9714  7.4857 12.1786 0.0052560
## Residuals    7  4.3026  0.6147                  
## Lack of fit  3  3.4426  1.1475  5.3374 0.0697146
## Pure error   4  0.8600  0.2150                  
## 
## Stationary point of response surface:
##        x1        x2 
## 0.9041430 0.5277301 
## 
## Eigenanalysis:
## $values
## [1] -0.9256357 -1.2620325
## 
## $vectors
##          [,1]       [,2]
## x1 -0.9336473 -0.3581937
## x2 -0.3581937  0.9336473

##-----------------------------------------------------------------------------
## Predição.

pred <- expand.grid(
    x1=seq(-sqrt(2), sqrt(2), l=20),
    x2=seq(-sqrt(2), sqrt(2), l=20))
pred$y <- predict(m0, newdata=pred)

pred <- transform(pred,
                  time=55+x1*5,
                  temp=165+x1*5)

colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")

wireframe(y~time+temp,
          data=pred,
          xlab=list("Tempo (min)", rot=22),
          ylab=list("Temperatura (°F)", rot=-36),
          zlab=list("Conversão (%)", rot=90),
          scales=list(arrows=FALSE),
          zlim=c(56, max(pred$y)),
          panel.3d.wireframe=panel.3d.contour,
          nlevels=18,# col="gray30",
          type=c("on", "bottom"),
          col.regions=colr(101),  drape=TRUE,
          screen=list(x=-75, z=10, y=35))

Regressão na presença de fatores categóricos

##-----------------------------------------------------------------------------
## Dados.

da <- read.table("http://www.leg.ufpr.br/~walmes/data/soja.txt",
                 header=TRUE, sep="\t", dec=",")
da <- transform(da, a=agua, A=factor(agua),
                k=potassio, K=factor(potassio))
str(da)

## 'data.frame':    75 obs. of  14 variables:
##  $ potassio: int  0 30 60 120 180 0 30 60 120 180 ...
##  $ agua    : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ bloco   : Factor w/ 5 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ rengrao : num  14.6 21.5 24.6 21.9 28.1 ...
##  $ pesograo: num  10.7 13.5 15.8 12.8 14.8 ...
##  $ kgrao   : num  15.1 17.1 19.1 18.1 19.1 ...
##  $ pgrao   : num  1.18 0.99 0.82 0.85 0.88 1.05 1.08 0.74 1.01 1.01 ...
##  $ ts      : int  136 159 156 171 190 140 193 200 208 237 ...
##  $ nvi     : int  22 2 0 2 0 20 6 6 7 10 ...
##  $ nv      : int  56 62 66 68 82 63 86 94 86 97 ...
##  $ a       : num  37.5 37.5 37.5 37.5 37.5 50 50 50 50 50 ...
##  $ A       : Factor w/ 3 levels "37.5","50","62.5": 1 1 1 1 1 2 2 2 2 2 ...
##  $ k       : int  0 30 60 120 180 0 30 60 120 180 ...
##  $ K       : Factor w/ 5 levels "0","30","60",..: 1 2 3 4 5 1 2 3 4 5 ...

xyplot(pesograo~k|A, data=da, type=c("p","a"))

##-----------------------------------------------------------------------------
## Ajuste.

m0 <- aov(kgrao~bloco+A*poly(k, degree=2), data=da,
          contrasts=list(bloco=contr.sum))
anova(m0)

## Analysis of Variance Table
## 
## Response: kgrao
##                       Df Sum Sq Mean Sq F value    Pr(>F)    
## bloco                  4  16.32   4.079  1.0970    0.3660    
## A                      2   5.49   2.747  0.7387    0.4819    
## poly(k, degree = 2)    2 423.70 211.850 56.9735 9.064e-15 ***
## A:poly(k, degree = 2)  4   6.92   1.731  0.4656    0.7607    
## Residuals             62 230.54   3.718                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m1 <- update(m0, .~bloco+A/poly(k, degree=2))
anova(m1)

## Analysis of Variance Table
## 
## Response: kgrao
##                       Df Sum Sq Mean Sq F value    Pr(>F)    
## bloco                  4  16.32   4.079  1.0970    0.3660    
## A                      2   5.49   2.747  0.7387    0.4819    
## A:poly(k, degree = 2)  6 430.63  71.771 19.3016 1.515e-12 ***
## Residuals             62 230.54   3.718                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary.lm(m1)

## 
## Call:
## aov(formula = kgrao ~ bloco + A + A:poly(k, degree = 2), data = da, 
##     contrasts = list(bloco = contr.sum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7011 -1.0098  0.0263  0.8936  5.4240 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 18.0568     0.3857  46.820  < 2e-16 ***
## bloco1                      -0.7364     0.4453  -1.654   0.1033    
## bloco2                       0.1263     0.4453   0.284   0.7777    
## bloco3                      -0.0404     0.4453  -0.091   0.9280    
## bloco4                      -0.0724     0.4453  -0.163   0.8714    
## A50                         -0.1616     0.5454  -0.296   0.7680    
## A62.5                       -0.6376     0.5454  -1.169   0.2469    
## A37.5:poly(k, degree = 2)1  17.3599     3.3399   5.198 2.39e-06 ***
## A50:poly(k, degree = 2)1    20.6219     3.3399   6.174 5.64e-08 ***
## A62.5:poly(k, degree = 2)1  21.3761     3.3399   6.400 2.32e-08 ***
## A37.5:poly(k, degree = 2)2  -2.9066     3.3399  -0.870   0.3875    
## A50:poly(k, degree = 2)2    -7.3413     3.3399  -2.198   0.0317 *  
## A62.5:poly(k, degree = 2)2  -6.7801     3.3399  -2.030   0.0467 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.928 on 62 degrees of freedom
## Multiple R-squared:  0.6624, Adjusted R-squared:  0.5971 
## F-statistic: 10.14 on 12 and 62 DF,  p-value: 1.247e-10

L <- list(A37.5lin=1, A37.5qua=2,
          A50.0lin=3, A50.0qua=4,
          A62.5lin=5, A62.5qua=6)
summary(m1, split=list("A:poly(k, degree = 2)"=L))

##                                   Df Sum Sq Mean Sq F value   Pr(>F)    
## bloco                              4   16.3    4.08   1.097   0.3660    
## A                                  2    5.5    2.75   0.739   0.4819    
## A:poly(k, degree = 2)              6  430.6   71.77  19.302 1.51e-12 ***
##   A:poly(k, degree = 2): A37.5lin  1  100.5  100.45  27.016 2.39e-06 ***
##   A:poly(k, degree = 2): A37.5qua  1  141.8  141.75  38.122 5.64e-08 ***
##   A:poly(k, degree = 2): A50.0lin  1  152.3  152.31  40.962 2.32e-08 ***
##   A:poly(k, degree = 2): A50.0qua  1    2.8    2.82   0.757   0.3875    
##   A:poly(k, degree = 2): A62.5lin  1   18.0   17.96   4.831   0.0317 *  
##   A:poly(k, degree = 2): A62.5qua  1   15.3   15.32   4.121   0.0467 *  
## Residuals                         62  230.5    3.72                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

m2 <- update(m0, .~bloco+A*poly(k, degree=1))
anova(m0, m2)

## Analysis of Variance Table
## 
## Model 1: kgrao ~ bloco + A * poly(k, degree = 2)
## Model 2: kgrao ~ bloco + A + poly(k, degree = 1) + A:poly(k, degree = 1)
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)  
## 1     62 230.54                             
## 2     65 266.64 -3   -36.104 3.2365 0.0281 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##-----------------------------------------------------------------------------
## Predição.
 
pred <- expand.grid(bloco="I", A=levels(da$A), k=seq(0,185,5))
ci <- predict(m0, newdata=pred, interval="confidence")-coef(m0)["bloco1"]
pred <- cbind(pred, ci)
 
mycol <- brewer.pal(6, "Set1")

p1 <- 
    xyplot(kgrao~k, groups=A, data=da,
           scales="free", jitter.x=TRUE,
           xlab=expression("Potássio no solo"~(mg~dm^{-3})),
           ylab="Peso de 100 grãos (g)",
           auto.key=list(title="Níveis de água (%)", cex.title=1.2,
               columns=3, points=FALSE, lines=TRUE),
           par.settings=list(superpose.symbol=list(col=mycol),
               superpose.line=list(col=mycol),
               strip.background=list(col="gray90")))

p2 <- 
    xyplot(fit~k, groups=A, data=pred,
           type="l", lwd=1.5,
           ly=pred$lwr, uy=pred$upr,
           cty="bands", alpha=0.25,
           panel.groups=panel.cbH,
           panel=panel.superpose)

p1+as.layer(under=TRUE, p2)

Regressão não linear

##-----------------------------------------------------------------------------
## Dados.

da <- read.table("http://www.leg.ufpr.br/~walmes/data/algodão.txt",
                 header=TRUE, sep="\t", encoding="latin1")
da$desf <- da$desf/100
levels(da$estag) <- c("Vegetativo", "Botão floral",
                      "Florescimento", "Maçã", "Capulho")
str(da)

## 'data.frame':    125 obs. of  8 variables:
##  $ estag : Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ desf  : num  0 0 0 0 0 0.25 0.25 0.25 0.25 0.25 ...
##  $ rep   : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ pcapu : num  33.2 28.7 31.5 28.9 36.4 ...
##  $ nnos  : int  32 29 29 27 28 30 33 23 29 26 ...
##  $ alt   : num  151 146 140 155 150 ...
##  $ ncapu : int  10 9 8 8 10 11 9 10 10 10 ...
##  $ nestru: int  10 9 8 9 10 11 9 11 10 10 ...

p0 <-
    xyplot(pcapu~desf|estag, data=da, layout=c(5,1),
           xlab="Nível de desfolha artificial",
           ylab="Peso de capulhos produzidos (g)")
## p0

##-----------------------------------------------------------------------------
## Ajuste.

n0 <- lapply(split(da, da$estag), nls,
             formula=pcapu~f0-f1*(desf+0.02)^exp(C),
             start=list(f0=30, f1=15, C=0))
n1 <- lapply(split(da, da$estag), nls,
             formula=pcapu~f0-f1*(desf+0.02)^((log(5)-log(f1))/log(xde)),
             start=list(f0=30, f1=10, xde=0.7))

chute1 <- sapply(n1, coef)
chute2 <- cbind(chute1[,1], sweep(chute1[,2:5], 1, chute1[,1], "-"))
c(t(chute2))

##  [1] 31.15716578 -1.89166251 -1.01579242 -2.88803986  3.63178792  9.29140391 -3.59069095
##  [8]  5.24697027 10.42379324 -0.50404109  0.87337687  0.09069715 -0.73457062 -0.12914993
## [15] -0.44398962

nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
            params=f0+f1+xde~estag,
            start=c(t(chute2)))

## summary(nn0)
anova(nn0, type="marginal")

## Denom. DF: 110 
##                 numDF  F-value p-value
## f0.(Intercept)      1 894.8326  <.0001
## f0.estag            4   3.0698  0.0193
## f1.(Intercept)      1  29.5203  <.0001
## f1.estag            4  10.1658  <.0001
## xde.(Intercept)     1 111.3543  <.0001
## xde.estag           4   5.9569  0.0002

##-----------------------------------------------------------------------------
## Predição.

nn0 <- gnls(pcapu~f0-f1*desf^((log(5)-log(f1))/log(xde)), data=da,
            params=f0+f1+xde~-1+estag,
            start=c(t(chute1)))
th <- coef(nn0); th

##     f0.estagVegetativo   f0.estagBotão floral  f0.estagFlorescimento 
##             31.1492629             29.2628830             28.5011919 
##           f0.estagMaçã        f0.estagCapulho     f1.estagVegetativo 
##             28.2634222             34.1531615             10.1662666 
##   f1.estagBotão floral  f1.estagFlorescimento           f1.estagMaçã 
##              6.1173914             13.0146648             21.6085661 
##        f1.estagCapulho    xde.estagVegetativo  xde.estagBotão floral 
##              8.2777934              0.8538359              0.9440167 
## xde.estagFlorescimento          xde.estagMaçã       xde.estagCapulho 
##              0.2021509              0.7243590              0.4954611

xde <- th[grepl("^xde", names(th))]

pred <- with(da,
             expand.grid(estag=levels(estag),
                         desf=seq(0, 1, l=20)))
pred$y <- predict(nn0, newdata=pred)

f <- function(th, desf, estag){
    X <- model.matrix(~0+estag)
    p <- gsub("\\..*$", "", names(th))
    ths <- split(th, f=p)
    ths <- lapply(ths, matrix, ncol=1)
    f0 <- X%*%ths$f0
    f1 <- X%*%ths$f1
    xde <- X%*%ths$xde
    y <- f0-f1*desf^((log(5)-log(f1))/log(xde))
    return(y)
}

## f(th=coef(nn0), desf=pred$desf, estag=pred$estag)
F <- rootSolve::gradient(f=f, x=coef(nn0),
                         desf=pred$desf, estag=pred$estag)
U <- chol(vcov(nn0))
pred$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
aux <- outer(pred$se, qnorm(0.975)*c(lwr=-1, fit=0, upr=1), FUN="*")
pred <- cbind(pred, sweep(aux, MARGIN=1, STATS=pred$y, FUN="+"))
str(pred)

## 'data.frame':    100 obs. of  7 variables:
##  $ estag: Factor w/ 5 levels "Vegetativo","Botão floral",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ desf : num  0 0 0 0 0 ...
##  $ y    : atomic  31.1 29.3 28.5 28.3 34.2 ...
##   ..- attr(*, "label")= chr "Predicted values"
##  $ se   : num  1.04 1.12 1.58 1.04 1.58 ...
##  $ lwr  : num  29.1 27.1 25.4 26.2 31.1 ...
##  $ fit  : num  31.1 29.3 28.5 28.3 34.2 ...
##  $ upr  : num  33.2 31.4 31.6 30.3 37.2 ...

xyplot(pcapu~desf|estag, data=da, col=1, layout=c(5,1),
       xlim=c(-0.2,1.2),
       xlab="Desfolha artifical", ylab="Peso de capulhos (g)",
       panel=function(...){
           panel.xyplot(...)
           panel.abline(v=xde[which.packet()], col=2)
       })+
    as.layer(xyplot(fit+lwr+upr~desf|estag, data=pred,
                    type="l", col=1, lty=c(1,2,2)))

Extensão de modelos de regressão

##-----------------------------------------------------------------------------
## Dados.
 
da <- read.table("http://www.leg.ufpr.br/~walmes/ridiculas/osmo.txt",
                   header=TRUE, sep="\t")
names(da) <- substr(tolower(names(da)), 1, 3)
str(da)

## 'data.frame':    213 obs. of  3 variables:
##  $ esp: Factor w/ 4 levels "A. reticulatus",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sal: int  0 0 0 0 0 0 10 10 10 10 ...
##  $ osm: int  592 601 485 497 596 587 530 328 339 516 ...

xyplot(osm~sal|esp, data=da, type=c("p","smooth"),
       xlab="Salinidade", ylab="Osmolalidade")

##-----------------------------------------------------------------------------
## Regressão segmentada.

db <- droplevels(subset(da, esp=="F. citerosum"))

m0 <- lm(osm~sal, data=db)

## 1 ponto de quebra.
s1 <- segmented(m0, seg.Z=~sal,
                psi=list(sal=c(5)),
                control=seg.control(display=FALSE))

## 2 pontos de quebra.
s2 <- update(s1, psi=list(sal=c(10,30)))
summary(s2)

## 
##  ***Regression Model with Segmented Relationship(s)***
## 
## Call: 
## segmented.lm(obj = m0, seg.Z = ~sal, psi = list(sal = c(10, 30)), 
##     control = seg.control(display = FALSE))
## 
## Estimated Break-Point(s):
##            Est. St.Err
## psi1.sal 10.18  1.317
## psi2.sal 34.87  1.117
## 
## t value for the gap-variable(s) V:  0 0 
## 
## Meaningful coefficients of the linear terms:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  523.492     31.409  16.667  < 2e-16 ***
## sal          -19.493      4.890  -3.986 0.000187 ***
## U1.sal        41.143      5.784   7.113       NA    
## U2.sal       -48.197     10.262  -4.697       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 94.54 on 59 degrees of freedom
## Multiple R-Squared: 0.7712,  Adjusted R-squared: 0.7518 
## 
## Convergence attained in 2 iterations with relative change 0

slope(s2)

## $sal
##          Est. St.Err. t value CI(95%).l CI(95%).u
## slope1 -19.49   4.890  -3.986    -29.28    -9.708
## slope2  21.65   3.089   7.009     15.47    27.830
## slope3 -26.55   9.786  -2.713    -46.13    -6.965

confint(s2)

## $sal
##           Est. CI(95%).l CI(95%).u
## psi1.sal 10.18     7.549     12.82
## psi2.sal 34.87    32.630     37.10

plot(osm~sal, data=db)
abline(m0)
plot(s1, add=TRUE, col=2)
plot(s2, add=TRUE, col=4)
lines(s1)
lines(s2)

##-----------------------------------------------------------------------------
## Regressão com polinômios locais.

l0 <- loess(osm~sal, data=db)
summary(l0)

## Call:
## loess(formula = osm ~ sal, data = db)
## 
## Number of Observations: 65 
## Equivalent Number of Parameters: 4.51 
## Residual Standard Error: 101.2 
## Trace of smoother matrix: 4.93 
## 
## Control settings:
##   normalize:  TRUE 
##   span       :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2

class(l0)

## [1] "loess"

methods(class="loess")

## [1] anova.loess*   predict.loess* print.loess*   summary.loess*
## 
##    Non-visible functions are asterisked

pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(l0, newdata=pred, se=TRUE)
me <- outer(aux$se.fit, c(lwr=-1, fit=0, upr=1)*qnorm(0.975), "*")
aux <- sweep(me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)

plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
         lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## Splines.

## p0 <- lm(osm~bs(sal, df=4), data=da)
p0 <- lm(osm~ns(sal, df=4), data=da)
summary(p0)

## 
## Call:
## lm(formula = osm ~ ns(sal, df = 4), data = da)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -317.74  -83.72  -11.85   84.15  364.67 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       524.024     25.142  20.842  < 2e-16 ***
## ns(sal, df = 4)1    1.777     47.245   0.038     0.97    
## ns(sal, df = 4)2  275.654     43.342   6.360 1.26e-09 ***
## ns(sal, df = 4)3  284.906     69.243   4.115 5.59e-05 ***
## ns(sal, df = 4)4  268.793     35.445   7.583 1.10e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 150.3 on 208 degrees of freedom
## Multiple R-squared:  0.3763, Adjusted R-squared:  0.3643 
## F-statistic: 31.37 on 4 and 208 DF,  p-value: < 2.2e-16

pred <- with(da, data.frame(sal=seq(min(sal), max(sal), l=100)))
aux <- predict(p0, newdata=pred, interval="confidence")
pred <- cbind(pred, aux)

plot(osm~sal, data=db)
matlines(x=pred$sal, y=pred[,c("fit","lwr","upr")],
         lty=c(1,2,2), col=1)

##-----------------------------------------------------------------------------
## GAM.

rock$lperm <- log(rock$perm)
pairs(rock)

g0 <- gam(log(perm)~s(peri)+s(shape), data=rock)
summary(g0)

## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## log(perm) ~ s(peri) + s(shape)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1075     0.1446   35.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##            edf Ref.df      F  p-value    
## s(peri)  2.892  3.587 11.645 2.64e-06 ***
## s(shape) 1.940  2.421  3.842   0.0231 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.628   Deviance explained = 66.7%
## GCV = 1.1426  Scale est. = 1.0038    n = 48

cloud(perm~peri+shape, data=rock)

pred <- with(rock,
             expand.grid(
                 peri=seq(min(peri), max(peri), l=20),
                 shape=seq(min(shape), max(shape), l=20))
             )
pred$perm <- predict(g0, newdata=pred)

colr <- brewer.pal(11, "Spectral")
colr <- colorRampPalette(colr, space="rgb")
colr <- rgb(t(col2rgb(colr(101)))/255,
            alpha=0.5)

wireframe(perm~peri+shape,
          data=pred,
          zlim=range(rock$lperm),
          xlab=list("Perimetro", rot=22),
          ylab=list("Forma", rot=-36),
          zlab=list("Permeabilidade", rot=90),
          scales=list(arrows=FALSE),
          panel.3d.wireframe=panel.3d.contour,
          nlevels=18,# col="gray30",
          type=c("on", "bottom"),
          col.regions=colr,  drape=TRUE,
          screen=list(x=-75, z=10, y=35),
          ## screen=list(x=-80, z=-5, y=-25),
          panel=function(...){
              L <- list(...)
              ## print(str(L))
              L$x <- rock$peri
              L$y <- rock$shape
              L$z <- rep(L$zlim[1], nrow(rock))
              L$type <- "p";
              L$pch <- 4;
              L$col <- 1;
              L$scales.3d$x.scales$draw <- FALSE
              L$scales.3d$y.scales$draw <- FALSE
              L$scales.3d$z.scales$draw <- FALSE
              L$xlab <- ""; L$ylab <- ""; L$zlab <- "";
              L$par.box <- list(col=rep(NA, 9))
              L$box.3d$lty <- 0
              L$panel.3d.wireframe <- NULL
              do.call(panel.cloud, L)
              L$pch <- 1;
              L$z <- rock$lperm
              do.call(panel.cloud, L)
              apply(rock[,c("peri","shape","lperm")], 1,
                    function(line){
                        L$x <- rep(line[1], 2)
                        L$y <- rep(line[2], 2)
                        L$z <- c(L$zlim[1], line[3])
                        L$type <- "l"
                        do.call(panel.cloud, L)
                    })
              panel.abline(v=0.3)
              panel.wireframe(...)
          })

Modelagem conjunta da variância

##-----------------------------------------------------------------------------

goi <- read.table("http://www.leg.ufpr.br/~walmes/data/goiaba.txt",
                  header=TRUE, sep="\t")
goi$daa2 <- goi$daa^2.5
str(goi)

## 'data.frame':    174 obs. of  8 variables:
##  $ daa   : int  13 13 13 13 13 13 13 13 13 13 ...
##  $ coleta: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ rep   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ long  : num  29.1 32.9 36.3 34.7 33.3 ...
##  $ trans : num  20.5 22.8 22.9 22.7 21.4 ...
##  $ peso  : num  6.66 8.51 9.93 9.14 7.6 7.54 6.27 9.94 7.95 7.79 ...
##  $ volume: int  6 9 11 10 7 8 6 9 8 8 ...
##  $ daa2  : num  609 609 609 609 609 ...

aux <- ddply(goi, .(daa), summarise, m=mean(peso), s=sd(peso))

p1 <- xyplot(peso~daa, data=goi, type=c("p","a"),
             ylab="Peso do fruto (g)", xlab="Dias após antese")
p2 <- xyplot(s~m, aux, type=c("p","r"),
             xlab="Média de peso", ylab="Desvio-padrão de peso")
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Ajustes.

## Modelo tradicional.
n0 <- gnls(peso~A-(A-D)*exp(-exp(C*(daa-B))), data=goi,
           start=c(A=200, C=0.09, B=105, D=7))
summary(n0)

## Generalized nonlinear least squares fit
##   Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B))) 
##   Data: goi 
##        AIC      BIC    logLik
##   1619.093 1634.888 -804.5465
## 
## Coefficients:
##       Value Std.Error   t-value p-value
## A 205.76239  4.196193  49.03549       0
## C   0.09367  0.010148   9.22980       0
## B 107.35182  0.874391 122.77326       0
## D  18.70608  3.179082   5.88411       0
## 
##  Correlation: 
##   A      C      B     
## C -0.357              
## B  0.590 -0.131       
## D -0.103  0.467  0.219
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -3.17237358 -0.42834311 -0.07757784  0.35077905  3.94396837 
## 
## Residual standard error: 24.94118 
## Degrees of freedom: 174 total; 170 residual

## Modelo com função de variâcia: var(e)=sigma^2*exp(2*delta*daa).
n1 <- update(n0,
             ## weights=varExp(0.03, form=~daa)
             weights=varPower(0.1, form=~log(daa))             
             )
summary(n1)

## Generalized nonlinear least squares fit
##   Model: peso ~ A - (A - D) * exp(-exp(C * (daa - B))) 
##   Data: goi 
##        AIC      BIC    logLik
##   1432.407 1451.362 -710.2036
## 
## Variance function:
##  Structure: Power of variance covariate
##  Formula: ~log(daa) 
##  Parameter estimates:
##    power 
## 5.764348 
## 
## Coefficients:
##       Value Std.Error  t-value p-value
## A 217.15613  8.784140 24.72139       0
## C   0.05520  0.003531 15.63409       0
## B 108.05936  1.880553 57.46148       0
## D   7.49496  0.426361 17.57893       0
## 
##  Correlation: 
##   A      C      B     
## C -0.566              
## B  0.879 -0.624       
## D -0.390  0.837 -0.386
## 
## Standardized residuals:
##         Min          Q1         Med          Q3         Max 
## -2.66220159 -0.53129856  0.09399151  0.63400182  2.80361091 
## 
## Residual standard error: 0.003829197 
## Degrees of freedom: 174 total; 170 residual

anova(n1, n0)

##    Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## n1     1  6 1432.407 1451.362 -710.2036                        
## n0     2  5 1619.093 1634.888 -804.5465 1 vs 2 188.6858  <.0001

##-----------------------------------------------------------------------------
## Diagnóstico.

da <- rbind(data.frame(model="trad",
                       r=residuals(n0, type="pearson"),
                       f=fitted(n0)),
            data.frame(model="var",
                       r=residuals(n1, type="pearson"),
                       f=fitted(n1)))

p1 <- qqmath(~r|model, data=da,
             xlab="Quantis teóricos",
             ylab="Resíduos padronizados")
p2 <- xyplot(sqrt(abs(r))~f|model, data=da, type=c("p","smooth"),
             xlab="Valor ajustado",
             ylab=expression(sqrt("|Resíduos padronizados|")))
grid.arrange(p1, p2)

##-----------------------------------------------------------------------------
## Predição.

model <- deriv3(~ A-(A-D)*exp(-exp(C*(daa-B))),
                c("A","C","B","D"),
                function(daa, A, C, B, D){NULL})

l <- list(n0, n1)
names(l) <- levels(da$model)
L <- l
for(i in seq_along(L)){
  m4 <- l[[i]]
  U <- chol(vcov(m4))
  pred <- data.frame(daa=seq(1,140,1))
  m <- model(daa=pred$daa, A=coef(m4)["A"], C=coef(m4)["C"],
             B=coef(m4)["B"], D=coef(m4)["D"])
  F0 <- attr(m, "gradient")
  pred$y <- c(m)
  pred$se <- sqrt(apply(F0%*%t(U), 1, function(x) sum(x^2)))
  z <- qnorm(0.975)
  pred <- transform(pred, lwr=y-z*se, upr=y+z*se)
  L[[i]] <- pred
}

str(L)

## List of 2
##  $ trad:'data.frame':    140 obs. of  5 variables:
##   ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ y  : num [1:140] 18.7 18.7 18.7 18.7 18.7 ...
##   ..$ se : num [1:140] 3.17 3.17 3.17 3.17 3.17 ...
##   ..$ lwr: num [1:140] 12.5 12.5 12.5 12.5 12.5 ...
##   ..$ upr: num [1:140] 24.9 24.9 24.9 24.9 24.9 ...
##  $ var :'data.frame':    140 obs. of  5 variables:
##   ..$ daa: num [1:140] 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ y  : num [1:140] 8.06 8.1 8.13 8.16 8.2 ...
##   ..$ se : num [1:140] 0.278 0.273 0.267 0.262 0.256 ...
##   ..$ lwr: num [1:140] 7.52 7.56 7.61 7.65 7.7 ...
##   ..$ upr: num [1:140] 8.61 8.63 8.65 8.68 8.7 ...

L <- ldply(L)
names(L)[1] <- "model"
L$model <- factor(L$model, levels=levels(da$model))
str(L)

## 'data.frame':    280 obs. of  6 variables:
##  $ model: Factor w/ 2 levels "trad","var": 1 1 1 1 1 1 1 1 1 1 ...
##  $ daa  : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ y    : num  18.7 18.7 18.7 18.7 18.7 ...
##  $ se   : num  3.17 3.17 3.17 3.17 3.17 ...
##  $ lwr  : num  12.5 12.5 12.5 12.5 12.5 ...
##  $ upr  : num  24.9 24.9 24.9 24.9 24.9 ...

icm <- ddply(goi, .(daa), summarise,
             m=mean(peso),
             lwr=mean(peso)-qt(0.975, length(peso)-1)*sd(peso)*
             sqrt(1/length(peso)),
             upr=mean(peso)+qt(0.975, length(peso)-1)*sd(peso)*
             sqrt(1/length(peso)))

#-----------------------------------------------------------------------------
# Gráfico com as bandas de confiança.

ylim <- range(goi$peso)
xyplot(y~daa|model, data=L, type="l", col=1,
       ylab="Massa fresca dos frutos (g)",
       xlab="Dias após a antese",
       strip=strip.custom(bg="gray90"),
       ly=L$lwr, uy=L$upr, cty="bands",
       prepanel=prepanel.cbH, panel=panel.cbH)+
  layer(panel.arrows(icm$daa, icm$lwr, icm$daa, icm$upr,
                     code=3, length=0.05, angle=90))

Modelo de efeito aleatório

##-----------------------------------------------------------------------------
## Modelos.

str(BodyWeight)

## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':   176 obs. of  4 variables:
##  $ weight: num  240 250 255 260 262 258 266 266 265 272 ...
##  $ Time  : num  1 8 15 22 29 36 43 44 50 57 ...
##  $ Rat   : Ord.factor w/ 16 levels "2"<"3"<"4"<"1"<..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ Diet  : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
##  - attr(*, "outer")=Class 'formula' length 2 ~Diet
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "formula")=Class 'formula' length 3 weight ~ Time | Rat
##   .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
##  - attr(*, "labels")=List of 2
##   ..$ x: chr "Time"
##   ..$ y: chr "Body weight"
##  - attr(*, "units")=List of 2
##   ..$ x: chr "(days)"
##   ..$ y: chr "(g)"
##  - attr(*, "FUN")=function (x)  
##   ..- attr(*, "source")= chr "function(x) max(x, na.rm = TRUE)"
##  - attr(*, "order.groups")= logi TRUE

plot(BodyWeight)

mm0 <- lme(weight~Diet*Time, random=~1+Time, data=BodyWeight,
           correlation=corCAR1())
summary(mm0)

## Linear mixed-effects model fit by REML
##  Data: BodyWeight 
##        AIC      BIC    logLik
##   1154.544 1189.038 -566.2721
## 
## Random effects:
##  Formula: ~1 + Time | Rat
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev     Corr  
## (Intercept) 36.9056825 (Intr)
## Time         0.2333653 -0.152
## Residual     5.4600807       
## 
## Correlation Structure: Continuous AR(1)
##  Formula: ~1 | Rat 
##  Parameter estimate(s):
##       Phi 
## 0.5316409 
## Fixed effects: weight ~ Diet * Time 
##                 Value Std.Error  DF   t-value p-value
## (Intercept) 251.30348 13.152622 157 19.106721  0.0000
## Diet2       200.68117 22.781009  13  8.809143  0.0000
## Diet3       253.67433 22.781009  13 11.135342  0.0000
## Time          0.36417  0.091878 157  3.963552  0.0001
## Diet2:Time    0.62374  0.159138 157  3.919461  0.0001
## Diet3:Time    0.27874  0.159138 157  1.751532  0.0818
##  Correlation: 
##            (Intr) Diet2  Diet3  Time   Dt2:Tm
## Diet2      -0.577                            
## Diet3      -0.577  0.333                     
## Time       -0.181  0.104  0.104              
## Diet2:Time  0.104 -0.181 -0.060 -0.577       
## Diet3:Time  0.104 -0.060 -0.181 -0.577  0.333
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -3.07548837 -0.43553702  0.06177724  0.45498272  1.75954253 
## 
## Number of Observations: 176
## Number of Groups: 16

plot(augPred(mm0))

anova(mm0)

##             numDF denDF   F-value p-value
## (Intercept)     1   157 1727.0904  <.0001
## Diet            2    13   86.6759  <.0001
## Time            1   157   82.4113  <.0001
## Diet:Time       2   157    7.7925   6e-04

##-----------------------------------------------------------------------------
## Predição dos efeitos fixos e aleatório.

pred0 <- with(BodyWeight,
              expand.grid(Diet=levels(Diet),
                          Time=seq(min(Time), max(Time), l=20)))
aux <- unique(BodyWeight[,c("Diet","Rat")])
pred1 <- merge(pred0, aux, all=TRUE)

pred0$y <- predict(mm0, newdata=pred0, level=0)
pred1$y <- predict(mm0, newdata=pred1, level=1)

U <- chol(vcov(mm0))
F <- model.matrix(formula(mm0)[-2], data=pred0)
pred0$se <- sqrt(apply(F%*%t(U), 1, function(x) sum(x^2)))
z <- qnorm(0.975)
aux <- outer(pred0$se, z*c(lwr=-1, fit=0, upr=1), FUN="*")
pred0 <- cbind(pred0, sweep(aux, 1, STATS=pred0$y, FUN="+"))

p0 <-
    xyplot(weight~Time|Diet, BodyWeight, groups=Rat,
           layout=c(3,1), xlab="Tempo", ylab="Peso (g)")

p1 <-
    xyplot(y~Time|Diet, pred0, type="l", lwd=2, layout=c(3,1),
           ly=pred0$lwr, uy=pred0$upr, cty="bands",
           panel=panel.cbH)

p2 <-
    xyplot(y~Time|Diet, groups=Rat, data=pred1,
           type="l", col="blue", lty=2)

p0+as.layer(p1)+as.layer(p2)

##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()

## [1] "2014-12-11 20:51:21 BRST"

sessionInfo()

## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] methods   splines   grid      stats     graphics  grDevices utils     datasets 
## [9] base     
## 
## other attached packages:
##  [1] wzRfun_0.4          rootSolve_1.6.5.1   mgcv_1.8-3          segmented_0.5-0.0  
##  [5] rsm_2.07            nlme_3.1-118        aod_1.3             multcomp_1.3-7     
##  [9] TH.data_1.0-5       mvtnorm_1.0-1       doBy_4.5-12         survival_2.37-7    
## [13] plyr_1.8.1          alr3_2.0.5          ellipse_0.3-8       car_2.0-22         
## [17] gridExtra_0.9.1     latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29    
## [21] rmarkdown_0.3.3     knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.4    evaluate_0.5.5  formatR_1.0     htmltools_0.2.6 MASS_7.3-35    
##  [6] Matrix_1.1-4    nnet_7.3-8      Rcpp_0.11.3     sandwich_2.3-2  stringr_0.6.2  
## [11] tools_3.1.2     yaml_2.1.13     zoo_1.7-11

Aplicação de modelos de regressão linear e não linear em ciências agrárias
09 à 11 de Dezembro de 2014 - Goiânia - GO	Prof. Dr. Walmes M. Zeviani
Embrapa Arroz e Feijão	Lab. de Estatística e Geoinformação - LEG
	Departamento de Estatística - UFPR

Aplicação de modelos de regressão linear e não linear em ciências agrárias

Trailer do curso

Regressão linear simples

Regressão linear múltipla

Regressão polinomial

Superfície de resposta

Regressão na presença de fatores categóricos

Regressão não linear

Extensão de modelos de regressão

Modelagem conjunta da variância

Modelo de efeito aleatório