Análise de severidade em maça

#///////////////////////////////////////////////////////////////////////
# Pacotes --------------------------------------------------------------

library(readxl)
library(ggbeeswarm)
library(emmeans)
library(plotly)
library(latticeExtra)
library(tidyverse)

#///////////////////////////////////////////////////////////////////////
# Importação -----------------------------------------------------------

# Lista para armazenar os dados.
tbs <- list()

tbs[[length(tbs) + 1]] <-
    read_excel("experimento cv eva.xlsx",
               sheet = "experimento 1",
               na = c("x"),
               skip = 2) |>
    suppressMessages() |>
    select(1:`70`) |>
    pivot_longer(cols = -c(tratamento, planta, folha),
                 names_to = "daa",
                 values_to = "severidade") |>
    mutate(daa = as.integer(daa)) |>
    add_column(cultivar = "eva", experimento = 1, .before = 1)

tbs[[length(tbs) + 1]] <-
    read_excel("experimento cv eva.xlsx",
               sheet = "experimento 2",
               na = c("x"),
               skip = 2) |>
    suppressMessages() |>
    select(1:`71`) |>
    pivot_longer(cols = -c(tratamento, planta, folha),
                 names_to = "daa",
                 values_to = "severidade") |>
    mutate(daa = as.integer(daa)) |>
    add_column(cultivar = "eva", experimento = 2, .before = 1)

tbs[[length(tbs) + 1]] <-
    read_excel("experimento cv gala.xlsx",
               sheet = "experimento 1",
               na = c("x"),
               skip = 2) |>
    suppressMessages() |>
    select(1:`70`) |>
    pivot_longer(cols = -c(tratamento, planta, folha),
                 names_to = "daa",
                 values_to = "severidade") |>
    mutate(daa = as.integer(daa)) |>
    add_column(cultivar = "gala", experimento = 1, .before = 1)

tbs[[length(tbs) + 1]] <-
    read_excel("experimento cv gala.xlsx",
               sheet = "experimento 2",
               na = c("x"),
               skip = 2) |>
    suppressMessages() |>
    select(1:`71`) |>
    pivot_longer(cols = -c(tratamento, planta, folha),
                 names_to = "daa",
                 values_to = "severidade") |>
    mutate(daa = as.integer(daa)) |>
    add_column(cultivar = "gala", experimento = 2, .before = 1)

str(tbs)

## List of 4
##  $ : tibble [5,040 × 7] (S3: tbl_df/tbl/data.frame)
##   ..$ cultivar   : chr [1:5040] "eva" "eva" "eva" "eva" ...
##   ..$ experimento: num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ tratamento : chr [1:5040] "testemunha" "testemunha" "testemunha" "testemunha" ...
##   ..$ planta     : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ folha      : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ daa        : int [1:5040] 0 1 5 8 12 19 22 26 29 33 ...
##   ..$ severidade : num [1:5040] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : tibble [5,040 × 7] (S3: tbl_df/tbl/data.frame)
##   ..$ cultivar   : chr [1:5040] "eva" "eva" "eva" "eva" ...
##   ..$ experimento: num [1:5040] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ tratamento : chr [1:5040] "testemunha" "testemunha" "testemunha" "testemunha" ...
##   ..$ planta     : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ folha      : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ daa        : int [1:5040] 0 1 6 9 12 15 19 22 26 29 ...
##   ..$ severidade : num [1:5040] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : tibble [5,040 × 7] (S3: tbl_df/tbl/data.frame)
##   ..$ cultivar   : chr [1:5040] "gala" "gala" "gala" "gala" ...
##   ..$ experimento: num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ tratamento : chr [1:5040] "testemunha" "testemunha" "testemunha" "testemunha" ...
##   ..$ planta     : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ folha      : num [1:5040] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ daa        : int [1:5040] 0 1 5 8 12 19 22 26 29 33 ...
##   ..$ severidade : num [1:5040] 0 0 0 0 0 0 0 0 0 0 ...
##  $ : tibble [5,061 × 7] (S3: tbl_df/tbl/data.frame)
##   ..$ cultivar   : chr [1:5061] "gala" "gala" "gala" "gala" ...
##   ..$ experimento: num [1:5061] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ tratamento : chr [1:5061] "testemunha" "testemunha" "testemunha" "testemunha" ...
##   ..$ planta     : num [1:5061] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ folha      : num [1:5061] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ daa        : int [1:5061] 0 1 6 9 12 15 19 22 26 29 ...
##   ..$ severidade : num [1:5061] 0 0 0 0 0 0 0 0 0 0 ...

# Coloca em uma única tabela.
tb <- bind_rows(tbs)
rm(tbs)

# Converte as variáveis para fatores e remove linhas com NA.
tb <-
    tb |>
    mutate(
        cultivar = factor(cultivar),
        experimento = factor(experimento),
        tratamento = factor(tratamento, levels = c("testemunha", "cc", "cn", "mistura")),
        planta = factor(planta),
        folha = factor(folha)
    ) |>
    # arrange(cultivar, experimento, tratamento, planta, folha, daa) |>
    drop_na(cultivar, experimento, tratamento, planta, folha, daa)

#///////////////////////////////////////////////////////////////////////
# Análise exploratória -------------------------------------------------

str(tb)

## tibble [20,160 × 7] (S3: tbl_df/tbl/data.frame)
##  $ cultivar   : Factor w/ 2 levels "eva","gala": 1 1 1 1 1 1 1 1 1 1 ...
##  $ experimento: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tratamento : Factor w/ 4 levels "testemunha","cc",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ planta     : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ folha      : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ daa        : int [1:20160] 0 1 5 8 12 19 22 26 29 33 ...
##  $ severidade : num [1:20160] 0 0 0 0 0 0 0 0 0 0 ...

# Frequência das combinações.
xtabs(~tratamento + cultivar, data = tb)

##             cultivar
## tratamento    eva gala
##   testemunha 2520 2520
##   cc         2520 2520
##   cn         2520 2520
##   mistura    2520 2520

xtabs(~planta + folha, data = tb)

##       folha
## planta   1   2   3   4   5   6   7   8   9  10
##      1 336 336 336 336 336 336 336 336 336 336
##      2 336 336 336 336 336 336 336 336 336 336
##      3 336 336 336 336 336 336 336 336 336 336
##      4 336 336 336 336 336 336 336 336 336 336
##      5 336 336 336 336 336 336 336 336 336 336
##      6 336 336 336 336 336 336 336 336 336 336

# Gráfico da severidade média no tempo por planta.
tb |>
    filter(!tratamento %in% c("testemunha", "cn"),
           cultivar %in% "gala") |>
    group_by(cultivar, experimento, tratamento, planta, daa) |>
    summarise(
        n_valid = sum(!is.na(severidade)),
        severidade = mean(severidade, na.rm = TRUE),
        .groups = "drop"
    ) |>
    ggplot(data = _,
           mapping = aes(x = daa, y = severidade, color = planta)) +
    facet_grid(cultivar + experimento ~ tratamento) +
    geom_point() +
    geom_text(mapping = aes(label = n_valid),
              vjust = -1, size = 3,
              show.legend = FALSE) +
    geom_line()

# ATTENTION: Redução da severidade média no tempo é causada pela queda
# de folhas com alta severidade.

# TODO: Repetir o último valor observado (LOCF - Last Observation
# Carried Forward) para cada planta > folha.
DataExplorer::plot_missing(tb)

# Preencher os valores NA com o último valor observado.
tb_fill <-
    tb |>
    group_by(cultivar, experimento, tratamento, planta, folha) |>
    arrange(daa) |>
    mutate(severidade_real = severidade) |>
    fill(severidade, .direction = "down") |>
    ungroup()

# Confere o gráfico novamente.
tb_fill |>
    filter(!tratamento %in% c("testemunha", "cn"),
           cultivar %in% "gala") |>
    group_by(cultivar, experimento, tratamento, planta, daa) |>
    summarise(
        n_valid = sum(!is.na(severidade_real)),
        severidade = mean(severidade, na.rm = TRUE),
        .groups = "drop"
    ) |>
    ggplot(data = _,
           mapping = aes(x = daa, y = severidade, color = planta)) +
    facet_grid(cultivar + experimento ~ tratamento) +
    geom_point() +
    geom_text(mapping = aes(label = n_valid),
              vjust = -1, size = 3,
              show.legend = FALSE) +
    geom_line()

#///////////////////////////////////////////////////////////////////////
# Análise com área abaixo da curva (AAC) -------------------------------

DescTools::AUC(x = 1:3, y = 1:3, method = “trapezoid”) agricolae::audpc(evaluation = 1:3, dates = 1:3)

# Área abaixo da curva (AAC) da severidade no tempo.
# A AUDPC dividida pelo número de dias totais (máximo - mínimo) é uma
# espécie de média ponderada.
tb_auc <-
    tb_fill |>
    group_by(cultivar, experimento, tratamento, planta, daa) |>
    summarise(
        severidade = mean(severidade, na.rm = TRUE),
        .groups = "drop"
    ) |>
    group_by(cultivar, experimento, tratamento, planta) |>
    arrange(daa) |>
    summarise(
        aac = DescTools::AUC(x = daa,
                             y = severidade,
                             method = "trapezoid"),
        range = max(daa) - min(daa),
        sev_mean = mean(severidade, na.rm = TRUE),
        aacr = aac/range,
        .groups = "drop"
    )
tb_auc

## # A tibble: 96 × 8
##    cultivar experimento tratamento planta   aac range sev_mean   aacr
##    <fct>    <fct>       <fct>      <fct>  <dbl> <int>    <dbl>  <dbl>
##  1 eva      1           testemunha 1       0       70   0      0     
##  2 eva      1           testemunha 2       0       70   0      0     
##  3 eva      1           testemunha 3       0       70   0      0     
##  4 eva      1           testemunha 4       0       70   0      0     
##  5 eva      1           testemunha 5       0       70   0      0     
##  6 eva      1           testemunha 6       0       70   0      0     
##  7 eva      1           cc         1       8.70    70   0.132  0.124 
##  8 eva      1           cc         2       0       70   0      0     
##  9 eva      1           cc         3      16.7     70   0.245  0.238 
## 10 eva      1           cc         4       3.4     70   0.0495 0.0486
## # ℹ 86 more rows

tb_auc[, c("aac", "aacr", "sev_mean")]

## # A tibble: 96 × 3
##      aac   aacr sev_mean
##    <dbl>  <dbl>    <dbl>
##  1  0    0        0     
##  2  0    0        0     
##  3  0    0        0     
##  4  0    0        0     
##  5  0    0        0     
##  6  0    0        0     
##  7  8.70 0.124    0.132 
##  8  0    0        0     
##  9 16.7  0.238    0.245 
## 10  3.4  0.0486   0.0495
## # ℹ 86 more rows

lattice::splom(tb_auc[, c("aac", "aacr", "sev_mean")], as.matrix = TRUE)

ggplot(tb_auc,
       mapping = aes(x = tratamento, y = aac)) +
    facet_grid(cultivar ~ experimento) +
    # geom_point() +
    geom_beeswarm(size = 2, cex = 3)

# NOTE: Muitos zeros por conta da testemunha. Vai ocasionar problemas
# com os pressupostos.

tb_auc |>
    group_by(cultivar, experimento, tratamento) |>
    summarise(
        n = n(),
        mean = mean(aac),
        sd = sd(aac),
        median = median(aac),
        IQR = IQR(aac),
        min = min(aac),
        max = max(aac),
        .groups = "drop"
    )

## # A tibble: 16 × 10
##    cultivar experimento tratamento     n    mean      sd median    IQR   min
##    <fct>    <fct>       <fct>      <int>   <dbl>   <dbl>  <dbl>  <dbl> <dbl>
##  1 eva      1           testemunha     6   0       0       0      0     0   
##  2 eva      1           cc             6   6.79    6.81    6.05  10.3   0   
##  3 eva      1           cn             6   0.373   0.914   0      0     0   
##  4 eva      1           mistura        6   4.35    5.65    2.60   6.34  0   
##  5 eva      2           testemunha     6   0       0       0      0     0   
##  6 eva      2           cc             6  21.4    19.2    20.0   32.5   0   
##  7 eva      2           cn             6   4.91    8.12    2.00   3.70  0   
##  8 eva      2           mistura        6  19.5    15.9    10.4   24.3   7.80
##  9 gala     1           testemunha     6   0       0       0      0     0   
## 10 gala     1           cc             6 182.    119.    161.   112.   72.1 
## 11 gala     1           cn             6  11.9    21.6     3.44   8.67  0   
## 12 gala     1           mistura        6 106.    114.     69.9   99.7   4.8 
## 13 gala     2           testemunha     6   0       0       0      0     0   
## 14 gala     2           cc             6 124.     94.9   128.   116.    9.25
## 15 gala     2           cn             6  48.9    47.0    51.1   58.2   0   
## 16 gala     2           mistura        6  99.4    78.2    82.4  107.    6.96
## # ℹ 1 more variable: max <dbl>

# IMPORTANT: Não tem variabilidade na testemunha. Todos os valores são
# 0. A análise deve ser feita sem a testemunha.

#///////////////////////////////////////////////////////////////////////
# Modelagem ------------------------------------------------------------

m0 <-
    tb_auc |>
    filter(!tratamento %in% c("testemunha")) |>
    lm(aac + 1 ~ experimento + cultivar * tratamento, data = _)

# Exame dos pressupostos.
par(mfrow = c(2, 2))
plot(m0)

graphics::layout(1)

# Transformação de Box-Cox.
MASS::boxcox(m0)
abline(v = 0, col = "red", lty = 2)

# Transformação logarítmica.
m1 <- update(m0, formula = log(.) ~ .)

# Exame dos pressupostos após transformação.
par(mfrow = c(2, 2))
plot(m1)

graphics::layout(1)

# Os dois experimentos funcionam como blocos nos quais as combinações
# entre cultivar e tratamento foram casualizadas com repetições.

# Quadro de análises de variância.
anova(m1)

## Analysis of Variance Table
## 
## Response: log(aac + 1)
##                     Df  Sum Sq Mean Sq F value    Pr(>F)    
## experimento          1  10.156  10.156  5.8203   0.01867 *  
## cultivar             1  80.201  80.201 45.9645 4.243e-09 ***
## tratamento           2  53.769  26.884 15.4079 3.334e-06 ***
## cultivar:tratamento  2   4.915   2.457  1.4083   0.25192    
## Residuals           65 113.414   1.745                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#///////////////////////////////////////////////////////////////////////
# Testes de médias para tratamentos ------------------------------------

# y_trans = log(aac + 1), então aac = exp(y_trans) - 1.

emmeans::emmeans(m1, specs = ~tratamento) |>
    emmeans::contrast(method = "tukey")

## NOTE: Results may be misleading due to involvement in interactions

##  contrast     estimate    SE df t.ratio p.value
##  cc - cn         1.978 0.381 65   5.189  <.0001
##  cc - mistura    0.338 0.381 65   0.885  0.6516
##  cn - mistura   -1.641 0.381 65  -4.303  0.0002
## 
## Results are averaged over the levels of: experimento, cultivar 
## Note: contrasts are still on the log(mu + 1) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## P value adjustment: tukey method for comparing a family of 3 estimates

tb_means_trat <-
    emmeans::emmeans(m1, specs = ~ tratamento) |>
    multcomp::cld(Letters = letters, method = "tukey")

## NOTE: Results may be misleading due to involvement in interactions

tb_means_trat

##  tratamento emmean   SE df lower.CL upper.CL .group
##  cn           1.39 0.27 65    0.852     1.93  a    
##  mistura      3.03 0.27 65    2.493     3.57   b   
##  cc           3.37 0.27 65    2.830     3.91   b   
## 
## Results are averaged over the levels of: experimento, cultivar 
## Results are given on the log(mu + 1) (not the response) scale. 
## Confidence level used: 0.95 
## Note: contrasts are still on the log(mu + 1) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

tb_means_trat |>
    as.data.frame() |>
    mutate(across(c(emmean, lower.CL, upper.CL), ~exp(.) - 1),
           label = sprintf("%0.1f %s", emmean, trimws(.group))) |>
    ggplot(data = _,
           mapping = aes(x = tratamento, y = emmean)) +
    geom_point(size = 4) +
    geom_pointrange(mapping = aes(ymin = lower.CL,
                                  ymax = upper.CL),
                    width = 0.2) +
    geom_text(mapping = aes(label = label),
              vjust = 0.5,
              hjust = 0,
              nudge_x = 0.05) +
    labs(y = "AAC da severidade",
         x = "Tratamento")

## Warning in geom_pointrange(mapping = aes(ymin = lower.CL, ymax = upper.CL), :
## Ignoring unknown parameters: `width`

#///////////////////////////////////////////////////////////////////////
# Testes de médias para cultivares -------------------------------------

emmeans::emmeans(m1, specs = ~cultivar) |>
    emmeans::contrast(method = "tukey")

## NOTE: Results may be misleading due to involvement in interactions

##  contrast   estimate    SE df t.ratio p.value
##  eva - gala    -2.11 0.311 65  -6.780  <.0001
## 
## Results are averaged over the levels of: experimento, tratamento 
## Note: contrasts are still on the log(mu + 1) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates.

tb_means_cult <-
    emmeans::emmeans(m1, specs = ~cultivar) |>
    multcomp::cld(Letters = letters, method = "tukey")

## NOTE: Results may be misleading due to involvement in interactions

tb_means_cult

##  cultivar emmean   SE df lower.CL upper.CL .group
##  eva        1.54 0.22 65     1.10     1.98  a    
##  gala       3.65 0.22 65     3.21     4.09   b   
## 
## Results are averaged over the levels of: experimento, tratamento 
## Results are given on the log(mu + 1) (not the response) scale. 
## Confidence level used: 0.95 
## Note: contrasts are still on the log(mu + 1) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

tb_means_cult |>
    as.data.frame() |>
    mutate(across(c(emmean, lower.CL, upper.CL), ~exp(.) - 1),
           label = sprintf("%0.1f %s", emmean, trimws(.group))) |>
    ggplot(data = _,
           mapping = aes(x = cultivar, y = emmean)) +
    geom_point(size = 4) +
    geom_pointrange(mapping = aes(ymin = lower.CL,
                                  ymax = upper.CL),
                    width = 0.2) +
    geom_text(mapping = aes(label = label),
              vjust = 0.5,
              hjust = 0,
              nudge_x = 0.05) +
    labs(y = "AAC da severidade",
         x = "Cultivar")

## Warning in geom_pointrange(mapping = aes(ymin = lower.CL, ymax = upper.CL), :
## Ignoring unknown parameters: `width`

#///////////////////////////////////////////////////////////////////////
# A folha cai pela idade ou pela severidade? ---------------------------

# QUESTION: Modelo para a queda da folha em função do tempo
# (sobreviência) ou da ocorrência de queda (binomial)?

str(tb)

## tibble [20,160 × 7] (S3: tbl_df/tbl/data.frame)
##  $ cultivar   : Factor w/ 2 levels "eva","gala": 1 1 1 1 1 1 1 1 1 1 ...
##  $ experimento: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tratamento : Factor w/ 4 levels "testemunha","cc",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ planta     : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ folha      : Factor w/ 10 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ daa        : int [1:20160] 0 1 5 8 12 19 22 26 29 33 ...
##  $ severidade : num [1:20160] 0 0 0 0 0 0 0 0 0 0 ...

tb_aux <-
    tb |>
    filter(cultivar %in% "gala",
           tratamento %in% "cc",
           experimento == 2)

tb_aux |>
    ggplot(data = _,
           mapping = aes(x = daa, y = severidade, color = folha)) +
    facet_wrap(~planta) +
    geom_point() +
    geom_line()

## Warning: Removed 40 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

tb_aux_stats <-
    tb_aux |>
    group_by(cultivar, experimento, tratamento, planta, folha) |>
    summarise(
        max_daa = max(daa[!is.na(severidade)], na.rm = TRUE),
        max_sev = max(severidade, na.rm = TRUE),
        ind = if_else(any(is.na(severidade)), 1, 0) |> factor(),
        .groups = "drop"
    )
tb_aux_stats

## # A tibble: 60 × 8
##    cultivar experimento tratamento planta folha max_daa max_sev ind  
##    <fct>    <fct>       <fct>      <fct>  <fct>   <int>   <dbl> <fct>
##  1 gala     2           cc         1      1          71     0.2 0    
##  2 gala     2           cc         1      2          71     2   0    
##  3 gala     2           cc         1      3          71     4   0    
##  4 gala     2           cc         1      4          71     3.5 0    
##  5 gala     2           cc         1      5          71    17.5 0    
##  6 gala     2           cc         1      6          71    20   0    
##  7 gala     2           cc         1      7          71     1   0    
##  8 gala     2           cc         1      8          71     2   0    
##  9 gala     2           cc         1      9          71     2   0    
## 10 gala     2           cc         1      10         61     0.5 1    
## # ℹ 50 more rows

tb_aux |>
    ggplot(data = _,
           mapping = aes(x = daa, y = severidade, color = folha)) +
    facet_wrap(~planta) +
    geom_line() +
    geom_point(data = tb_aux_stats,
               mapping = aes(x = max_daa,
                             y = max_sev,
                             shape = ind),
               inherit.aes = FALSE) +
    scale_shape_manual(breaks = c(Sim = 1, Não = 0),
                       values = c(4, 1),
                       name = "Caiu?")

## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

#///////////////////////////////////////////////////////////////////////
# Calcula as estatísticas por folha ------------------------------------

tb_leaf_stats <-
    tb |>
    filter(!tratamento %in% "testemunha") |>
    group_by(cultivar, experimento, tratamento, planta, folha) |>
    summarise(
        max_daa = max(daa[!is.na(severidade)], na.rm = TRUE),
        max_sev = max(severidade, na.rm = TRUE),
        ind = if_else(any(is.na(severidade)), 1, 0),
        .groups = "drop"
    ) |>
    mutate(folha = as.integer(folha),
           ue = interaction(cultivar,
                            experimento,
                            tratamento,
                            planta,
                            drop = TRUE))
tb_leaf_stats

## # A tibble: 720 × 9
##    cultivar experimento tratamento planta folha max_daa max_sev   ind ue        
##    <fct>    <fct>       <fct>      <fct>  <int>   <int>   <dbl> <dbl> <fct>     
##  1 eva      1           cc         1          1      70     2.2     0 eva.1.cc.1
##  2 eva      1           cc         1          2      70     0       0 eva.1.cc.1
##  3 eva      1           cc         1          3      70     0       0 eva.1.cc.1
##  4 eva      1           cc         1          4      70     0.8     0 eva.1.cc.1
##  5 eva      1           cc         1          5      70     0       0 eva.1.cc.1
##  6 eva      1           cc         1          6      70     0       0 eva.1.cc.1
##  7 eva      1           cc         1          7      70     0       0 eva.1.cc.1
##  8 eva      1           cc         1          8      70     0       0 eva.1.cc.1
##  9 eva      1           cc         1          9      70     0       0 eva.1.cc.1
## 10 eva      1           cc         1         10      70     0.3     0 eva.1.cc.1
## # ℹ 710 more rows

# Usa uma transformação na severidade máxima para ter uma variável mais
# simétrica.
tb_leaf_stats <-
    tb_leaf_stats |>
    mutate(#sev_fun = sqrt(max_sev),
        sev_fun = log(max_sev + 1))

tb_leaf_stats |>
    group_by(cultivar, experimento, tratamento) |>
    summarise(
        n = n(),
        prop = mean(ind),
        .groups = "drop"
    )

## # A tibble: 12 × 5
##    cultivar experimento tratamento     n   prop
##    <fct>    <fct>       <fct>      <int>  <dbl>
##  1 eva      1           cc            60 0.0667
##  2 eva      1           cn            60 0.0167
##  3 eva      1           mistura       60 0.0333
##  4 eva      2           cc            60 0.217 
##  5 eva      2           cn            60 0.0333
##  6 eva      2           mistura       60 0.0667
##  7 gala     1           cc            60 0.417 
##  8 gala     1           cn            60 0     
##  9 gala     1           mistura       60 0.0833
## 10 gala     2           cc            60 0.183 
## 11 gala     2           cn            60 0.0333
## 12 gala     2           mistura       60 0.25

# Indica cada planta.
nlevels(tb_leaf_stats$ue)

## [1] 72

ggplot(tb_leaf_stats,
       mapping = aes(x = sev_fun)) +
    facet_wrap(~ind) +
    geom_histogram() +
    geom_rug()

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

ggplot(tb_leaf_stats,
       mapping = aes(x = sev_fun, color = factor(ind))) +
    stat_ecdf() +
    geom_rug()

#///////////////////////////////////////////////////////////////////////
# Modelo misto generalizado --------------------------------------------

# NOTE: Preciso refletir um pouco mais sobre essa parte da análise.

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

# Fazer um scaterplot 3d com plotly: ind ~ folha + max_sev.
plot_ly(tb_leaf_stats,
        x = ~folha,
        y = ~sev_fun,
        z = ~as.integer(ind) - 1,
        color = ~ind,
        colors = c("blue", "red"),
        type = "scatter3d",
        mode = "markers") |>
    plotly::layout(scene = list(xaxis = list(title = "Folha"),
                                yaxis = list(title = "Severidade máxima"),
                                zaxis = list(title = "Caiu?")))

# Versão modelo misto.
str(tb_leaf_stats)

## tibble [720 × 10] (S3: tbl_df/tbl/data.frame)
##  $ cultivar   : Factor w/ 2 levels "eva","gala": 1 1 1 1 1 1 1 1 1 1 ...
##  $ experimento: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tratamento : Factor w/ 4 levels "testemunha","cc",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ planta     : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ folha      : int [1:720] 1 2 3 4 5 6 7 8 9 10 ...
##  $ max_daa    : int [1:720] 70 70 70 70 70 70 70 70 70 70 ...
##  $ max_sev    : num [1:720] 2.2 0 0 0.8 0 0 0 0 0 0.3 ...
##  $ ind        : num [1:720] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ue         : Factor w/ 72 levels "eva.1.cc.1","gala.1.cc.1",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sev_fun    : num [1:720] 1.163 0 0 0.588 0 ...

summary(tb_leaf_stats)

##  cultivar   experimento      tratamento  planta      folha         max_daa     
##  eva :360   1:360       testemunha:  0   1:120   Min.   : 1.0   Min.   : 9.00  
##  gala:360   2:360       cc        :240   2:120   1st Qu.: 3.0   1st Qu.:70.00  
##                         cn        :240   3:120   Median : 5.5   Median :70.00  
##                         mistura   :240   4:120   Mean   : 5.5   Mean   :67.42  
##                                          5:120   3rd Qu.: 8.0   3rd Qu.:71.00  
##                                          6:120   Max.   :10.0   Max.   :71.00  
##                                                                                
##     max_sev            ind                   ue         sev_fun      
##  Min.   : 0.000   Min.   :0.0000   eva.1.cc.1 : 10   Min.   :0.0000  
##  1st Qu.: 0.000   1st Qu.:0.0000   gala.1.cc.1: 10   1st Qu.:0.0000  
##  Median : 0.000   Median :0.0000   eva.2.cc.1 : 10   Median :0.0000  
##  Mean   : 1.577   Mean   :0.1167   gala.2.cc.1: 10   Mean   :0.4666  
##  3rd Qu.: 1.200   3rd Qu.:0.0000   eva.1.cn.1 : 10   3rd Qu.:0.7885  
##  Max.   :28.000   Max.   :1.0000   gala.1.cn.1: 10   Max.   :3.3673  
##                                    (Other)    :660

# Ajuste do modelo misto.
m1 <-
    tb_leaf_stats |>
    glmer(ind ~
              folha + sev_fun +
              # I(folha^2) +
              # tratamento +
              # cultivar +
              # experimento +
              (1 | ue),
          family = binomial(link = "logit"),
          data = _,
          control = glmerControl(optimizer = "bobyqa",
                                 optCtrl = list(maxfun = 2e5)))

summary(m1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: ind ~ folha + sev_fun + (1 | ue)
##    Data: tb_leaf_stats
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     452.7     471.0    -222.3     444.7       716 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8948 -0.2954 -0.1801 -0.1443  4.9058 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  ue     (Intercept) 2.177    1.475   
## Number of obs: 720, groups:  ue, 72
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.59538    0.37750  -6.875 6.19e-12 ***
## folha       -0.08859    0.04672  -1.896  0.05797 .  
## sev_fun      0.44429    0.16604   2.676  0.00746 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) folha 
## folha   -0.576       
## sev_fun -0.214  0.001

car::Anova(m1, test = "Chisq")

## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: ind
##          Chisq Df Pr(>Chisq)   
## folha   3.5946  1   0.057968 . 
## sev_fun 7.1596  1   0.007456 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Distribuição dos efeitos aleatórios.
qqnorm(ranef(m1)$ue[[1]])

ranef(m1)[["ue"]] |>
    as.data.frame() |>
    rownames_to_column(var = "ue") |>
    separate(ue,
             into = c("cultivar", "experimento", "tratamento", "planta"),
             sep = "\\.") |>
    rename(effect = "(Intercept)") |>
    ggplot(data = _,
           mapping = aes(sample = effect)) +
    facet_grid(cultivar + experimento ~ tratamento) +
    geom_qq() +
    geom_rug()

# Grid para fazer a predição.
grid <- with(tb_leaf_stats,
             list(folha = seq(min(folha), max(folha), length.out = 30),
                  sev_fun = seq(min(sev_fun), max(sev_fun), length.out = 30)))

# Probabilidades preditas pelo modelo misto.
tb_means_mixed <-
    emmeans::emmeans(
        m1,
        specs = ~folha + sev_fun,
        at = grid,
        type = "response") |>
    as.data.frame()
str(tb_means_mixed)

## Classes 'summary_emm' and 'data.frame':  900 obs. of  7 variables:
##  $ folha    : num  1 1.31 1.62 1.93 2.24 ...
##  $ sev_fun  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ prob     : num  0.0639 0.0623 0.0607 0.0592 0.0577 ...
##  $ SE       : num  0.0211 0.0202 0.0194 0.0186 0.0178 ...
##  $ df       : num  Inf Inf Inf Inf Inf ...
##  $ asymp.LCL: num  0.0331 0.0326 0.0322 0.0317 0.0311 ...
##  $ asymp.UCL: num  0.12 0.116 0.112 0.108 0.104 ...
##  - attr(*, "estName")= chr "prob"
##  - attr(*, "clNames")= chr [1:2] "asymp.LCL" "asymp.UCL"
##  - attr(*, "pri.vars")= chr [1:2] "folha" "sev_fun"
##  - attr(*, "adjust")= chr "none"
##  - attr(*, "side")= num 0
##  - attr(*, "delta")= num 0
##  - attr(*, "type")= chr "response"
##  - attr(*, "mesg")= chr [1:2] "Confidence level used: 0.95" "Intervals are back-transformed from the logit scale"
##  - attr(*, "linkname")= chr "logit"
##  - attr(*, "digits")= int 7

# Gráfico de contorno da superfície de predição.
tb_means_mixed |>
    lattice::levelplot(prob ~ folha * sev_fun,
                       data = _,
                       aspect = 1,
                       col.regions = viridis::viridis(100),
                       # col.regions = terrain.colors(100),
                       contour = TRUE)

# Gráfico wireframe da superfície de predição.
tb_means_mixed |>
    lattice::wireframe(prob ~ folha * sev_fun,
                       data = _,
                       col = "transparent",
                       col.regions = viridis::viridis(100),
                       # col.regions = terrain.colors(100)
                       drape = TRUE)

# Juntar os pontos observados com a superfície de predição.
lattice::cloud(
    ind ~ folha * sev_fun,
    xlim = range(tb_means_mixed$folha),
    ylim = range(tb_means_mixed$sev_fun),
    zlim = c(0, 1),
    # screen = list(z = 30, x = -60, y = 0),
    scales = list(arrows = FALSE),
    pch = ifelse(tb_leaf_stats$ind == 1, 4, 1),
    col = ifelse(tb_leaf_stats$ind == 1, "red", "blue"),
    par.settings = list(axis.line = list(col = "transparent")),
    data = tb_leaf_stats
) +
    latticeExtra::as.layer(
        lattice::wireframe(
            prob ~ folha * sev_fun,
            data = as.data.frame(tb_means_mixed),
            xlim = range(tb_means_mixed$folha),
            ylim = range(tb_means_mixed$sev_fun),
            zlim = c(0, 1),
            xlab = "",
            ylab = "",
            zlab = "",
            col = "transparent",
            col.regions = viridis::viridis(100, alpha = 0.4),
            drape = TRUE,
            alpha = 0.25)
    )

# Colocar a superfície no gráfico 3d com plotly que é interativo.
grid$prob <-
    tb_means_mixed |>
    as.data.frame() |>
    pull(prob) |>
    matrix(nrow = length(grid$folha),
           ncol = length(grid$sev_fun))
str(grid)

## List of 3
##  $ folha  : num [1:30] 1 1.31 1.62 1.93 2.24 ...
##  $ sev_fun: num [1:30] 0 0.116 0.232 0.348 0.464 ...
##  $ prob   : num [1:30, 1:30] 0.0639 0.0623 0.0607 0.0592 0.0577 ...

# Usar a escala viridis.
plot_ly() |>
    add_surface(x = grid$folha,
                y = grid$sev_fun,
                z = grid$prob,
                colors = scales::viridis_pal()(12),
                opacity = 0.5) |>
    add_markers(data = tb_leaf_stats,
                x = ~folha,
                y = ~sev_fun,
                z = ~ind,
                marker = list(color = ifelse(tb_leaf_stats$ind == 1, "red", "blue")),
                type = "scatter3d",
                mode = "markers") |>
    plotly::layout(scene = list(xaxis = list(title = "Folha"),
                                yaxis = list(title = "Severidade máxima"),
                                zaxis = list(title = "Caiu?")))

#///////////////////////////////////////////////////////////////////////
# Fim ------------------------------------------------------------------

Análise de severidade em maça

Walmes Zeviani

07 de outubro de 2025