#///////////////////////////////////////////////////////////////////////
# Pacotes --------------------------------------------------------------

library(tidyverse)

#///////////////////////////////////////////////////////////////////////
# Reducing on-the-job stress -------------------------------------------

# Reducing on-the-job stress. Plant therapists believe that plants can
# reduce on-the-job stress. A Kansas State PLANTS University study was
# conducted to investigate this phenomenon. Two weeks prior to final
# exams, 10 undergraduate students took part in an experiment to
# determine what effect the presence of a live plant, a photo of a
# plant, or absence of a plant has on a student's ability to relax while
# isolated in a dimly lit room. Each student participated in three
# sessions-one with a live plant, one with a plant photo, and one with
# no plant (control).* During each session, finger temperature was
# measured at 1-minute intervals for 20 minutes. Because increasing
# finger temperature indicates an increased level of relaxation, the
# maximum temperature (in degrees) was used as the response variable.
# These data (based on data from Elizabeth Schreiber, Department of
# Statistics, Kansas State University, Manhattan, Kansas) are saved in
# the accompanying file. Conduct an ANOVA and make the proper inferences
# at a = .10.

tb_plants <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/PLANTS.DAT" |>
    read.table(header = FALSE)
str(tb_plants)

tb_plants <-
    tb_plants |>
    setNames(c("student", "treatment", "temperature")) |>
    mutate(student = factor(student),
           treatment = factor(treatment,
                              labels = c("LivePlant", "PlantPhoto", "NoPlant")))

tb_plants |>
    pivot_wider(names_from = treatment, values_from = temperature)

ggplot(tb_plants,
       aes(x = treatment, y = temperature, color = student, group = student)) +
    geom_line() +
    geom_point()

ggplot(tb_plants,
       aes(x = treatment, y = temperature)) +
    geom_boxplot()

ggplot(tb_plants,
       aes(x = student, y = temperature, color = treatment, group = treatment)) +
    geom_line() +
    geom_point()

# y_{ij} = \mu + S_i + T_j + e_{ij}
m0 <- lm(temperature ~ student + treatment, data = tb_plants)

par(mfrow = c(2, 2))
plot(m0)
layout(1)

car::residualPlots(m0)

anova(m0)

tb_means <- 
    emmeans::emmeans(m0, specs = ~treatment) |>
    as.data.frame()

ggplot(tb_means,
       aes(x = treatment, y = emmean,
           ymin = lower.CL, ymax = upper.CL)) +
    geom_pointrange()

ggplot(tb_means,
       aes(y = treatment, x = emmean,
           xmin = lower.CL, xmax = upper.CL)) +
    geom_pointrange() +
    geom_text(aes(label = sprintf("%0.2f", emmean)),
              vjust = 0, nudge_y = 0.05)

coef(m0)

grepl(x = names(coef(m0)), pattern = "student")
m0$assign

# Média estimada marginal.
coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1]) + coef(m0)["treatmentNoPlant"]
coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1]) + coef(m0)["treatmentPlantPhoto"]
coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1])

#///////////////////////////////////////////////////////////////////////
# Absentee rates at a jeans plant --------------------------------------

# Absentee rates at a jeans plant. A plant that manufactures denim jeans
# in the United Kingdom introduced a computerized automated handling
# system. The new system delivers garments to the assembly line
# operators by means of an overhead conveyor. Although the automated
# system minimizes operator handling time, it inhibits operators from
# working ahead and taking breaks from their machine. A study in New
# Technology, Work, and Employment (July 2001) investigated the impact
# of the new handling system on worker absentee rates at the jeans
# plant. One theory is that the mean absentee rate will vary by day of
# the week, as operators decide to indulge in 1-day absences to relieve
# work pressure. Nine weeks were randomly selected, and the absentee
# rate (percentage of workers absent) determined for each day (Monday
# through Friday) of the workweek. The data are listed in the table at
# the top of the next column. Conduct a complete analysis of the data to
# determine whether the mean absentee rate differs across the 5 days of
# the workweek.

tb_jeans <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/JEANS.DAT" |>
    read.table(header = FALSE)
str(tb_jeans)

tb_jeans <-
    tb_jeans |>
    setNames(c("week", "absence", "wday")) |>
    mutate(week = factor(week),
           wday = factor(wday, levels = c("M", "T", "W", "R", "F")))
tb_jeans

tb_jeans |>
    pivot_wider(names_from = wday, values_from = absence)

# c(tb_jeans$absence/100) |>
#     MASS::fractions()

ggplot(tb_jeans,
       aes(x = wday, y = absence, color = week, group = week)) +
    geom_line() +
    geom_point()

ggplot(tb_jeans,
       aes(x = week, y = absence, color = wday, group = wday)) +
    geom_line() +
    geom_point()

# y_{ij} = \mu + W_i + D_j + e_{ij}
m0 <- lm(absence ~ week + wday, data = tb_jeans)

par(mfrow = c(2, 2))
plot(m0)
layout(1)

anova(m0)

tb_means <- 
    emmeans::emmeans(m0, specs = ~wday) |>
    as.data.frame()

ggplot(tb_means,
       aes(x = wday, y = emmean,
           ymin = lower.CL, ymax = upper.CL)) +
    geom_pointrange()

emmeans::emmeans(m0, specs = ~wday) |>
    emmeans::contrast("trt.vs.ctrl")

emmeans::emmeans(m0, specs = ~wday) |>
    emmeans::contrast("pairwise")

summary(m0)

m1 <- lm(absence ~ week + wday_agg,
         data = mutate(tb_jeans, 
                       wday_agg = fct_collapse(wday, 
                                               "M" = c("M"),
                                               "nM" = c("T", "W", "R", "F"))))
anova(m1)

anova(m0, m1)

emmeans::emmeans(m1, specs = ~wday_agg) |>
    as.data.frame()

#///////////////////////////////////////////////////////////////////////

tb_mosquito <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/MOSQUITO.DAT" |>
    read.table(header = FALSE)
str(tb_mosquito)

tb_mosquito <-
    tb_mosquito |>
    setNames(c("location", "insecticide", "resistance_ratio")) |>
    mutate(location = factor(location),
           insecticide = factor(insecticide))
tb_mosquito

tb_mosquito |>
    pivot_wider(names_from = insecticide, values_from = resistance_ratio)

#///////////////////////////////////////////////////////////////////////

# Commercial eggs produced from different housing systems. Refer to the
# Food Chemistry (Vol. 106, 2008) study of four different types of egg
# housing systems, Exercise 9.33 (p. 546). Recall that the four housing
# systems were cage, barn, free range, and organic. In addition to
# housing system, the researchers also determined the weight class
# (medium or large) for each sampled egg. The data on whipping capacity
# (percent overrun) for the 28 sampled eggs are shown in the next table.
# The researchers want to investigate the effect of both housing system
# and weight class on the mean whipping capacity of the eggs. In
# particular, they want to know whether the difference between the mean
# whipping capacity of medium and large eggs depends on the housing
# system.

# Do jeito que veio do Assistente do ChatGPT.
# https://chatgpt.com/g/g-AiT7svnDN-table-image-to-csv-converter
tb_eggs <-
"Housing	Wtclass	Overrun (%)
Cage	M	495, 462, 488, 471, 471
Cage	L	502, 472, 474, 492, 479
Free	M	513, 510, 510
Free	L	520, 531, 521
Barn	M	515, 516, 514
Barn	L	526, 501, 508
Organic	M	532, 511, 527
Organic	L	530, 544, 531" |>
    textConnection() |>
    read.table(header = TRUE, sep = "\t", check.names = FALSE) |>
    separate_longer_delim(3, delim = ",") |>
    mutate(`Overrun (%)` = as.integer(`Overrun (%)`),
           Housing = factor(Housing),
           Wtclass = factor(Wtclass))

str(tb_eggs)

names(tb_eggs) <- c("sistema", "tamanho", "crescimento")

ggplot(tb_eggs,
       aes(x = sistema, y = crescimento,
           color = tamanho, group = tamanho)) +
    stat_summary(fun = mean, geom = "line") +
    geom_point()

ggplot(tb_eggs,
       aes(x = tamanho, y = crescimento,
           color = sistema, group = sistema)) +
    stat_summary(fun = mean, geom = "line") +
    geom_point()

# y_{ijk} = \mu + S_i + T_j + (S*T)_{ij} + e_{ijk}
m0 <- lm(crescimento ~ sistema * tamanho, data = tb_eggs)
# sistema * tamanho = sistema + tamanho + sistema:tamanho

options(width = 150)

model.matrix(m0)

par(mfrow = c(2, 2))
plot(m0)
layout(1)

anova(m0)

tb_means <- 
    emmeans::emmeans(m0, specs = ~sistema) |>
    as.data.frame()

ggplot(tb_means,
       aes(x = sistema, y = emmean,
           ymin = lower.CL, ymax = upper.CL)) +
    geom_pointrange()

emmeans::emmeans(m0, specs = ~sistema) |>
    emmeans::contrast("trt.vs.ctrl", ref = "Cage")

tb_means <- 
    emmeans::emmeans(m0, specs = ~sistema + tamanho) |>
    as.data.frame()

ggplot(tb_means,
       aes(x = sistema, y = emmean, color = tamanho,
           ymin = lower.CL, ymax = upper.CL)) +
    geom_pointrange(position = position_dodge(width = 0.15))

#///////////////////////////////////////////////////////////////////////

# On the trail of the cockroach. Knowledge of how cockroaches forage
# for food is valuable for companies that develop and manufacture roach
# bait and traps. Many entomologists believe, however, that the
# navigational behavior of cockroaches scavenging for food is random. D.
# Miller of Virginia Tech University challenged the "random-walk" theory
# by designing an experiment to test a cockroach's ability to follow a
# trail of their fecal material (Explore, Research at the University of
# Florida, Fall 1998). A methanol extract from roach feces-called a
# pheromone-was used to create a chemical trail. German cockroaches
# were released at the beginning of the trail, one at a time, and a
# video surveillance camera was used to monitor the roach's movements.
# In addition to the trail containing the fecal extract (the treatment),
# a trail using methanol only (the control) was created. To determine if
# trail-following ability differed among cockroaches of different age,
# sex, and reproductive status, four roach groups were used in the
# experiment: adult males, adult females, gravid (pregnant) females, and
# nymphs (immatures). Twenty roaches of each type were randomly assigned
# to the treatment trail, and 10 of each type were randomly assigned to
# the control trail. Thus, a total of 120 roaches were used in the
# experiment. The movement pattern of each cockroach was measured (in
# "pixels") as the average trail deviation. The data for the 120
# cockroaches in the study are stored in the accompanying file. (The
# first 5 and last 5 observations in the data set are listed here.)
# Conduct a complete analysis of the data. Determine whether roaches can
# distinguish between the fecal extract and control trail and whether
# trail-following ability differs according to age, sex, and
# reproductive status.

tb_roach <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/ROACH.DAT" |>
    read.table(header = FALSE)
str(tb_roach)

tb_roach <-
    tb_roach |>
    setNames(c("trail_deviation", "group", "trail")) |>
    mutate(across(c("group", "trail"), factor)) |>
    mutate(trail = factor(trail,
                          levels = c("C", "E"),
                          labels = c("Control", "Extract")),
           group = factor(group,
                          levels = c("M", "F", "G", "N"),
                          labels = c("AdultM", "AdultF", "GravidF", "Nymph")))
tb_roach

tb_roach |>
    group_by(trail, group) |>
    rstatix::get_summary_stats(trail_deviation, type = "mean_sd")

ggplot(tb_roach,
       aes(x = trail, y = trail_deviation,
           color = group, group = group)) +
    stat_summary(fun = mean, geom = "line") +
    geom_point()

ggplot(tb_roach,
       aes(x = trail, y = trail_deviation, group = group)) +
    facet_wrap(~group) +
    geom_boxplot(aes(x = trail, group = trail)) +
    stat_summary(fun = mean, geom = "line") +
    geom_point(color = "red")

m0 <- lm(trail_deviation ~ trail * group, data = tb_roach)

MASS::boxcox(m0)

par(mfrow = c(2, 2))
plot(m0)
layout(1)

anova(m0)

emmeans::emmeans(m0, specs = ~trail) |>
    as.data.frame()

emmeans::emmeans(m0, specs = ~group) |>
    as.data.frame()

#///////////////////////////////////////////////////////////////////////

# Factorial designs are commonly employed in marketing research to
# evaluate the effectiveness of sales strategies. At one supermarket,
# two of the factors were Price level (regular, reduced price, cost to
# supermarket) and Display level (normal display space, normal display
# space plus end-of-aisle display, twice the normal display space). A 3
# x 3 complete factorial design was employed, where each treatment was
# applied three times to a particular product at a particular
# supermarket. The dependent variable of interest was unit sales for the
# week. (To minimize treatment carryover effects, each treatment was
# preceded and followed by a week in which the product was priced at its
# regular price and was displayed in its normal manner.)

tb_supermkt <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/SUPERMKT.DAT" |>
    read.table(header = FALSE)
str(tb_supermkt)

tb_supermkt <-
    tb_supermkt |>
    setNames(c("display", "price", "sales")) |>
    mutate(display = factor(display,
                            labels = c("Normal", "NormalEnd", "Double")),
           price = factor(price,
                          labels = c("Regular", "Reduced", "Cost")))
tb_supermkt

tb_supermkt |>
    group_by(display, price) |>
    rstatix::get_summary_stats(sales, type = "mean_sd")

#///////////////////////////////////////////////////////////////////////

# Quality Engineering (Vol. 6, 1994) reported the results of an
# experiment that was designed to find ways to improve the output of an
# industrial lathe. lathe is controlled by a computer that automatically
# feeds bar stock, cuts the stock, machines the surface finish, and
# releases the part. As it is machined, the bar stock spins and is held
# in place by a collet. ne lathe operator sets the feed (the rate at
# which bars are machined) and the speed (spin rate). lhe product
# characteristic of interest is surface finish. It is measured on a
# gauge that records the vertical distance a probe travels as it moves
# along a given horizontal distance on the bar. Ihe rougher the surfaces
# the higher the gauge measurement. "Ihe factors that were manipulated
# in the experiment were speed, feed, collet tightness, and tool wear.
# The following table reports the factor-level settings and the
# resulting surface-finish measurements (H High; L = Low).

tb_lathe <-
    "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/LATHE.DAT" |>
    read.table(header = FALSE)
str(tb_lathe)

tb_lathe <-
    tb_lathe |>
    setNames(c("speed", "feed", "collet", "tool_wear", "surface_finish")) |>
    mutate(across(c("speed", "feed", "collet", "tool_wear"),
                  ~factor(.x, levels = c("L", "H"))))
tb_lathe

tb_lathe |>
    count(speed, feed, collet, tool_wear)

#///////////////////////////////////////////////////////////////////////

# 12.164 Comparing mosquito repellents.
#
# Comparing mosquito repellents. Which insect repellents
# protect best against mosquitoes? Consumer Reports
# REPELL (June 2000) tested 14 products that all claim to be an ef-
# fective mosquito repellent. Each product was classified
# as either lotion/cream or aerosol/spray. The cost of the
# product (in dollars) was divided by the amount of the re-
# pellent needed to cover exposed areas of the skin (about
# 1>3 ounce) to obtain a cost-per-use value. Effectiveness
# was measured as the maximum number of hours of
# protection (in half-hour increments) provided when
# ­
# human testers exposed their arms to 200 mosquitoes. The
# data from the report are listed in the table.

tb_repell <-
"Insect Repellent	Type	Cost/Use	Maximum Protection
Amway Hour Guard 12	Lotion/Cream	$2.08	13.5 hours
Avon Skin-So-Soft	Aerosol/Spray	0.67	0.5
Avon BugGuard Plus	Lotion/Cream	1.00	2.0
Ben's Backyard Formula	Lotion/Cream	0.75	7.0
Bite Blocker	Lotion/Cream	0.46	3.0
BugOut	Aerosol/Spray	0.11	6.0
Cutter Skinsations	Aerosol/Spray	0.22	3.0
Cutter Unscented	Aerosol/Spray	0.19	5.5
Muskoll Ultra6Hours	Aerosol/Spray	0.24	6.5
Natrapel	Aerosol/Spray	0.27	1.0
Off! Deep Woods	Aerosol/Spray	1.77	14.0
Off! Skintastic	Lotion/Cream	0.67	3.0
Sawyer Deet Formula	Lotion/Cream	0.36	7.0
Repel Permanone	Aerosol/Spray	2.75	24.0" |>
    textConnection() |>
    read.table(header = TRUE, sep = "\t",
               check.names = FALSE, quote = "") |>
    mutate(across(c("Cost/Use", "Maximum Protection"),
                  ~as.numeric(sub("[^0-9.]+", "", .x))),
           Type = factor(Type))
str(tb_repell)

#///////////////////////////////////////////////////////////////////////

# Household food consumption. The data in the table below were collected
# for a random sample of 26 house-holds in Washington, D.C. An economist
# wants to relate household food consumption, y, to household income,
# x1, and household size, x2, with the first-order model.
# a. Fit the model to the data. Do you detect any signs of
#    multicollinearity in the data? Explain.
# b. Is there visual evidence (from a residual plot) that a
#    second-order model may be more appropriate for predicting
#    household food consumption? Explain.
# c. Comment on the assumption of constant error variance,
#    using a residual plot. Does it appear to be
#    satisfied?
# d. Are there any outliers in the data? If so, identify them.
# e. Based on a graph of the residuals, does the assumption
#    of normal errors appear to be reasonably satisfied? Explain.

"http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/DCFOOD.DAT"