#/////////////////////////////////////////////////////////////////////// # Pacotes -------------------------------------------------------------- library(tidyverse) #/////////////////////////////////////////////////////////////////////// # Reducing on-the-job stress ------------------------------------------- # Reducing on-the-job stress. Plant therapists believe that plants can # reduce on-the-job stress. A Kansas State PLANTS University study was # conducted to investigate this phenomenon. Two weeks prior to final # exams, 10 undergraduate students took part in an experiment to # determine what effect the presence of a live plant, a photo of a # plant, or absence of a plant has on a student's ability to relax while # isolated in a dimly lit room. Each student participated in three # sessions-one with a live plant, one with a plant photo, and one with # no plant (control).* During each session, finger temperature was # measured at 1-minute intervals for 20 minutes. Because increasing # finger temperature indicates an increased level of relaxation, the # maximum temperature (in degrees) was used as the response variable. # These data (based on data from Elizabeth Schreiber, Department of # Statistics, Kansas State University, Manhattan, Kansas) are saved in # the accompanying file. Conduct an ANOVA and make the proper inferences # at a = .10. tb_plants <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/PLANTS.DAT" |> read.table(header = FALSE) str(tb_plants) tb_plants <- tb_plants |> setNames(c("student", "treatment", "temperature")) |> mutate(student = factor(student), treatment = factor(treatment, labels = c("LivePlant", "PlantPhoto", "NoPlant"))) tb_plants |> pivot_wider(names_from = treatment, values_from = temperature) ggplot(tb_plants, aes(x = treatment, y = temperature, color = student, group = student)) + geom_line() + geom_point() ggplot(tb_plants, aes(x = treatment, y = temperature)) + geom_boxplot() ggplot(tb_plants, aes(x = student, y = temperature, color = treatment, group = treatment)) + geom_line() + geom_point() # y_{ij} = \mu + S_i + T_j + e_{ij} m0 <- lm(temperature ~ student + treatment, data = tb_plants) par(mfrow = c(2, 2)) plot(m0) layout(1) car::residualPlots(m0) anova(m0) tb_means <- emmeans::emmeans(m0, specs = ~treatment) |> as.data.frame() ggplot(tb_means, aes(x = treatment, y = emmean, ymin = lower.CL, ymax = upper.CL)) + geom_pointrange() ggplot(tb_means, aes(y = treatment, x = emmean, xmin = lower.CL, xmax = upper.CL)) + geom_pointrange() + geom_text(aes(label = sprintf("%0.2f", emmean)), vjust = 0, nudge_y = 0.05) coef(m0) grepl(x = names(coef(m0)), pattern = "student") m0$assign # Média estimada marginal. coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1]) + coef(m0)["treatmentNoPlant"] coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1]) + coef(m0)["treatmentPlantPhoto"] coef(m0)[1] + 1/10 * sum(coef(m0)[m0$assign == 1]) #/////////////////////////////////////////////////////////////////////// # Absentee rates at a jeans plant -------------------------------------- # Absentee rates at a jeans plant. A plant that manufactures denim jeans # in the United Kingdom introduced a computerized automated handling # system. The new system delivers garments to the assembly line # operators by means of an overhead conveyor. Although the automated # system minimizes operator handling time, it inhibits operators from # working ahead and taking breaks from their machine. A study in New # Technology, Work, and Employment (July 2001) investigated the impact # of the new handling system on worker absentee rates at the jeans # plant. One theory is that the mean absentee rate will vary by day of # the week, as operators decide to indulge in 1-day absences to relieve # work pressure. Nine weeks were randomly selected, and the absentee # rate (percentage of workers absent) determined for each day (Monday # through Friday) of the workweek. The data are listed in the table at # the top of the next column. Conduct a complete analysis of the data to # determine whether the mean absentee rate differs across the 5 days of # the workweek. tb_jeans <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/JEANS.DAT" |> read.table(header = FALSE) str(tb_jeans) tb_jeans <- tb_jeans |> setNames(c("week", "absence", "wday")) |> mutate(week = factor(week), wday = factor(wday, levels = c("M", "T", "W", "R", "F"))) tb_jeans tb_jeans |> pivot_wider(names_from = wday, values_from = absence) # c(tb_jeans$absence/100) |> # MASS::fractions() ggplot(tb_jeans, aes(x = wday, y = absence, color = week, group = week)) + geom_line() + geom_point() ggplot(tb_jeans, aes(x = week, y = absence, color = wday, group = wday)) + geom_line() + geom_point() # y_{ij} = \mu + W_i + D_j + e_{ij} m0 <- lm(absence ~ week + wday, data = tb_jeans) par(mfrow = c(2, 2)) plot(m0) layout(1) anova(m0) tb_means <- emmeans::emmeans(m0, specs = ~wday) |> as.data.frame() ggplot(tb_means, aes(x = wday, y = emmean, ymin = lower.CL, ymax = upper.CL)) + geom_pointrange() emmeans::emmeans(m0, specs = ~wday) |> emmeans::contrast("trt.vs.ctrl") emmeans::emmeans(m0, specs = ~wday) |> emmeans::contrast("pairwise") summary(m0) m1 <- lm(absence ~ week + wday_agg, data = mutate(tb_jeans, wday_agg = fct_collapse(wday, "M" = c("M"), "nM" = c("T", "W", "R", "F")))) anova(m1) anova(m0, m1) emmeans::emmeans(m1, specs = ~wday_agg) |> as.data.frame() #/////////////////////////////////////////////////////////////////////// tb_mosquito <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/MOSQUITO.DAT" |> read.table(header = FALSE) str(tb_mosquito) tb_mosquito <- tb_mosquito |> setNames(c("location", "insecticide", "resistance_ratio")) |> mutate(location = factor(location), insecticide = factor(insecticide)) tb_mosquito tb_mosquito |> pivot_wider(names_from = insecticide, values_from = resistance_ratio) #/////////////////////////////////////////////////////////////////////// # Commercial eggs produced from different housing systems. Refer to the # Food Chemistry (Vol. 106, 2008) study of four different types of egg # housing systems, Exercise 9.33 (p. 546). Recall that the four housing # systems were cage, barn, free range, and organic. In addition to # housing system, the researchers also determined the weight class # (medium or large) for each sampled egg. The data on whipping capacity # (percent overrun) for the 28 sampled eggs are shown in the next table. # The researchers want to investigate the effect of both housing system # and weight class on the mean whipping capacity of the eggs. In # particular, they want to know whether the difference between the mean # whipping capacity of medium and large eggs depends on the housing # system. # Do jeito que veio do Assistente do ChatGPT. # https://chatgpt.com/g/g-AiT7svnDN-table-image-to-csv-converter tb_eggs <- "Housing Wtclass Overrun (%) Cage M 495, 462, 488, 471, 471 Cage L 502, 472, 474, 492, 479 Free M 513, 510, 510 Free L 520, 531, 521 Barn M 515, 516, 514 Barn L 526, 501, 508 Organic M 532, 511, 527 Organic L 530, 544, 531" |> textConnection() |> read.table(header = TRUE, sep = "\t", check.names = FALSE) |> separate_longer_delim(3, delim = ",") |> mutate(`Overrun (%)` = as.integer(`Overrun (%)`), Housing = factor(Housing), Wtclass = factor(Wtclass)) str(tb_eggs) names(tb_eggs) <- c("sistema", "tamanho", "crescimento") ggplot(tb_eggs, aes(x = sistema, y = crescimento, color = tamanho, group = tamanho)) + stat_summary(fun = mean, geom = "line") + geom_point() ggplot(tb_eggs, aes(x = tamanho, y = crescimento, color = sistema, group = sistema)) + stat_summary(fun = mean, geom = "line") + geom_point() # y_{ijk} = \mu + S_i + T_j + (S*T)_{ij} + e_{ijk} m0 <- lm(crescimento ~ sistema * tamanho, data = tb_eggs) # sistema * tamanho = sistema + tamanho + sistema:tamanho options(width = 150) model.matrix(m0) par(mfrow = c(2, 2)) plot(m0) layout(1) anova(m0) tb_means <- emmeans::emmeans(m0, specs = ~sistema) |> as.data.frame() ggplot(tb_means, aes(x = sistema, y = emmean, ymin = lower.CL, ymax = upper.CL)) + geom_pointrange() emmeans::emmeans(m0, specs = ~sistema) |> emmeans::contrast("trt.vs.ctrl", ref = "Cage") tb_means <- emmeans::emmeans(m0, specs = ~sistema + tamanho) |> as.data.frame() ggplot(tb_means, aes(x = sistema, y = emmean, color = tamanho, ymin = lower.CL, ymax = upper.CL)) + geom_pointrange(position = position_dodge(width = 0.15)) #/////////////////////////////////////////////////////////////////////// # On the trail of the cockroach. Knowledge of how cockroaches forage # for food is valuable for companies that develop and manufacture roach # bait and traps. Many entomologists believe, however, that the # navigational behavior of cockroaches scavenging for food is random. D. # Miller of Virginia Tech University challenged the "random-walk" theory # by designing an experiment to test a cockroach's ability to follow a # trail of their fecal material (Explore, Research at the University of # Florida, Fall 1998). A methanol extract from roach feces-called a # pheromone-was used to create a chemical trail. German cockroaches # were released at the beginning of the trail, one at a time, and a # video surveillance camera was used to monitor the roach's movements. # In addition to the trail containing the fecal extract (the treatment), # a trail using methanol only (the control) was created. To determine if # trail-following ability differed among cockroaches of different age, # sex, and reproductive status, four roach groups were used in the # experiment: adult males, adult females, gravid (pregnant) females, and # nymphs (immatures). Twenty roaches of each type were randomly assigned # to the treatment trail, and 10 of each type were randomly assigned to # the control trail. Thus, a total of 120 roaches were used in the # experiment. The movement pattern of each cockroach was measured (in # "pixels") as the average trail deviation. The data for the 120 # cockroaches in the study are stored in the accompanying file. (The # first 5 and last 5 observations in the data set are listed here.) # Conduct a complete analysis of the data. Determine whether roaches can # distinguish between the fecal extract and control trail and whether # trail-following ability differs according to age, sex, and # reproductive status. tb_roach <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/ROACH.DAT" |> read.table(header = FALSE) str(tb_roach) tb_roach <- tb_roach |> setNames(c("trail_deviation", "group", "trail")) |> mutate(across(c("group", "trail"), factor)) |> mutate(trail = factor(trail, levels = c("C", "E"), labels = c("Control", "Extract")), group = factor(group, levels = c("M", "F", "G", "N"), labels = c("AdultM", "AdultF", "GravidF", "Nymph"))) tb_roach tb_roach |> group_by(trail, group) |> rstatix::get_summary_stats(trail_deviation, type = "mean_sd") ggplot(tb_roach, aes(x = trail, y = trail_deviation, color = group, group = group)) + stat_summary(fun = mean, geom = "line") + geom_point() ggplot(tb_roach, aes(x = trail, y = trail_deviation, group = group)) + facet_wrap(~group) + geom_boxplot(aes(x = trail, group = trail)) + stat_summary(fun = mean, geom = "line") + geom_point(color = "red") m0 <- lm(trail_deviation ~ trail * group, data = tb_roach) MASS::boxcox(m0) par(mfrow = c(2, 2)) plot(m0) layout(1) anova(m0) emmeans::emmeans(m0, specs = ~trail) |> as.data.frame() emmeans::emmeans(m0, specs = ~group) |> as.data.frame() #/////////////////////////////////////////////////////////////////////// # Factorial designs are commonly employed in marketing research to # evaluate the effectiveness of sales strategies. At one supermarket, # two of the factors were Price level (regular, reduced price, cost to # supermarket) and Display level (normal display space, normal display # space plus end-of-aisle display, twice the normal display space). A 3 # x 3 complete factorial design was employed, where each treatment was # applied three times to a particular product at a particular # supermarket. The dependent variable of interest was unit sales for the # week. (To minimize treatment carryover effects, each treatment was # preceded and followed by a week in which the product was priced at its # regular price and was displayed in its normal manner.) tb_supermkt <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/SUPERMKT.DAT" |> read.table(header = FALSE) str(tb_supermkt) tb_supermkt <- tb_supermkt |> setNames(c("display", "price", "sales")) |> mutate(display = factor(display, labels = c("Normal", "NormalEnd", "Double")), price = factor(price, labels = c("Regular", "Reduced", "Cost"))) tb_supermkt tb_supermkt |> group_by(display, price) |> rstatix::get_summary_stats(sales, type = "mean_sd") #/////////////////////////////////////////////////////////////////////// # Quality Engineering (Vol. 6, 1994) reported the results of an # experiment that was designed to find ways to improve the output of an # industrial lathe. lathe is controlled by a computer that automatically # feeds bar stock, cuts the stock, machines the surface finish, and # releases the part. As it is machined, the bar stock spins and is held # in place by a collet. ne lathe operator sets the feed (the rate at # which bars are machined) and the speed (spin rate). lhe product # characteristic of interest is surface finish. It is measured on a # gauge that records the vertical distance a probe travels as it moves # along a given horizontal distance on the bar. Ihe rougher the surfaces # the higher the gauge measurement. "Ihe factors that were manipulated # in the experiment were speed, feed, collet tightness, and tool wear. # The following table reports the factor-level settings and the # resulting surface-finish measurements (H High; L = Low). tb_lathe <- "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/LATHE.DAT" |> read.table(header = FALSE) str(tb_lathe) tb_lathe <- tb_lathe |> setNames(c("speed", "feed", "collet", "tool_wear", "surface_finish")) |> mutate(across(c("speed", "feed", "collet", "tool_wear"), ~factor(.x, levels = c("L", "H")))) tb_lathe tb_lathe |> count(speed, feed, collet, tool_wear) #/////////////////////////////////////////////////////////////////////// # 12.164 Comparing mosquito repellents. # # Comparing mosquito repellents. Which insect repellents # protect best against mosquitoes? Consumer Reports # REPELL (June 2000) tested 14 products that all claim to be an ef- # fective mosquito repellent. Each product was classified # as either lotion/cream or aerosol/spray. The cost of the # product (in dollars) was divided by the amount of the re- # pellent needed to cover exposed areas of the skin (about # 1>3 ounce) to obtain a cost-per-use value. Effectiveness # was measured as the maximum number of hours of # protection (in half-hour increments) provided when # ­ # human testers exposed their arms to 200 mosquitoes. The # data from the report are listed in the table. tb_repell <- "Insect Repellent Type Cost/Use Maximum Protection Amway Hour Guard 12 Lotion/Cream $2.08 13.5 hours Avon Skin-So-Soft Aerosol/Spray 0.67 0.5 Avon BugGuard Plus Lotion/Cream 1.00 2.0 Ben's Backyard Formula Lotion/Cream 0.75 7.0 Bite Blocker Lotion/Cream 0.46 3.0 BugOut Aerosol/Spray 0.11 6.0 Cutter Skinsations Aerosol/Spray 0.22 3.0 Cutter Unscented Aerosol/Spray 0.19 5.5 Muskoll Ultra6Hours Aerosol/Spray 0.24 6.5 Natrapel Aerosol/Spray 0.27 1.0 Off! Deep Woods Aerosol/Spray 1.77 14.0 Off! Skintastic Lotion/Cream 0.67 3.0 Sawyer Deet Formula Lotion/Cream 0.36 7.0 Repel Permanone Aerosol/Spray 2.75 24.0" |> textConnection() |> read.table(header = TRUE, sep = "\t", check.names = FALSE, quote = "") |> mutate(across(c("Cost/Use", "Maximum Protection"), ~as.numeric(sub("[^0-9.]+", "", .x))), Type = factor(Type)) str(tb_repell) #/////////////////////////////////////////////////////////////////////// # Household food consumption. The data in the table below were collected # for a random sample of 26 house-holds in Washington, D.C. An economist # wants to relate household food consumption, y, to household income, # x1, and household size, x2, with the first-order model. # a. Fit the model to the data. Do you detect any signs of # multicollinearity in the data? Explain. # b. Is there visual evidence (from a residual plot) that a # second-order model may be more appropriate for predicting # household food consumption? Explain. # c. Comment on the assumption of constant error variance, # using a residual plot. Does it appear to be # satisfied? # d. Are there any outliers in the data? If so, identify them. # e. Based on a graph of the residuals, does the assumption # of normal errors appear to be reasonably satisfied? Explain. "http://leg.ufpr.br/~walmes/data/business_economics_dataset/EXERCISE/DCFOOD.DAT"