#-----------------------------------------------------------------------
# R para Data Science
#
#                Prof. Dr. Walmes M. Zeviani & Prof. Dr. Wagner H. Bonat
#                Department of Statistics · Federal University of Paraná
#                                       2021-mai-11 · Curitiba/PR/Brazil
#-----------------------------------------------------------------------

#-----------------------------------------------------------------------
# Pacotes.

library(tidyverse)

#-----------------------------------------------------------------------
# Importanto dados.

url_csv <- "https://raw.githubusercontent.com/leg-ufpr/hackathon/master/notas.csv"
tb_nt <- read.csv2(url_csv)
str(tb_nt)

url_json <- "https://raw.githubusercontent.com/leg-ufpr/hackathon/master/opinioes.json"
tb_op <- jsonlite::read_json(url_json, simplifyVector = TRUE)
tb_op <- as.data.frame(tb_op)
str(tb_op)

names(tb_op) <- c("ID", "title", "model", "usercity", "usage", "pro",
                  "con", "problem", "opinion", "ts")

tb_nt <- tb_nt %>%
    pivot_wider(id_cols = "ID",
                names_from = "quesito",
                values_from = "nota")
str(tb_nt)

tb <- inner_join(tb_nt,
                 tb_op[, c("ID", "model", "usercity", "usage", "ts")],
                 by = "ID")
str(tb)

#-----------------------------------------------------------------------
# Tratamento com regex para extração.

tb$location <-
    tb$usercity %>%
    str_remove("^.*- ")

tb$uf <-
    tb$location %>%
    str_sub(start = -2)

tb$year <-
    tb$model %>%
    str_extract("\\d{4}/\\d{4}") %>%
    str_sub(end = 4) %>%
    as.integer()

tb$spec <-
    tb$model %>%
    str_remove(" \\d{4}/\\d{4}")

# Tempo que possui o veículo.
tb$period <-
    tb$usage %>%
    str_remove("menos de") %>%
    str_replace(".*há +(\\d+) +ano.*", "\\1") %>%
    as.integer()

# Último veículo.
tb$lastcar <-
    ifelse(str_detect(tb$usage, "Carro anterior:"),
           str_replace(tb$usage, ".*Carro anterior: (.*)$", "\\1"),
           NA_character_)

# Percurso.
tb$km <-
    ifelse(str_detect(tb$usage, "\\d+ km"),
           str_replace(tb$usage, ".* ([0-9.]+) km.*", "\\1"),
           NA_character_) %>%
    str_remove("\\.") %>%
    as.integer()

# Data da avaliação.
tb$ts <-
    tb$ts %>%
    as.POSIXct(format = "%d/%m/%Y %H:%M:%S")

# Fabricante.
tb$brand <-
    tb$spec %>%
    str_replace("^(\\w+) .*", "\\1")

tb$brand_last <-
    tb$lastcar %>%
    str_replace("^(\\w+) .*", "\\1")

tb$model <-
    tb$spec %>%
    str_replace("^\\w+ (\\w+) .*", "\\1")

str(tb)

#-----------------------------------------------------------------------
# Análise.

tb %>%
    count(brand)

tb %>%
    count(brand, model)

tb %>%
    count(brand_last, sort = TRUE)

tb <- tb %>%
    mutate(brand_last = fct_lump_n(brand_last, n = 4))

tb %>%
    filter(brand == "Chevrolet") %>%
    ggplot(data = .,
           mapping = aes(y = brand_last)) +
    geom_bar()

#-----------------------------------------------------------------------
# Média das variáveis avaliadas por fabricante.

tb_agg <- tb %>%
    group_by(brand, model) %>%
    do({
        bind_cols(
            summarise_at(., vars(Estilo:Recomendação), "mean"),
            summarise(., n = n()))
    }) %>%
    ungroup()
tb_agg

#-----------------------------------------------------------------------
# Mapas.

library(geobr)

states <- read_state(year = 2019)
# saveRDS(states, file = "geobr_states.rds")
# states <- readRDS(file = "geobr_states.rds")
class(states)

tb_count <- tb %>%
    count(uf)

print(tb_count, n = Inf)
names(tb)

tb_count <- tb %>%
    filter(brand == "Volkswagen") %>%
    group_by(uf) %>%
    summarise(n = n(),
              y = mean(Recomendação))

# join the databases
states <- dplyr::left_join(states,
                           tb_count,
                           by = c("abbrev_state" = "uf"))
names(states)

ggplot() +
    geom_sf(data = states,
            mapping = aes(fill = y),
            color = "black",
            size = .15) +
    scale_fill_distiller(palette = "Blues",
                         direction = 1,
                         name = "Nota") +
    theme_minimal()

#-----------------------------------------------------------------------
# Uma pequena interface shiny.

library(esquisse)

esquisser(iris)

ggplot(iris) +
    aes(x = Petal.Width, y = Petal.Length) +
    geom_point(size = 4.3, colour = "#112446") +
    labs(
        x = "Largura da sépala (mm)",
        y = "Comprimento da pétala (mm)",
        title = "Dados do dataset iris",
        subtitle = "Diagrama dispersão",
        caption = "Feito com amor usando o R"
    ) +
    theme_gray() +
    facet_wrap(vars(Species))

str(states)

esquisser(states)
esquisser(iris)

library(DataExplorer)
library(skimr)
library(vis_dat)

#-----------------------------------------------------------------------