#/////////////////////////////////////////////////////////////////////// # Pacotes -------------------------------------------------------------- library(tidyverse) #/////////////////////////////////////////////////////////////////////// # Importação: Cadastro de empresas ------------------------------------- # url <- "http://leg.ufpr.br/~walmes/data/Cadastro-Central-de-Empresas-MS.csv" url <- "http://leg.ufpr.br/~walmes/data/Cadastro-Central-de-Empresas-PR.csv" # tb <- read.csv(url, # header = TRUE, # comment.char = "#", # stringsAsFactors = FALSE, # quote = "\"", # na.strings = "-") # str(tb) tb <- read_csv(url, comment = "#", quote = "\"", na = "-") str(tb) skimr::skim(tb) summarytools::descr(tb) summarytools::dfSummary(tb) visdat::vis_dat(tb) DataExplorer::plot_missing(tb) # help(pkg_install, package = "pak", j) # pak::pkg_install("") # pak::pkg_sysreqs("summarytools", upgrade = FALSE) # pak::pkg_deps_tree("summarytools") # Padronização de nomes de variáveis. tb <- tb |> janitor::clean_names() str(tb) tb_long <- tb |> filter(posicao %in% c(1:6)) |> select(nome, localidade, starts_with("x")) |> pivot_longer(cols = starts_with("x"), names_to = "ano", values_to = "resp") tb_wide <- tb_long |> pivot_wider(names_from = nome, values_from = resp) tb_wide tb_wide <- tb_wide |> janitor::clean_names() tb_wide <- tb_wide |> mutate(ano = as.integer(str_remove(ano, "x"))) tb_wide visdat::vis_dat(tb_wide) DataExplorer::plot_missing(tb_wide) #/////////////////////////////////////////////////////////////////////// tb <- tb_wide str(tb) tb <- tb |> rename(unid = 3, n_empresas = 4, ocup = 5, ocup_assal = 6, sal_medio = 7, outras_remun = 8) # Total de empresas do paraná. tb |> group_by(ano) |> mutate(sal_tot = ocup * sal_medio) |> summarise(n_empresas = sum(n_empresas, na.rm = TRUE), sal_medio = mean(sal_medio, na.rm = TRUE), sal_tot = sum(sal_tot, na.rm = TRUE), ocup_tot = sum(ocup, na.rm = TRUE), sal_medio2 = sal_tot/ocup_tot) cbind(1:5, lead(1:5), lead(1:5, n = 2), lag(1:5), lag(1:5, n = 2)) tb |> filter(localidade == "Curitiba") |> mutate(past = lag(ocup), perc = 100 * (ocup - past)/past) |> select(ano, ocup, past, perc) #/////////////////////////////////////////////////////////////////////// url <- "http://leg.ufpr.br/~walmes/data/hb20_venda_webmotors_280314.txt" tb <- read_tsv(url) attr(tb, "spec") <- NULL str(tb) summarytools::dfSummary(tb) visdat::vis_dat(tb) tb |> # filter(km == 0) |> count(carro) |> mutate(freq = 100 * n/sum(n)) tb |> filter(km == 0) |> group_by(carro) |> summarise(preco_medio = mean(preco, na.rm = TRUE)) tb |> count(especificacao) tb |> filter(km == 0) |> group_by(carro) |> rstatix::get_summary_stats(preco) tb |> filter(km == 0) |> group_by(carro, anomod) |> rstatix::get_summary_stats(preco) ggplot(tb, aes(x = km, y = preco)) + facet_wrap(~carro) + geom_smooth(method = "lm", se = FALSE) + geom_point() tb |> filter(km == 0, carro == "hb20") |> select(preco) |> pull(preco) |> mean() m0 <- lm(preco ~ I(km/1000), data = filter(tb, km > 0, carro == "hb20")) coef(m0) #///////////////////////////////////////////////////////////////////////