knitr::opts_chunk$set(
warning = FALSE, # show warnings during codebook generation
message = FALSE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
# set base directory
basedir = "/home/ecco_rais/data/clean/RAIS-homogenized/output/"
# adjust as necessary
startyear = 2003
endyear = 2004
# libraries
library(codebook)
library(rio)
# Start the codebook loop
#for ( year in startyear:endyear) {
year = "2017"
# for CSV
codebook_data <- fread(file.path(basedir, paste0("harmo_", year, ".csv")))
# omit the following lines, if your missing values are already properly labelled
codebook_data <- detect_missing(codebook_data,
only_labelled = TRUE, # only labelled values are autodetected as
# missing
negative_values_are_missing = FALSE, # negative values are missing values
ninety_nine_problems = TRUE, # 99/999 are missing values, if they
# are more than 5 MAD from the median
)
# If you are not using formr, the codebook package needs to guess which items
# form a scale. The following line finds item aggregates with names like this:
# scale = scale_1 + scale_2R + scale_3R
# identifying these aggregates allows the codebook function to
# automatically compute reliabilities.
# However, it will not reverse items automatically.
codebook_data <- detect_scales(codebook_data)
## Warning in detect_scales(codebook_data): cbo items found, but no aggregate
## Warning in detect_scales(codebook_data): cnae items found, but no aggregate
## Warning in detect_scales(codebook_data): active items found, but no aggregate
codebook_data <- as.data.table(codebook_data)
codebook(codebook_data)
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning in grepl("^\\s+$", x): input string 1 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 2 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 3 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 4 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 5 is invalid in this locale
## Warning in max(f): no non-missing arguments to max; returning -Inf
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning in max(f): no non-missing arguments to max; returning -Inf
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning in grepl("^\\s+$", x): input string 1 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 2 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 3 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 4 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 5 is invalid in this locale
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning: Couldn't find skimmers for class: integer64; No user-defined `sfl` provided. Falling
## back to `character`.
## Warning in grepl("^\\s+$", x): input string 1 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 2 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 3 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 4 is invalid in this locale
## Warning in grepl("^\\s+$", x): input string 5 is invalid in this locale
Dataset name: codebook_data
The dataset has N=65655882 rows and 67 columns. 0 rows have no missing values on any column.
|
#Variables
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
adm_date | numeric | 0 | 1 | 1e+06 | 5072017 | 3.1e+07 | 8776093 | 8510237 | ▇▂▂▂▁ | NA |
## Error in if (stats::median(table(x)) == 1) {: missing value where TRUE/FALSE needed
## No non-missing values to show.
65655882 missing values.
name | data_type | n_missing | complete_rate | count | label |
---|---|---|---|---|---|
cbo94 | logical | 65655882 | 0 | : | NA |
70101 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cbo02 | numeric | 70101 | 0.9989323 | 10105 | 513435 | 992225 | 492739.3 | 205266.5 | ▁▃▇▅▁ | NA |
## Error in `ggplot2::geom_histogram()`:
## ! Problem while computing position.
## ℹ Error occurred in the 1st layer.
## Caused by error in `if (...) NULL`:
## ! missing value where TRUE/FALSE needed
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
cei | character | 0 | 1 | 46906 | 0 | 1 | 21 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
zip_establishment | numeric | 0 | 1 | 1e+06 | 4.2e+07 | 1e+08 | 46633787 | 30946994 | ▇▅▃▅▅ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cnae20 | numeric | 0 | 1 | 1113 | 52231 | 99008 | 56984.44 | 25982.18 | ▂▂▇▂▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cnae20sub | numeric | 0 | 1 | 111301 | 5223100 | 9900800 | 5698451 | 2598216 | ▂▂▇▂▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cnae95 | numeric | 0 | 1 | 1112 | 55247 | 99007 | 57084.57 | 22556.15 | ▂▂▆▇▂ | NA |
## Error in `ggplot2::geom_histogram()`:
## ! Problem while computing stat.
## ℹ Error occurred in the 1st layer.
## Caused by error in `seq_len()`:
## ! argument must be coercible to non-negative integer
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
firmID | character | 0 | 1 | 3845034 | 0 | 15 | 21 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cnpj_root | numeric | 0 | 1 | 0 | 1.4e+07 | 1e+08 | 24372359 | 25816991 | ▇▂▁▁▁ | NA |
## Error in `ggplot2::geom_histogram()`:
## ! Problem while computing position.
## ℹ Error occurred in the 1st layer.
## Caused by error in `if (...) NULL`:
## ! missing value where TRUE/FALSE needed
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
cpf | character | 0 | 1 | 55488531 | 0 | 1 | 21 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
dob | numeric | 0 | 1 | 1e+06 | 1.6e+07 | 3.1e+07 | 15677804 | 8783089 | ▇▇▇▇▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
termination_day | character | 0 | 1 | 32 | 0 | 2 | 2 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
schooling | numeric | 0 | 1 | 1 | 7 | 11 | 6.793991 | 1.701364 | ▁▂▇▃▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
ibge_subsetor | numeric | 0 | 1 | 1 | 19 | 25 | 18.12556 | 5.287318 | ▁▁▂▇▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
age | numeric | 0 | 1 | 0 | 35 | 100 | 36.69655 | 11.72856 | ▁▇▅▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
cei_avail | numeric | 0 | 1 | 0 | 0 | 1 | 0.0133853 | 0.1149178 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
pat | numeric | 0 | 1 | 0 | 0 | 1 | 0.2888026 | 0.453206 | ▇▁▁▁▃ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
disabled | numeric | 0 | 1 | 0 | 0 | 1 | 0.0086419 | 0.0925594 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
simples | numeric | 0 | 1 | 0 | 0 | 1 | 0.2429833 | 0.4288851 | ▇▁▁▁▂ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
union | numeric | 0 | 1 | 0 | 0 | 1 | 0.0738842 | 0.2615823 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
intermittent_contract | numeric | 0 | 1 | 0 | 0 | 1 | 0.0001243 | 0.0111503 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
partial_job | numeric | 0 | 1 | 0 | 0 | 1 | 0.0030312 | 0.0549724 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
alvara_ind | numeric | 0 | 1 | 0 | 0 | 1 | 4.32e-05 | 0.0065745 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
termination_month | numeric | 0 | 1 | 0 | 0 | 12 | 1.974901 | 3.608127 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
separation_cause | numeric | 0 | 1 | 0 | 0 | 90 | 4.686708 | 8.904724 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
muni_job | numeric | 0 | 1 | 0 | 0 | 530010 | 155566.7 | 180985.4 | ▇▁▂▃▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
muni | numeric | 0 | 1 | 110001 | 351880 | 530010 | 345536.5 | 88460.11 | ▁▂▇▂▂ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
nationality | numeric | 0 | 1 | 10 | 10 | 80 | 10.08543 | 1.784163 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
legal_form | numeric | 0 | 1 | 1015 | 2062 | 5037 | 2047.48 | 680.198 | ▂▇▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
name | character | 0 | 1 | 40785749 | 0 | 2 | 52 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
ctps | numeric | 0 | 1 | 0 | 68175 | 1e+08 | 1448106 | 3795366 | ▇▁▁▁▁ | NA |
## Error in `ggplot2::geom_histogram()`:
## ! Problem while computing stat.
## ℹ Error occurred in the 1st layer.
## Caused by error in `seq_len()`:
## ! argument must be coercible to non-negative integer
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
pis | character | 0 | 1 | 55561692 | 0 | 13 | 21 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
hired_hours | numeric | 0 | 1 | 1 | 44 | 44 | 40.77122 | 6.827322 | ▁▁▁▁▇ | NA |
10319553 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
race_color | numeric | 10319553 | 0.8428236 | 1 | 4 | 9 | 4.900209 | 3.04816 | ▇▁▁▁▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace | label |
---|---|---|---|---|---|---|---|---|---|
estab_name | character | 0 | 1 | 3502272 | 0 | 3 | 52 | 0 | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
sex | numeric | 0 | 1 | 0 | 0 | 1 | 0.4326729 | 0.4954463 | ▇▁▁▁▆ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
establishment_size | numeric | 0 | 1 | 1 | 6 | 10 | 6.278815 | 2.852224 | ▃▅▆▅▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
tenure | numeric | 0 | 1 | 0 | 24 | 600 | 55.74033 | 80.1713 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
adm_type | numeric | 0 | 1 | 0 | 0 | 14 | 0.6359371 | 1.036817 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
disability_type | numeric | 0 | 1 | 0 | 0 | 6 | 0.0192074 | 0.2521726 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
firmID_type | numeric | 0 | 1 | 0 | 1 | 1 | 0.9716366 | 0.1660087 | ▁▁▁▁▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
wage_type | numeric | 0 | 1 | 1 | 1 | 7 | 1.309368 | 1.1056 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
contract_type | numeric | 0 | 1 | 10 | 10 | 97 | 16.71512 | 14.84768 | ▇▂▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
active1231 | numeric | 0 | 1 | 0 | 1 | 1 | 0.7049116 | 0.4560825 | ▃▁▁▁▇ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_april | numeric | 0 | 1 | 0 | 1124 | 140540 | 1726.511 | 3261.811 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_august | numeric | 0 | 1 | 0 | 1155 | 140552 | 1758.658 | 3270.223 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_feb | numeric | 0 | 1 | 0 | 1085 | 140535 | 1691.723 | 3298.412 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_jan | numeric | 0 | 1 | 0 | 1119 | 140484 | 1776.853 | 3469.528 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_july | numeric | 0 | 1 | 0 | 1157 | 140556 | 1776.223 | 3316.557 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_june | numeric | 0 | 1 | 0 | 1142 | 140534 | 1754.728 | 3283.199 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_may | numeric | 0 | 1 | 0 | 1149 | 140526 | 1750.196 | 3254.601 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_march | numeric | 0 | 1 | 0 | 1123 | 140546 | 1726.374 | 3333.645 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_nov | numeric | 0 | 1 | 0 | 1171 | 140461 | 1779.624 | 3277.2 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_oct | numeric | 0 | 1 | 0 | 1178 | 140516 | 1778.049 | 3248.745 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_sept | numeric | 0 | 1 | 0 | 1153 | 140495 | 1752.634 | 3240.089 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_dec_mw | numeric | 0 | 1 | 0 | 1.4 | 150 | 2.206376 | 3.824295 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
earn_dec | numeric | 0 | 1 | 0 | 1323 | 140556 | 2070.504 | 3584.209 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
mean_earn_mw | numeric | 0 | 1 | 0 | 1.7 | 150 | 2.71563 | 3.803707 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
mean_earn | numeric | 0 | 1 | 0 | 1568 | 140550 | 2552.184 | 3564.445 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
hired_wage | numeric | 0 | 1 | 0 | 1027 | 1e+07 | 1411.773 | 5264.697 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
last_wage | numeric | 0 | 1 | 0 | 1450 | 1e+07 | 2417.658 | 6576.111 | ▇▁▁▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
yr | numeric | 0 | 1 | 2017 | 2017 | 2017 | 2017 | 0 | ▁▁▇▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
termination_year | numeric | 0 | 1 | 2017 | 2017 | 2017 | 2017 | 0 | ▁▁▇▁▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
adm_day | numeric | 0 | 1 | 1 | 5 | 31 | 8.711481 | 8.509995 | ▇▂▂▂▁ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
adm_month | numeric | 0 | 1 | 1 | 6 | 12 | 6.259857 | 3.422801 | ▇▅▅▅▆ | NA |
0 missing values.
name | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist | label |
---|---|---|---|---|---|---|---|---|---|---|
adm_year | numeric | 0 | 1 | 1938 | 2015 | 2017 | 2012.744 | 6.627319 | ▁▁▁▁▇ | NA |
The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.
{
"name": "codebook_data",
"datePublished": "2024-06-07",
"description": "The dataset has N=65655882 rows and 67 columns.\n0 rows have no missing values on any column.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n|name |label | n_missing|\n|:---------------------|:-----|---------:|\n|adm_date |NA | 0|\n|cbo94 |NA | 65655882|\n|cbo02 |NA | 70101|\n|cei |NA | 0|\n|zip_establishment |NA | 0|\n|cnae20 |NA | 0|\n|cnae20sub |NA | 0|\n|cnae95 |NA | 0|\n|firmID |NA | 0|\n|cnpj_root |NA | 0|\n|cpf |NA | 0|\n|dob |NA | 0|\n|termination_day |NA | 0|\n|schooling |NA | 0|\n|ibge_subsetor |NA | 0|\n|age |NA | 0|\n|cei_avail |NA | 0|\n|pat |NA | 0|\n|disabled |NA | 0|\n|simples |NA | 0|\n|union |NA | 0|\n|intermittent_contract |NA | 0|\n|partial_job |NA | 0|\n|alvara_ind |NA | 0|\n|termination_month |NA | 0|\n|separation_cause |NA | 0|\n|muni_job |NA | 0|\n|muni |NA | 0|\n|nationality |NA | 0|\n|legal_form |NA | 0|\n|name |NA | 0|\n|ctps |NA | 0|\n|pis |NA | 0|\n|hired_hours |NA | 0|\n|race_color |NA | 10319553|\n|estab_name |NA | 0|\n|sex |NA | 0|\n|establishment_size |NA | 0|\n|tenure |NA | 0|\n|adm_type |NA | 0|\n|disability_type |NA | 0|\n|firmID_type |NA | 0|\n|wage_type |NA | 0|\n|contract_type |NA | 0|\n|active1231 |NA | 0|\n|earn_april |NA | 0|\n|earn_august |NA | 0|\n|earn_feb |NA | 0|\n|earn_jan |NA | 0|\n|earn_july |NA | 0|\n|earn_june |NA | 0|\n|earn_may |NA | 0|\n|earn_march |NA | 0|\n|earn_nov |NA | 0|\n|earn_oct |NA | 0|\n|earn_sept |NA | 0|\n|earn_dec_mw |NA | 0|\n|earn_dec |NA | 0|\n|mean_earn_mw |NA | 0|\n|mean_earn |NA | 0|\n|hired_wage |NA | 0|\n|last_wage |NA | 0|\n|yr |NA | 0|\n|termination_year |NA | 0|\n|adm_day |NA | 0|\n|adm_month |NA | 0|\n|adm_year |NA | 0|\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
"keywords": ["adm_date", "cbo94", "cbo02", "cei", "zip_establishment", "cnae20", "cnae20sub", "cnae95", "firmID", "cnpj_root", "cpf", "dob", "termination_day", "schooling", "ibge_subsetor", "age", "cei_avail", "pat", "disabled", "simples", "union", "intermittent_contract", "partial_job", "alvara_ind", "termination_month", "separation_cause", "muni_job", "muni", "nationality", "legal_form", "name", "ctps", "pis", "hired_hours", "race_color", "estab_name", "sex", "establishment_size", "tenure", "adm_type", "disability_type", "firmID_type", "wage_type", "contract_type", "active1231", "earn_april", "earn_august", "earn_feb", "earn_jan", "earn_july", "earn_june", "earn_may", "earn_march", "earn_nov", "earn_oct", "earn_sept", "earn_dec_mw", "earn_dec", "mean_earn_mw", "mean_earn", "hired_wage", "last_wage", "yr", "termination_year", "adm_day", "adm_month", "adm_year"],
"@context": "http://schema.org/",
"@type": "Dataset",
"variableMeasured": [
{
"name": "adm_date",
"@type": "propertyValue"
},
{
"name": "cbo94",
"@type": "propertyValue"
},
{
"name": "cbo02",
"@type": "propertyValue"
},
{
"name": "cei",
"@type": "propertyValue"
},
{
"name": "zip_establishment",
"@type": "propertyValue"
},
{
"name": "cnae20",
"@type": "propertyValue"
},
{
"name": "cnae20sub",
"@type": "propertyValue"
},
{
"name": "cnae95",
"@type": "propertyValue"
},
{
"name": "firmID",
"@type": "propertyValue"
},
{
"name": "cnpj_root",
"@type": "propertyValue"
},
{
"name": "cpf",
"@type": "propertyValue"
},
{
"name": "dob",
"@type": "propertyValue"
},
{
"name": "termination_day",
"@type": "propertyValue"
},
{
"name": "schooling",
"@type": "propertyValue"
},
{
"name": "ibge_subsetor",
"@type": "propertyValue"
},
{
"name": "age",
"@type": "propertyValue"
},
{
"name": "cei_avail",
"@type": "propertyValue"
},
{
"name": "pat",
"@type": "propertyValue"
},
{
"name": "disabled",
"@type": "propertyValue"
},
{
"name": "simples",
"@type": "propertyValue"
},
{
"name": "union",
"@type": "propertyValue"
},
{
"name": "intermittent_contract",
"@type": "propertyValue"
},
{
"name": "partial_job",
"@type": "propertyValue"
},
{
"name": "alvara_ind",
"@type": "propertyValue"
},
{
"name": "termination_month",
"@type": "propertyValue"
},
{
"name": "separation_cause",
"@type": "propertyValue"
},
{
"name": "muni_job",
"@type": "propertyValue"
},
{
"name": "muni",
"@type": "propertyValue"
},
{
"name": "nationality",
"@type": "propertyValue"
},
{
"name": "legal_form",
"@type": "propertyValue"
},
{
"name": "name",
"@type": "propertyValue"
},
{
"name": "ctps",
"@type": "propertyValue"
},
{
"name": "pis",
"@type": "propertyValue"
},
{
"name": "hired_hours",
"@type": "propertyValue"
},
{
"name": "race_color",
"@type": "propertyValue"
},
{
"name": "estab_name",
"@type": "propertyValue"
},
{
"name": "sex",
"@type": "propertyValue"
},
{
"name": "establishment_size",
"@type": "propertyValue"
},
{
"name": "tenure",
"@type": "propertyValue"
},
{
"name": "adm_type",
"@type": "propertyValue"
},
{
"name": "disability_type",
"@type": "propertyValue"
},
{
"name": "firmID_type",
"@type": "propertyValue"
},
{
"name": "wage_type",
"@type": "propertyValue"
},
{
"name": "contract_type",
"@type": "propertyValue"
},
{
"name": "active1231",
"@type": "propertyValue"
},
{
"name": "earn_april",
"@type": "propertyValue"
},
{
"name": "earn_august",
"@type": "propertyValue"
},
{
"name": "earn_feb",
"@type": "propertyValue"
},
{
"name": "earn_jan",
"@type": "propertyValue"
},
{
"name": "earn_july",
"@type": "propertyValue"
},
{
"name": "earn_june",
"@type": "propertyValue"
},
{
"name": "earn_may",
"@type": "propertyValue"
},
{
"name": "earn_march",
"@type": "propertyValue"
},
{
"name": "earn_nov",
"@type": "propertyValue"
},
{
"name": "earn_oct",
"@type": "propertyValue"
},
{
"name": "earn_sept",
"@type": "propertyValue"
},
{
"name": "earn_dec_mw",
"@type": "propertyValue"
},
{
"name": "earn_dec",
"@type": "propertyValue"
},
{
"name": "mean_earn_mw",
"@type": "propertyValue"
},
{
"name": "mean_earn",
"@type": "propertyValue"
},
{
"name": "hired_wage",
"@type": "propertyValue"
},
{
"name": "last_wage",
"@type": "propertyValue"
},
{
"name": "yr",
"@type": "propertyValue"
},
{
"name": "termination_year",
"@type": "propertyValue"
},
{
"name": "adm_day",
"@type": "propertyValue"
},
{
"name": "adm_month",
"@type": "propertyValue"
},
{
"name": "adm_year",
"@type": "propertyValue"
}
]
}`
# } # end year loop