It can be useful in exploratory graphics to impute data outside the range of
the data. impute_below
imputes variables with missings to have values
10 percent below the range for numeric values, plus some jittered noise,
to separate repeated values, so that missing values can be visualised
along with the rest of the data. For character or factor
values, it adds a new string or label.
Examples
library(dplyr)
vec <- rnorm(10)
vec[sample(1:10, 3)] <- NA
impute_below(vec)
#> [1] -0.008593142 -0.530161130 -0.561854135 0.509078646 0.115911160
#> [6] -0.751444156 -0.252584949 -0.690342117 0.985024011 -0.742595875
impute_below(vec, prop_below = 0.25)
#> [1] -0.008593142 -0.530161130 -0.561854135 0.509078646 0.115911160
#> [6] -0.983475878 -0.252584949 -0.922373839 0.985024011 -0.974627597
impute_below(vec,
prop_below = 0.25,
jitter = 0.2)
#> [1] -0.008593142 -0.530161130 -0.561854135 0.509078646 0.115911160
#> [6] -1.088182499 -0.252584949 -0.843774343 0.985024011 -1.052789373
dat <- tibble(
num = rnorm(10),
int = as.integer(rpois(10, 5)),
fct = factor(LETTERS[1:10])
) %>%
mutate(
across(
everything(),
\(x) set_prop_miss(x, prop = 0.25)
)
)
dat
#> # A tibble: 10 × 3
#> num int fct
#> <dbl> <int> <fct>
#> 1 0.721 10 A
#> 2 -0.303 5 B
#> 3 -0.730 6 C
#> 4 0.0459 NA D
#> 5 0.271 7 NA
#> 6 -1.74 5 F
#> 7 -0.290 1 NA
#> 8 -0.686 5 H
#> 9 NA NA I
#> 10 NA 3 J
dat %>%
nabular() %>%
mutate(
num = impute_below(num),
int = impute_below(int),
fct = impute_below(fct),
)
#> # A tibble: 10 × 6
#> num int fct num_NA int_NA fct_NA
#> <dbl> <dbl> <fct> <fct> <fct> <fct>
#> 1 0.721 10 A !NA !NA !NA
#> 2 -0.303 5 B !NA !NA !NA
#> 3 -0.730 6 C !NA !NA !NA
#> 4 0.0459 -0.0751 D !NA NA !NA
#> 5 0.271 7 missing !NA !NA NA
#> 6 -1.74 5 F !NA !NA !NA
#> 7 -0.290 1 missing !NA !NA NA
#> 8 -0.686 5 H !NA !NA !NA
#> 9 -2.01 0.0370 I NA NA !NA
#> 10 -2.03 3 J NA !NA !NA
dat %>%
nabular() %>%
mutate(
across(
where(is.numeric),
impute_below
)
)
#> # A tibble: 10 × 6
#> num int fct num_NA int_NA fct_NA
#> <dbl> <dbl> <fct> <fct> <fct> <fct>
#> 1 0.721 10 A !NA !NA !NA
#> 2 -0.303 5 B !NA !NA !NA
#> 3 -0.730 6 C !NA !NA !NA
#> 4 0.0459 -0.0751 D !NA NA !NA
#> 5 0.271 7 NA !NA !NA NA
#> 6 -1.74 5 F !NA !NA !NA
#> 7 -0.290 1 NA !NA !NA NA
#> 8 -0.686 5 H !NA !NA !NA
#> 9 -2.01 0.0370 I NA NA !NA
#> 10 -2.03 3 J NA !NA !NA
dat %>%
nabular() %>%
mutate(
across(
c("num", "int"),
impute_below
)
)
#> # A tibble: 10 × 6
#> num int fct num_NA int_NA fct_NA
#> <dbl> <dbl> <fct> <fct> <fct> <fct>
#> 1 0.721 10 A !NA !NA !NA
#> 2 -0.303 5 B !NA !NA !NA
#> 3 -0.730 6 C !NA !NA !NA
#> 4 0.0459 -0.0751 D !NA NA !NA
#> 5 0.271 7 NA !NA !NA NA
#> 6 -1.74 5 F !NA !NA !NA
#> 7 -0.290 1 NA !NA !NA NA
#> 8 -0.686 5 H !NA !NA !NA
#> 9 -2.01 0.0370 I NA NA !NA
#> 10 -2.03 3 J NA !NA !NA