This can be useful if you are imputing specific values, however we would
generally recommend to impute using other model based approaches. See
the simputation
package, for example simputation::impute_lm()
.
Examples
vec <- rnorm(10)
vec[sample(1:10, 3)] <- NA
vec
#> [1] NA -0.25705805 -1.41422789 0.01887104 0.35647301 0.89006961
#> [7] NA NA 0.43744452 -1.65606748
impute_zero(vec)
#> [1] 0.00000000 -0.25705805 -1.41422789 0.01887104 0.35647301 0.89006961
#> [7] 0.00000000 0.00000000 0.43744452 -1.65606748
library(dplyr)
dat <- tibble(
num = rnorm(10),
int = rpois(10, 5),
fct = factor(LETTERS[1:10])
) %>%
mutate(
across(
everything(),
\(x) set_prop_miss(x, prop = 0.25)
)
)
dat
#> # A tibble: 10 × 3
#> num int fct
#> <dbl> <int> <fct>
#> 1 -1.30 5 A
#> 2 2.19 3 B
#> 3 -0.303 NA C
#> 4 1.36 2 NA
#> 5 -0.744 5 E
#> 6 NA 6 NA
#> 7 1.76 7 G
#> 8 0.724 NA H
#> 9 NA 3 I
#> 10 1.38 7 J
dat %>%
nabular() %>%
mutate(
num = impute_fixed(num, -9999),
int = impute_zero(int),
fct = impute_factor(fct, "out")
)
#> # A tibble: 10 × 6
#> num int fct num_NA int_NA fct_NA
#> <dbl> <dbl> <fct> <fct> <fct> <fct>
#> 1 -1.30 5 A !NA !NA !NA
#> 2 2.19 3 B !NA !NA !NA
#> 3 -0.303 0 C !NA NA !NA
#> 4 1.36 2 out !NA !NA NA
#> 5 -0.744 5 E !NA !NA !NA
#> 6 -9999 6 out NA !NA NA
#> 7 1.76 7 G !NA !NA !NA
#> 8 0.724 0 H !NA NA !NA
#> 9 -9999 3 I NA !NA !NA
#> 10 1.38 7 J !NA !NA !NA