Skip to contents

It can be useful in exploratory graphics to impute data outside the range of the data. impute_below imputes variables with missings to have values 10 percent below the range for numeric values, plus some jittered noise, to separate repeated values, so that missing values can be visualised along with the rest of the data. For character or factor values, it adds a new string or label.

Usage

impute_below(x, ...)

Arguments

x

a variable of interest to shift

...

extra arguments to pass

Examples

library(dplyr)
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_below(vec)
#>  [1] -0.008593142 -0.530161130 -0.561854135  0.509078646  0.115911160
#>  [6] -0.751444156 -0.252584949 -0.690342117  0.985024011 -0.742595875
impute_below(vec, prop_below = 0.25)
#>  [1] -0.008593142 -0.530161130 -0.561854135  0.509078646  0.115911160
#>  [6] -0.983475878 -0.252584949 -0.922373839  0.985024011 -0.974627597
impute_below(vec,
            prop_below = 0.25,
            jitter = 0.2)
#>  [1] -0.008593142 -0.530161130 -0.561854135  0.509078646  0.115911160
#>  [6] -1.088182499 -0.252584949 -0.843774343  0.985024011 -1.052789373

dat <- tibble(
 num = rnorm(10),
 int = as.integer(rpois(10, 5)),
 fct = factor(LETTERS[1:10])
) %>%
 mutate(
   across(
     everything(),
     \(x) set_prop_miss(x, prop = 0.25)
   )
 )

dat
#> # A tibble: 10 × 3
#>        num   int fct  
#>      <dbl> <int> <fct>
#>  1  0.721     10 A    
#>  2 -0.303      5 B    
#>  3 -0.730      6 C    
#>  4  0.0459    NA D    
#>  5  0.271      7 NA   
#>  6 -1.74       5 F    
#>  7 -0.290      1 NA   
#>  8 -0.686      5 H    
#>  9 NA         NA I    
#> 10 NA          3 J    

dat %>%
 nabular() %>%
 mutate(
   num = impute_below(num),
   int = impute_below(int),
   fct = impute_below(fct),
 )
#> # A tibble: 10 × 6
#>        num     int fct     num_NA int_NA fct_NA
#>      <dbl>   <dbl> <fct>   <fct>  <fct>  <fct> 
#>  1  0.721  10      A       !NA    !NA    !NA   
#>  2 -0.303   5      B       !NA    !NA    !NA   
#>  3 -0.730   6      C       !NA    !NA    !NA   
#>  4  0.0459 -0.0751 D       !NA    NA     !NA   
#>  5  0.271   7      missing !NA    !NA    NA    
#>  6 -1.74    5      F       !NA    !NA    !NA   
#>  7 -0.290   1      missing !NA    !NA    NA    
#>  8 -0.686   5      H       !NA    !NA    !NA   
#>  9 -2.01    0.0370 I       NA     NA     !NA   
#> 10 -2.03    3      J       NA     !NA    !NA   

dat %>%
 nabular() %>%
 mutate(
   across(
     where(is.numeric),
     impute_below
   )
 )
#> # A tibble: 10 × 6
#>        num     int fct   num_NA int_NA fct_NA
#>      <dbl>   <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.721  10      A     !NA    !NA    !NA   
#>  2 -0.303   5      B     !NA    !NA    !NA   
#>  3 -0.730   6      C     !NA    !NA    !NA   
#>  4  0.0459 -0.0751 D     !NA    NA     !NA   
#>  5  0.271   7      NA    !NA    !NA    NA    
#>  6 -1.74    5      F     !NA    !NA    !NA   
#>  7 -0.290   1      NA    !NA    !NA    NA    
#>  8 -0.686   5      H     !NA    !NA    !NA   
#>  9 -2.01    0.0370 I     NA     NA     !NA   
#> 10 -2.03    3      J     NA     !NA    !NA   

dat %>%
 nabular() %>%
 mutate(
   across(
     c("num", "int"),
     impute_below
   )
 )
#> # A tibble: 10 × 6
#>        num     int fct   num_NA int_NA fct_NA
#>      <dbl>   <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.721  10      A     !NA    !NA    !NA   
#>  2 -0.303   5      B     !NA    !NA    !NA   
#>  3 -0.730   6      C     !NA    !NA    !NA   
#>  4  0.0459 -0.0751 D     !NA    NA     !NA   
#>  5  0.271   7      NA    !NA    !NA    NA    
#>  6 -1.74    5      F     !NA    !NA    !NA   
#>  7 -0.290   1      NA    !NA    !NA    NA    
#>  8 -0.686   5      H     !NA    !NA    !NA   
#>  9 -2.01    0.0370 I     NA     NA     !NA   
#> 10 -2.03    3      J     NA     !NA    !NA