Skip to contents

Impute the median value into a vector with missing values

Usage

impute_median(x)

# S3 method for default
impute_median(x)

# S3 method for factor
impute_median(x)

Arguments

x

vector

Value

vector with median values replaced

Examples


vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_median(vec)
#>  [1] -0.7289445 -0.9342655 -1.2804352 -0.3857275 -0.3857275  0.2674186
#>  [7] -0.3857275 -0.1630526  0.2793086 -0.3857275

library(dplyr)

dat <- tibble(
  num = rnorm(10),
  int = as.integer(rpois(10, 5)),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>        num   int fct  
#>      <dbl> <int> <fct>
#>  1  0.449      8 A    
#>  2 -0.306      6 B    
#>  3 -0.0124    11 C    
#>  4 -1.09       6 D    
#>  5 NA          3 NA   
#>  6 -0.0466     4 F    
#>  7 -1.44      NA G    
#>  8 NA          5 H    
#>  9 -0.397     NA NA   
#> 10  0.664      3 J    

dat %>%
  nabular() %>%
  mutate(
    num = impute_median(num),
    int = impute_median(int),
  )
#> # A tibble: 10 × 6
#>        num   int fct   num_NA int_NA fct_NA
#>      <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.449    8   A     !NA    !NA    !NA   
#>  2 -0.306    6   B     !NA    !NA    !NA   
#>  3 -0.0124  11   C     !NA    !NA    !NA   
#>  4 -1.09     6   D     !NA    !NA    !NA   
#>  5 -0.177    3   NA    NA     !NA    NA    
#>  6 -0.0466   4   F     !NA    !NA    !NA   
#>  7 -1.44     5.5 G     !NA    NA     !NA   
#>  8 -0.177    5   H     NA     !NA    !NA   
#>  9 -0.397    5.5 NA    !NA    NA     NA    
#> 10  0.664    3   J     !NA    !NA    !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      where(is.numeric),
      impute_median
    )
  )
#> # A tibble: 10 × 6
#>        num   int fct   num_NA int_NA fct_NA
#>      <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.449    8   A     !NA    !NA    !NA   
#>  2 -0.306    6   B     !NA    !NA    !NA   
#>  3 -0.0124  11   C     !NA    !NA    !NA   
#>  4 -1.09     6   D     !NA    !NA    !NA   
#>  5 -0.177    3   NA    NA     !NA    NA    
#>  6 -0.0466   4   F     !NA    !NA    !NA   
#>  7 -1.44     5.5 G     !NA    NA     !NA   
#>  8 -0.177    5   H     NA     !NA    !NA   
#>  9 -0.397    5.5 NA    !NA    NA     NA    
#> 10  0.664    3   J     !NA    !NA    !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      c("num", "int"),
      impute_median
    )
 )
#> # A tibble: 10 × 6
#>        num   int fct   num_NA int_NA fct_NA
#>      <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.449    8   A     !NA    !NA    !NA   
#>  2 -0.306    6   B     !NA    !NA    !NA   
#>  3 -0.0124  11   C     !NA    !NA    !NA   
#>  4 -1.09     6   D     !NA    !NA    !NA   
#>  5 -0.177    3   NA    NA     !NA    NA    
#>  6 -0.0466   4   F     !NA    !NA    !NA   
#>  7 -1.44     5.5 G     !NA    NA     !NA   
#>  8 -0.177    5   H     NA     !NA    !NA   
#>  9 -0.397    5.5 NA    !NA    NA     NA    
#> 10  0.664    3   J     !NA    !NA    !NA