Skip to contents

This can be useful if you are imputing specific values, however we would generally recommend to impute using other model based approaches. See the simputation package, for example simputation::impute_lm().

Usage

impute_mean(x)

# S3 method for default
impute_mean(x)

# S3 method for factor
impute_mean(x)

Arguments

x

vector

Value

vector with mean values replaced

Examples


library(dplyr)
vec <- rnorm(10)

vec[sample(1:10, 3)] <- NA

impute_mean(vec)
#>  [1]  0.5301633  0.7462801  1.3446716  0.5301633 -0.4860343  0.8088018
#>  [7]  0.3218633  0.0581052  0.5301633  0.9174552

dat <- tibble(
  num = rnorm(10),
  int = as.integer(rpois(10, 5)),
  fct = factor(LETTERS[1:10])
) %>%
  mutate(
    across(
      everything(),
      \(x) set_prop_miss(x, prop = 0.25)
    )
  )

dat
#> # A tibble: 10 × 3
#>       num   int fct  
#>     <dbl> <int> <fct>
#>  1 NA         7 A    
#>  2  1.35     NA B    
#>  3 NA         4 C    
#>  4  0.590     4 NA   
#>  5  1.23      5 E    
#>  6 -1.42     NA F    
#>  7 -1.04      9 NA   
#>  8  1.28      3 H    
#>  9 -1.31      7 I    
#> 10  1.60      6 J    

dat %>%
  nabular() %>%
  mutate(
    num = impute_mean(num),
    int = impute_mean(int),
    fct = impute_mean(fct),
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.285  7    A     NA     !NA    !NA   
#>  2  1.35   5.62 B     !NA    NA     !NA   
#>  3  0.285  4    C     NA     !NA    !NA   
#>  4  0.590  4    J     !NA    !NA    NA    
#>  5  1.23   5    E     !NA    !NA    !NA   
#>  6 -1.42   5.62 F     !NA    NA     !NA   
#>  7 -1.04   9    J     !NA    !NA    NA    
#>  8  1.28   3    H     !NA    !NA    !NA   
#>  9 -1.31   7    I     !NA    !NA    !NA   
#> 10  1.60   6    J     !NA    !NA    !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      where(is.numeric),
      impute_mean
    )
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.285  7    A     NA     !NA    !NA   
#>  2  1.35   5.62 B     !NA    NA     !NA   
#>  3  0.285  4    C     NA     !NA    !NA   
#>  4  0.590  4    NA    !NA    !NA    NA    
#>  5  1.23   5    E     !NA    !NA    !NA   
#>  6 -1.42   5.62 F     !NA    NA     !NA   
#>  7 -1.04   9    NA    !NA    !NA    NA    
#>  8  1.28   3    H     !NA    !NA    !NA   
#>  9 -1.31   7    I     !NA    !NA    !NA   
#> 10  1.60   6    J     !NA    !NA    !NA   

dat %>%
  nabular() %>%
  mutate(
    across(
      c("num", "int"),
      impute_mean
    )
  )
#> # A tibble: 10 × 6
#>       num   int fct   num_NA int_NA fct_NA
#>     <dbl> <dbl> <fct> <fct>  <fct>  <fct> 
#>  1  0.285  7    A     NA     !NA    !NA   
#>  2  1.35   5.62 B     !NA    NA     !NA   
#>  3  0.285  4    C     NA     !NA    !NA   
#>  4  0.590  4    NA    !NA    !NA    NA    
#>  5  1.23   5    E     !NA    !NA    !NA   
#>  6 -1.42   5.62 F     !NA    NA     !NA   
#>  7 -1.04   9    NA    !NA    !NA    NA    
#>  8  1.28   3    H     !NA    !NA    !NA   
#>  9 -1.31   7    I     !NA    !NA    !NA   
#> 10  1.60   6    J     !NA    !NA    !NA