% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/custom_filter.R
\name{step_custom_filter}
\alias{step_custom_filter}
\alias{tidy.step_custom_filter}
\title{Custom Filter}
\usage{
step_custom_filter(recipe, ..., role = NA, trained = FALSE,
  filter_function = NULL, options = NULL, removals = NULL,
  skip = FALSE, id = rand_id("custom_filter"))

\method{tidy}{step_custom_filter}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the
sequence of operations for this recipe.}

\item{...}{One or more selector functions to choose which
variables that will evaluated by the filtering. See
[recipes::selections()] for more details.}

\item{role}{Not used by this step since no new variables are
created.}

\item{trained}{A logical to indicate if the quantities for
preprocessing have been estimated.}

\item{filter_function}{A custom filter function, that will
diagnose problematic variables (see Details below).}

\item{options}{A list of options that will be provided to the
filter function as arguments (see Details below).}

\item{removals}{A character string that contains the names of
the columns that should be removed. These values are not determined
until [recipes::prep.recipe()] is called.}

\item{skip}{A logical. Should the step be skipped when the
recipe is baked by \code{\link[=bake.recipe]{bake.recipe()}}? While all operations are baked
when \code{\link[=prep.recipe]{prep.recipe()}} is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)).
Care should be taken when using \code{skip = TRUE} as it may affect
the computations for subsequent operations}

\item{id}{A character string that is unique to this step to identify it.}

\item{x}{A `step_custom_filter` object.}
}
\value{
An updated version of `recipe` with the new step
 added to the sequence of existing steps (if any). For the
 `tidy` method, a tibble with columns `terms` which
 is the columns that will be removed as well as the step `id`.
}
\description{
`step_custom_filter` creates a *specification* of a (higher-order) recipe
step that will potentially remove variables using a custom filter function.
}
\details{
This step diagnoses problematic variables according to
 a custom filter function. The filter function must meet the
 following requirements:
\enumerate{
  \item the function must at least take one argument `x`:
  the subset of selected variables from the initial data set.
  \item the function must return a vector with the names of
  the variables diagnosed as problematic.
}

All additional arguments to the custom filter function must be provided
through the 'options' argument.
}
\examples{
library(magrittr)
library(tidyselect)
library(generics)
library(tibble)
library(purrr)
library(recipes)

# generate data.
df <- tibble(a = c(1, -999, 3,NA,NA),
             b = c(1,3, NA,NA,NA),
             c = c(1,-999,3,4,5),
             d = rep(1, 5),
             e = c(-999, -999, -999, -999, NA),
             f = rep(NA, 5))

# Create custom filter function to identify variables with a proportion of
# missing values above some threshold. The function treats # values provided
# with the 'other_values' argument as missings.

filter_missings <- function(x, threshold = 0.5, other_values = NULL) {

  # identify problematic variables.
  if (is.null(other_values)) {

    problematic_lgl <- map_lgl(x, ~ mean(is.na(.)) >= threshold)

  } else {

    problematic_lgl <- map_lgl(x, ~ mean(is.na(.) | 
    . \%in\% other_values) >= threshold)

  }

  # return names of problematic variables.
  names(x)[problematic_lgl]

}

# create recipe.
rec <- recipe(df) \%>\%
  step_custom_filter(everything(),
                     filter_function = filter_missings,
                     options = list(threshold = 0.5, other_values = -999))

# prep recipe.
rec_prep <- prep(rec)

# bake recipe.
rec_baked <- bake(rec_prep, df)

# inspect output.
tidy(rec)
tidy(rec, number = 1)
tidy(rec_prep)
tidy(rec_prep, number = 1)
rec_baked

}
\seealso{
[recipes::recipe()]
  [recipes::prep.recipe()] [recipes::bake.recipe()]
}
\concept{preprocessing variable_filters}
\keyword{datagen}
