% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/custom_transformation.R
\name{step_custom_transformation}
\alias{step_custom_transformation}
\alias{tidy.step_custom_transformation}
\title{Custom Transformation}
\usage{
step_custom_transformation(recipe, ..., role = "predictor",
  trained = FALSE, prep_function = NULL, prep_options = NULL,
  prep_output = NULL, bake_function = NULL, bake_options = NULL,
  bake_how = "bind_cols", selected_vars = NULL, skip = FALSE,
  id = rand_id("custom_transformation"))

\method{tidy}{step_custom_transformation}(x, ...)
}
\arguments{
\item{recipe}{A recipe object. The step will be added to the sequence of
operations for this recipe.}

\item{...}{One or more selector functions to choose which variables are
affected by the step. See [recipes::selections()] for more details. The
names of the selected variables will be stored in the `selected_vars`
argument.}

\item{role}{For model terms created by this step, what analysis role should
they be assigned? By default, the function assumes that the new columns
will be used as predictors in a model.}

\item{trained}{A logical to indicate if the quantities for preprocessing have
been estimated.}

\item{prep_function}{A function. This is a helper function for the
[recipes::prep.recipe()] method. It will be invoked, when the recipe is
'prepped' by [recipes::prep.recipe()]. The function MUST satisfy the
following conditions: (1) the function must take an argument `x`: the the
subset of selected variables (`selected_vars`) from the initial data set,
(2) the function MUST return the (required) estimated parameters that can
be later applied to other data sets. This output can be of any appropriate
type and shape. Leave `prep_function` as NULL, if the preparation of new
data sets does not depend on parameters learned on the initial data set.}

\item{prep_options}{A list with (any) additional arguments for the prep
helper function call EXCEPT for the `x` argument. Leave as NULL, if no
`prep_function` is given.}

\item{prep_output}{Output from prep helper (`prep_function`) function call
consisting of the estimated parameters from the initial data set set, that
will be applied to other data sets. Results are not computed until
[recipes::prep.recipe()] is called.}

\item{bake_function}{A function. This is a helper function for the 'bake'
method. It will be invoked, when the recipe is 'baked' by `bake.recipe()`.
The function MUST satisfy the following conditions: (1) the function must
take an argument `x`: the new data set, that the transformation will be
applied to, (2) IF the preparation of new data sets depends on parameters
learned on the initial data set, the function must take the argument
`prep_output`: the output from the prep helper fct (`prep_function`), (3)
the output from from the function should be the transformed variables. The
output must be of a type and shape, that allows it to be binded column wise
to the new data set after converting it to a `tibble`.}

\item{bake_options}{A list with (any) arguments for the `bake_function`
function call EXCEPT for the `x` and `prep_output` arguments.}

\item{bake_how}{A character. How should the transformed variables be appended
to the new data set? Choose from options (1) `bind_cols`: simply bind the
transformed variables to the new data set or (2) `replace`: replace the
selected variables (`selected vars`) from the new data set with the
transformed variables.}

\item{selected_vars}{A character string that contains the names of the
selected variables. These values are not determined until
[recipes::prep.recipe()] is called.}

\item{skip}{A logical. Should the step be skipped when the recipe is baked by
[recipes::bake.recipe()]? While all operations are baked when
[recipes::prep.recipe()] is run, some operations may not be able to be
conducted on new data (e.g. processing the outcome variable(s)). Care
should be taken when using `skip = TRUE` as it may affect the computations
for subsequent operations.}

\item{id}{A character string that is unique to this step to identify it.}

\item{x}{A `step_custom_transformation` object.}
}
\value{
An updated version of `recipe` with the new step added to the
  sequence of existing steps (if any). For the `tidy` method, a `tibble` with
  columns `terms` (the selectors or variables selected) as well as the step
  `id`.
}
\description{
`step_custom_transformation` creates a *specification* of a higher order
recipe step that will make a transformation of the input data from (custom)
`prep` and `bake` helper functions.
}
\examples{
library(dplyr)
library(purrr)
library(tibble)
library(recipes)
library(generics)

# divide 'mtcars' into two data sets.
cars_initial <- mtcars[1:16, ]
cars_new <- mtcars[17:nrow(mtcars), ]

# define prep helper function, that computes means and standard deviations
# for (an arbitrary number of) numeric variables.
compute_means_sd <- function(x) {
 
 map(.x = x, ~ list(mean = mean(.x), sd = sd(.x)))
 
}

# define bake helper function, that centers numeric variables to have
# a mean of 'alpha' and scale them to have a standard deviation of
# 'beta'.
center_scale <- function(x, prep_output, alpha, beta) {
  
  # extract only the relevant variables from the new data set.
  new_data <- select(x, names(prep_output))
  
  # apply transformation to each of these variables.
  # variables are centered around 'alpha' and scaled to have a standard 
  # deviation of 'beta'.
  map2(.x = new_data,
       .y = prep_output,
       ~ alpha + (.x - .y$mean) * beta / .y$sd)
  
}

# create recipe.
rec <- recipe(cars_initial) \%>\%
  step_custom_transformation(mpg, disp,
                             prep_function = compute_means_sd,
                             bake_function = center_scale,
                             bake_options = list(alpha = 0, beta = 1),
                             bake_how = "replace")

# prep recipe.
rec_prep <- prep(rec)

# bake recipe.
rec_baked <- bake(rec_prep, cars_new)
rec_baked

# inspect output.
rec
rec_baked
tidy(rec)
tidy(rec, 1)
tidy(rec_prep)
tidy(rec_prep, 1)
}
\seealso{
[recipes::recipe()] [recipes::prep.recipe()]
  [recipes::bake.recipe()]
}
\concept{preprocessing}
\keyword{datagen}
