% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/standardize.R, R/standardize.data.R,
%   R/standardize.models.R
\name{standardize}
\alias{standardize}
\alias{standardize.numeric}
\alias{standardize.data.frame}
\alias{standardize.default}
\title{Standardization (Z-scoring)}
\usage{
standardize(
  x,
  robust = FALSE,
  two_sd = FALSE,
  weights = NULL,
  verbose = TRUE,
  ...
)

\method{standardize}{numeric}(
  x,
  robust = FALSE,
  two_sd = FALSE,
  weights = NULL,
  verbose = TRUE,
  ...
)

\method{standardize}{data.frame}(
  x,
  robust = FALSE,
  two_sd = FALSE,
  weights = NULL,
  verbose = TRUE,
  select = NULL,
  exclude = NULL,
  remove_na = c("none", "selected", "all"),
  force = FALSE,
  append = FALSE,
  suffix = "_z",
  ...
)

\method{standardize}{default}(
  x,
  robust = FALSE,
  two_sd = FALSE,
  weights = TRUE,
  verbose = TRUE,
  include_response = TRUE,
  ...
)
}
\arguments{
\item{x}{A data frame, a vector or a statistical model.}

\item{robust}{Logical, if \code{TRUE}, centering is done by subtracting the
median from the variables and dividing it by the median absolute deviation
(MAD). If \code{FALSE}, variables are standardized by subtracting the
mean and dividing it by the standard deviation (SD).}

\item{two_sd}{If \code{TRUE}, the variables are scaled by two times the deviation
(SD or MAD depending on \code{robust}). This method can be useful to obtain
model coefficients of continuous parameters comparable to coefficients
related to binary predictors, when applied to \strong{the predictors} (not the
outcome) (Gelman, 2008).}

\item{weights}{Can be \code{NULL} (for no weighting), or:
\itemize{
\item For model: if \code{TRUE} (default), a weighted-standardization is carried out.
\item For \code{data.frame}s: a numeric vector of weights, or a character of the name of a column in the \code{data.frame} that contains the weights.
\item For numeric vectors: a numeric vector of weights.
}}

\item{verbose}{Toggle warnings on or off.}

\item{...}{Arguments passed to or from other methods.}

\item{select}{Character vector of column names. If \code{NULL} (the default), all
variables will be selected.}

\item{exclude}{Character vector of column names to be excluded from selection.}

\item{remove_na}{How should missing values (\code{NA}) be treated: if \code{"none"}
(default): each column's standardization is done separately, ignoring
\code{NA}s. Else, rows with \code{NA} in the columns selected with \code{select} /
\code{exclude} (\code{"selected"}) or in all columns (\code{"all"}) are dropped before
standardization, and the resulting data frame does not include these cases.}

\item{force}{Logical, if \code{TRUE}, forces standardization of factors as
well. Factors are converted to numerical values, with the lowest level
being the value \code{1} (unless the factor has numeric levels, which are
converted to the corresponding numeric value).}

\item{append}{Logical, if \code{TRUE} and \code{x} is a data frame, standardized
variables will be added as additional columns; if \code{FALSE},
existing variables are overwritten.}

\item{suffix}{Character value, will be appended to variable (column) names of
\code{x}, if \code{x} is a data frame and \code{append = TRUE}.}

\item{include_response}{For a model, if \code{TRUE} (default), the response value
will also be standardized. If \code{FALSE}, only the predictors will be
standardized. Note that for certain models (logistic regression, count
models, ...), the response value will never be standardized, to make
re-fitting the model work. (For \code{mediate} models, only applies to the y
model; m model's response will always be standardized.)}
}
\value{
The standardized object (either a standardize data frame or a
statistical model fitted on standardized data).
}
\description{
Performs a standardization of data (z-scoring), i.e., centering and scaling,
so that the data is expressed in terms of standard deviation (i.e., mean = 0,
SD = 1) or Median Absolute Deviance (median = 0, MAD = 1). When applied to a
statistical model, this function extracts the dataset, standardizes it, and
refits the model with this standardized version of the dataset. The
\code{\link[=normalize]{normalize()}} function can also be used to scale all numeric variables within
the 0 - 1 range.
}
\note{
When \code{x} is a vector or a data frame with \verb{remove_na = "none")},
missing values are preserved, so the return value has the same length /
number of rows as the original input.
}
\section{Model Standardization}{

If \code{x} is a model object, standardization is done by completely refitting the
model on the standardized data. Hence, this approach is equal to
standardizing the variables \emph{before} fitting the model and will return a new
model object. However, this method is particularly recommended for complex
models that include interactions or transformations (e.g., polynomial or
spline terms). The \code{robust} (default to \code{FALSE}) argument enables a robust
standardization of data, i.e., based on the \code{median} and \code{MAD} instead of the
\code{mean} and \code{SD}. See \code{\link[=standardize_parameters]{standardize_parameters()}} for other methods of
standardizing model coefficients.
\subsection{Transformed Variables}{

When the model's formula contains transformations (e.g. \code{y ~ exp(X)}) the
transformation effectively takes place after standardization (e.g.,
\code{exp(scale(X))}). Some transformations are undefined for negative values,
such as \code{log()} and \code{sqrt()}. To avoid dropping these values, the
standardized data is shifted by \code{Z - min(Z) + 1} or \code{Z - min(Z)}
(respectively).
}
}

\section{Generalized Linear Models}{
When standardizing coefficients of a generalized model (GLM, GLMM, etc), only
the predictors are standardized, maintaining the interpretability of the
coefficients (e.g., in a binomial model: the exponent of the standardized
parameter is the OR of a change of 1 SD in the predictor, etc.)
}

\examples{
# Data frames
summary(standardize(iris))

# Models
model <- lm(Sepal.Length ~ Species * Petal.Width, data = iris)
coef(standardize(model))
}
\seealso{
Other transform utilities: 
\code{\link{change_scale}()},
\code{\link{normalize}()},
\code{\link{ranktransform}()}

Other standardize: 
\code{\link{standardize_info}()},
\code{\link{standardize_parameters}()}
}
\concept{standardize}
\concept{transform utilities}
