% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pd_importance.R
\name{pd_importance}
\alias{pd_importance}
\alias{pd_importance.default}
\alias{pd_importance.hstats}
\title{PD Bases Importance (Experimental)}
\usage{
pd_importance(object, ...)

\method{pd_importance}{default}(object, ...)

\method{pd_importance}{hstats}(
  object,
  normalize = TRUE,
  squared = TRUE,
  sort = TRUE,
  top_m = 15L,
  eps = 1e-08,
  plot = FALSE,
  fill = "#2b51a1",
  ...
)
}
\arguments{
\item{object}{Object of class "hstats".}

\item{...}{Further parameters passed to \code{geom_bar()}.}

\item{normalize}{Should statistics be normalized? Default is \code{TRUE}.}

\item{squared}{Should \emph{squared} statistics be returned? Default is \code{TRUE}.}

\item{sort}{Should results be sorted? Default is \code{TRUE}.
(Multioutput is sorted by row means.)}

\item{top_m}{How many rows should be shown? (\code{Inf} to show all.)}

\item{eps}{Threshold below which numerator values are set to 0.}

\item{plot}{Should results be plotted as barplot? Default is \code{FALSE}.}

\item{fill}{Color of bar (only for univariate statistics).}
}
\value{
A matrix of statistics (one row per variable, one column per prediction dimension),
or a "ggplot" object (if \code{plot = TRUE}).
}
\description{
Experimental variable importance method based on partial dependence functions.
While related to Greenwell et al., our suggestion measures not only main effect
strength but also interaction effects. It is very closely related to \eqn{H^2_j},
see Details. Set \code{plot = TRUE} to plot the results as barplot.
}
\details{
If \eqn{x_j} has no effects, the (centered) prediction function \eqn{F}
equals the (centered) partial dependence \eqn{F_{\setminus j}} on all other
features \eqn{\mathbf{x}_{\setminus j}}, i.e.,
\deqn{
    F(\mathbf{x}) = F_{\setminus j}(\mathbf{x}_{\setminus j}).
}
Therefore, the following measure of variable importance follows:
\deqn{
  \textrm{Imp}_{j} = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - 
  \hat F_{\setminus j}(\mathbf{x}_{i\setminus j})\big]^2}{\frac{1}{n} \sum_{i = 1}^n
  \big[F(\mathbf{x}_i)\big]^2}.
}
It differs from \eqn{H^2_j} only by not subtracting the main effect of the \eqn{j}-th
feature in the numerator. It can be read as the proportion of prediction variability
unexplained by all other features. As such, it measures variable importance of
the \eqn{j}-th feature, including its interaction effects (check \code{\link[=partial_dep]{partial_dep()}}
for all definitions).

Remarks 1 to 4 of \code{\link[=h2_overall]{h2_overall()}} also apply here.
}
\section{Methods (by class)}{
\itemize{
\item \code{pd_importance(default)}: Default method of PD based feature importance.

\item \code{pd_importance(hstats)}: PD based feature importance from "hstats" object.

}}
\examples{
# MODEL 1: Linear regression
fit <- lm(Sepal.Length ~ . + Petal.Width:Species, data = iris)
s <- hstats(fit, X = iris[-1])
pd_importance(s)
pd_importance(s, plot = TRUE)

# MODEL 2: Multi-response linear regression
fit <- lm(as.matrix(iris[1:2]) ~ Petal.Length + Petal.Width * Species, data = iris)
s <- hstats(fit, X = iris[3:5])
pd_importance(s, plot = TRUE)
}
\references{
Greenwell, Brandon M., Bradley C. Boehmke, and Andrew J. McCarthy.
\emph{A Simple and Effective Model-Based Variable Importance Measure.} Arxiv (2018).
}
\seealso{
\code{\link[=hstats]{hstats()}}, \code{\link[=h2_overall]{h2_overall()}}
}
