% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PlotICE.R
\name{plot_ice}
\alias{plot_ice}
\alias{plot_ice,ANY-method}
\alias{plot_ice,familiarCollection-method}
\title{Plot individual conditional expectation plots.}
\usage{
plot_ice(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = NULL,
  gradient_palette_range = NULL,
  x_label = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = NULL,
  plot_sub_title = NULL,
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  novelty_range = NULL,
  value_scales = waiver(),
  novelty_scales = waiver(),
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  ice_default_alpha = 0.6,
  n_max_samples_shown = 50L,
  show_ice = TRUE,
  show_pd = TRUE,
  show_novelty = TRUE,
  anchor_values = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_ice}{ANY}(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = NULL,
  gradient_palette_range = NULL,
  x_label = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = NULL,
  plot_sub_title = NULL,
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  novelty_range = NULL,
  value_scales = waiver(),
  novelty_scales = waiver(),
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  ice_default_alpha = 0.6,
  n_max_samples_shown = 50L,
  show_ice = TRUE,
  show_pd = TRUE,
  show_novelty = TRUE,
  anchor_values = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_ice}{familiarCollection}(
  object,
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = NULL,
  gradient_palette_range = NULL,
  x_label = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  x_range = NULL,
  x_n_breaks = 5,
  x_breaks = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  novelty_range = NULL,
  value_scales = waiver(),
  novelty_scales = waiver(),
  conf_int_style = c("ribbon", "step", "none"),
  conf_int_alpha = 0.4,
  ice_default_alpha = 0.6,
  n_max_samples_shown = 50L,
  show_ice = TRUE,
  show_pd = TRUE,
  show_novelty = TRUE,
  anchor_values = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)
}
\arguments{
\item{object}{\code{familiarCollection} object, or one or more \code{familiarData}
objects, that will be internally converted to a \code{familiarCollection}
object. It is also possible to provide a \code{familiarEnsemble} or one or more
\code{familiarModel} objects together with the data from which data is computed
prior to export. Paths to such files can also be provided.}

\item{draw}{(\emph{optional}) Draws the plot if TRUE.}

\item{dir_path}{(\emph{optional}) Path to the directory where created individual
conditional expectation plots are saved to. Output is saved in the
\code{explanation} subdirectory. If \code{NULL}, figures are written to the folder,
but are returned instead.}

\item{split_by}{(\emph{optional}) Splitting variables. This refers to column names
on which datasets are split. A separate figure is created for each split.
See details for available variables.}

\item{color_by}{(\emph{optional}) Variables used to determine fill colour of plot
objects. The variables cannot overlap with those provided to the \code{split_by}
argument, but may overlap with other arguments. See details for available
variables.}

\item{facet_by}{(\emph{optional}) Variables used to determine how and if facets of
each figure appear. In case the \code{facet_wrap_cols} argument is \code{NULL}, the
first variable is used to define columns, and the remaing variables are
used to define rows of facets. The variables cannot overlap with those
provided to the \code{split_by} argument, but may overlap with other arguments.
See details for available variables.}

\item{facet_wrap_cols}{(\emph{optional}) Number of columns to generate when facet
wrapping. If NULL, a facet grid is produced instead.}

\item{ggtheme}{(\emph{optional}) \code{ggplot} theme to use for plotting.}

\item{discrete_palette}{(\emph{optional}) Palette to use to colour the different
plot elements in case a value was provided to the \code{color_by} argument. For
2D individual conditional expectation plots without novelty, the initial
colour determines the colour of the points indicating sample values.}

\item{gradient_palette}{(\emph{optional}) Sequential or divergent palette used to
colour the raster in 2D individual conditional expectation or partial
dependence plots. This argument is not used for 1D plots.}

\item{gradient_palette_range}{(\emph{optional}) Numerical range used to span the
gradient for 2D plots. This should be a range of two values, e.g. \code{c(0, 1)}. By default, values are determined from the data, dependent on the
\code{value_scales} parameter. This parameter is ignored for 1D plots.}

\item{x_label}{(\emph{optional}) Label to provide to the x-axis. If NULL, no label
is shown.}

\item{y_label}{(\emph{optional}) Label to provide to the y-axis. If NULL, no label
is shown.}

\item{legend_label}{(\emph{optional}) Label to provide to the legend. If NULL, the
legend will not have a name.}

\item{plot_title}{(\emph{optional}) Label to provide as figure title. If NULL, no
title is shown.}

\item{plot_sub_title}{(\emph{optional}) Label to provide as figure subtitle. If
NULL, no subtitle is shown.}

\item{caption}{(\emph{optional}) Label to provide as figure caption. If NULL, no
caption is shown.}

\item{x_range}{(\emph{optional}) Value range for the x-axis.}

\item{x_n_breaks}{(\emph{optional}) Number of breaks to show on the x-axis of the
plot. \code{x_n_breaks} is used to determine the \code{x_breaks} argument in case it
is unset.}

\item{x_breaks}{(\emph{optional}) Break points on the x-axis of the plot.}

\item{y_range}{(\emph{optional}) Value range for the y-axis.}

\item{y_n_breaks}{(\emph{optional}) Number of breaks to show on the y-axis of the
plot. \code{y_n_breaks} is used to determine the \code{y_breaks} argument in case it
is unset.}

\item{y_breaks}{(\emph{optional}) Break points on the y-axis of the plot.}

\item{novelty_range}{(\emph{optional}) Numerical range used to span the range of
novelty values. This determines the size of the bubbles in 2D, and
transparency of lines in 1D. This should be a range of two values, e.g.
\code{c(0, 1)}. By default, values are determined from the data, dependent on
the \code{value_scales} parameter. This parameter is ignored if
\code{show_novelty=FALSE}.}

\item{value_scales}{(\emph{optional}) Sets scaling of predicted values. This
parameter has several options:
\itemize{
\item \code{fixed} (default): The value axis for all features will have the same
range.
\item \code{feature}: The value axis for each feature will have the same range. This
option is unavailable for 2D plots.
\item \code{figure}: The value axis for all facets in a figure will have the same
range.
\item \code{facet}: Each facet has its own range. This option is unavailable for 2D
plots.
}

For 1D plots, this option is ignored if the \code{y_range} is provided, whereas
for 2D it is ignored if the \code{gradient_palette_range} is provided.}

\item{novelty_scales}{(\emph{optional}) Sets scaling of novelty values, similar to
the \code{value_scales} parameter, but with more limited options:
\itemize{
\item \code{fixed} (default): The novelty will have the same range for all features.
\item \code{figure}: The novelty will have the same range for all facets in a figure.
}}

\item{conf_int_style}{(\emph{optional}) Confidence interval style. See details for
allowed styles.}

\item{conf_int_alpha}{(\emph{optional}) Alpha value to determine transparency of
confidence intervals or, alternatively, other plot elements with which the
confidence interval overlaps. Only values between 0.0 (fully transparent)
and 1.0 (fully opaque) are allowed.}

\item{ice_default_alpha}{(\emph{optional}) Default transparency (value) of sample
lines in an 1D plot. When novelty is shown, this is the transparency
corresponding to the least novel points. The confidence interval alpha
values is scaled by this value.}

\item{n_max_samples_shown}{(\emph{optional}) Maximum number of samples shown in an
individual conditional expectation plot. Defaults to 50. These samples are
randomly picked from the samples present in the ICE data, but the same
samples are consistently picked. Partial dependence is nonetheless computed
from all available samples.}

\item{show_ice}{(\emph{optional}) Sets whether individual conditional expectation
plots should be created.}

\item{show_pd}{(\emph{optional}) Sets whether partial dependence plots should be
created. Note that if an anchor is set for a particular feature, its
partial dependence cannot be shown.}

\item{show_novelty}{(\emph{optional}) Sets whether novelty is shown in plots.}

\item{anchor_values}{(\emph{optional}) A single value or a named list or array of
values that are used to centre the individual conditional expectation plot.
A single value is valid if and only if only a single feature is assessed.
Otherwise, values Has no effect if the plot is not shown, i.e.
\code{show_ice=FALSE}. A partial dependence plot cannot be shown for those
features.}

\item{width}{(\emph{optional}) Width of the plot. A default value is derived from
the number of facets.}

\item{height}{(\emph{optional}) Height of the plot. A default value is derived
from the number of features and the number of facets.}

\item{units}{(\emph{optional}) Plot size unit. Either \code{cm} (default), \code{mm} or
\verb{in}.}

\item{export_collection}{(\emph{optional}) Exports the collection if TRUE.}

\item{...}{
  Arguments passed on to \code{\link[=export_ice_data]{export_ice_data}}, \code{\link[ggplot2:ggsave]{ggplot2::ggsave}}, \code{\link[=extract_ice]{extract_ice}}
  \describe{
    \item{\code{aggregate_results}}{Flag that signifies whether results should be
aggregated for export.}
    \item{\code{device}}{Device to use. Can either be a device function
(e.g. \link{png}), or one of "eps", "ps", "tex" (pictex),
"pdf", "jpeg", "tiff", "png", "bmp", "svg" or "wmf" (windows only). If
\code{NULL} (default), the device is guessed based on the \code{filename} extension.}
    \item{\code{scale}}{Multiplicative scaling factor.}
    \item{\code{dpi}}{Plot resolution. Also accepts a string input: "retina" (320),
"print" (300), or "screen" (72). Applies only to raster output types.}
    \item{\code{limitsize}}{When \code{TRUE} (the default), \code{ggsave()} will not
save images larger than 50x50 inches, to prevent the common error of
specifying dimensions in pixels.}
    \item{\code{bg}}{Background colour. If \code{NULL}, uses the \code{plot.background} fill value
from the plot theme.}
    \item{\code{create.dir}}{Whether to create new directories if a non-existing
directory is specified in the \code{filename} or \code{path} (\code{TRUE}) or return an
error (\code{FALSE}, default). If \code{FALSE} and run in an interactive session,
a prompt will appear asking to create a new directory when necessary.}
    \item{\code{features}}{Names of the feature or features (2) assessed simultaneously.
By default \code{NULL}, which means that all features are assessed one-by-one.}
    \item{\code{feature_x_range}}{When one or two features are defined using \code{features},
\code{feature_x_range} can be used to set the range of values for the first
feature. For numeric features, a vector of two values is assumed to indicate
a range from which \code{n_sample_points} are uniformly sampled. A vector of more
than two values is interpreted as is, i.e. these represent the values to be
sampled. For categorical features, values should represent a (sub)set of
available levels.}
    \item{\code{feature_y_range}}{As \code{feature_x_range}, but for the second feature in
case two features are defined.}
    \item{\code{n_sample_points}}{Number of points used to sample continuous features.}
    \item{\code{data}}{A \code{dataObject} object, \code{data.table} or \code{data.frame} that
constitutes the data that are assessed.}
    \item{\code{is_pre_processed}}{Flag that indicates whether the data was already
pre-processed externally, e.g. normalised and clustered. Only used if the
\code{data} argument is a \code{data.table} or \code{data.frame}.}
    \item{\code{cl}}{Cluster created using the \code{parallel} package. This cluster is then
used to speed up computation through parallellisation.}
    \item{\code{evaluation_times}}{One or more time points that are used for in analysis of
survival problems when data has to be assessed at a set time, e.g.
calibration. If not provided explicitly, this parameter is read from
settings used at creation of the underlying \code{familiarModel} objects. Only
used for \code{survival} outcomes.}
    \item{\code{ensemble_method}}{Method for ensembling predictions from models for the
same sample. Available methods are:
\itemize{
\item \code{median} (default): Use the median of the predicted values as the ensemble
value for a sample.
\item \code{mean}: Use the mean of the predicted values as the ensemble value for a
sample.
}}
    \item{\code{verbose}}{Flag to indicate whether feedback should be provided on the
computation and extraction of various data elements.}
    \item{\code{message_indent}}{Number of indentation steps for messages shown during
computation and extraction of various data elements.}
    \item{\code{sample_limit}}{(\emph{optional}) Set the upper limit of the number of samples
that are used during evaluation steps. Cannot be less than 20.

This setting can be specified per data element by providing a parameter
value in a named list with data elements, e.g.
\code{list("sample_similarity"=100, "permutation_vimp"=1000)}.

This parameter can be set for the following data elements:
\code{sample_similarity} and \code{ice_data}.}
    \item{\code{detail_level}}{(\emph{optional}) Sets the level at which results are computed
and aggregated.
\itemize{
\item \code{ensemble}: Results are computed at the ensemble level, i.e. over all
models in the ensemble. This means that, for example, bias-corrected
estimates of model performance are assessed by creating (at least) 20
bootstraps and computing the model performance of the ensemble model for
each bootstrap.
\item \code{hybrid} (default): Results are computed at the level of models in an
ensemble. This means that, for example, bias-corrected estimates of model
performance are directly computed using the models in the ensemble. If there
are at least 20 trained models in the ensemble, performance is computed for
each model, in contrast to \code{ensemble} where performance is computed for the
ensemble of models. If there are less than 20 trained models in the
ensemble, bootstraps are created so that at least 20 point estimates can be
made.
\item \code{model}: Results are computed at the model level. This means that, for
example, bias-corrected estimates of model performance are assessed by
creating (at least) 20 bootstraps and computing the performance of the model
for each bootstrap.
}

Note that each level of detail has a different interpretation for bootstrap
confidence intervals. For \code{ensemble} and \code{model} these are the confidence
intervals for the ensemble and an individual model, respectively. That is,
the confidence interval describes the range where an estimate produced by a
respective ensemble or model trained on a repeat of the experiment may be
found with the probability of the confidence level. For \code{hybrid}, it
represents the range where any single model trained on a repeat of the
experiment may be found with the probability of the confidence level. By
definition, confidence intervals obtained using \code{hybrid} are at least as
wide as those for \code{ensemble}. \code{hybrid} offers the correct interpretation if
the goal of the analysis is to assess the result of a single, unspecified,
model.

\code{hybrid} is generally computationally less expensive then \code{ensemble}, which
in turn is somewhat less expensive than \code{model}.

A non-default \code{detail_level} parameter can be specified for separate
evaluation steps by providing a parameter value in a named list with data
elements, e.g. \code{list("auc_data"="ensemble", "model_performance"="hybrid")}.
This parameter can be set for the following data elements: \code{auc_data},
\code{decision_curve_analyis}, \code{model_performance}, \code{permutation_vimp},
\code{ice_data}, \code{prediction_data} and \code{confusion_matrix}.}
    \item{\code{estimation_type}}{(\emph{optional}) Sets the type of estimation that should be
possible. This has the following options:
\itemize{
\item \code{point}: Point estimates.
\item \code{bias_correction} or \code{bc}: Bias-corrected estimates. A bias-corrected
estimate is computed from (at least) 20 point estimates, and \code{familiar} may
bootstrap the data to create them.
\item \code{bootstrap_confidence_interval} or \code{bci} (default): Bias-corrected
estimates with bootstrap confidence intervals (Efron and Hastie, 2016). The
number of point estimates required depends on the \code{confidence_level}
parameter, and \code{familiar} may bootstrap the data to create them.
}

As with \code{detail_level}, a non-default \code{estimation_type} parameter can be
specified for separate evaluation steps by providing a parameter value in a
named list with data elements, e.g. \code{list("auc_data"="bci", "model_performance"="point")}. This parameter can be set for the following
data elements: \code{auc_data}, \code{decision_curve_analyis}, \code{model_performance},
\code{permutation_vimp}, \code{ice_data}, and \code{prediction_data}.}
    \item{\code{confidence_level}}{(\emph{optional}) Numeric value for the level at which
confidence intervals are determined. In the case bootstraps are used to
determine the confidence intervals bootstrap estimation, \code{familiar} uses the
rule of thumb \eqn{n = 20 / ci.level} to determine the number of required
bootstraps.

The default value is \code{0.95}.}
    \item{\code{bootstrap_ci_method}}{(\emph{optional}) Method used to determine bootstrap
confidence intervals (Efron and Hastie, 2016). The following methods are
implemented:
\itemize{
\item \code{percentile} (default): Confidence intervals obtained using the percentile
method.
\item \code{bc}: Bias-corrected confidence intervals.
}

Note that the standard method is not implemented because this method is
often not suitable due to non-normal distributions. The bias-corrected and
accelerated (BCa) method is not implemented yet.}
  }}
}
\value{
\code{NULL} or list of plot objects, if \code{dir_path} is \code{NULL}.
}
\description{
This method creates individual conditional expectation plots
based on data in a familiarCollection object.
}
\details{
This function generates individual conditional expectation plots.
These plots come in two varieties, namely 1D and 2D. 1D plots show the
predicted value as function of a single feature, whereas 2D plots show the
predicted value as a function of two features.

Available splitting variables are: \code{feature_x}, \code{feature_y} (2D only),
\code{fs_method}, \code{learner}, \code{data_set} and \code{positive_class} (categorical
outcomes) or \code{evaluation_time} (survival outcomes). By default, for 1D ICE
plots the data are split by \code{feature_x}, \code{fs_method} and \code{learner}, with
faceting by \code{data_set}, \code{positive_class} or \code{evaluation_time}. If only
partial dependence is shown, \code{positive_class} and \code{evaluation_time} are
used to set colours instead. For 2D plots, by default the data are split by
\code{feature_x}, \code{fs_method} and \code{learner}, with faceting by \code{data_set},
\code{positive_class} or \code{evaluation_time}. The \code{color_by} argument cannot be
used with 2D plots, and attempting to do so causes an error. Attempting to
specify \code{feature_x} or \code{feature_y} for \code{color_by} will likewise result in
an error, as multiple features cannot be shown in the same facet.

The splitting variables indicated by \code{color_by} are coloured according to
the \code{discrete_palette} parameter. This parameter is therefore only used for
1D plots. Available palettes for \code{discrete_palette} and \code{gradient_palette}
are those listed by \code{grDevices::palette.pals()} (requires R >= 4.0.0),
\code{grDevices::hcl.pals()} (requires R >= 3.6.0) and \code{rainbow}, \code{heat.colors},
\code{terrain.colors}, \code{topo.colors} and \code{cm.colors}, which correspond to the
palettes of the same name in \code{grDevices}. If not specified, a default
palette based on palettes in Tableau are used. You may also specify your
own palette by using colour names listed by \code{grDevices::colors()} or
through hexadecimal RGB strings.

Bootstrap confidence intervals of the partial dependence plots can be shown
using various styles set by \code{conf_int_style}:
\itemize{
\item \code{ribbon} (default): confidence intervals are shown as a ribbon with an
opacity of \code{conf_int_alpha} around the point estimate of the partial
dependence.
\item \code{step} (default): confidence intervals are shown as a step function around
the point estimate of the partial dependence.
\item \code{none}: confidence intervals are not shown. The point estimate of the
partial dependence is shown as usual.
}

Note that when bootstrap confidence intervals were computed, they were also
computed for individual samples in individual conditional expectation
plots. To avoid clutter, only point estimates for individual samples are
shown.

Labelling methods such as \code{set_fs_method_names} or \code{set_data_set_names} can
be applied to the \code{familiarCollection} object to update labels, and order
the output in the figure.
}
