% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.R
\name{predict.blockForest}
\alias{predict.blockForest}
\title{Prediction using Random Forest variants for block-structured covariate data}
\usage{
\method{predict}{blockForest}(
  object,
  data = NULL,
  predict.all = FALSE,
  num.trees = object$num.trees,
  type = "response",
  se.method = "infjack",
  quantiles = c(0.1, 0.5, 0.9),
  seed = NULL,
  num.threads = NULL,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{object}{\code{blockForest} object.}

\item{data}{New test data of class \code{data.frame} or \code{gwaa.data} (GenABEL).}

\item{predict.all}{Return individual predictions for each tree instead of aggregated predictions for all trees. Return a matrix (sample x tree) for classification and regression, a 3d array for probability estimation (sample x class x tree) and survival (sample x time x tree).}

\item{num.trees}{Number of trees used for prediction. The first \code{num.trees} in the forest are used.}

\item{type}{Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details.}

\item{se.method}{Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details.}

\item{quantiles}{Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use.}

\item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.}

\item{num.threads}{Number of threads. Default is number of CPUs available.}

\item{verbose}{Verbose output on or off.}

\item{...}{further arguments passed to or from other methods.}
}
\value{
Object of class \code{blockForest.prediction} with elements
  \tabular{ll}{
      \code{predictions}    \tab Predicted classes/values (only for classification and regression)  \cr
      \code{unique.death.times} \tab Unique death times (only for survival). \cr
      \code{chf} \tab Estimated cumulative hazard function for each sample (only for survival). \cr
      \code{survival} \tab Estimated survival function for each sample (only for survival). \cr
      \code{num.trees}   \tab Number of trees. \cr
      \code{num.independent.variables} \tab Number of independent variables. \cr
      \code{treetype}    \tab Type of forest/tree. Classification, regression or survival. \cr
      \code{num.samples}     \tab Number of samples.
  }
}
\description{
This function is to be applied to the entry 'forest' of the output of
\code{\link{blockfor}}. See the example section for illustration.
}
\details{
For \code{type = 'response'} (the default), the predicted classes (classification), predicted numeric values (regression), predicted probabilities (probability estimation) or survival probabilities (survival) are returned. 
For \code{type = 'se'}, the standard error of the predictions are returned (regression only). The jackknife-after-bootstrap or infinitesimal jackknife for bagging is used to estimate the standard errors based on out-of-bag predictions. See Wager et al. (2014) for details.
For \code{type = 'terminalNodes'}, the IDs of the terminal node in each tree for each observation in the given dataset are returned.
For \code{type = 'quantiles'}, the selected quantiles for each observation are estimated. See Meinshausen (2006) for details.

If \code{type = 'se'} is selected, the method to estimate the variances can be chosen with \code{se.method}. Set \code{se.method = 'jack'} for jackknife-after-bootstrap and \code{se.method = 'infjack'} for the infinitesimal jackknife for bagging.

For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics.
To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object.
}
\examples{
# NOTE: There is no association between covariates and response for the
# simulated data below.
# Moreover, the input parameters of blockfor() are highly unrealistic
# (e.g., nsets = 10 is specified much too small).
# The purpose of the shown examples is merely to illustrate the
# application of predict.blockForest().


# Generate data:
################

set.seed(1234)

# Covariate matrix:
X <- cbind(matrix(nrow=40, ncol=5, data=rnorm(40*5)), 
           matrix(nrow=40, ncol=30, data=rnorm(40*30, mean=1, sd=2)),
           matrix(nrow=40, ncol=100, data=rnorm(40*100, mean=2, sd=3)))
colnames(X) <- paste("X", 1:ncol(X), sep="")

# Block variable (list):
block <- rep(1:3, times=c(5, 30, 100))
block <- lapply(1:3, function(x) which(block==x))

# Binary outcome:
ybin <- factor(sample(c(0,1), size=40, replace=TRUE), levels=c(0,1))

# Survival outcome:
ysurv <- cbind(rnorm(40), sample(c(0,1), size=40, replace=TRUE))



# Divide in training and test data:

Xtrain <- X[1:30,]
Xtest <- X[31:40,]

ybintrain <- ybin[1:30]
ybintest <- ybin[31:40]

ysurvtrain <- ysurv[1:30,]
ysurvtest <- ysurv[31:40,]




# Binary outcome: Apply algorithm to training data and obtain predictions
# for the test data:
#########################################################################

# Apply a variant to the training data:

blockforobj <- blockfor(Xtrain, ybintrain, num.trees = 100, replace = TRUE, block=block,
                        nsets = 10, num.trees.pre = 50, splitrule="extratrees", 
                        block.method = "SplitWeights")
blockforobj$paramvalues


# Obtain prediction for the test data:

(predres <- predict(blockforobj$forest, data = Xtest, block.method = "SplitWeights"))
predres$predictions



# Survival outcome: Apply algorithm to training data and obtain predictions
# for the test data:
###########################################################################

# Apply a variant to the training data:

blockforobj <- blockfor(Xtrain, ysurvtrain, num.trees = 100, replace = TRUE, block=block,
                        nsets = 10, num.trees.pre = 50, splitrule="extratrees", 
                        block.method = "SplitWeights")
blockforobj$paramvalues


# Obtain prediction for the test data:

(predres <- predict(blockforobj$forest, data = Xtest, block.method = "SplitWeights"))
rowSums(predres$chf)

}
\references{
\itemize{
  \item Wright, M. N. & Ziegler, A. (2017). ranger: A Fast Implementation of Random Forests for High Dimensional Data in C++ and R. J Stat Softw 77:1-17. \doi{10.18637/jss.v077.i01}.
  \item Wager, S., Hastie T., & Efron, B. (2014). Confidence Intervals for Random Forests: The Jackknife and the Infinitesimal Jackknife. J Mach Learn Res 15:1625-1651. \url{https://jmlr.org/papers/v15/wager14a.html}.
  \item Meinshausen (2006). Quantile Regression Forests. J Mach Learn Res 7:983-999. \url{https://www.jmlr.org/papers/v7/meinshausen06a.html}.  
  }
}
\seealso{
\code{\link{blockForest}}
}
\author{
Marvin N. Wright
}
