% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/crossvalidation.R
\name{applyFolds}
\alias{applyFolds}
\alias{cvMa}
\alias{cvLong}
\alias{cvrisk.FDboost}
\alias{cvrisk.FDboost}
\alias{cvLong}
\alias{cvMa}
\title{Cross-Validation and Bootstrapping over Curves}
\usage{
applyFolds(object, folds = cv(rep(1, length(unique(object$id))), type =
  "bootstrap"), grid = 1:mstop(object), fun = NULL, riskFun = NULL,
  numInt = object$numInt, papply = mclapply, mc.preschedule = FALSE,
  showProgress = TRUE, compress = FALSE, ...)

\method{cvrisk}{FDboost}(object, folds = cvLong(id = object$id, weights =
  model.weights(object)), grid = 1:mstop(object), papply = mclapply,
  fun = NULL, corrected = TRUE, mc.preschedule = FALSE, ...)

cvLong(id, weights = rep(1, l = length(id)), type = c("bootstrap", "kfold",
  "subsampling", "curves"), B = ifelse(type == "kfold", 10, 25), prob = 0.5,
  strata = NULL)

cvMa(ydim, weights = rep(1, l = ydim[1] * ydim[2]), type = c("bootstrap",
  "kfold", "subsampling", "curves"), B = ifelse(type == "kfold", 10, 25),
  prob = 0.5, strata = NULL, ...)
}
\arguments{
\item{object}{fitted FDboost-object}

\item{folds}{a weight matrix with number of rows equal to the number of observed trajectories.}

\item{grid}{the grid over which the optimal number of boosting iterations (mstop) is searched.}

\item{fun}{if \code{fun} is \code{NULL}, the out-of-bag risk is returned. 
\code{fun}, as a function of \code{object}, 
may extract any other characteristic of the cross-validated models. These are returned as is.}

\item{riskFun}{only exists in \code{applyFolds}; allows to compute other risk functions than the risk 
of the family that was specified in object. 
Must be specified as function of arguments \code{(y, f, w = 1)}, where \code{y} is the 
observed response, \code{f} is the prediciton from the model and \code{w} is the weight. 
The risk function must return a scalar numeric value for vector valued imput.}

\item{numInt}{only exists in \code{applyFolds}; the scheme for numerical integration, 
see \code{numInt} in \code{\link{FDboost}}.}

\item{papply}{(parallel) apply function, defaults to \code{\link[parallel]{mclapply}}, 
see \code{\link[mboost]{cvrisk}} for details.}

\item{mc.preschedule}{Defaults to \code{FALSE}. Preschedule tasks if are parallelized using \code{mclapply}?
For details see \code{\link[parallel]{mclapply}}.}

\item{showProgress}{logical, defaults to \code{TRUE}.}

\item{compress}{logical, defaults to \code{FALSE}. Only used to force a meaningful
behaviour of \code{applyFolds} with hmatrix objects when using nested resampling.}

\item{...}{further arguments passed to \code{\link[parallel]{mclapply}}}

\item{corrected}{see \code{\link[mboost]{cvrisk}}.}

\item{id}{the id-vector as integers 1, 2, ... specifying which observations belong to the same curve, 
deprecated in \code{cvMa()}.}

\item{weights}{a numeric vector of (integration) weights, defaults to 1.}

\item{type}{character argument for specifying the cross-validation 
method. Currently (stratified) bootstrap, k-fold cross-validation, subsampling and 
leaving-one-curve-out cross validation (i.e. jack knife on curves) are implemented.}

\item{B}{number of folds, per default 25 for \code{bootstrap} and
\code{subsampling} and 10 for \code{kfold}.}

\item{prob}{percentage of observations to be included in the learning samples 
for subsampling.}

\item{strata}{a factor of the same length as \code{weights} for stratification.}

\item{ydim}{dimensions of response-matrix}
}
\value{
\code{cvMa} and \code{cvLong} return a matrix of sampling weights to be used in \code{cvrisk}. 

The functions \code{applyFolds} and \code{cvrisk.FDboost} return a \code{cvrisk}-object, 
which is a matrix of the computed out-of-bag risk. The matrix has the folds in rows and the 
number of boosting iteratins in columns. Furhtermore, the matrix has attributes including: 
\item{risk}{name of the applied risk function}
\item{call}{model call of the model object}
\item{mstop}{gird of stopping iterations that is used}
\item{type}{name for the type of folds}
}
\description{
Cross-validation and bootstrapping over curves to compute the empirical risk for 
hyper-parameter selection.
}
\details{
The number of boosting iterations is an important hyper-parameter of boosting.   
It be chosen using the functions \code{applyFolds} or \code{cvrisk.FDboost}. Those functions 
they compute honest, i.e., out-of-bag, estimates of the empirical risk for different 
numbers of boosting iterations. 
The weights (zero weights correspond to test cases) are defined via the folds matrix, 
see \code{\link[mboost]{cvrisk}} in package mboost. 

In case of functional response, we recommend to use \code{applyFolds}. 
It recomputes the model in each fold using \code{FDboost}. Thus, all parameters are recomputed, 
including the smooth offset (if present) and the identifiability constraints (if present, only 
relevant for \code{bolsc}, \code{brandomc} and \code{bbsc}).  
Note, that the function \code{applyFolds} expects folds that give weights
per curve without considering integration weights.  

The function \code{cvrisk.FDboost} is a wrapper for \code{\link[mboost]{cvrisk}} in package mboost. 
It overrides the default for the folds, so that the folds are sampled on the level of curves 
(not on the level of single observations, which does not make sense for functional response).  
Note that the smooth offset and the computation of the identifiability constraints
are not part of the refitting if \code{cvrisk} is used. 
Per default the integration weights of the model fit are used to compute the prediction errors 
(as the integration weights are part of the default folds). 
Note that in \code{cvrisk} the weights are rescaled to sum up to one. 

The functions \code{cvMa} and \code{cvLong} can be used to build an appropriate 
weight matrix for functional response to be used with \code{cvrisk} as sampling 
is done on the level of curves. The probability for each 
curve to enter a fold is equal over all curves.     
The function \code{cvMa} takes the dimensions of the response matrix as input argument and thus
can only be used for regularly observed response. 
The function \code{cvLong} takes the id variable and the weights as arguments and thus can be used
for responses in long format that are potentially observed irregularly. 
 
If \code{strata} is defined 
sampling is performed in each stratum separately thus preserving 
the distribution of the \code{strata} variable in each fold.
}
\note{
Use argument \code{mc.cores = 1L} to set the numbers of cores that is used in 
parallel computation. On Windows only 1 core is possible, \code{mc.cores = 1}, which is the default.
}
\examples{
Ytest <- matrix(rnorm(15), ncol = 3) # 5 trajectories, each with 3 observations 
Ylong <- as.vector(Ytest)
## 4-folds for bootstrap for the response in long format without integration weights
cvMa(ydim = c(5,3), type = "bootstrap", B = 4)  
cvLong(id = rep(1:5, times = 3), type = "bootstrap", B = 4)

if(require(fda)){
 ## load the data
 data("CanadianWeather", package = "fda")
 
 ## use data on a daily basis 
 canada <- with(CanadianWeather, 
                list(temp = t(dailyAv[ , , "Temperature.C"]),
                     l10precip = t(dailyAv[ , , "log10precip"]),
                     l10precip_mean = log(colMeans(dailyAv[ , , "Precipitation.mm"]), base = 10),
                     lat = coordinates[ , "N.latitude"],
                     lon = coordinates[ , "W.longitude"],
                     region = factor(region),
                     place = factor(place),
                     day = 1:365,  ## corresponds to t: evaluation points of the fun. response 
                     day_s = 1:365))  ## corresponds to s: evaluation points of the fun. covariate
 
## center temperature curves per day 
canada$tempRaw <- canada$temp
canada$temp <- scale(canada$temp, scale = FALSE) 
rownames(canada$temp) <- NULL ## delete row-names 
  
## fit the model  
mod <- FDboost(l10precip ~ 1 + bolsc(region, df = 4) + 
                 bsignal(temp, s = day_s, cyclic = TRUE, boundary.knots = c(0.5, 365.5)), 
               timeformula = ~ bbs(day, cyclic = TRUE, boundary.knots = c(0.5, 365.5)), 
               data = canada)
mod <- mod[75]

\dontrun{
  #### create folds for 3-fold bootstrap: one weight for each curve
  set.seed(123)
  folds_bs <- cv(weights = rep(1, mod$ydim[1]), type = "bootstrap", B = 3)

  ## compute out-of-bag risk on the 3 folds for 1 to 75 boosting iterations  
  cvr <- applyFolds(mod, folds = folds_bs, grid = 1:75)

  ## weights per observation point  
  folds_bs_long <- folds_bs[rep(1:nrow(folds_bs), times = mod$ydim[2]), ]
  attr(folds_bs_long, "type") <- "3-fold bootstrap"
  ## compute out-of-bag risk on the 3 folds for 1 to 75 boosting iterations  
  cvr3 <- cvrisk(mod, folds = folds_bs_long, grid = 1:75)
}

\dontrun{
  ## plot the out-of-bag risk
  par(mfrow = c(1,3))
  plot(cvr); legend("topright", lty=2, paste(mstop(cvr)))
  plot(cvr3); legend("topright", lty=2, paste(mstop(cvr3)))
}

}

}
\seealso{
\code{\link[mboost]{cvrisk}} to perform cross-validation with scalar response.
}
