% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/D2MCS.R
\name{D2MCS}
\alias{D2MCS}
\title{Data Driven Multiple Classifier System.}
\description{
The class is responsible of managing the whole process.
Concretely builds the M.L. models (optimizes models hyperparameters), selects
the best M.L. model for each cluster and executes the classification stage.
}
\examples{

# Specify the random number generation
set.seed(1234)

## Create Dataset Handler object.
loader <- DatasetLoader$new()

## Load 'hcc-data-complete-balanced.csv' dataset file.
data <- loader$load(filepath = system.file(file.path("examples",
                                                     "hcc-data-complete-balanced.csv"),
                                           package = "D2MCS"),
                    header = TRUE, normalize.names = TRUE)
## Get column names
data$getColumnNames()

## Split data into 4 partitions keeping balance ratio of 'Class' column.
data$createPartitions(num.folds = 4, class.balance = "Class")

## Create a subset comprising the first 2 partitions for clustering purposes.
cluster.subset <- data$createSubset(num.folds = c(1, 2), class.index = "Class",
                                    positive.class = "1")

## Create a subset comprising second and third partitions for trainning purposes.
train.subset <- data$createSubset(num.folds = c(2, 3), class.index = "Class",
                                  positive.class = "1")

## Create a subset comprising last partitions for testing purposes.
test.subset <- data$createSubset(num.folds = 4, class.index = "Class",
                                 positive.class = "1")

## Distribute the features into clusters using MCC heuristic.
distribution <- SimpleStrategy$new(subset = cluster.subset,
                                   heuristic = MCCHeuristic$new())
distribution$execute()

## Get the best achieved distribution
distribution$getBestClusterDistribution()

## Create a train set from the computed clustering distribution
train.set <- distribution$createTrain(subset = train.subset)

\dontrun{

## Initialization of D2MCS configuration parameters.
##  - Defining training operation.
##    + 10-fold cross-validation
##    + Use only 1 CPU core.
##    + Seed was set to ensure straightforward reproductivity of experiments.
trFunction <- TwoClass$new(method = "cv", number = 10, savePredictions = "final",
                           classProbs = TRUE, allowParallel = TRUE,
                           verboseIter = FALSE, seed = 1234)

#' ## - Specify the models to be trained
ex.classifiers <- c("ranger", "lda", "lda2")

## Initialize D2MCS
#' d2mcs <- D2MCS$new(dir.path = tempdir(),
                      num.cores = 1)

## Execute training stage for using 'MCC' and 'PPV' measures to optimize model hyperparameters.
trained.models <- d2mcs$train(train.set = train.set,
                              train.function = trFunction,
                              ex.classifiers = ex.classifiers,
                              metrics = c("MCC", "PPV"))

## Execute classification stage using two different voting schemes
predictions <- d2mcs$classify(train.output = trained.models,
                              subset = test.subset,
                              voting.types = c(
                                    SingleVoting$new(voting.schemes = c(ClassMajorityVoting$new(),
                                                                        ClassWeightedVoting$new()),
                                                     metrics = c("MCC", "PPV"))))

## Compute the performance of each voting scheme using PPV and MMC measures.
predictions$getPerformances(test.subset, measures = list(MCC$new(), PPV$new()))

## Execute classification stage using multiple voting schemes (simple and combined)
predictions <- d2mcs$classify(train.output = trained.models,
                              subset = test.subset,
                              voting.types = c(
                                    SingleVoting$new(voting.schemes = c(ClassMajorityVoting$new(),
                                                                         ClassWeightedVoting$new()),
                                                      metrics = c("MCC", "PPV")),
                                    CombinedVoting$new(voting.schemes = ClassMajorityVoting$new(),
                                                        combined.metrics = MinimizeFP$new(),
                                                        methodology = ProbBasedMethodology$new(),
                                                        metrics = c("MCC", "PPV"))))

## Compute the performance of each voting scheme using PPV and MMC measures.
predictions$getPerformances(test.subset, measures = list(MCC$new(), PPV$new()))
}


}
\seealso{
\code{\link{Dataset}}, \code{\link{Subset}}, \code{\link{Trainset}}
}
\keyword{classif}
\keyword{methods}
\keyword{programming}
\keyword{utilities}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-new}{\code{D2MCS$new()}}
\item \href{#method-train}{\code{D2MCS$train()}}
\item \href{#method-classify}{\code{D2MCS$classify()}}
\item \href{#method-getAvailableModels}{\code{D2MCS$getAvailableModels()}}
\item \href{#method-clone}{\code{D2MCS$clone()}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-new"></a>}}
\if{latex}{\out{\hypertarget{method-new}{}}}
\subsection{Method \code{new()}}{
The function is used to initialize all parameters needed
to build a Multiple Classifier System.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{D2MCS$new(
  dir.path,
  num.cores = NULL,
  socket.type = "PSOCK",
  outfile = NULL,
  serialize = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{dir.path}}{A \link{character} defining location were the
trained models should be saved.}

\item{\code{num.cores}}{An optional \link{numeric} value specifying
the number of CPU cores used for training the models (only if
parallelization is allowed). If not defined (num.cores - 2) cores will be
used.}

\item{\code{socket.type}}{A \link{character} value defining the type of socket
used to communicate the workers. The default type, \code{"PSOCK"}, calls
makePSOCKcluster. Type \code{"FORK"} calls makeForkCluster. For more
information see \code{\link{makeCluster}}}

\item{\code{outfile}}{Where to direct the stdout and stderr connection output
from the workers. "" indicates no redirection (which may only be useful
for workers on the local machine). Defaults to '/dev/null'}

\item{\code{serialize}}{A \code{\link{logical}} value. If \link{TRUE} (default)
serialization will use XDR: where large amounts of data are to be
transferred and all the nodes are little-endian, communication may be
substantially faster if this is set to false.}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-train"></a>}}
\if{latex}{\out{\hypertarget{method-train}{}}}
\subsection{Method \code{train()}}{
The function is responsible of performing the M.L. model
training stage.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{D2MCS$train(
  train.set,
  train.function,
  num.clusters = NULL,
  model.recipe = DefaultModelFit$new(),
  ex.classifiers = c(),
  ig.classifiers = c(),
  metrics = NULL,
  saveAllModels = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{train.set}}{A \code{\link{Trainset}} object used as training input
for the M.L. models}

\item{\code{train.function}}{A \code{\link{TrainFunction}} defining the training
configuration options.}

\item{\code{num.clusters}}{An \link{numeric} value used to define the number of
clusters from the \code{\link{Trainset}} that should be utilized during
the training stage. If not defined all clusters will we taken into
account for training.}

\item{\code{model.recipe}}{An unprepared recipe object inherited from
\code{\link{GenericModelFit}} class.}

\item{\code{ex.classifiers}}{A \link{character} vector containing the name of
the M.L. models used in training stage. See
\code{\link{getModelInfo}} and
\url{https://topepo.github.io/caret/available-models.html} for more
information about all the available models.}

\item{\code{ig.classifiers}}{A \link{character} vector containing the name of
the M.L. that should be ignored when performing the training stage. See
\code{\link{getModelInfo}} and
\url{https://topepo.github.io/caret/available-models.html} for more
information about all the available models.}

\item{\code{metrics}}{A \link{character} vector containing the metrics used to
perform the M.L. model hyperparameter optimization during the training
stage. See \code{\link{SummaryFunction}}, \code{\link{UseProbability}}
and \code{\link{NoProbability}} for more information.}

\item{\code{saveAllModels}}{A \link{logical} parameter. A \link{TRUE} saves all
trained models while A \link{FALSE} saves only the M.L. model achieving
the best performance on each cluster.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
A \code{\link{TrainOutput}} object containing all the information
computed during the training stage.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-classify"></a>}}
\if{latex}{\out{\hypertarget{method-classify}{}}}
\subsection{Method \code{classify()}}{
The function is responsible for executing the classification
stage.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{D2MCS$classify(train.output, subset, voting.types, positive.class = NULL)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{train.output}}{The \code{\link{TrainOutput}} object computed in the
train stage.}

\item{\code{subset}}{A \code{\link{Subset}} containing the data to be classified.}

\item{\code{voting.types}}{A \link{list} containing \code{\link{SingleVoting}}
or \code{\link{CombinedVoting}} objects.}

\item{\code{positive.class}}{An optional \link{character} parameter used
to define the positive class value.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns}{
A \code{\link{ClassificationOutput}} with all the values computed
during classification stage.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-getAvailableModels"></a>}}
\if{latex}{\out{\hypertarget{method-getAvailableModels}{}}}
\subsection{Method \code{getAvailableModels()}}{
The function obtains all the available M.L. models.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{D2MCS$getAvailableModels()}\if{html}{\out{</div>}}
}

\subsection{Returns}{
A \link{data.frame} containing the information of the available
M.L. models.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-clone"></a>}}
\if{latex}{\out{\hypertarget{method-clone}{}}}
\subsection{Method \code{clone()}}{
The objects of this class are cloneable with this method.
\subsection{Usage}{
\if{html}{\out{<div class="r">}}\preformatted{D2MCS$clone(deep = FALSE)}\if{html}{\out{</div>}}
}

\subsection{Arguments}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{deep}}{Whether to make a deep clone.}
}
\if{html}{\out{</div>}}
}
}
}
