% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kmeans.R
\name{h2o.kmeans}
\alias{h2o.kmeans}
\title{Performs k-means clustering on an H2O dataset}
\usage{
h2o.kmeans(training_frame, x, model_id = NULL, validation_frame = NULL,
  nfolds = 0, keep_cross_validation_models = TRUE,
  keep_cross_validation_predictions = FALSE,
  keep_cross_validation_fold_assignment = FALSE,
  fold_assignment = c("AUTO", "Random", "Modulo", "Stratified"),
  fold_column = NULL, ignore_const_cols = TRUE,
  score_each_iteration = FALSE, k = 1, estimate_k = FALSE,
  user_points = NULL, max_iterations = 10, standardize = TRUE,
  seed = -1, init = c("Random", "PlusPlus", "Furthest", "User"),
  max_runtime_secs = 0, categorical_encoding = c("AUTO", "Enum",
  "OneHotInternal", "OneHotExplicit", "Binary", "Eigen", "LabelEncoder",
  "SortByResponse", "EnumLimited"), export_checkpoints_dir = NULL)
}
\arguments{
\item{training_frame}{Id of the training data frame.}

\item{x}{A vector containing the \code{character} names of the predictors in the model.}

\item{model_id}{Destination id for this model; auto-generated if not specified.}

\item{validation_frame}{Id of the validation data frame.}

\item{nfolds}{Number of folds for K-fold cross-validation (0 to disable or >= 2). Defaults to 0.}

\item{keep_cross_validation_models}{\code{Logical}. Whether to keep the cross-validation models. Defaults to TRUE.}

\item{keep_cross_validation_predictions}{\code{Logical}. Whether to keep the predictions of the cross-validation models. Defaults to FALSE.}

\item{keep_cross_validation_fold_assignment}{\code{Logical}. Whether to keep the cross-validation fold assignment. Defaults to FALSE.}

\item{fold_assignment}{Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will
stratify the folds based on the response variable, for classification problems. Must be one of: "AUTO",
"Random", "Modulo", "Stratified". Defaults to AUTO.}

\item{fold_column}{Column with cross-validation fold index assignment per observation.}

\item{ignore_const_cols}{\code{Logical}. Ignore constant columns. Defaults to TRUE.}

\item{score_each_iteration}{\code{Logical}. Whether to score during each iteration of model training. Defaults to FALSE.}

\item{k}{The max. number of clusters. If estimate_k is disabled, the model will find k centroids, otherwise it will
find up to k centroids. Defaults to 1.}

\item{estimate_k}{\code{Logical}. Whether to estimate the number of clusters (<=k) iteratively and deterministically. Defaults
to FALSE.}

\item{user_points}{This option allows you to specify a dataframe, where each row represents an initial cluster center. The user-
specified points must have the same number of columns as the training observations. The number of rows must
equal the number of clusters}

\item{max_iterations}{Maximum training iterations (if estimate_k is enabled, then this is for each inner Lloyds iteration) Defaults
to 10.}

\item{standardize}{\code{Logical}. Standardize columns before computing distances Defaults to TRUE.}

\item{seed}{Seed for random numbers (affects certain parts of the algo that are stochastic and those might or might not be enabled by default)
Defaults to -1 (time-based random number).}

\item{init}{Initialization mode Must be one of: "Random", "PlusPlus", "Furthest", "User". Defaults to Furthest.}

\item{max_runtime_secs}{Maximum allowed runtime in seconds for model training. Use 0 to disable. Defaults to 0.}

\item{categorical_encoding}{Encoding scheme for categorical features Must be one of: "AUTO", "Enum", "OneHotInternal", "OneHotExplicit",
"Binary", "Eigen", "LabelEncoder", "SortByResponse", "EnumLimited". Defaults to AUTO.}

\item{export_checkpoints_dir}{Automatically export generated models to this directory.}
}
\value{
Returns an object of class \linkS4class{H2OClusteringModel}.
}
\description{
Performs k-means clustering on an H2O dataset
}
\examples{
\dontrun{
library(h2o)
h2o.init()
prostate_path <- system.file("extdata", "prostate.csv", package = "h2o")
prostate <- h2o.uploadFile(path = prostate_path)
h2o.kmeans(training_frame = prostate, k = 10, x = c("AGE", "RACE", "VOL", "GLEASON"))
}
}
\seealso{
\code{\link{h2o.cluster_sizes}}, \code{\link{h2o.totss}}, \code{\link{h2o.num_iterations}},
         \code{\link{h2o.betweenss}}, \code{\link{h2o.tot_withinss}}, \code{\link{h2o.withinss}},
         \code{\link{h2o.centersSTD}}, \code{\link{h2o.centers}}
}
