% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TGL_kmeans.R
\name{TGL_kmeans_tidy}
\alias{TGL_kmeans_tidy}
\title{TGL kmeans with 'tidy' output}
\usage{
TGL_kmeans_tidy(
  df,
  k,
  metric = "euclid",
  max_iter = 40,
  min_delta = 0.0001,
  verbose = FALSE,
  keep_log = FALSE,
  id_column = FALSE,
  reorder_func = "hclust",
  add_to_data = FALSE,
  hclust_intra_clusters = FALSE,
  seed = NULL,
  parallel = getOption("tglkmeans.parallel"),
  use_cpp_random = FALSE
)
}
\arguments{
\item{df}{a data frame or a matrix. Each row is a single observation and each column is a dimension.
the first column can contain id for each observation (if id_column is TRUE),
otherwise the rownames are used.}

\item{k}{number of clusters. Note that in some cases the algorithm might return less clusters than k.}

\item{metric}{distance metric for kmeans++ seeding. can be 'euclid', 'pearson' or 'spearman'}

\item{max_iter}{maximal number of iterations}

\item{min_delta}{minimal change in assignments (fraction out of all observations) to continue iterating}

\item{verbose}{display algorithm messages}

\item{keep_log}{keep algorithm messages in 'log' field}

\item{id_column}{\code{df}'s first column contains the observation id}

\item{reorder_func}{function to reorder the clusters. operates on each center and orders by the result. e.g. \code{reorder_func = mean} would calculate the mean of each center and then would reorder the clusters accordingly. If \code{reorder_func = hclust} the centers would be ordered by hclust of the euclidean distance of the correlation matrix, i.e. \code{hclust(dist(cor(t(centers))))}
if NULL, no reordering would be done.}

\item{add_to_data}{return also the original data frame with an extra 'clust' column with the cluster ids ('id' is the first column)}

\item{hclust_intra_clusters}{run hierarchical clustering within each cluster and return an ordering of the observations.}

\item{seed}{seed for the c++ random number generator}

\item{parallel}{cluster every cluster parallelly (if hclust_intra_clusters is true)}

\item{use_cpp_random}{use c++ random number generator instead of R's. This should be used for only for
backwards compatibility, as from version 0.4.0 onwards the default random number generator was changed o R.}
}
\value{
list with the following components:
\describe{
  \item{cluster:}{tibble with `id` column with the observation id (`1:n` if no id column was supplied), and `clust` column with the observation assigned cluster.}
  \item{centers:}{tibble with `clust` column and the cluster centers.}
  \item{size:}{tibble with `clust` column and `n` column with the number of points in each cluster.}
  \item{data:}{tibble with `clust` column the original data frame.}
  \item{log:}{messages from the algorithm run (only if \code{id_column = FALSE}).}
  \item{order:}{tibble with 'id' column, 'clust' column, 'order' column with a new ordering if the observations and 'intra_clust_order' column with the order within each cluster. (only if hclust_intra_clusters = TRUE)}
}
}
\description{
TGL kmeans with 'tidy' output
}
\examples{
\dontshow{
# this line is only for CRAN checks
tglkmeans.set_parallel(1)
}

# create 5 clusters normally distributed around 1:5
d <- simulate_data(
    n = 100,
    sd = 0.3,
    nclust = 5,
    dims = 2,
    add_true_clust = FALSE,
    id_column = FALSE
)

head(d)

# cluster
km <- TGL_kmeans_tidy(d, k = 5, "euclid", verbose = TRUE)
km
}
\seealso{
\code{\link{TGL_kmeans}}
}
