% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ui.R
\name{HarmonyMatrix}
\alias{HarmonyMatrix}
\title{Main Harmony interface}
\usage{
HarmonyMatrix(
  data_mat,
  meta_data,
  vars_use,
  do_pca = TRUE,
  npcs = 20,
  theta = NULL,
  lambda = NULL,
  sigma = 0.1,
  nclust = NULL,
  tau = 0,
  block.size = 0.05,
  max.iter.harmony = 10,
  max.iter.cluster = 200,
  epsilon.cluster = 1e-05,
  epsilon.harmony = 1e-04,
  plot_convergence = FALSE,
  return_object = FALSE,
  verbose = TRUE,
  reference_values = NULL,
  cluster_prior = NULL
)
}
\arguments{
\item{data_mat}{Matrix of normalized gene expession (default) or PCA 
embeddings (see do_pca). 
Cells can be rows or columns.}

\item{meta_data}{Either (1) Dataframe with variables to integrate or (2) 
vector with labels.}

\item{vars_use}{If meta_data is dataframe, this defined which variable(s) 
to remove (character vector).}

\item{do_pca}{Whether to perform PCA on input matrix.}

\item{npcs}{If doing PCA on input matrix, number of PCs to compute.}

\item{theta}{Diversity clustering penalty parameter. Specify for each
variable in vars_use Default theta=2. theta=0 does not encourage any 
diversity. Larger values of theta result in more diverse clusters.}

\item{lambda}{Ridge regression penalty parameter. Specify for each variable
 in vars_use. 
Default lambda=1. Lambda must be strictly positive. Smaller values result 
in more aggressive correction.}

\item{sigma}{Width of soft kmeans clusters. Default sigma=0.1. Sigma scales
the distance from a cell to cluster centroids. Larger values of sigma 
result in cells assigned to more clusters. Smaller values of sigma make 
soft kmeans cluster approach hard clustering.}

\item{nclust}{Number of clusters in model. nclust=1 equivalent to simple 
linear regression.}

\item{tau}{Protection against overclustering small datasets with large ones.
tau is the expected number of cells per cluster.}

\item{block.size}{What proportion of cells to update during clustering.
Between 0 to 1, default 0.05. Larger values may be faster but less accurate}

\item{max.iter.harmony}{Maximum number of rounds to run Harmony. One round
of Harmony involves one clustering and one correction step.}

\item{max.iter.cluster}{Maximum number of rounds to run clustering at each 
round of Harmony.}

\item{epsilon.cluster}{Convergence tolerance for clustering round of 
Harmony. Set to -Inf to never stop early.}

\item{epsilon.harmony}{Convergence tolerance for Harmony. Set to -Inf to
never stop early.}

\item{plot_convergence}{Whether to print the convergence plot of the 
clustering objective function. TRUE to plot, FALSE to suppress. This can be
 useful for debugging.}

\item{return_object}{(Advanced Usage) Whether to return the Harmony object 
or only the corrected PCA embeddings.}

\item{verbose}{Whether to print progress messages. TRUE to print, 
FALSE to suppress.}

\item{reference_values}{(Advanced Usage) Defines reference dataset(s). 
Cells that have batch variables values matching reference_values will not 
be moved.}

\item{cluster_prior}{(Advanced Usage) Provides user defined clusters for 
cluster initialization. If the number of provided clusters C is less than K, 
Harmony will initialize K-C clusters with kmeans. C cannot exceed K.}
}
\value{
By default, matrix with corrected PCA embeddings. If return_object 
is TRUE, returns the full Harmony object (R6 reference class type).
}
\description{
Use this to run the Harmony algorithm on gene expression or PCA matrix.
}
\examples{


## By default, Harmony inputs a normalized gene expression matrix
\dontrun{
harmony_embeddings <- HarmonyMatrix(exprs_matrix, meta_data, 'dataset')
}

## Harmony can also take a PCA embeddings matrix
data(cell_lines_small)
pca_matrix <- cell_lines_small$scaled_pcs
meta_data <- cell_lines_small$meta_data
harmony_embeddings <- HarmonyMatrix(pca_matrix, meta_data, 'dataset', 
                                    do_pca=FALSE)

## Output is a matrix of corrected PC embeddings
dim(harmony_embeddings)
harmony_embeddings[seq_len(5), seq_len(5)]

## Finally, we can return an object with all the underlying data structures
harmony_object <- HarmonyMatrix(pca_matrix, meta_data, 'dataset', 
                                    do_pca=FALSE, return_object=TRUE)
dim(harmony_object$Y) ## cluster centroids
dim(harmony_object$R) ## soft cluster assignment
dim(harmony_object$Z_corr) ## corrected PCA embeddings
head(harmony_object$O) ## batch by cluster co-occurence matrix

}
