% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{divergence}
\alias{divergence}
\title{Optimize the number of topics}
\usage{
divergence(x, min_size = 0.01, select = NULL, regularize = TRUE)
}
\arguments{
\item{x}{a LDA model fitted by \code{\link[=textmodel_seededlda]{textmodel_seededlda()}} or \code{\link[=textmodel_lda]{textmodel_lda()}}.}

\item{min_size}{the minimum size of topics for regularized topic divergence.
Ignored when \code{regularize = FALSE}.}

\item{select}{names of topics for which the divergence is computed.}

\item{regularize}{if \code{TRUE}, returns the regularized divergence.}
}
\description{
\code{divergence()} computes the regularized topic divergence to find the optimal
number of topics for LDA.
}
\details{
\code{divergence()} computes the average Jensen-Shannon divergence
between all the pairs of topic vectors in \code{x$phi}. The divergence score
maximizes when the chosen number of topic \code{k} is optimal (Deveaud et al.,
2014). The regularized divergence penalizes topics smaller than \code{min_size}
to avoid fragmentation (Watanabe & Baturo, forthcoming).
}
\references{
Deveaud, Romain et al. (2014). "Accurate and Effective Latent
Concept Modeling for Ad Hoc Information Retrieval".
doi:10.3166/DN.17.1.61-84. \emph{Document Numérique}.

Watanabe, Kohei & Baturo, Alexander. (2023). "Seeded Sequential LDA:
A Semi-supervised Algorithm for Topic-specific Analysis of Sentences".
doi:10.1177/08944393231178605. \emph{Social Science Computer Review}.
}
\seealso{
\link{sizes}
}
