% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{calc_prob_coherence}
\alias{calc_prob_coherence}
\title{Probabilistic coherence of topics}
\usage{
calc_prob_coherence(beta, data, m = 5)
}
\arguments{
\item{beta}{A numeric matrix or a numeric vector. The vector, or rows of the 
matrix represent the numeric relationship between topic(s) and terms. For
example, this relationship may be p(word|topic) or p(topic|word).}

\item{data}{A document term matrix or term co-occurrence matrix. The preferred
class is a \code{\link[Matrix]{dgCMatrix-class}}. However there is support
for any \code{\link[Matrix]{Matrix-class}} object as well as several other
commonly-used classes such as \code{\link[base]{matrix}},
\code{\link[quanteda]{dfm}}, \code{\link[tm]{DocumentTermMatrix}}, and
\code{\link[slam]{simple_triplet_matrix}}}

\item{m}{An integer for the number of words to be used in the calculation. 
Defaults to 5}
}
\value{
Returns an object of class \code{numeric} corresponding to the 
probabilistic coherence of the input topic(s).
}
\description{
Calculates the probabilistic coherence of a topic or topics. 
This approximates semantic coherence or human understandability of a topic.
}
\details{
For each pair of words {a, b} in the top M words in a topic, probabilistic
  coherence calculates P(b|a) - P(b), where {a} is more probable than {b} in
  the topic. For example, suppose the top 4 words in a topic are {a, b, c, d}.
  Then, we calculate 1. P(a|b) - P(b), P(a|c) - P(c), P(a|d) - P(d)
  2. P(b|c) - P(c), P(b|d) - P(d)
  3. P(c|d) - P(d)
  All 6 differences are averaged together.
}
\examples{
# Load a pre-formatted dtm and topic model
data(nih_sample_dtm)

# fit a model
set.seed(12345)
model <- tidylda(
  data = nih_sample_dtm[1:20, ], k = 5,
  iterations = 100, burnin = 50
)

calc_prob_coherence(beta = model$beta, data = nih_sample_dtm, m = 5)
}
