% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/flxregbetabinom.R
\name{FLXMCregbetabinom}
\alias{FLXMCregbetabinom}
\title{FlexMix Driver for Regularized Beta-Binomial Mixtures}
\usage{
FLXMCregbetabinom(
  formula = . ~ .,
  size = NULL,
  alpha = 0,
  eps = sqrt(.Machine$double.eps)
)
}
\arguments{
\item{formula}{A formula which is interpreted relative to the
formula specified in the call to \code{\link[flexmix:flexmix]{flexmix::flexmix()}} using
\code{\link[stats:update.formula]{stats::update.formula()}}. Only the left-hand side (response)
of the formula is used. Default is to use the original model
formula specified in \code{\link[flexmix:flexmix]{flexmix::flexmix()}}.}

\item{size}{Number of trials (one or more). Default \code{NULL} implies
that the number of trials is inferred columnwise by the
maximum value observed.}

\item{alpha}{A non-negative scalar acting as regularization
parameter. Can be regarded as adding \code{alpha} observations
equal to the population mean to each component.}

\item{eps}{Lower threshold for the shape parameters a and b.}
}
\value{
An object of class \code{"FLXC"}.
}
\description{
This model driver can be used to cluster data using the beta-binomial
distribution.
}
\details{
Using a regularization parameter \code{alpha} greater than zero can be
viewed as adding \code{alpha} observations equal to the population mean
to each component. This can be used to avoid degenerate solutions
(i.e., probabilites of 0 or 1). It also has the effect that
clusters become more similar to each other the larger \code{alpha} is
chosen. For small values this effect is, however, mostly
negligible.
}
\examples{
library("flexmix")
library("flexord")
library("flexclust")

# Sample data
k <- 4     # nr of clusters
size <- 4  # nr of trials
N <- 100   # obs. per cluster

set.seed(0xdeaf)

# random probabilities per component
probs <- lapply(seq_len(k), \(ki) runif(10, 0.01, 0.99))

# sample data
dat <- lapply(probs, \(p) {
    lapply(p, \(p_i) {
        rbinom(N, size, p_i)
    }) |> do.call(cbind, args=_)
}) |> do.call(rbind, args=_)

true_clusters <- rep(1:4, rep(N, k))

# Sample data is drawn from a binomial distribution but we fit
# beta-binomial which is a slight mis-specification but the
# beta-binomial can be seen as a generalized binomial.
m <- flexmix(dat~1, model=FLXMCregbetabinom(size=size, alpha=0),
             cluster = true_clusters)

# Cluster without regularization
m1 <- stepFlexmix(dat~1, model=FLXMCregbetabinom(size=size, alpha=0), k=k)

# Cluster with regularization
m2 <- flexmix(dat~1, model=FLXMCregbetabinom(size=size, alpha=1), k=k,
              cluster = posterior(m1))

# Both models are mostly able to reconstruct the true clusters (ARI ~ 0.95)
# (it's a very easy clustering problem)
# Small values for the regularization don't seem to affect the ARI (much)
randIndex(clusters(m1), true_clusters)
randIndex(clusters(m2), true_clusters)
}
\references{
Ernst, D, Ortega Menjivar, L, Scharl, T, Grün, B (2025).  \emph{Ordinal
Clustering with the flex-Scheme.} Austrian Journal of
Statistics. \emph{Submitted manuscript}.

Kondofersky, I (2008). \emph{Modellbasiertes Clustern mit der
Beta-Binomialverteilung.} Bachelor's thesis,
Ludwig-Maximilians-Universität München.
}
