% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bayes2S_v6.r
\name{bayes.2S}
\alias{bayes.2S}
\title{Fitting Bayesian Prevalence-Incidence Mixture Model}
\usage{
bayes.2S(
  Vobs,
  Z.X = NULL,
  Z.W = NULL,
  r = NULL,
  dist.X = "weibull",
  kappa = 0.5,
  update.kappa = FALSE,
  kappa.prior = NULL,
  ndraws = 1000,
  prop.sd.X = NULL,
  chains = 3,
  thining = 1,
  parallel = TRUE,
  update.till.converge = FALSE,
  maxit = Inf,
  conv.crit = "upper",
  min_effss = chains * 10,
  beta.prior = "norm",
  beta.prior.X = 1,
  sig.prior.X = 1,
  tau.w = 1,
  fix.sigma.X = FALSE,
  prev.run = NULL,
  update.burnin = TRUE,
  ndraws.update = NULL,
  prev = TRUE,
  vanilla = FALSE,
  ndraws.naive = 10000,
  naive.run.prop.sd.X = prop.sd.X,
  par.exp = FALSE,
  collapsed.g = TRUE,
  k.prior = 1,
  fix.k = FALSE
)
}
\arguments{
\item{Vobs}{A list of length \eqn{n} of numeric vectors representing screening times. The first element of each vector should always be \code{0} and the last element \code{Inf} in the case of right censoring.}

\item{Z.X}{A numeric matrix of dimension \eqn{n \times p_x} containing covariates for the AFT incidence model. Missing values are not allowed.}

\item{Z.W}{A numeric matrix of dimension \eqn{n \times p_w} containing covariates for the probit prevalence model. Missing values are not allowed.}

\item{r}{A binary vector of length \eqn{n} indicating whether a baseline test was conducted (\code{1} for yes, \code{0} for no / missing baseline test).}

\item{dist.X}{Character. Specifies the distribution for the time-to-incidence variable; choices are \code{'weibull'}, \code{'lognormal'}, or \code{'loglog'} (log-logistic).}

\item{kappa}{Numeric. The test sensitivity value to be used if \code{update.kappa = FALSE}; otherwise, the starting value for estimating \eqn{\kappa}.}

\item{update.kappa}{Logical. If \code{TRUE}, the test sensitivity (\eqn{\kappa}) is updated during the Gibbs sampler.}

\item{kappa.prior}{A numeric vector of length 2. When specified, a Beta distribution prior is used for \eqn{\kappa}, centered at \code{kappa.prior[1]} with standard deviation \code{kappa.prior[2]}. If \code{NULL}, a uniform prior (Beta(1,1)) is used.}

\item{ndraws}{Integer. The total number of MCMC draws for the main Gibbs sampler.}

\item{prop.sd.X}{Numeric. The standard deviation for the proposal (jumping) distribution in the Metropolis sampler used for updating \eqn{\beta_{xj}}. Can be searched for automatically using \link{search.prop.sd}.}

\item{chains}{Integer. The number of MCMC chains to run.}

\item{thining}{Integer. The thinning interval for the MCMC sampler.}

\item{parallel}{Logical. If \code{TRUE}, parallel processing is enabled for the Gibbs sampler. Each chain is assigned to one core (via the \code{foreach} package). Alternatively, use \link{bayes.2S_seq} which employs a \code{for} loop over the chains.}

\item{update.till.converge}{Logical. If \code{TRUE}, the model is updated iteratively until convergence criteria are met. Convergence is assessed using the Gelman-Rubin diagnostic (\eqn{R<1.1}) and a minimum effective sample size (\code{min_effss}) for each parameter, respectively.}

\item{maxit}{Numeric. The maximum number of MCMC draws allowed before interrupting the update process when \code{update.till.converge} is enabled. Default is \code{Inf} (i.e., no automatic interruption).}

\item{conv.crit}{Character. Specifies whether the convergence check uses the point estimate (\code{'point'}) or the upper bound (\code{'upper'}) of the Gelman-Rubin diagnostic estimate (\eqn{\hat{R}}).}

\item{min_effss}{Integer. The minimum effective sample size required for each parameter before convergence is accepted during iterative updating.}

\item{beta.prior}{Character. Specifies the type of prior for the incidence regression coefficients (\eqn{\beta_{xj}}); options are \code{'norm'} for normal and \code{'t'} for student-t.}

\item{beta.prior.X}{Numeric. The hyperparameter for the prior distribution of the regression coefficients (\eqn{\beta_{xj}}) in the AFT incidence model. For a normal prior, this is the standard deviation; for a student-t prior, it represents the degrees of freedom. The default produces a standard-normal prior.}

\item{sig.prior.X}{Numeric. The hyperparameter (standard deviation) for a half-normal prior on the scale parameter (\eqn{\sigma}) of the AFT incidence model.}

\item{tau.w}{Numeric. The hyperparameter (standard deviation) for the normal prior distribution of the regression coefficients (\eqn{\beta_{wj}}) in the probit prevalence model. The default produces a standard-normal prior.}

\item{fix.sigma.X}{Logical. If \code{TRUE}, the scale parameter (\eqn{\sigma}) in the AFT incidence model is fixed at the value provided in \code{sig.prior.X}; if \code{FALSE}, it is updated.}

\item{prev.run}{Optional. An object of class \code{BayesPIM} containing results from a previous run. When provided, data and prior settings are adopted from the previous run, and the MCMC chain continues from the last draw.}

\item{update.burnin}{Logical. If \code{TRUE} (default) and \code{prev.run} is provided, the burn-in period is updated to half of the total draws (sum of previous and current runs); otherwise, the burn-in is maintained as half of the draws from the initial run.}

\item{ndraws.update}{Integer. The number of MCMC draws for updating a previous run or for convergence updates. If unspecified, \code{ndraws} is used.}

\item{prev}{Logical. If \code{TRUE}, prevalence adjustment is applied; if \code{FALSE}, prevalence is assumed to be zero.}

\item{vanilla}{Logical. If \code{TRUE}, a vanilla run is performed that assumes no prevalence adjustment and fixes \eqn{\kappa = 1}, equivalent to a standard Bayesian interval-censored survival regression.}

\item{ndraws.naive}{Integer. The number of MCMC draws for a preliminary vanilla run used to obtain starting values. Increase if initial values lead to issues (e.g., an infinite posterior).}

\item{naive.run.prop.sd.X}{Numeric. The standard deviation for the proposal distribution used in the vanilla run. Adjust only if the acceptance rate is significantly off target, as indicated by an interruption message.}

\item{par.exp}{Logical. If \code{TRUE}, the parameter expansion technique of Liu & Wiu (1999) with a Haar prior is employed for updating the regression coefficients (\eqn{\beta_{wj}}) in the prevalence model. Experimental: tests suggest that it does not improve convergence or reduce autocorrelation.}

\item{collapsed.g}{Logical. If \code{TRUE}, the latent prevalence class membership update in the Gibbs sampler is integrated (collapsed) over the latent incidence time variable. This setting is recommended to improve convergence.}

\item{k.prior}{Experimental prior parameter for generalized gamma; currently not used.}

\item{fix.k}{Experimental fixing of prior parameter for generalized gamma; currently not used.}
}
\value{
A list containing the following elements:

\item{par.X.all}{An \code{mcmc.list} of MCMC samples for the incidence and prevalence model parameters.}
\item{par.X.bi}{An \code{mcmc.list} of MCMC samples for the incidence and prevalence model parameters after burn-in removal.}
\item{X}{A matrix of posterior draws for the latent event times \eqn{x_i}, with one column per chain.}
\item{C}{A matrix of posterior draws for prevalence class membership \eqn{g_i}, with one column per chain.}
\item{ac.X}{A matrix with MCMC draws in rows and chains in columns, where each row indicates whether the Metropolis sampler accepted (1) or rejected (0) a sample.}
\item{ac.X.cur}{Same as \code{ac.X}, but only for the last update.}
\item{dat}{A data frame containing the last observed interval.}
\item{priors}{A list of prior specifications for the model parameters, including \code{beta.prior.X} (incidence regression coefficients) and \code{sig.prior.X} (scale parameter for the AFT model).}
\item{runtime}{The total runtime of the MCMC sampler.}

Additionally, most input arguments are returned as part of the output for reference.
}
\description{
Estimates the Pattern Mixture model of Klausch et al. (2025) using a Bayesian Gibbs sampler. The model is formulated as an interval-censored survival model over successive intervals, with the possibility of missed events due to imperfect test sensitivity. In addition, baseline tests at time zero may fail to detect pre-study events (prevalence).
}
\details{
This Bayesian prevalence-incidence mixture model (PIM) characterizes the time to incidence using an accelerated failure time (AFT) model of the form:

\deqn{\log(x_i) = \bm{z}_{xi}' \bm{\beta}_x + \sigma \epsilon_i}{log(x_i) = z_{xi}' beta_x + sigma * epsilon_i}

where \eqn{\epsilon_i} is chosen such that \eqn{x_i} follows a \code{weibull}, \code{lognormal}, or \code{loglog} (log-logistic) distribution, as specified by the \code{dist} argument. The covariate vector \eqn{\bm{z}_{xi}} for individual \eqn{i} is provided in the \code{Z.X} matrix.

Baseline prevalence is modeled using a probit formulation \eqn{Pr(g_i=1 | \bm{z}_{wi}) = Pr(w_i > 0 | \bm{z}_{wi})} with

\deqn{w_i = \bm{z}_{wi}' \bm{\beta}_w + \psi_i}{w_i = z_{wi}' beta_w + psi_i}

where \eqn{\psi_i} follows a standard normal distribution, and the covariate vector \eqn{\bm{z}_{wi}} is given in the \code{Z.W} matrix. The latent variable \eqn{w_i} determines prevalence status, such that \eqn{g_i = 1} if \eqn{w_i > 0} and \eqn{g_i = 0} otherwise.

The argument \code{Vobs} provides the observed testing times for all individuals. It is a list of numeric vectors, where each vector starts with \code{0} (representing the baseline time) and is followed by one or more screening times. The final entry is \code{Inf} in the case of right censoring or indicates the time of a positive test if an event is observed. Specifically:
\itemize{
\item If the baseline test is positive, the vector consists solely of \code{c(0)}.
\item If the baseline test is negative and right censoring occurs before the first regular screening, the vector is \code{c(0, Inf)}.
\item Otherwise, the vector ends with \code{Inf} in the case of right censoring (e.g., \code{c(0, 1, 3, 6, Inf)}) or ends at the event time (e.g., \code{c(0, 1, 3, 6)} for an event detected at time \code{6}).
}

By convention, every vector in \code{Vobs} starts with \code{0}. However, the binary vector \code{r} of \code{length} \eqn{n} indicates whether the baseline test was conducted (\code{r[i] = 1}) or missing (\code{r[i] = 0}) for each individual \code{i} in \code{Vobs}. For further details on coding, see Section 2 of the main paper.

Test sensitivity can be fixed to a value \code{kappa} by setting \code{update.kappa = FALSE}, or it can be estimated if \code{update.kappa = TRUE}. When estimated, a Beta prior is used, centered on the first element of \code{kappa.prior}, with a standard deviation equal to its second element. An internal optimization process finds the Beta prior hyperparameters that best match this choice. If the chosen prior is not feasible, unexpected behavior may occur. If \code{kappa.prior} is not specified (the default), an uninformative uniform(0,1) prior is used. In general, we advise against using an uninformative prior, but this default avoids favoring any specific informative prior.

The Gibbs sampler runs for \code{ndraws} iterations for each of \code{chains} total chains. The Metropolis step used for sampling the parameters of the incidence model applies a normal proposal (jumping) distribution with a standard deviation \code{prop.sd.X}, which must be selected by trial and error. An optimal acceptance rate is approximately 23\%, which can be computed per MCMC run from the model output. Alternatively, the function \link{search.prop.sd} provides a heuristic for selecting an effective proposal distribution standard deviation.

If \code{parallel = TRUE}, the Gibbs sampler runs in parallel with one chain per CPU (if possible), using the \code{foreach} package. If this package causes issues on some operating systems, set \code{parallel = FALSE} or use the \link{bayes.2S_seq} function, which iterates over \code{1:chains} using a \code{for} loop. This sequential function may also be useful in Monte Carlo simulations that parallelize experimental replications using \code{foreach}.

We recommend running at least two chains in parallel, and preferably more, to facilitate standard MCMC diagnostics such as the Gelman-Rubin \eqn{R} statistic. Additionally, we suggest first running the sampler for a moderate number of iterations to assess its behavior before using the updating functionality in \code{prev.run} to extend sampling (see below).

The option \code{update.till.convergence = TRUE} allows \code{bayes.2S} to run until convergence. Convergence is achieved when \eqn{R < 1.1} for all parameters and the minimum effective sample size \code{min_effs} is reached. The sampler continues updating until convergence is attained or \code{maxit} is reached.

The priors for the regression coefficients in the prevalence and incidence models can be controlled using \code{beta.prior}, \code{beta.prior.X}, \code{sig.prior.X}, and \code{tau.w}. Specifically:
\itemize{
\item \code{beta.prior} determines the prior type for \eqn{\beta_{xj}} (either \code{normal} or Student-\eqn{t} \code{t}).
\item \code{beta.prior.X} specifies either the standard deviation (for normal priors) or degrees of freedom (for Student-\eqn{t} priors). The default is a standard normal prior.
\item A half-normal prior is used for \eqn{\sigma}, with \code{sig.prior.X} controlling the standard deviation.
\item A zero-centered normal prior is assigned to \eqn{\beta_{wj}}, with \code{tau.w} controlling its standard deviation (default: standard normal).
}

Sometimes model fitting can be improved by fixing the \eqn{\sigma} parameter to a value, which is achieved through setting \code{fix.sigma.X = TRUE}. Then, the value specified as \code{sig.prior.X} is regarded as the correct value for \eqn{\sigma}, akin to a point prior on this value. The functionality can also be used to obtain the exponential distribution, aking to a Markov model. For this choose \code{dist='weibull'}, \code{sig.prior.X = 1}, and \code{fix.sigma.X=TRUE}.

The \code{prev.run} argument allows updating a previous run with additional MCMC draws. The MCMC chain resumes from the last draws, continues, and merges with the original run. If an initial model was fit using \code{mod <- bayes.2S(...)}, it can be updated using \code{mod_update <- bayes.2S(prev.run = mod)}. By default, \code{ndraws} additional iterations are added unless otherwise specified via \code{ndraws.update}. When updating, the number of discarded burn-in draws can be adjusted to half of the total draws (\code{update.burnin = TRUE}) or remain at the initial number (\code{update.burnin = FALSE}).

The Gibbs sampler requires starting values, which are obtained from an initial Bayesian interval-censored survival model using the specified \code{dist} distribution. The jumping distribution variance and the number of MCMC draws for this initialization are controlled via \code{ndraws.naive} and \code{naive.run.prop.sd.X}. The default values typically suffice but may need adjustment if initialization fails (e.g., increasing \code{ndraws.naive} or tuning \code{naive.run.prop.sd.X}). If starting values are found but still lead to an infinite posterior at initialization, the error "Bad starting values" is returned. Then it usually sufficces to re-run \code{bayes.2S} with an increased \code{ndraws.naive} value.
}
\examples{
\donttest{
library(BayesPIM)

# Generate data according to the Klausch et al. (2024) PIM
set.seed(2025)
dat <- gen.dat(kappa = 0.7, n = 1e3, theta = 0.2,
               p = 1, p.discrete = 1,
               beta.X = c(0.2, 0.2), beta.W = c(0.2, 0.2),
               v.min = 20, v.max = 30, mean.rc = 80,
               sigma.X = 0.2, mu.X = 5, dist.X = "weibull",
               prob.r = 1)

# Initial model fit with fixed test sensitivity kappa (approx. 1-3 minutes runtime)
mod <- bayes.2S(Vobs = dat$Vobs,
                Z.X = dat$Z,
                Z.W = dat$Z,
                r = dat$r,
                kappa = 0.7,
                update.kappa = FALSE,
                ndraws = 1e4,
                chains = 2,
                prop.sd.X = 0.008,
                parallel = TRUE,
                dist.X = "weibull")


# Inspect results
mod$runtime  # Runtime of the Gibbs sampler
plot(trim.mcmc(mod$par.X.all, thining = 10))  # MCMC chains including burn-in, also see ?trim.mcmc
plot(trim.mcmc(mod$par.X.bi, thining = 10))   # MCMC chains excluding burn-in
apply(mod$ac.X, 2, mean)  # Acceptance rates per chain
gelman.diag(mod$par.X.bi)  # Gelman convergence diagnostics

# Model updating
mod_update <- bayes.2S(prev.run = mod)      # Adds ndraws additional MCMC draws
mod_update <- bayes.2S(prev.run = mod, 
                       ndraws.update = 1e3) # Adds ndraws.update additional MCMC draws

# Example with kappa estimated/updated
mod2 <- bayes.2S(Vobs = dat$Vobs,
                 Z.X = dat$Z,
                 Z.W = dat$Z,
                 r = dat$r,
                 kappa = 0.7,
                 update.kappa = TRUE,
                 kappa.prior = c(0.7, 0.1), # Beta prior, mean = 0.7, s.d. = 0.1
                 ndraws = 1e4,
                 chains = 2,
                 prop.sd.X = 0.008,
                 parallel = TRUE,
                 dist.X = "weibull")

# Inspect results
mod2$runtime # runtime of Gibbs sampler
plot( trim.mcmc( mod2$par.X.all, thining = 10) ) # kappa returned as part of the mcmc.list
}

}
\references{
T. Klausch, B. I. Lissenberg-Witte, and V. M. Coupe (2024). "A Bayesian prevalence-incidence mixture model for screening outcomes with misclassification.", \url{doi:10.48550/arXiv.2412.16065}.

J. S. Liu and Y. N. Wu, “Parameter Expansion for Data Augmentation,” Journal of the American Statistical Association, vol. 94, no. 448, pp. 1264–1274, 1999, \url{doi:10.2307/2669940}.
}
