% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_mvgam_priors.R
\name{get_mvgam_priors}
\alias{get_mvgam_priors}
\title{Extract information on default prior distributions for an \pkg{mvgam} model}
\usage{
get_mvgam_priors(
  formula,
  trend_formula,
  factor_formula,
  knots,
  trend_knots,
  trend_model = "None",
  family = poisson(),
  data,
  unit = time,
  species = series,
  use_lv = FALSE,
  n_lv,
  trend_map,
  ...
)
}
\arguments{
\item{formula}{A \code{formula} object specifying the GAM observation model formula.
These are exactly like the formula for a GLM except that smooth terms, \code{s()},
\code{te()}, \code{ti()}, \code{t2()}, as well as time-varying \code{dynamic()} terms,
nonparametric \code{gp()} terms and offsets using \code{offset()}, can be added to the
right hand side to specify that the linear predictor depends on smooth
functions of predictors (or linear functionals of these).

In \code{nmix()} family models, the \code{formula} is used to set up a linear predictor
for the detection probability. Details of the formula syntax used by
\pkg{mvgam} can be found in \code{\link{mvgam_formulae}}}

\item{trend_formula}{An optional \code{formula} object specifying the GAM process
model formula. If supplied, a linear predictor will be modelled for the
latent trends to capture process model evolution separately from the
observation model.

\strong{Important notes:}
\itemize{
\item Should not have a response variable specified on the left-hand side
(e.g., \code{~ season + s(year)})
\item Use \code{trend} instead of \code{series} for effects that vary across time series
\item Only available for \code{RW()}, \code{AR()} and \code{VAR()} trend models
\item In \code{nmix()} family models, sets up linear predictor for latent abundance
\item Consider dropping one intercept using \code{- 1} convention to avoid
estimation challenges
}}

\item{factor_formula}{Can be supplied instead \code{trend_formula} to match
syntax from \link{jsdgam}}

\item{knots}{An optional \code{list} containing user specified knot values for
basis construction. For most bases the user simply supplies the knots to be
used, which must match up with the \code{k} value supplied. Different terms can
use different numbers of knots, unless they share a covariate.}

\item{trend_knots}{As for \code{knots} above, this is an optional \code{list} of knot
values for smooth functions within the \code{trend_formula}.}

\item{trend_model}{\code{character} or \code{function} specifying the time series dynamics
for the latent trend.

\strong{Available options:}
\itemize{
\item \code{None}: No latent trend component (GAM component only, like \code{\link[mgcv]{gam}})
\item \code{ZMVN} or \code{ZMVN()}: Zero-Mean Multivariate Normal (Stan only)
\item \code{'RW'} or \code{RW()}: Random Walk
\item \code{'AR1'}, \code{'AR2'}, \code{'AR3'} or \code{AR(p = 1, 2, 3)}: Autoregressive models
\item \code{'CAR1'} or \code{CAR(p = 1)}: Continuous-time AR (Ornstein–Uhlenbeck process)
\item \code{'VAR1'} or \code{VAR()}: Vector Autoregressive (Stan only)
\item \code{'PWlogistic'}, \code{'PWlinear'} or \code{PW()}: Piecewise trends (Stan only)
\item \code{'GP'} or \code{GP()}: Gaussian Process with squared exponential kernel (Stan only)
}

\strong{Additional features:}
\itemize{
\item Moving average and/or correlated process error terms available for most types
(e.g., \code{RW(cor = TRUE)} for multivariate Random Walk)
\item Hierarchical correlations possible for structured data
\item See \link{mvgam_trends} for details and \code{\link[=ZMVN]{ZMVN()}} for examples
}}

\item{family}{\code{family} specifying the exponential observation family for the series.

\strong{Supported families:}
\itemize{
\item \code{gaussian()}: Real-valued data
\item \code{betar()}: Proportional data on \verb{(0,1)}
\item \code{lognormal()}: Non-negative real-valued data
\item \code{student_t()}: Real-valued data
\item \code{Gamma()}: Non-negative real-valued data
\item \code{bernoulli()}: Binary data
\item \code{poisson()}: Count data (default)
\item \code{nb()}: Overdispersed count data
\item \code{binomial()}: Count data with imperfect detection when number of trials is known
(use \code{cbind()} to bind observations and trials)
\item \code{beta_binomial()}: As \code{binomial()} but allows for overdispersion
\item \code{nmix()}: Count data with imperfect detection when number of trials is unknown
(State-Space N-Mixture model with Poisson latent states and Binomial observations)
}

See \code{\link{mvgam_families}} for more details.}

\item{data}{A \code{dataframe} or \code{list} containing the model response variable
and covariates required by the GAM \code{formula} and optional \code{trend_formula}.

\strong{Required columns for most models:}
\itemize{
\item \code{series}: A \code{factor} index of the series IDs (number of levels should equal
number of unique series labels)
\item \code{time}: \code{numeric} or \code{integer} index of time points. For most dynamic trend
types, time should be measured in discrete, regularly spaced intervals
(i.e., \code{c(1, 2, 3, ...)}). Irregular spacing is allowed for \code{trend_model = CAR(1)},
but zero intervals are adjusted to \code{1e-12} to prevent sampling errors.
}

\strong{Special cases:}
\itemize{
\item Models with hierarchical temporal correlation (e.g., \code{AR(gr = region, subgr = species)})
should NOT include a \code{series} identifier
\item Models without temporal dynamics (\code{trend_model = 'None'} or \code{trend_model = ZMVN()})
don't require a \code{time} variable
}}

\item{unit}{The unquoted name of the variable that represents the unit of
analysis in \code{data} over which latent residuals should be correlated. This
variable should be either a \code{numeric} or \code{integer} variable in the
supplied \code{data}. Defaults to \code{time} to be consistent with other
functionalities in \pkg{mvgam}, though note that the data need not be time
series in this case. See examples below for further details and
explanations}

\item{species}{The unquoted name of the \code{factor} variable that indexes the
different response units in \code{data} (usually \code{'species'} in a JSDM).
Defaults to \code{series} to be consistent with other \code{mvgam} models}

\item{use_lv}{\code{logical}. If \code{TRUE}, use dynamic factors to estimate series'
latent trends in a reduced dimension format. Only available for \code{RW()},
\code{AR()} and \code{GP()} trend models. Default is \code{FALSE}.
See \code{\link{lv_correlations}} for examples.}

\item{n_lv}{\code{integer} specifying the number of latent dynamic factors to use
if \code{use_lv == TRUE}. Cannot exceed \code{n_series}. Default is
\code{min(2, floor(n_series / 2))}.}

\item{trend_map}{Optional \code{data.frame} specifying which series should depend on
which latent trends. Enables multiple series to depend on the same latent
trend process with different observation processes.

\strong{Required structure:}
\itemize{
\item Column \code{series}: Single unique entry for each series (matching factor levels in data)
\item Column \code{trend}: Integer values indicating which trend each series depends on
}

\strong{Notes:}
\itemize{
\item Sets up latent factor model by enabling \code{use_lv = TRUE}
\item Process model intercept is NOT automatically suppressed
\item Not yet supported for continuous time models (\code{CAR()})
}}

\item{...}{Not currently used}
}
\value{
either a \code{data.frame} containing the prior definitions (if any
suitable priors can be altered by the user) or \code{NULL}, indicating
that no priors in the model can be modified
}
\description{
This function lists the parameters that can have their prior distributions
changed for a given model, as well listing their default distributions
}
\details{
Users can supply a model formula, prior to fitting the model, so
that default priors can be inspected and altered. To make alterations,
change the contents of the \code{prior} column and supplying this
\code{data.frame} to the \code{\link{mvgam}} or \code{\link{jsdgam}}
functions using the argument \code{priors}. If using \code{Stan} as the backend,
users can also modify the parameter bounds by modifying the
\code{new_lowerbound} and/or \code{new_upperbound} columns. This will be necessary
if using restrictive distributions on some parameters, such as a Beta
distribution for the trend sd parameters for example (Beta only has
support on \code{(0,1)}), so the upperbound cannot be above \code{1}. Another
option is to make use of the prior modification functions in \pkg{brms}
(i.e. \code{\link[brms]{prior}}) to change prior distributions and bounds
(just use the name of the parameter that you'd like to change as the
\code{class} argument; see examples below)
}
\note{
Only the \code{prior}, \code{new_lowerbound} and/or \code{new_upperbound} columns of
the output should be altered when defining the user-defined priors for
the model. Use only if you are familiar with the underlying probabilistic
programming language. There are no sanity checks done to ensure that the
code is legal (i.e. to check that lower bounds are smaller than upper
bounds, for example)
}
\examples{
\donttest{
# ========================================================================
# Example 1: Simulate data and inspect default priors
# ========================================================================

dat <- sim_mvgam(trend_rel = 0.5)

# Get a model file that uses default mvgam priors for inspection (not
# always necessary, but this can be useful for testing whether your
# updated priors are written correctly)
mod_default <- mvgam(
  y ~ s(series, bs = "re") + s(season, bs = "cc") - 1,
  family = nb(),
  data = dat$data_train,
  trend_model = AR(p = 2),
  run_model = FALSE
)

# Inspect the model file with default mvgam priors
stancode(mod_default)

# Look at which priors can be updated in mvgam
test_priors <- get_mvgam_priors(
  y ~ s(series, bs = "re") + s(season, bs = "cc") - 1,
  family = nb(),
  data = dat$data_train,
  trend_model = AR(p = 2)
)
test_priors

# ========================================================================
# Example 2: Modify priors manually
# ========================================================================

# Make a few changes; first, change the population mean for the
# series-level random intercepts
test_priors$prior[2] <- "mu_raw ~ normal(0.2, 0.5);"

# Now use stronger regularisation for the series-level AR2 coefficients
test_priors$prior[5] <- "ar2 ~ normal(0, 0.25);"

# Check that the changes are made to the model file without any warnings
# by setting 'run_model = FALSE'
mod <- mvgam(
  y ~ s(series, bs = "re") + s(season, bs = "cc") - 1,
  family = nb(),
  data = dat$data_train,
  trend_model = AR(p = 2),
  priors = test_priors,
  run_model = FALSE
)
stancode(mod)

# No warnings, the model is ready for fitting now in the usual way with
# the addition of the 'priors' argument

# ========================================================================
# Example 3: Use brms syntax for prior modification
# ========================================================================

# The same can be done using 'brms' functions; here we will also change
# the ar1 prior and put some bounds on the ar coefficients to enforce
# stationarity; we set the prior using the 'class' argument in all brms
# prior functions
brmsprior <- c(
  prior(normal(0.2, 0.5), class = mu_raw),
  prior(normal(0, 0.25), class = ar1, lb = -1, ub = 1),
  prior(normal(0, 0.25), class = ar2, lb = -1, ub = 1)
)
brmsprior

mod <- mvgam(
  y ~ s(series, bs = "re") + s(season, bs = "cc") - 1,
  family = nb(),
  data = dat$data_train,
  trend_model = AR(p = 2),
  priors = brmsprior,
  run_model = FALSE
)
stancode(mod)

# ========================================================================
# Example 4: Error handling example
# ========================================================================

# Look at what is returned when an incorrect spelling is used
test_priors$prior[5] <- "ar2_bananas ~ normal(0, 0.25);"
mod <- mvgam(
  y ~ s(series, bs = "re") + s(season, bs = "cc") - 1,
  family = nb(),
  data = dat$data_train,
  trend_model = AR(p = 2),
  priors = test_priors,
  run_model = FALSE
)
stancode(mod)

# ========================================================================
# Example 5: Parametric (fixed effect) priors
# ========================================================================

simdat <- sim_mvgam()

# Add a fake covariate
simdat$data_train$cov <- rnorm(NROW(simdat$data_train))

priors <- get_mvgam_priors(
  y ~ cov + s(season),
  data = simdat$data_train,
  family = poisson(),
  trend_model = AR()
)

# Change priors for the intercept and fake covariate effects
priors$prior[1] <- "(Intercept) ~ normal(0, 1);"
priors$prior[2] <- "cov ~ normal(0, 0.1);"

mod2 <- mvgam(
  y ~ cov + s(season),
  data = simdat$data_train,
  trend_model = AR(),
  family = poisson(),
  priors = priors,
  run_model = FALSE
)
stancode(mod2)

# ========================================================================
# Example 6: Alternative brms syntax for fixed effects
# ========================================================================

# Likewise using 'brms' utilities (note that you can use Intercept rather
# than `(Intercept)`) to change priors on the intercept
brmsprior <- c(
  prior(normal(0.2, 0.5), class = cov),
  prior(normal(0, 0.25), class = Intercept)
)
brmsprior

mod2 <- mvgam(
  y ~ cov + s(season),
  data = simdat$data_train,
  trend_model = AR(),
  family = poisson(),
  priors = brmsprior,
  run_model = FALSE
)
stancode(mod2)

# ========================================================================
# Example 7: Bulk prior assignment
# ========================================================================

# The "class = 'b'" shortcut can be used to put the same prior on all
# 'fixed' effect coefficients (apart from any intercepts)
set.seed(0)
dat <- mgcv::gamSim(1, n = 200, scale = 2)
dat$time <- 1:NROW(dat)
mod <- mvgam(
  y ~ x0 + x1 + s(x2) + s(x3),
  priors = prior(normal(0, 0.75), class = "b"),
  data = dat,
  family = gaussian(),
  run_model = FALSE
)
stancode(mod)
}

}
\seealso{
\code{\link{mvgam}}, \code{\link{mvgam_formulae}},
\code{\link[brms]{prior}}
}
\author{
Nicholas J Clark
}
