\name{negbinomial}
\alias{negbinomial}
\alias{polya}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{ Negative Binomial Distribution Family Function }
\description{
  Maximum likelihood estimation of the two parameters of a negative
  binomial distribution.

}
\usage{
negbinomial(lmu = "loge", lsize = "loge", emu = list(), esize = list(),
            imu = NULL, isize = NULL, quantile.probs = 0.75,
            nsimEIM = 100, cutoff = 0.995,
            Maxiter = 5000, deviance.arg = FALSE, imethod = 1,
            parallel = FALSE, shrinkage.init = 0.95, zero = -2)
polya(lprob = "logit", lsize = "loge", eprob = list(), esize = list(),
    iprob = NULL, isize = NULL, quantile.probs = 0.75, nsimEIM = 100,
    deviance.arg = FALSE, imethod = 1, shrinkage.init = 0.95, zero = -2)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{lmu, lsize, lprob}{
  Link functions applied to the \eqn{\mu}{mu}, \eqn{k}
  and \eqn{p}  parameters.
  See \code{\link{Links}} for more choices.
  Note that the \eqn{\mu}{mu}, \eqn{k}
  and \eqn{p}  parameters are the \code{mu},
  \code{size} and \code{prob} arguments of 
  \code{\link[stats:NegBinomial]{rnbinom}} respectively.
  Common alternatives for \code{lsize} are
  \code{\link{nloge}} and
  \code{\link{reciprocal}}.

  }
  \item{emu, esize, eprob}{
  List. Extra argument for each of the links.
  See \code{earg} in \code{\link{Links}} for general information.

  }
  \item{imu, isize, iprob}{
  Optional initial values for the mean and \eqn{k} and \eqn{p}.
  For \eqn{k}, if failure to converge occurs then try different values
  (and/or use \code{imethod}).
  For a \eqn{S}-column response, \code{isize} can be of length \eqn{S}.
  A value \code{NULL} means an initial value for each response is
  computed internally using a range of values.
  The last argument is ignored if used within \code{\link{cqo}}; see
  the \code{iKvector} argument of \code{\link{qrrvglm.control}} instead.

  }
  \item{quantile.probs}{
  Passed into the \code{probs} argument
  of \code{\link[stats:quantile]{quantile}}
  when \code{imethod = 3} to obtain an initial value for the mean.

  }

  \item{nsimEIM}{
  This argument is used
  for computing the diagonal element of the
  \emph{expected information matrix} (EIM) corresponding to \eqn{k}.
  See \code{\link{CommonVGAMffArguments}} for more information
  and the note below.

  }
  \item{cutoff}{
  Used in the finite series approximation.
  A numeric which is close to 1 but never exactly 1.
  Used to specify how many terms of the infinite series
  for computing the second diagonal element of the
  EIM are actually used.
  The sum of the probabilites are added until they reach this value or more
  (but no more than \code{Maxiter} terms allowed).
  It is like specifying \code{p} in an imaginary function
  \code{qnegbin(p)}.

  }
  \item{Maxiter}{
  Used in the finite series approximation.
  Integer. The maximum number of terms allowed when computing
  the second diagonal element of the EIM.
  In theory, the value involves an infinite series.
  If this argument is too small then the value may be inaccurate.

  }
  \item{deviance.arg}{
  Logical. If \code{TRUE}, the deviance function
  is attached to the object. Under ordinary circumstances, it should
  be left alone because it really assumes the index parameter is at
  the maximum likelihood estimate. Consequently, one cannot use that
  criterion to minimize within the IRLS algorithm.
  It should be set \code{TRUE} only when used with \code{\link{cqo}} 
  under the fast algorithm.

  }
  \item{imethod}{
  An integer with value \code{1} or \code{2} or \code{3} which
  specifies the initialization method for the \eqn{\mu}{mu} parameter.
  If failure to converge occurs try another value
  and/or else specify a value for \code{shrinkage.init}
  and/or else specify a value for \code{isize}.

  }
  \item{parallel}{
  See \code{\link{CommonVGAMffArguments}} for more information.
  Setting \code{parallel = TRUE} is useful in order to get
  something similar to \code{\link{quasipoissonff}} or
  what is known as NB-1.
  The parallelism constraint does not apply to any intercept term.
  You should set \code{zero = NULL} too if \code{parallel = TRUE} to
  avoid a conflict.

  }
  \item{shrinkage.init}{
  How much shrinkage is used when initializing \eqn{\mu}{mu}.
  The value must be between 0 and 1 inclusive, and
  a value of 0 means the individual response values are used,
  and a value of 1 means the median or mean is used.
  This argument is used in conjunction with \code{imethod}.
  If convergence failure occurs try setting this argument to 1.

  }
  \item{zero}{
  Integer valued vector, usually assigned \eqn{-2} or \eqn{2} if used
  at all.  Specifies which of the two linear/additive predictors are
  modelled as an intercept only. By default, the \eqn{k} parameter
  (after \code{lsize} is applied) is modelled as a single unknown
  number that is estimated.  It can be modelled as a function of the
  explanatory variables by setting \code{zero = NULL}. A negative value
  means that the value is recycled, so setting \eqn{-2} means all \eqn{k}
  are intercept-only.
  See \code{\link{CommonVGAMffArguments}} for more information.

  }

}
\details{
  The negative binomial distribution can be motivated in several ways,
  e.g., as a Poisson distribution with a mean that is gamma
  distributed.
  There are several common parametrizations of the negative binomial
  distribution.
  The one used by \code{negbinomial()} uses the
  mean \eqn{\mu}{mu} and an \emph{index} parameter
  \eqn{k}, both which are positive.
  Specifically, the density of a random variable \eqn{Y} is 
  \deqn{f(y;\mu,k) ~=~ {y + k - 1 \choose y} \,
    \left( \frac{\mu}{\mu+k} \right)^y\,
    \left( \frac{k}{k+\mu} \right)^k }{%
    f(y;mu,k) = C_{y}^{y + k - 1}
    [mu/(mu+k)]^y [k/(k+mu)]^k}
  where \eqn{y=0,1,2,\ldots},
  and \eqn{\mu > 0}{mu > 0} and \eqn{k > 0}.
  Note that the \emph{dispersion} parameter is 
  \eqn{1/k}, so that as \eqn{k} approaches infinity the negative
  binomial distribution approaches a Poisson distribution.
  The response has variance \eqn{Var(Y)=\mu+\mu^2/k}{Var(Y)=mu*(1+mu/k)}.
  When fitted, the \code{fitted.values} slot of the object contains
  the estimated value of the \eqn{\mu}{mu} parameter, i.e., of the mean
  \eqn{E(Y)}.
  It is common for some to use \eqn{\alpha=1/k}{alpha=1/k} as the
  ancillary or heterogeneity parameter;
  so common alternatives for \code{lsize} are
  \code{\link{nloge}} and
  \code{\link{reciprocal}}.


  For \code{polya} the density is
  \deqn{f(y;p,k) ~=~ {y + k - 1 \choose y} \,
    \left( 1 - p \right)^y\,
    p^k }{%
    f(y;p,k) = C_{y}^{y + k - 1}
    [1 - p]^y p^k}
  where \eqn{y=0,1,2,\ldots},
  and \eqn{0 < p < 1}{0 < p < 1} and \eqn{k > 0}.


  The negative binomial distribution can be coerced into the
  classical GLM framework with one of the parameters being
  of interest and the other treated as a nuisance/scale
  parameter (this is implemented in the MASS library). The
  \pkg{VGAM} family function \code{negbinomial} treats both
  parameters on the same footing, and estimates them both
  by full maximum likelihood estimation.  Simulated Fisher
  scoring is employed as the default (see the \code{nsimEIM}
  argument).


  The parameters \eqn{\mu}{mu} and \eqn{k} are independent
  (diagonal EIM), and the confidence region for \eqn{k}
  is extremely skewed so that its standard error is often
  of no practical use. The parameter \eqn{1/k} has been
  used as a measure of aggregation.


  These \pkg{VGAM} family functions handle
  \emph{multivariate} responses, so that a matrix can be
  used as the response. The number of columns is the number
  of species, say, and setting \code{zero = -2} means that
  \emph{all} species have a \eqn{k} equalling a (different)
  intercept only.


}
\section{Warning}{
  The Poisson model corresponds to \eqn{k} equalling
  infinity.  If the data is Poisson or close to Poisson,
  numerical problems will occur. Possibly choosing a
  log-log link may help in such cases, otherwise use
  \code{\link{poissonff}} or \code{\link{quasipoissonff}}.


  These functions are fragile; the maximum likelihood
  estimate of the index parameter is fraught (see Lawless,
  1987). In general, the \code{\link{quasipoissonff}} is
  more robust.  Other alternatives to \code{negbinomial} are
  to fit a NB-1 or RR-NB model; see Yee (2011). Assigning
  values to the \code{isize} argument may lead to a local
  solution, and smaller values are preferred over large
  values when using this argument.


  Yet to do: write a family function which uses the methods of moments
  estimator for \eqn{k}.


}
\value{
  An object of class \code{"vglmff"} (see \code{\link{vglmff-class}}).
  The object is used by modelling functions such as \code{\link{vglm}},
  \code{\link{rrvglm}}
  and \code{\link{vgam}}.

}
\references{
Lawless, J. F. (1987)
Negative binomial and mixed Poisson regression.
\emph{The Canadian Journal of Statistics}
\bold{15}, 209--225.


Hilbe, J. M. (2007)
\emph{Negative Binomial Regression}.
Cambridge: Cambridge University Press.


Bliss, C. and Fisher, R. A. (1953)
Fitting the negative binomial distribution to biological data.
\emph{Biometrics}
\bold{9}, 174--200.


  Yee, T. W. (2011)
  Two-parameter reduced-rank vector generalized linear models.
  \emph{In preparation}.



}
\author{ Thomas W. Yee }
\note{
% The \pkg{VGAM} package has a few other family functions for the
% negative binomial distribution. Currently, none of these others work
% very well.


  These two functions implement two common parameterizations
  of the negative binomial (NB). Some people called the
  NB with integer \eqn{k} the \emph{Pascal} distribution,
  whereas if \eqn{k} is real then this is the \emph{Polya}
  distribution. I don't. The one matching the details of
  \code{\link[stats:NegBinomial]{rnbinom}} in terms of \eqn{p}
  and \eqn{k} is \code{polya()}.


  For \code{polya()} the code may fail when \eqn{p} is close
  to 0 or 1. It is not yet compatible with \code{\link{cqo}}
  or \code{\link{cao}}.


  Suppose the response is called \code{ymat}.
  For \code{negbinomial()}
  the diagonal element of the \emph{expected information matrix}
  (EIM) for parameter \eqn{k}
  involves an infinite series; consequently simulated Fisher scoring
  (see \code{nsimEIM}) is the default. This algorithm should definitely be
  used if \code{max(ymat)} is large, e.g., \code{max(ymat) > 300} or there
  are any outliers in \code{ymat}.
  A second algorithm involving a finite series approximation can be
  invoked by setting \code{nsimEIM = NULL}.
  Then the arguments
  \code{Maxiter} and
  \code{cutoff} are pertinent.


  Regardless of the algorithm used,
  convergence problems may occur, especially when the response has large
  outliers or is large in magnitude.
  If convergence failure occurs, try using arguments
  (in recommended decreasing order)
  \code{nsimEIM},
  \code{shrinkage.init},
  \code{imethod},
  \code{Maxiter}, 
  \code{cutoff},
  \code{isize},
  \code{zero}.


  The function \code{negbinomial} can be used by the fast algorithm in
  \code{\link{cqo}}, however, setting \code{EqualTolerances = TRUE} and
  \code{ITolerances = FALSE} is recommended.


% For \code{\link{cqo}} and \code{\link{cao}}, taking the square-root
% of the response means (approximately) a \code{\link{poissonff}} family
% may be used on the transformed data.


% If the negative binomial family function \code{\link{negbinomial}}
% is used for \code{cqo} then set \code{negbinomial(deviance = TRUE)}
% is necessary. This means to minimize the deviance, which the fast
% algorithm can handle.


  In the first example below (Bliss and Fisher, 1953), from each of 6
  McIntosh apple trees in an orchard that had been sprayed, 25 leaves
  were randomly selected. On each of the leaves, the number of adult
  female European red mites were counted.



  There are two special uses of \code{negbinomial} for handling count data.
  Firstly,
  when used by \code{\link{rrvglm}}  this 
  results in a continuum of models in between and
  inclusive of quasi-Poisson and negative binomial regression.
  This is known as a reduced-rank negative binomial model \emph{(RR-NB)}.
  It fits a negative binomial log-linear regression with variance function
  \eqn{Var(Y) = \mu + \delta_1   \mu^{\delta_2}}{Var(Y) = mu + delta1 * mu^delta2}
  where \eqn{\delta_1}{delta1}
  and   \eqn{\delta_2}{delta2}
  are parameters to be estimated by MLE.
  Confidence intervals are available for \eqn{\delta_2}{delta2},
  therefore it can be decided upon whether the
  data are quasi-Poisson or negative binomial, if any.


  Secondly,
  the use of \code{negbinomial} with \code{parallel = TRUE}
  inside \code{\link{vglm}}
  can result in a model similar to \code{\link{quasipoissonff}}.
  This is named the \emph{NB-1} model.
  The dispersion parameter is estimated by MLE whereas
  \code{\link[stats:glm]{glm}} uses the method of moments.
  In particular, it fits a negative binomial log-linear regression
  with variance function
  \eqn{Var(Y) = \phi_0   \mu}{Var(Y) = phi0 * mu}
  where \eqn{\phi_0}{phi0}
  is a parameter to be estimated by MLE.
  Confidence intervals are available for \eqn{\phi_0}{phi0}.


}

\seealso{ 
  \code{\link{quasipoissonff}},
  \code{\link{poissonff}},
  \code{\link{zinegbinomial}},
  \code{\link{posnegbinomial}},
  \code{\link{invbinomial}},
% \code{\link[MASS]{rnegbin}}.
  \code{\link[stats:NegBinomial]{rnbinom}},
  \code{\link{nbolf}},
  \code{\link{rrvglm}},
  \code{\link{cao}},
  \code{\link{cqo}},
  \code{\link{CommonVGAMffArguments}}.


}
\examples{
# Example 1: apple tree data
appletree <- data.frame(y = 0:7, w = c(70, 38, 17, 10, 9, 3, 2, 1))
fit <- vglm(y ~ 1, negbinomial, appletree, weights = w)
summary(fit)
coef(fit, matrix = TRUE)
Coef(fit)

# Example 2: simulated data with multivariate response
ndata <- data.frame(x2 = runif(nn <- 500))
ndata <- transform(ndata, y1 = rnbinom(nn, mu = exp(3+x2), size = exp(1)),
                          y2 = rnbinom(nn, mu = exp(2-x2), size = exp(0)))
fit1 <- vglm(cbind(y1, y2) ~ x2, negbinomial, ndata, trace = TRUE)
coef(fit1, matrix = TRUE)

# Example 3: large counts so definitely use the nsimEIM argument
ndata <- transform(ndata, y3 = rnbinom(nn, mu = exp(12+x2), size = exp(1)))
with(ndata, range(y3))  # Large counts
fit2 <- vglm(y3 ~ x2, negbinomial(nsimEIM = 100), ndata, trace = TRUE)
coef(fit2, matrix = TRUE)

# Example 4: a NB-1 to estimate a negative binomial with Var(Y) = phi0 * mu
nn <- 1000        # Number of observations
phi0 <- 10        # Specify this; should be greater than unity
delta0 <- 1 / (phi0 - 1)
mydata <- data.frame(x2 = runif(nn), x3 = runif(nn))
mydata <- transform(mydata, mu = exp(2 + 3 * x2 + 0 * x3))
mydata <- transform(mydata, y3 = rnbinom(nn, mu = mu, size = delta0 * mu))
\dontrun{
plot(y3 ~ x2, data = mydata, pch = "+", col = 'blue',
     main = paste("Var(Y) = ", phi0, " * mu", sep = ""), las = 1) }
nb1 <- vglm(y3 ~ x2 + x3, negbinomial(parallel = TRUE, zero = NULL),
            mydata, trace = TRUE)
# Extracting out some quantities:
cnb1 <- coef(nb1, matrix = TRUE)
mydiff <- (cnb1["(Intercept)", "log(size)"] - cnb1["(Intercept)", "log(mu)"])
delta0.hat <- exp(mydiff)
(phi.hat <- 1 + 1 / delta0.hat)  # MLE of phi
summary(nb1)
# Obtain a 95 percent confidence interval for phi0:
myvec <- rbind(-1, 1, 0, 0)
(se.mydiff <- sqrt(t(myvec) \%*\%  vcov(nb1) \%*\%  myvec))
ci.mydiff <- mydiff + c(-1.96, 1.96) * se.mydiff
ci.delta0 <- ci.exp.mydiff <- exp(ci.mydiff)
(ci.phi0 <- 1 + 1 / rev(ci.delta0)) # The 95 percent conf. interval for phi0

confint_nb1(nb1) # Quick way to get it

summary(glm(y3 ~ x2 + x3, quasipoisson, mydata))$disper # cf. moment estimator
}
\keyword{models}
\keyword{regression}


%y1 = MASS:::rnegbin(n, mu=exp(3+x), theta=exp(1)) # k is theta
%y2 = MASS:::rnegbin(n, mu=exp(2-x), theta=exp(0))
