% $Id: multinomRob.Rd,v 1.13 2005/09/23 03:53:42 wrm1 Exp $
%
\name{Robust Multinomial Regression}
\alias{multinomRob}
\title{Multinomial Robust Estimation}
\description{
  \code{multinomRob} fits the overdispersed multinomial regression model
  for grouped count data using the hyperbolic tangent (tanh) and least quartile
  difference (LQD) robust estimators.
}
\usage{
  multinomRob(model, data, starting.values=NULL,  equality=NULL,
              genoud.parms=NULL, print.level=0, iter = FALSE,
              maxiter = 10, multinom.t=1, multinom.t.df=NA,
              MLEonly=FALSE)
}
\arguments{
  \item{model}{
    The regression model specification.  This is a list of formulas, with one
    formula for each category of outcomes for which counts have been measured
    for each observation.  For example, in the following,

    \code{model=list(y1 ~ x1, y2 ~ x2, y3 ~ 0)}

    the outcome variables containing counts are \code{y1}, \code{y2} and
    \code{y3}, and the linear predictor for \code{y1} is a coefficient times
    \code{x1} plus a constant, the linear predictor for \code{y2} is a
    coefficient times \code{x2} plus a constant, and the linear predictor for
    \code{y3} is zero.  Each formula has the format \code{countvar ~ RHS},
    where \code{countvar} is the name of a vector, in the dataframe referenced
    by the \code{data} argument, that gives the counts for all observations
    for one category.  \code{RHS} denotes the righthand side of a formula using
    the usual syntax for formulas, where each variable in the formula is the
    name of a vector in the dataframe referenced by the \code{data} argument.
    For example, a \code{RHS} specification of \code{var1 + var2*var3} would
    specify that the regressors are to be \code{var1}, \code{var2},
    \code{var3}, the terms generated by the interaction \code{var2:var3}, and
    the constant.

    The set of outcome alternatives may be specified to vary over observations,
    by putting in a negative value for alternatives that do not exist for
    particular observations.  If the value of an outcome variable is negative
    for an observation, then that outcome is considered not available for that
    observation.  The predicted counts for that observation are defined only
    for the available observations and are based on the linear predictors for
    the available observations.  The same set of coefficient parameter values
    are used for all observations.  Any observation for which fewer than two
    outcomes are available is omitted.

    Observations with missing data (\code{NA}) in any outcome variable or
    regressor are omitted (listwise deletion).

    In a model that has the same regressors for every category, except for
    one category for which there are no regressors in order to identify the
    model (the reference category), the \code{RHS} specification must be
    given for all the categories except the reference category.  The formula
    for the reference category must include a \code{RHS} specification that
    explicitly omits the constant, e.g., \code{countvar ~ -1} or
    \code{countvar ~ 0}.  The number of coefficient parameters to be
    estimated equals the number of terms generated by all the formulas,
    subject to equality constraints that may be specified using the
    \code{equality} argument.}
  \item{data}{
    The dataframe that contains all the variables referenced in the
    \code{model} argument, which are the data to be analyzed.}
  \item{starting.values}{
    Starting values for the regression coefficient parameters, as a vector.
    The parameter ordering matches the ordering of the formulas in the
    \code{model} argument:  parameters for the terms in the first formula
    appear first, then come parameters for the terms in the second formula,
    etc.  In practice it will usually be better to start by letting
    multinomRob find starting values by using the \code{multinom.t} option,
    then using the results from one run as starting values for a subsequent
    run done with, perhaps, a larger population of operators for rgenoud.}
  \item{equality}{
    List of equality constraints.  This is a list of lists of
    formulas.  Each formula has the same format as in the model specification,
    and must include only a subset of the outcomes and regressors used in the
    model specification formulas.  All the coefficients specified by the
    formulas in each list will be constrained to have the same value during
    estimation.  For example, in the following,

    \code{multinomRob(model=list(y1 ~ x1, y2 ~ x2, y3 ~ 0), data=dtf,
      equality=list(list(y1 ~ x1 + 0, y2 ~ x2 + 0)) );}

    the model to be estimated is

   \code{list(y1 ~ x1, y2 ~ x2, y3 ~ 0)}

    and the coefficients of x1 and x2 are constrained equal by

   \code{equality=list(list(y1 ~ x1 + 0, y2 ~ x2 + 0))}

    In the equality formulas it is necessary to say \code{+ 0} so the
    intercepts are not involved in the constraints.  If a parameter occurs
    in two different lists in the \code{equality=} argument, then all the
    parameters in the two lists are constrained to be equal to one
    another.  In the output this is described as consolidating the lists.}
  \item{genoud.parms}{
    List of named arguments used to control the rgenoud optimizer, which is
    used to compute the LQD estimator.}
  \item{print.level}{
    Specify 0 for minimal printing, 1 to print more detailed information
    about LQD and other intermediate computations, 2 to print details about
    the tanh computations, or 3 to print details about starting values
    computations.}
  \item{iter}{
    \code{TRUE} means to iterate between LQD and tanh estimation steps until
    either the algorithm converges, the number of iterations specified by the
    \code{maxiter} argument is reached, or if an LQD step occurs that
    produces a larger value than the previous step did for the overdispersion
    scale parameter.  This option is often improves the fit of the model.}
  \item{maxiter}{
    The maximum number of iterations to be done between LQD and tanh
    estimation steps.}
  \item{multinom.t}{
    \code{1} means use the multinomial multivariate-t model to compute
    starting values for the coefficient parameters.  But if the MNL
    results are better (as judged by the LQD fit), MNL values will be
    used instead.  \code{0} means use nonrobust maximum likelihood
    estimates for a multinomial regression
    model.  \code{2} forces the use of the multivariate-t model for
    starting values even if the MNL estimates provide better starting
    values for the LQD.  Note that with \code{multinom.t=1} or \code{multinom.t=2},
    multivariate-t
    starting values will not be used if the model cannot generate valid
    standard errors.  To force the use of multivariate-t estimates even
    in this circumstance, see the \code{multinom.t.df} argument.

    If the \code{starting.values} argument is not
    \code{NULL}, the starting values given in that argument are used and the \code{multinom.t}
    argument is ignored.  Multinomial multivariate-t starting values are
    not available when the number
    of outcome alternatives varies over the observations.}
  \item{multinom.t.df}{
    \code{NA} means that the degrees of freedom (DF) for the multivariate-t
    model (when used) should be estimated.  If \code{multinom.t.df} is a number,
    that number will be used for the degrees of freedom and the DF will not be
    estimated.  Only a positive number should be used.
    Setting \code{multinom.t.df} to a number also implies that, if
    \code{multinom.t=1} or \code{multinom.t=2}, the
    multivariate-t starting values will be used (depending on the comparison with
    the MNL estimates if \code{multinom.t=1} is set) even if the standard
    errors are not defined.
  }
  \item{MLEonly}{
    If \code{TRUE}, then only the standard maximum-likelihood MNL model
    is estimated. No robust estimation model and no overdispersion
    parameter is estimated.}    
}
\details{
  The tanh estimator is a redescending
  M-estimator, and the LQD estimator is a generalized S-estimator.  The LQD
  is used to estimate the scale of the overdispersion.  Given that scale
  estimate, the tanh estimator is used to estimate the coefficient parameters
  of the linear predictors of the multinomial regression model. \cr

  If starting
  values are not supplied, they are computed using a multinomial
  multivariate-t model.  The program also computes and reports nonrobust
  maximum likelihood estimates for the multinomial regression model,
  reporting sandwich estimates for the standard errors that are adjusted for
  a nonrobust estimate of the error dispersion.
}
\value{
  multinomRob returns a list of 15 objects.  The returned objects are:

   \item{coefficients}{
     The tanh coefficient estimates in matrix format.  The matrix has one
     column for each formula specified in the \code{model} argument.  The
     name of each column is the name used for the count variable in the
     corresponding formula.  The label for each row of the matrix gives the
     names of the regressors to which the coefficient values in the row
     apply.  The regressor names in each label are separated by a forward
     slash (/), and \code{NA} is used to denote that no regressor is
     associated with the corresponding value in the matrix.  The value 0 is
     used in the matrix to fill in for values that do not correspond to a
     \code{model} formula regressor.}
   \item{se}{
     The tanh coefficient estimate standard errors in matrix format.  The
     format and labelling used for the matrix is the same as is used for the
     \code{coefficients}.  The standard errors are derived from the estimated
     asymptotic sandwich covariance estimate.}
   \item{LQDsigma2}{
     The LQD dispersion (variance) parameter estimate.  This is the LQD estimate of
     the scale value, squared.}
   \item{TANHsigma2}{
     The tanh dispersion parameter estimate.}
   \item{weights}{
     The matrix of tanh weights for the orthogonalized residuals.  The matrix
     has one row for each observation in the data and as many columns as
     there are formulas specified in the \code{model} argument.  The first
     column of the matrix has names for the observations, and the remaining
     columns contain the weights.  Each of the latter columns has a name
     derived from the name of one of the count variables named in the
     \code{model} argument.  If \code{count1} is the name of the count
     variable used in the first formula, then the second column in the matrix
     is named \code{weights:count1}, etc.

     If an observation has negative values specified for some outcome variables,
     indicating that those outcome alternatives are not available for that
     observation, then values of \code{NA} appear in the weights matrix for that
     observation, as many \code{NA} values as there are unavailable
     alternatives.  The \code{NA} values will be the last values in the affected
     row of the weights matrix, regardless of which outcome alternatives were
     unavailable for the observation.}
   \item{Hdiag}{
     Weights used to fully studentize the orthogonalized residuals.  The matrix
     has one row for each observation in the data and as many columns as
     there are formulas specified in the \code{model} argument.  The first
     column of the matrix has names for the observations, and the remaining
     columns contain the weights.  Each of the latter columns has a name
     derived from the name of one of the count variables named in the
     \code{model} argument.  If \code{count1} is the name of the count
     variable used in the first formula, then the second column in the matrix
     is named \code{Hdiag:count1}, etc.

     If an observation has negative values specified for some outcome variables,
     indicating that those outcome alternatives are not available for that
     observation, then values of 0 appear in the weights matrix for that
     observation, as many 0 values as there are unavailable alternatives.  Values
     of 0 that are created for this reason will be the last values in the affected
     row of the weights matrix, regardless of which outcome alternatives were
     unavailable for the observation.}
   \item{prob}{
     The matrix of predicted probabilities for each category for each
     observation based on the tanh coefficient estimates.}
   \item{residuals.rotate}{
     Matrix of studentized residuals which have been made comparable by
     rotating each choice category to the first position.  These
     residuals, unlike the student and standard residuals below, are no
     longer orthogonalized because of the rotation.  These are the
     residuals displayed in Table 6 of the reference article.}
   \item{residuals.student}{
     Matrix of fully studentized orthogonalized residuals.}
   \item{residuals.standard}{
     Matrix of orthogonalized residuals, standardized by dividing by the
     overdispersion scale.}
   \item{mnl}{
     List of nonrobust maximum likelihood estimation results from function
     \code{\link{multinomMLE}}.}
   \item{multinomT}{
     List of multinomial multivariate-t estimation results from function
     \code{\link{multinomT}}.}
   \item{genoud}{
     List of LQD estimation results obtained by rgenoud optimization, from
     function \code{genoudRob}.}
   \item{mtanh}{
     List of tanh estimation results from function \code{mGNtanh}.}
   \item{error}{
     Exit error code, usually from function \code{mGNtanh}.}
   \item{iter}{
     Number of LQD-tanh iterations.}
}
\references{
  Walter R. Mebane, Jr. and  Jasjeet Singh Sekhon. 2004.  ``Robust Estimation
  and Outlier Detection for Overdispersed Multinomial Models of Count Data.'' 
  \emph{American Journal of Political Science} 48 (April): 391--410.
  \url{http://sekhon.berkeley.edu/multinom.pdf}

  For additional documentation please visit \url{http://sekhon.berkeley.edu/robust/}.
}
\author{Walter R. Mebane, Jr., Cornell University,
  \email{wrm1@cornell.edu}, \url{http://macht.arts.cornell.edu/wrm1/} \cr

  Jasjeet S. Sekhon, UC Berkeley, \email{sekhon@berkeley.edu},
  \url{http://sekhon.berkeley.edu/}}
}
\examples{
# make some multinomial data
x1 <- rnorm(50);
x2 <- rnorm(50);
p1 <- exp(x1)/(1+exp(x1)+exp(x2));
p2 <- exp(x2)/(1+exp(x1)+exp(x2));
p3 <- 1 - (p1 + p2);
y <- matrix(0, 50, 3);
for (i in 1:50) {
  y[i,] <- rmultinomial(1000, c(p1[i], p2[i], p3[i]));
}

# perturb the first 5 observations
y[1:5,c(1,2,3)] <- y[1:5,c(3,1,2)];
y1 <- y[,1];
y2 <- y[,2];
y3 <- y[,3];

# put data into a dataframe
dtf <- data.frame(x1, x2, y1, y2, y3);

## Set parameters for Genoud
zz.genoud.parms <- list( pop.size             = 1000,
                        wait.generations      = 10,
                        max.generations       = 100,
                        scale.domains         = 5,
                        print.level = 0
                        )

# estimate a model, with "y3" being the reference category
# true coefficient values are:  (Intercept) = 0, x = 1
# impose an equality constraint
# equality constraint:  coefficients of x1 and x2 are equal
mulrobE <- multinomRob(list(y1 ~ x1, y2 ~ x2, y3 ~ 0),
                      dtf,
                      equality = list(list(y1 ~ x1 + 0, y2 ~ x2 + 0)),
                      genoud.parms = zz.genoud.parms,
                      print.level = 3, iter=FALSE);
summary(mulrobE, weights=TRUE);

#Do only MLE estimation.  The following model is NOT identified if we
#try to estimate the overdispersed MNL.
dtf <- data.frame(y1=c(1,1),y2=c(2,1),y3=c(1,2),x=c(0,1))
summary(multinomRob(list(y1 ~ 0, y2 ~ x, y3 ~ x), data=dtf, MLEonly=TRUE))
}
\keyword{robust}
\keyword{models}
\keyword{regression}
