% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gmnl.R, R/gmnl.methods.R
\name{gmnl}
\alias{coef.gmnl}
\alias{df.residual.gmnl}
\alias{fitted.gmnl}
\alias{gmnl}
\alias{logLik.gmnl}
\alias{model.matrix.gmnl}
\alias{nObs.gmnl}
\alias{print.gmnl}
\alias{print.summary.gmnl}
\alias{residuals.gmnl}
\alias{summary.gmnl}
\alias{update.gmnl}
\title{Estimate Multinomial Logit Models with Observed and Unobserved Individual Heterogeneity.}
\usage{
gmnl(formula, data, subset, weights, na.action, model = c("mnl", "mixl",
  "smnl", "gmnl", "lc", "mm"), start = NULL, ranp = NULL, R = 40, Q = 2,
  haltons = NA, mvar = NULL, seed = 12345, correlation = FALSE,
  bound.err = 2, panel = FALSE, hgamma = c("direct", "indirect"),
  reflevel = NULL, init.tau = 0.1, init.gamma = 0.1, notscale = NULL,
  print.init = FALSE, gradient = TRUE, typeR = TRUE, ...)

\method{print}{gmnl}(x, digits = max(3, getOption("digits") - 3),
  width = getOption("width"), ...)

\method{summary}{gmnl}(object, ...)

\method{print}{summary.gmnl}(x, digits = max(3, getOption("digits") - 2),
  width = getOption("width"), ...)

\method{update}{gmnl}(object, new, ...)

\method{coef}{gmnl}(object, ...)

\method{model.matrix}{gmnl}(object, ...)

\method{residuals}{gmnl}(object, outcome = TRUE, ...)

\method{df.residual}{gmnl}(object, ...)

\method{fitted}{gmnl}(object, outcome = TRUE, ...)

\method{logLik}{gmnl}(object, ...)

\method{nObs}{gmnl}(x, ...)
}
\arguments{
\item{formula}{a symbolic description of the model to be estimated. The formula is divided in five parts, each of them separated by the symbol \code{|}. The first part is reserved for alternative-specific variables with a generic coefficient. The second part corresponds to individual-specific variables with an alternative specific coefficients. The third part corresponds to alternative-specific variables with an alternative-specific coefficident. The fourth part is reserved for time-invariant variables that modify the mean of the random parameters. Finally, the fifth part is reserved for time-invariant variables that enter in the scale coefficient or in the probability assignment in models with latent classes.}

\item{data}{the data of class \code{\link[mlogit]{mlogit.data}}.}

\item{subset}{an optional vector specifying a subset of observations.}

\item{weights}{an optional vector of weights. Default to 1.}

\item{na.action}{a function wich indicated what should happen when the data
contains \code{NA}'s.}

\item{model}{a string indicating which model is estimated. The options are "\code{mnl}" for the Multinomial Logit Model, "\code{mixl}" for the Mixed Logit Model, "\code{smnl}" for the Scaled Multinomial Logit Model, "\code{gmnl}" for the Generalized Multinomial Logit Model, "\code{lc}" for the Latent Class Multinomial Logit Model, and "\code{mm}" for the Mixed-Mixed Multinomial Logit Model.}

\item{start}{a vector of starting values.}

\item{ranp}{a named vector whose names are the random parameters and values the distribution:
"\code{n}" for normal, "\code{ln}" for log-normal, "\code{cn}" for truncated normal, "\code{u}" for uniform, "\code{t}" for triangular, "\code{sb}" for Sb Johnson.}

\item{R}{the number of draws of pseudo-random numbers if \code{ranp} is not \code{NULL}.}

\item{Q}{number of classes for LC or MM-MNL models.}

\item{haltons}{only relevant if \code{ranp} is not \code{NULL}. If \code{haltons = NULL}, pseudo-random numbers are used instead of Halton sequences. If \code{haltons=NA}, the first \eqn{K} primes are used to generates the Halton draws, where \eqn{K} is the number of random parameters, and 15 of the initial sequence of elements are dropped. Otherwise, \code{haltons} should be a list with elements \code{prime} and \code{drop}.}

\item{mvar}{only valid if \code{ranp} is not \code{NULL}. This is a named list, where the names correspond to the variables with random parameters, and the values correspond to the variables that enter in the mean of each random parameters.}

\item{seed}{seed for the random number generator. Default is \code{seed = 12345}.}

\item{correlation}{only relevant if \code{ranp} is not \code{NULL}. If true, the correlation across random parameters is taken into account.}

\item{bound.err}{only relevenat if model is \code{smnl} or \code{gmnl}. It indicates at which values the draws for the scale parameter are truncated. By default \code{bound.err = 2}, therefore a truncated normal distribution with truncation at -2 and +2 is used.}

\item{panel}{if \code{TRUE} a panel data model is estimated.}

\item{hgamma}{a string indicated how to estimate the parameter gamma. If "\code{direct}", then \eqn{\gamma} is estimated directly, if "\code{indirect}" then \eqn{\gamma ^*} is estimated, where \eqn{\gamma = \exp(\gamma^*)/(1 + \exp(\gamma^*))}.}

\item{reflevel}{the base alternative.}

\item{init.tau}{initial value for the \eqn{\tau} parameter.}

\item{init.gamma}{initial value  for \eqn{\gamma}.}

\item{notscale}{only relevant if model is \code{smnl} or \code{gmnl}. It is a vector indicating which variables should not be scaled.}

\item{print.init}{if \code{TRUE}, the initial values for the optimization procedure are printed.}

\item{gradient}{if \code{TRUE}, analytical gradients are used for the optimization procedure.}

\item{typeR}{if \code{TRUE}, truncated normal draws are used for the scale parameter, if \code{FALSE} the procedure suggested by Greene (2010) is used.}

\item{...}{additional arguments to be passed to \code{\link[maxLik]{maxLik}}, which depend in the maximization routine.}

\item{x, object}{and object of class \code{gmnl}.}

\item{digits}{the number of digits.}

\item{width}{width.}

\item{new}{an updated formula for the \code{update} method.}

\item{outcome}{if \code{TRUE}, then the \code{fitted} and \code{residuals} methods return a vector that corresponds to the chosen alternative, otherwise it returns a matrix where each column corresponds to each alternative.}
}
\value{
An object of class ``\code{gmnl}'' with the following elements
\item{coefficients}{the named vector of coefficients,}
\item{logLik}{a set of values of the maximum likelihood procedure,}   
\item{mf}{the model framed used,} 
\item{formula}{the formula (a \code{gFormula} object),}
\item{time}{\code{proc.time()} minus the start time,}
\item{freq}{frequency of dependent variable,}
\item{draws}{type of draws used,}
\item{model}{the fitted model,}
\item{R}{number of draws used,}
\item{ranp}{vector indicating the variables with random parameters and their distribution,}
\item{residuals}{the residuals,}
\item{correlation}{whether the model is fitted assuming that the random parameters are correlated,}
\item{bi}{matrix of conditional expectation of random parameters,}
\item{Q}{number of classes,}
\item{call}{the matched call.}
}
\description{
Estimate different types of multinomial logit models with observed and unobserved individual heterogneity, such as
MIXL, S-MNL, G-MNL, LC and MM-MNL models. These models are estimated using  Maximum Simulated Likelihood. It supports both cross-sectional and panel data.
}
\details{
Let the utility to person \eqn{i} from choosing alternative \eqn{j} on choice occasion \eqn{t} be: \deqn{U_{ijt} = \beta_{i}x_{ijt} + \epsilon_{ijt}} where \eqn{\epsilon_{ijt}} is i.i.d extreme value, and \eqn{\beta_i} vary across individuals. Each model estimated by \code{gmnl} depends on how \eqn{\beta_i} is specified. The options are the following:
\enumerate{
\item S-MNL if \eqn{\beta_i=\sigma_i\beta}, where the scale \eqn{\sigma_i} varies across individuals.
\item MIXL  if \eqn{\beta_i=\beta + s\eta_i}, where \eqn{\eta_i} is a draw from some distribution. For example, if \eqn{\beta_i\sim N(\beta, s^2)}, then \eqn{\eta_i\sim N(0, 1)}.
\item GMNL if \eqn{\beta_i=\sigma_i\beta + \gamma s\eta_i + \sigma_i(1-\gamma)s\eta_i}, where \eqn{\sigma_i} is the scale parameter, and \eqn{\gamma} is a parameter that controls how the variance of residual taste heterogeneity varies with scale.
\item LC if \eqn{\beta_i=\beta_q} with probability \eqn{w_{iq}} for \eqn{q = 1,...,Q}, where \eqn{Q} is the total number of classes.
\item MM-MIXL if  \eqn{\beta_i\sim f(\beta_q, \Sigma_q)} with probability \eqn{w_{iq}} for \eqn{q = 1,...,Q}, where \eqn{Q} is the total number of classes.
}

Observed heterogeneity can be also accommodated in the random parameters when the MIXL is estimated by including individual-specific covariates. Specifically, the vector of random coefficients is \deqn{\beta_i=\beta +\Pi z_i + L\eta_i} where \eqn{z_i} is a set of characteristics of individual \eqn{i} that influence the mean of the taste parameters; and \eqn{\Pi} is matrix of parameters. To estimate this model, the fourth part of the \code{formula} should be specified along with the \code{mvar} argument.


One can also allow the mean of the scale to differ across individuals by including individual-specific characteristics. Thus, the scale parameters can be written as \deqn{\exp(\bar{\sigma} + \delta h_i + \tau \upsilon_i)} where \eqn{h_i} is a vector of attributes of individual \eqn{i}. To estimate this model, the fifth part of the \code{formula} should include the variables that enter \eqn{h_i}.

For models with latent classes,  the class assignment is modeled as a semi-parametric multinomial logit format \deqn{w_{iq}= \frac{\exp(\gamma_q)}{\sum_{q=1}^Q\exp(\gamma_q)}} for \eqn{q = 1,...,Q, \gamma_1 = 0}. Latent class models (LC and MM-MIXL) requires at least that a constant should be specified in the fifth part of the \code{formula}. If the class assignment, \eqn{w_{iq}}, is also determined by socio-economic characteristics, these variables can be also included in the fifth part.  


Models that involve random parameters are estimated using Maximum Simulated Likelihood using the \code{maxLik} function of \code{\link[maxLik]{maxLik}} package.
}
\examples{
## Examples using the Fishing data set from the AER package
data("TravelMode", package = "AER")
library(mlogit)
TM <- mlogit.data(TravelMode, choice = "choice", shape = "long", 
                 alt.levels = c("air", "train", "bus", "car"), chid.var = "individual")
\dontrun{
## S-MNL model, ASCs not scaled
smnl <- gmnl(choice ~ wait + vcost + travel + gcost| 1, data = TM, 
             model = "smnl", R = 100, 
             notscale = c(1, 1, 1, rep(0, 4)))
summary(smnl)

## MIXL model with observed heterogeneity
mixl.hier <- gmnl(choice ~ vcost + gcost + travel + wait | 1 | 0 | income + size - 1,
                 data = TM,
                 model = "mixl",
                 ranp = c(travel = "t", wait = "n"),
                 mvar = list(travel = c("income","size"), wait = c("income")),
                 R = 30,
                 haltons = list("primes"= c(2, 17), "drop" = rep(19, 2)))
summary(mixl.hier)

## Examples using the Electricity data set from the mlogit package
data("Electricity", package = "mlogit")
Electr <- mlogit.data(Electricity, id.var = "id", choice = "choice",
                     varying = 3:26, shape = "wide", sep = "")
                     
## Estimate a MIXL model with correlated random parameters
Elec.cor <- gmnl(choice ~ pf + cl + loc + wk + tod + seas| 0, data = Electr,
                 subset = 1:3000,
                 model = 'mixl',
                 R = 10,
                 panel = TRUE,
                 ranp = c(cl = "n", loc = "n", wk = "n", tod = "n", seas = "n"),
                 correlation = TRUE)
summary(Elec.cor)
cov.gmnl(Elec.cor)
se.cov.gmnl(Elec.cor)
se.cov.gmnl(Elec.cor, sd = TRUE)
cor.gmnl(Elec.cor)

## Estimate a G-MNL model, where ASCs are also random
Electr$asc2 <- as.numeric(Electr$alt == 2)
Electr$asc3 <- as.numeric(Electr$alt == 3)
Electr$asc4 <- as.numeric(Electr$alt == 4)

Elec.gmnl <- gmnl(choice ~ pf + cl + loc + wk + tod + seas + asc2 + asc3 + asc4 | 0,
                 data = Electr,
                 subset = 1:3000,
                 model = 'gmnl',
                 R = 30,
                 panel = TRUE,
                 notscale = c(rep(0, 6), 1, 1, 1),
                 ranp = c(cl = "n", loc = "n", wk = "n", tod = "n", seas = "n",
                 asc2 = "n", asc3 = "n", asc4 = "n"))
summary(Elec.gmnl)

## Estimate a LC model with 2 classes
Elec.lc <- gmnl(choice ~ pf + cl + loc + wk + tod + seas| 0 | 0 | 0 | 1,
               data = Electr,
               subset = 1:3000,
               model = 'lc',
               panel = TRUE,
               Q = 2)
summary(Elec.lc)

## Estimate a MM-MIXL model
Elec.mm <- gmnl(choice ~ pf + cl + loc + wk + tod + seas| 0 | 0 | 0 | 1,
                 data = Electr,
                 subset = 1:3000,
                 model = 'mm',
                 R = 30,
                 panel = TRUE,
                 ranp = c(pf = "n", cl = "n", loc = "n", wk = "n", tod = "n",
                 seas = "n"),
                 Q = 2,
                 iterlim = 500)
summary(Elec.mm)
}
}
\author{
Mauricio Sarrias \email{msarrias86@gmail.com}
}
\references{
\itemize{
\item Keane, M., & Wasi, N. (2013). Comparing alternative models of heterogeneity in consumer choice behavior. Journal of Applied Econometrics, 28(6), 1018-1045.
\item Fiebig, D. G., Keane, M. P., Louviere, J., & Wasi, N. (2010). The generalized multinomial logit model: accounting for scale and coefficient heterogeneity. Marketing Science, 29(3), 393-421.
\item Greene, W. H., & Hensher, D. A. (2010). Does scale heterogeneity across individuals matter? An empirical assessment of alternative logit models. Transportation, 37(3), 413-428.
\item Train, K. (2009). Discrete choice methods with simulation. Cambridge University Press.
}
}
\seealso{
\code{\link[mlogit]{mlogit}}, \code{\link[mlogit]{mlogit.data}},  \code{\link[maxLik]{maxLik}}, \code{Rchoice}
}

