% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GDINA.R, R/anova.GDINA.R, R/extract.GDINA.R,
%   R/hoparm.R, R/itemparm.GDINA.R, R/personparm.GDINA.R, R/s3GDINA.R,
%   R/summary.GDINA.R
\name{GDINA}
\alias{GDINA}
\alias{anova.GDINA}
\alias{extract.GDINA}
\alias{hoparm.GDINA}
\alias{itemparm.GDINA}
\alias{itemparm.GDINA}
\alias{personparm.GDINA}
\alias{AIC.GDINA}
\alias{BIC.GDINA}
\alias{logLik.GDINA}
\alias{deviance.GDINA}
\alias{npar.GDINA}
\alias{indlogLik.GDINA}
\alias{indlogPost.GDINA}
\alias{summary.GDINA}
\title{Calibrate dichotomous and polytomous responses}
\usage{
GDINA(dat, Q, model = "GDINA", sequential = FALSE, item.names = NULL,
  higher.order = FALSE, higher.order.model = "Rasch",
  higher.order.method = "MMLE", verbose = 1, catprob.parm = NULL,
  higher.order.struc.parm = NULL, mono.constraint = FALSE, group = NULL,
  empirical = !higher.order, att.prior = NULL, att.str = FALSE,
  lower.p = 1e-04, upper.p = 0.9999, smallNcorrection = c(5e-04, 0.001),
  nstarts = 1, conv.crit = 0.001, lower.prior = -1,
  conv.type = "max.p.change", maxitr = 1000, digits = 4, diagnosis = 0,
  Mstep.warning = FALSE, optimizer = "all", randomseed = 123456,
  optim.control = list())

\method{anova}{GDINA}(object, ...)

\method{extract}{GDINA}(object, what, digits = 4, ...)

\method{hoparm}{GDINA}(object, withSE = FALSE, digits = 4, ...)

\method{itemparm}{GDINA}(object, what = c("catprob", "itemprob", "LCprob",
  "gs", "delta", "rrum", "higher.order"), withSE = FALSE, SE.type = 2,
  digits = 4, ...)

\method{personparm}{GDINA}(object, what = c("EAP", "MAP", "MLE", "mp"),
  digits = 4, ...)

\method{AIC}{GDINA}(object, ...)

\method{BIC}{GDINA}(object, ...)

\method{logLik}{GDINA}(object, ...)

\method{deviance}{GDINA}(object, ...)

\method{npar}{GDINA}(object, ...)

\method{indlogLik}{GDINA}(object, ...)

\method{indlogPost}{GDINA}(object, ...)

\method{summary}{GDINA}(object, ...)
}
\arguments{
\item{dat}{A required \eqn{N \times J} \code{matrix} or \code{data.frame} consisting of the
responses of \eqn{N} individuals to \eqn{J} items. Missing values need to be coded as \code{NA}.}

\item{Q}{A required \eqn{J \times K} item or category and attribute association matrix, wher \eqn{J} represents the number of
items or nonzero categories and \eqn{K} represents the number of attributes. For binary attributes,
entry 1 indicates that the attribute is measured by the item, and 0 otherwise.
For polytomous attributes, non-zero elements indicate the level
of attributes that are needed for an individual to answer the item correctly (see Chen, \& de la Torre, 2013).
Note that for polytomous items, the sequential G-DINA
model is used and either restricted or unrestricted category-level Q-matrix is needed.
In the category-level Q-matrix, the first column gives the item number, which must be numeric and match the number of column in the data.
The second column indicates the category number. See \code{Examples}.}

\item{model}{A vector for each item or nonzero category, or a scalar which will be used for all
items or nonzero categories to specify the CDMs fitted. The possible options
include \code{"GDINA"},\code{"DINA"},\code{"DINO"},\code{"ACDM"},\code{"LLM"}, and \code{"RRUM"}.
It is also possible to specify CDMs using numbers. Particularly, 0,1,2,3,4 and 5 represents
\code{"GDINA"},\code{"DINA"},\code{"DINO"},\code{"ACDM"},\code{"LLM"}, and \code{"RRUM"}, respectively.}

\item{sequential}{logical; \code{TRUE} if the sequential model is fitted for polytomous responses.}

\item{item.names}{A vector giving the item names. By default, items are named as "Item 1", "Item 2", etc.}

\item{higher.order}{logical; \code{TRUE} indicates a higher-order joint attribute distribution
is assumed. The default is \code{FALSE}, which means that the saturated attribute distribution is estimated.
The higher-order model needs to be specified in argument \code{higher.order.model}.}

\item{higher.order.model}{a character indicating the IRT model for higher-order joint attribute distribution. Can be
\code{"2PL"}, \code{"1PL"} or \code{"Rasch"}, representing two parameter logistic IRT model,
one parameter logistic IRT model and Rasch model,
respectively. For \code{"1PL"} model, a common slope parameter is
estimated (see \code{Details}). \code{"Rasch"} is the default model when \code{higher.order = TRUE}.}

\item{higher.order.method}{a character indicating the algorithm for the higher-order structural parameter estimation;
Can be either \code{"MMLE"} using marginal maximum likelihood estimation, or \code{"BL"} based on the Bock and
Lieberman's (1970) approach. \code{"BL"} is suitable when the number of attributes is few. It is not
sensitive to sample size but can be very slow if the number of attributes is large.
\code{"MMLE"}, which is the default, is suitable for most conditions but might be slow if
sample size is extremely large.}

\item{verbose}{How to print calibration information
after each EM iteration? Can be 0, 1 or 2, indicating to print no information,
information for current iteration, or information for all iterations.}

\item{catprob.parm}{A list of initial success probability parameters for each nonzero category.}

\item{higher.order.struc.parm}{A matrix or data frame providing higher order structural parameters.
If supplied, it must be of dimension \eqn{K\times 2}.
The first column is the slope parameters and the second column is the intercept.}

\item{mono.constraint}{logical; \code{TRUE} indicates that \eqn{P(\bm{\alpha}_1) <=P(\bm{\alpha}_2)} if
for all \eqn{k}, \eqn{\alpha_{1k} <= \alpha_{2k}}. Can be a vector for each item or nonzero category or a scalar which will be used for all
items to specify whether monotonicity constraint should be added.}

\item{group}{a scalar indicating which column in \code{dat} is group indicator or
a numerical vector indicating the group each individual belongs to. If it is a vector,
its length must be equal to the number of individuals. Only at most two groups can be handled currently.}

\item{empirical}{Logical; whether empirical bayes is adopted or not? \code{TRUE} is
the default when higher order attribute structure is not assumed. If estimating
higher order structure, it will be \code{FALSE}.}

\item{att.prior}{A vector of length \eqn{2^K} or a matrix of dimension \eqn{2^K\times} no. of groups to specify
attribute prior distribution for \eqn{2^K} latent classes for all groups. Only applicable for dichotomous attributes.
The sum of all elements does not have to be equal to 1; however, it will be transformed so that the sum is equal to 1
before model calibration.
(1) If \code{empirical=FALSE} and \code{higher.order=FALSE}, the attribute prior distribution is fixed during model
calibration; (2) if \code{empirical=TRUE} and \code{higher.order=FALSE}, the distribution for all latent classes
with non-zero priors is updated using the empirical bayes method;
(3) if \code{empirical=FALSE} and \code{higher.order=TRUE}, the distribution for all latent classes
with non-zero priors is updated using the specified higher-order model.
The label for each latent class can be obtained by calling \code{attributepattern(K)}. See \code{examples} for more info.}

\item{att.str}{logical; whether attributes have any structure?}

\item{lower.p}{A vector for each item or nonzero category, or a scalar which will be used for all
items or nonzero categories to specify the lower bound for success probabilities. The default is \code{1e-4} for all items.}

\item{upper.p}{A vector for each item or nonzero category, or a scalar which will be used for all
items or nonzero categories to specify the upper bound for success probabilities. The default is 0.9999 for all items.}

\item{smallNcorrection}{A numeric vector with two elements specifying the corrections applied when the expected number of
individuals in some latent groups are too small. If the expected no. of examinees is less than the second element,
the first element and two times the first element will be added to the numerator and denominator of the closed-form solution of
probabilities of success. Only applicable for the G-DINA, DINA and DINO model estimation without monotonic constraints.}

\item{nstarts}{how many sets of starting values? The default is 1.}

\item{conv.crit}{The convergence criterion for max absolute change in item parameters or deviance.}

\item{lower.prior}{The lower bound for prior weights. Only applicable for nonstructured attributes.
The default value is -1, which means the lower bound is \eqn{1/2^K/100}.}

\item{conv.type}{How is the convergence criterion evaluated? Can be \code{"max.p.change"}, indicating
the maximum absolute change in success probabilities, or \code{"dev.change"}, representing
the absolute change in deviance.}

\item{maxitr}{The maximum number of EM cycles allowed.}

\item{digits}{How many decimal places in each number? The default is 4.}

\item{diagnosis}{Run in diagnostic mode? If it is 1 or 2, some intermediate results obtained in each iteration can be extracted.}

\item{Mstep.warning}{Logical; Whether the warning message in Mstep, if any, should be output immediately.}

\item{optimizer}{A string indicating which optimizer should be used in M-step.}

\item{randomseed}{Random seed for generating initial item parameters. The default random seed is 123456.}

\item{optim.control}{Control options for optimizers in the M-step. Only available when \code{optimizer} is one specific optimization
method, including \code{BFGS} from \link[stats]{optim}, \link[nloptr]{slsqp}, \link[Rsolnp]{solnp} and \link[alabama]{auglag}.
For the \link[alabama]{auglag} method, \code{optim.control} specifies \code{control.outer}.}

\item{object}{estimated GDINA object for various S3 methods}

\item{...}{additional arguments}

\item{what}{argument for various S3 methods}

\item{withSE}{argument for S3 method \code{\link{itemparm}}; show standard errors or not?}

\item{SE.type}{type of standard errors.}
}
\value{
\code{GDINA} returns an object of class \code{GDINA}. S3 methods for \code{GDINA} objects
 include \code{\link{extract}} for extracting various components, \code{\link{itemparm}}
 for extracting item parameters, \code{\link{personparm}}
 for calculating person parameters, \code{summary} for summary information.
 \code{AIC}, \code{BIC},\code{logLik}, \code{deviance} and \code{npar} can also be used to
 calculate AIC, BIC, observed log-likelihood, deviance and number of parameters.
}
\description{
\code{GDINA} calibrates the generalized deterministic inputs, noisy and
gate (G-DINA; de la Torre, 2011) model for dichotomous responses, and the sequential
G-DINA model (Ma, & de la Torre, 2016a) for ordinal and nominal responses.
By setting appropriate constraints, the deterministic inputs,
noisy and gate (DINA; de la Torre, 2009; Junker & Sijtsma, 2001) model,
the deterministic inputs, noisy or gate (DINO; Templin & Henson, 2006)
model, the reduced reparametrized unified model (R-RUM; Hartz, 2002),
the additive CDM (A-CDM; de la Torre, 2011), and the linear logistic
model (LLM; Maris, 1999) can also be calibrated. Note that the LLM is equivalent to
the C-RUM (Hartz, 2002), a special case of the GDM (von Davier, 2008), and that the R-RUM
is also known as a special case of the generalized NIDA model (de la Torre, 2011).
Different models can be fitted to different
items in a single test. The attributes can be either dichotomous or polytomous
(Chen & de la Torre, 2013). Joint attribute distribution can be saturated, structured or higher-order,
(de la Torre & Douglas, 2004) when attributes are binary.
Marginal maximum likelihood method with Expectation-Maximization (MMLE/EM) alogrithm
is used for item parameter estimation.

To compare two \code{GDINA} models, use method \code{\link{anova}}.

To extract higher-order parameters, use method \code{\link{hoparm}}.

To extract lower-order structural (item) parameters, use S3 method \code{\link{itemparm}}.

To calculate lower-order incidental (person) parameters
use method \code{\link{personparm}}. To extract other components returned, use \code{\link{extract}}.
To plot item/category response function, use \code{\link{plotIRF}}. To
check whether monotonicity is violated, use \code{\link{monocheck}}. To conduct anaysis in graphical user interface,
use \code{\link{startGDINA}}.
}
\section{Methods (by generic)}{
\itemize{
\item \code{anova}: Model comparison using likelihood ratio test

\item \code{extract}: extract various elements of GDINA estimates

\item \code{hoparm}: extract higher-order parameters

\item \code{itemparm}: extract various item parameters

\item \code{personparm}: calculate person attribute patterns and higher-order ability

\item \code{AIC}: calculate AIC

\item \code{BIC}: calculate BIC

\item \code{logLik}: calculate log-likelihood

\item \code{deviance}: calculate deviance

\item \code{npar}: calculate the number of parameters

\item \code{indlogLik}: extract log-likelihood for each individual

\item \code{indlogPost}: extract log posterior for each individual

\item \code{summary}: print summary information
}}

\note{
anova function does NOT check whether models compared are nested or not.
}
\section{The G-DINA model}{


The generalized DINA model (G-DINA; de la Torre, 2011) is an extension of the DINA model.
Unlike the DINA model, which collaspes all latent classes into two latent groups for
each item, if item \eqn{j} requires \eqn{K_j^*}
attributes, the G-DINA model collapses \eqn{2^K} latent classes into \eqn{2^{K_j^*}}
latent groups with unique success probabilities on item \eqn{j}, where
\eqn{K_j^*=\sum_{k=1}^{K}q_{jk}}.

Let \eqn{\bm{\alpha}_{lj}^*} be the reduced attribute
pattern consisting of the columns of the attributes required by item \eqn{j}, where
\eqn{l=1,\ldots,2^{K_j^*}}. For example, if only the first and the last attributes are
required, \eqn{\bm{\alpha}_{lj}^*=(\alpha_{l1},\alpha_{lK})}. For notational
convenience, the first \eqn{K_j^*} attributes can be assumed to be the required attributes
for item \eqn{j} as in de la Torre (2011). The probability of success \eqn{P(X_{j}=1|\bm{\alpha}_{lj}^*)} is denoted
by \eqn{P(\bm{\alpha}_{lj}^*)}. To model this probability of success, different link functions
as in the generalized linear models are used in the G-DINA model. The item response
function of the G-DINA model using the identity link can be written as
\deqn{P(\bm{\alpha}_{lj}^*)=\delta_{j0}+\sum_{k=1}^{K_j^*}\delta_{jk}\alpha_{lk}+
\sum_{k'=k+1}^{K_j^*}\sum_{k=1}^{K_j^*-1}\delta_{jkk'}\alpha_{lk}\alpha_{lk'}+\cdots+
\delta_{j12{\cdots}K_j^*}\prod_{k=1}^{K_j^*}\alpha_{lk},
}
where \eqn{\delta_{j0}} is the intercept for item \eqn{j}, \eqn{\delta_{jk}} is the main effect
due to \eqn{\alpha_{lk}}, \eqn{\delta_{jkk'}} is the interaction effect due to
\eqn{\alpha_{lk}} and \eqn{\alpha_{lk'}}, \eqn{\delta_{j12{\ldots}K_j^*}} is the interaction
effect due to \eqn{\alpha_{l1}, \cdots,\alpha_{lK_j^*}}. The log and logit links can also
be employed.
}

\section{Other CDMs as special cases}{


Several widely used CDMs can be obtained by setting appropriate constraints to the G-DINA model.
This section introduces the parameterization
of different CDMs within the G-DINA model framework very breifly. Readers interested in this please refer to
de la Torre(2011) for details.

\describe{
  \item{\code{DINA model}}{
       In DINA model, each item has two item parameters - guessing (\eqn{g}) and slip (\eqn{s}). In traditional
       parameterization of the DINA model, a latent variable \eqn{\eta} for person \eqn{i} and
       item \eqn{j} is defined as
       \deqn{\eta_{ij}=\prod_{k=1}^K\alpha_{ik}^{q_{jk}}}
       Briefly speaking, if individual \eqn{i} master all attributes required by item \eqn{j},
       \eqn{\eta_{ij}=1}; otherwise, \eqn{\eta_{ij}=0}.
       Item response function of the DINA model can be written by
       \deqn{P(X_{ij}=1|\eta_{ij})=(1-s_j)^{\eta_{ij}}g_j^{1-\eta_{ij}}}
       To obtain the DINA model from the G-DINA model,
       all terms in identity link G-DINA model except \eqn{\delta_0} and \eqn{\delta_{12{\ldots}K_j^*}}
       need to be fixed to zero, that is,
       \deqn{ P(\bm{\alpha}_{lj}^*)=\delta_{j0}+\delta_{j12{\cdots}K_j^*}\prod_{k=1}^{K_j^*}\alpha_{lk}}
       In this parameterization, \eqn{\delta_{j0}=g_j} and \eqn{\delta_{j0}+\delta_{j12{\cdots}K_j^*}=1-s_j}.

   }
\item{\code{DINO model}}{
       The DINO model can be given by
       \deqn{P(\bm{\alpha}_{lj}^*)=\delta_{j0}+\delta_{j1}I(\bm{\alpha}_{lj}^*\neq \bm{0})}

       where \eqn{I(\cdot)} is an indicator variable. The DINO model is also a constrained identity
       link G-DINA model. As shown by de la Torre (2011), the appropriate constraint is
       \deqn{\delta_{jk}=-\delta_{jk^{'}k^{''}}=\cdots=(-1)^{K_j^*+1}\delta_{j12{\cdots}K_j^*},} for
       \eqn{k=1,\cdots,K_j^*, k^{'}=1,\cdots,K_j^*-1$, and $k^{''}>k^{'},\cdots,K_j^*}.
   }
\item{\code{Additive models with different link functions}}{
       The A-CDM, LLM and R-RUM can be obtained by setting all interactions to be zero in
       identity, logit and log link G-DINA model, respectively. Specifically, the A-CDM can be formulated as
       \deqn{P(\bm{\alpha}_{lj}^*)=\delta_{j0}+\sum_{k=1}^{K_j^*}\delta_{jk}\alpha_{lk}.}
       The item response function for
       LLM can be given by
       \deqn{ logit[P(\bm{\alpha}_{lj}^*)]=\delta_{j0}+\sum_{k=1}^{K_j^*}\delta_{jk}\alpha_{lk},}
       and lastly, the RRUM, can be written as
       \deqn{log[P(\bm{\alpha}_{lj}^*)]=\delta_{j0}+\sum_{k=1}^{K_j^*}\delta_{jk}\alpha_{lk}.} It should be
       noted that the LLM is equivalent to the compensatory RUM, which is subsumed by the GDM, and that
       the RRUM is a special case of the generalized noisy inputs, deterministic ``And" gate model (G-NIDA).
   }
   }
}

\section{Model Estimation}{


The MMLE/EM algorithm is implemented in this package. For G-DINA, DINA and DINO models, closed-form solutions can be found.
Specifically, for the G-DINA model, \deqn{P(\alpha_{lj}^*)=R_{jl}/N_{jl}} where \eqn{R_{jl}} is the expected number of examinees with attribute pattern \eqn{\alpha_{lj}^*}
answering item \eqn{j} correctly and \eqn{N_{jl}} is the expected number of examinees with attribute pattern \eqn{\alpha_{lj}^*}.
For DINA or DINO model, \eqn{R_{jl}} and \eqn{N_{jl}} are collapsed for latent classes having the same probability of success.
See de la Torre (2009) and de la Torre (2011) for details.

For ACDM, LLM and RRUM, closed-form solutions do not exist, and therefore some general optimization techniques are
adopted in M-step. See Ma, Iaconangelo and de la Torre (2016) for details.
The selection of optimization techniques mainly depends on whether
some specific constraints need to be added. It should
be noted that adding monotone constraints to the G-DINA model may dramatically increase running time especially when the number of required
attributes are large.

The sequential G-DINA model can be estimated as in Ma & de la Torre (2016a) using optimization techniques. However,
Ma & de la Torre (2016b) found that the sequential G-DINA, DINA and DINO models can be estimated using
close-form solutions, which can be implemented in a straightforward
manner using the observation-coding (Tutz, 1997).

For estimating the joint attribute
distribution, by default, an empirical Bayes method (Carlin & Louis, 2000) is adopted, which is referred to as
the saturated attribute structure. Specifically,
the prior distribution of joint attributes is uniform at the beginning, and then updated after
each EM iteration based on the posterior distribution.

The joint attribute distribution can also be modeled using some higher-order IRT models, which is referred to as
higher-order attribute structure. The higher-order attribute structure was originally proposed by de la Torre
and Douglas (2004) for the DINA model. It has been extended in this package for the G-DINA model, DINA, DINO, A-CDM, LLM and RRUM.
Particularly, three IRT models are available for the higher-order attribute structure:
Rasch model (Rasch), one parameter logistic model (1PL) and two parameter logistic model (2PL).
For the Rasch model, the probability of mastering attribute \eqn{k} for individual \eqn{i} is defined as
\deqn{P(\alpha_k=1|\theta_i,\lambda_{0k})=\frac{exp(\theta_i+\lambda_{0k})}{1+exp(\theta_i+\lambda_{0k})}}
For the 1PL model, the probability of mastering attribute \eqn{k} for individual \eqn{i} is defined as
\deqn{P(\alpha_k=1|\theta_i,\lambda_{0k},\lambda_{1})=\frac{exp(\lambda_{1}\theta_i+\lambda_{0k})}{1+exp(\lambda_{1}\theta_i+\lambda_{0k})}}
For the 2PL model, the probability of mastering attribute \eqn{k} for individual \eqn{i} is defined as
\deqn{P(\alpha_k=1|\theta_i,\lambda_{0k},\lambda_{1k})=\frac{exp(\lambda_{1k}\theta_i+\lambda_{0k})}{1+exp(\lambda_{1k}\theta_i+\lambda_{0k})}}
where \eqn{\theta_i} is the ability of examinee \eqn{i}. \eqn{\lambda_{0k}} and \eqn{\lambda_{1k}} are the intercept
and slope parameters for attribute \eqn{k}, respectively. In the Rasch model, \eqn{\lambda_{1k}=1 \forall k};
whereas in the 1PL model, a common slope parameter \eqn{\lambda_{1}} is estimated.
The probability of joint attributes can be written as
 \deqn{P(\strong{\alpha}|\theta_i,\strong{\lambda})=\prod_k P(\alpha_k|\theta_i,\strong{\lambda})}
To estimate the parameters for higher order IRT model, either Bock and Aitkin's (1981) MMLE/EM algorithm or
Bock and Lieberman's (BL; 1970) method can be used.
}

\section{The Number of Parameters}{


For dichotomous response models:
Assume a test measures \eqn{K} attributes and item \eqn{j} requires \eqn{K_j^*} attributes:
The DINA and DINO model has 2 item parameters for each item;
if item \eqn{j} is ACDM, LLM or RRUM, it has \eqn{K_j^*+1} item parameters; if it is G-DINA model, it has \eqn{2^{K_j^*}} item parameters.
Apart from item parameters, the parameters involved in the estimation of joint attribute distribution need to be estimated as well.
When using the saturated attribute structure, there are \eqn{2^K-1} parameters for joint attribute distribution estimation; when
using a higher-order attribute structure, there are \eqn{K}, \eqn{K+1}, and \eqn{2\times K} parameters for the Rasch model,
1PL model and 2PL model, respectively.
For polytomous response data using the sequential G-DINA model, the number of item parameters
are counted at category level.
}

\examples{
\dontrun{
####################################
#        Example 1.                #
#     GDINA, DINA, DINO            #
#    ACDM, LLM and RRUM            #
# estimation and comparison        #
#                                  #
####################################

dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ

#--------GDINA model --------#

mod1 <- GDINA(dat = dat, Q = Q, model = "GDINA")
mod1
# summary information
summary(mod1)

AIC(mod1) #AIC
BIC(mod1) #BIC
logLik(mod1) #log-likelihood value
deviance(mod1) # deviance: -2 log-likelihood
npar(mod1) # number of parameters


head(indlogLik(mod1)) # individual log-likelihood
head(indlogPost(mod1)) # individual log-posterior

# item parameters
# see ?itemparm
itemparm(mod1) # item probabilities of success for each latent group
itemparm(mod1, withSE = TRUE) # item probabilities of success & standard errors
itemparm(mod1, what = "delta") # delta parameters
itemparm(mod1, what = "delta",withSE=TRUE) # delta parameters
itemparm(mod1, what = "gs") # guessing and slip parameters
itemparm(mod1, what = "gs",withSE = TRUE) # guessing and slip parameters & standard errors

# person parameters
# see ?personparm
personparm(mod1) # EAP estimates of attribute profiles
personparm(mod1, what = "MAP") # MAP estimates of attribute profiles
personparm(mod1, what = "MLE") # MLE estimates of attribute profiles

#plot item response functions for item 10
plotIRF(mod1,item = 10)
plotIRF(mod1,item = 10,errorbar = TRUE) # with error bars
plotIRF(mod1,item = c(6,10))

# Use extract function to extract more components
# See ?extract

# ------- DINA model --------#
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
mod2 <- GDINA(dat = dat, Q = Q, model = "DINA")
mod2
itemparm(mod2, what = "gs") # guess and slip parameters
itemparm(mod2, what = "gs",withSE = TRUE) # guess and slip parameters and standard errors

# Model comparison at the test level via likelihood ratio test
anova(mod1,mod2)

# -------- DINO model -------#
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
mod3 <- GDINA(dat = dat, Q = Q, model = "DINO")
#slip and guessing
itemparm(mod3, what = "gs") # guess and slip parameters
itemparm(mod3, what = "gs",withSE = TRUE) # guess and slip parameters + standard errors

# Model comparison at test level via likelihood ratio test
anova(mod1,mod3)

# --------- ACDM model -------#
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
mod4 <- GDINA(dat = dat, Q = Q, model = "ACDM")
mod4
# --------- LLM model -------#
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
mod4b <- GDINA(dat = dat, Q = Q, model = "LLM")
mod4b
# --------- RRUM model -------#
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
mod4c <- GDINA(dat = dat, Q = Q, model = "RRUM")
mod4c

# --- Different CDMs for different items --- #

dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
models <- c(rep("GDINA",3),"LLM","DINA","DINO","ACDM","RRUM","LLM","RRUM")
mod5 <- GDINA(dat = dat, Q = Q, model = models)
anova(mod1,mod5)


####################################
#        Example 2.                #
#        Model estimations         #
# With monotonocity constraints    #
####################################
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
# for item 10 only
mod11 <- GDINA(dat = dat, Q = Q, model = "GDINA",mono.constraint = c(rep(FALSE,9),TRUE))
mod11
mod11a <- GDINA(dat = dat, Q = Q, model = "DINA",mono.constraint = TRUE)
mod11a
mod11b <- GDINA(dat = dat, Q = Q, model = "ACDM",mono.constraint = TRUE)
mod11b
mod11c <- GDINA(dat = dat, Q = Q, model = "LLM",mono.constraint = TRUE)
mod11c
mod11d <- GDINA(dat = dat, Q = Q, model = "RRUM",mono.constraint = TRUE)
mod11d
itemparm(mod11d,"delta")
itemparm(mod11d,"rrum")

####################################
#           Example 3.             #
#        Model estimations         #
# With Higher order att structure  #
####################################

dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
# --- Higher order G-DINA model ---#
mod12 <- GDINA(dat = dat, Q = Q, model = "GDINA",
               higher.order = TRUE,higher.order.method="BL")
hoest=hoparm(mod12) # extract higher-order parameters
hoest$theta # ability
hoest$lambda # structural parameters
# --- Higher order DINA model ---#
mod22 <- GDINA(dat = dat, Q = Q, model = "DINA",
               higher.order = TRUE,higher.order.method="MMLE")
# --- Higher order DINO model ---#
mod23 <- GDINA(dat = dat, Q = Q, model = "DINO",higher.order = TRUE)
# --- Higher order ACDM model ---#
mod24 <- GDINA(dat = dat, Q = Q, model = "ACDM",
               higher.order = TRUE,higher.order.model="1PL")
# --- Higher order LLM model ---#
mod25 <- GDINA(dat = dat, Q = Q, model = "LLM",higher.order = TRUE)
# --- Higher order RRUM model ---#
mod26 <- GDINA(dat = dat, Q = Q, model = "RRUM",higher.order = TRUE)

####################################
#          Example 4.              #
#        Model estimations         #
# With user-specified att structure#
####################################

# --- User-specified attribute priors ----#
# prior distribution is fixed during calibration
# Assume each of 000,100,010 and 001 has probability of 0.1
# and each of 110, 101,011 and 111 has probability of 0.15
# Note that the sum is equal to 1
#
prior <- c(0.1,0.1,0.1,0.1,0.15,0.15,0.15,0.15)
# fit GDINA model  - empirical must be FALSE
dat <- sim10GDINA$simdat
Q <- sim10GDINA$simQ
modp1 <- GDINA(dat = dat, Q = Q, att.prior = prior, att.str = TRUE, empirical = FALSE)
# See the posterior weights
extract(modp1,what = "posterior.prob")
extract(modp1,what = "att.prior")
# ----Linear structure of attributes -----#
# Assuming A1 -> A2 -> A3
  Q <- matrix(c(1,0,0,
                1,0,0,
                1,1,0,
                1,1,0,
                1,1,1,
                1,1,1,
                1,0,0,
                1,0,0,
                1,1,0,
                1,1,0,
                1,1,1,
                1,1,1),ncol=3,byrow=TRUE)
 # item parameters for DINA model (guessing and slip)
 gs <- matrix(rep(0.1,24),ncol=2)
 N <- 5000
 # attribute simulation
 att <- rbind(matrix(0,nrow=500,ncol=3),
              matrix(rep(c(1,0,0),1000),ncol=3,byrow=TRUE),
              matrix(rep(c(1,1,0),1000),ncol=3,byrow=TRUE),
              matrix(rep(c(1,1,1),2500),ncol=3,byrow=TRUE))
 # data simulation
 simD <- simGDINA(N,Q,gs.parm = gs,
                   model = "DINA",attribute = att)
 dat <- simD$dat
 # setting structure: A1 -> A2 -> A3
 # note: latent classes with prior 0 are assumed impossible
 prior <- c(0.1,0.2,0,0,0.2,0,0,0.5)
 out <- GDINA(dat,Q,att.prior=prior,att.str = TRUE, model="DINA")
 # check posterior dist.
 extract(out,what = "posterior.prob")


####################################
#          Example 5.              #
#        Model estimations         #
# With user-specified att structure#
####################################

# --- User-specified attribute structure ----#
Q <- sim30GDINA$simQ
K <- ncol(Q)
# divergent structure A1->A2->A3;A1->A4->A5;A1->A4->A6
diverg <- list(c(1,2),
               c(2,3),
               c(1,4),
               c(4,5))
struc <- att.structure(diverg,K)

# data simulation
N <- 1000
true.lc <- sample(c(1:2^K),N,replace=TRUE,prob=struc$att.prob)
table(true.lc) #check the sample
true.att <- attributepattern(K)[true.lc,]
 gs <- matrix(rep(0.1,2*nrow(Q)),ncol=2)
 # data simulation
 simD <- simGDINA(N,Q,gs.parm = gs,
                   model = "DINA",attribute = true.att)
 dat <- extract(simD,"dat")

modp1 <- GDINA(dat = dat, Q = Q, att.prior = struc$att.prob, att.str = TRUE, empirical = FALSE)
modp1
# Note that fixed priors were used for all iterations
extract(modp1,what = "att.prior")
# Posterior weights were slightly different
extract(modp1,what = "posterior.prob")
modp2 <- GDINA(dat = dat, Q = Q, att.prior = struc$att.prob, att.str = TRUE, empirical = TRUE)
modp2
extract(modp2,what = "att.prior")
extract(modp2,what = "posterior.prob")


####################################
#           Example 6.             #
#        Model estimations         #
# With user-specified initial pars #
####################################

 # check initials to see the format for initial item parameters
 initials <- sim10GDINA$simItempar
 dat <- sim10GDINA$simdat
 Q <- sim10GDINA$simQ
 mod.ini <- GDINA(dat,Q,catprob.parm = initials)

####################################
#           Example 7.             #
#        Model estimations         #
#          Without M-step          #
####################################

 # -----------Fix User specified item parameters
 # Item parameters are not estimated
 # Only person attributes are estimated
 # attribute prior distribution matters if interested in the marginalized likelihood
 dat <- frac20$dat
 Q <- frac20$Q
 mod.initial <- GDINA(dat,Q,maxit=20) # estimation- only 10 iterations for illustration purposes
 par <- itemparm(mod.initial,digits=8)
 weights <- extract(mod.initial,"posterior.prob",digits=8) #posterior weights
 # use the weights as the priors
 mod.fix <- GDINA(dat,Q,catprob.parm = par,att.prior=weights,maxitr=0) # re-estimation
 anova(mod.initial,mod.fix) # very similar - good approximation most of time
 # prior used for the likelihood calculation for the last step
 priors <- extract(mod.initial,"att.prior")
 # use the priors as the priors
 mod.fix2 <- GDINA(dat,Q,catprob.parm = par,att.prior=priors,maxitr=0) # re-estimation
 anova(mod.initial,mod.fix2) # identical results

####################################
#           Example 8.             #
#        polytomous attribute      #
#          model estimation        #
#    see Chen, de la Torre 2013    #
####################################


# --- polytomous attribute G-DINA model --- #
dat <- sim30pGDINA$simdat
Q <- sim30pGDINA$simQ
#polytomous G-DINA model
pout <- GDINA(dat,Q)

# ----- polymous DINA model --------#
pout2 <- GDINA(dat,Q,model="DINA")
anova(pout,pout2)

####################################
#           Example 9.             #
#        Sequential G-DINA model   #
#    see Ma, & de la Torre 2016    #
####################################

# --- polytomous attribute G-DINA model --- #
dat <- sim20seqGDINA$simdat
Q <- sim20seqGDINA$simQ
Q
#    Item Cat A1 A2 A3 A4 A5
#       1   1  1  0  0  0  0
#       1   2  0  1  0  0  0
#       2   1  0  0  1  0  0
#       2   2  0  0  0  1  0
#       3   1  0  0  0  0  1
#       3   2  1  0  0  0  0
#       4   1  0  0  0  0  1
#       ...

#sequential G-DINA model
sGDINA <- GDINA(dat,Q,sequential = TRUE)
sDINA <- GDINA(dat,Q,sequential = TRUE,model = "DINA")
anova(sGDINA,sDINA)
itemparm(sDINA) # processing function
itemparm(sDINA,"itemprob") # success probabilities for each item
itemparm(sDINA,"LCprob") # success probabilities for each category for all latent classes

####################################
#           Example 10.            #
#    Multiple-Group G-DINA model   #
####################################
Q <- sim10GDINA$simQ

# parameter simulation
# Group 1 - female
N1 <- 2000
gs1 <- matrix(rep(0.1,2*nrow(Q)),ncol=2)
# Group 2 - male
N2 <- 2000
gs2 <- matrix(rep(0.2,2*nrow(Q)),ncol=2)

# data simulation for each group
sim1 <- simGDINA(N1,Q,gs.parm = gs1,model = "DINA")
sim2 <- simGDINA(N2,Q,gs.parm = gs2,model = "DINO")

# combine data
# see ?bdiagMatrix
dat <- bdiagMatrix(list(extract(sim1,"dat"),extract(sim2,"dat")),fill=NA)
Q <- rbind(Q,Q)

# Fit G-DINA model
mg.est <- GDINA(dat = dat,Q = Q,group = rep(c("female","male"),each=2000))
extract(mg.est,"posterior.prob")
}

}
\references{
Bock, R. D., & Aitkin, M. (1981). Marginal maximum likelihood estimation of item parameters: Application of an EM algorithm. \emph{Psychometrika, 46}, 443-459.

Bock, R. D., & Lieberman, M. (1970). Fitting a response model forn dichotomously scored items. \emph{Psychometrika, 35}, 179-197.

Carlin, B. P., & Louis, T. A. (2000). Bayes and empirical bayes methods for data analysis. New York, NY: Chapman & Hall

de la Torre, J. (2009). DINA Model and Parameter Estimation: A Didactic. \emph{Journal of Educational and Behavioral Statistics, 34}, 115-130.

de la Torre, J. (2011). The generalized DINA model framework. \emph{Psychometrika, 76}, 179-199.

de la Torre, J., & Douglas, J. A. (2004). Higher-order latent trait models for cognitive diagnosis. \emph{Psychometrika, 69}, 333-353.

de la Torre, J., & Lee, Y. S. (2013). Evaluating the wald test for item-level comparison of
saturated and reduced models in cognitive diagnosis. \emph{Journal of Educational Measurement, 50}, 355-373.

Haertel, E. H. (1989). Using restricted latent class models to map the skill structure of achievement items.
\emph{Journal of Educational Measurement, 26}, 301-321.

Hartz, S. M. (2002). A bayesian framework for the unified model for assessing cognitive abilities:
Blending theory with practicality (Unpublished doctoral dissertation). University of Illinois at Urbana-Champaign.

Junker, B. W., & Sijtsma, K. (2001). Cognitive assessment models with few assumptions, and connections with nonparametric
item response theory. \emph{Applied Psychological Measurement, 25}, 258-272.

Ma, W., & de la Torre, J. (2016a). A sequential cognitive diagnosis model for polytomous responses. \emph{British Journal of Mathematical and Statistical Psychology. 69,} 253-275.

Ma, W., & de la Torre, J. (2016b, July). A Q-matrix validation method for the sequential G-DINA model. Paper presented at the 80th International Meeting of the Psychometric Society, Asheville, NC.

Ma, W., Iaconangelo, C., & de la Torre, J. (2016). Model similarity, model selection and attribute classification.
\emph{Applied Psychological Measurement, 40}, 200-217.

Maris, E. (1999). Estimating multiple classification latent class models. \emph{Psychometrika, 64}, 187-212.

Tatsuoka, K. K. (1983). Rule space: An approach for dealing with misconceptions based on
item response theory. \emph{Journal of Educational Measurement, 20}, 345-354.

Templin, J. L., & Henson, R. A. (2006). Measurement of psychological disorders using cognitive diagnosis models.
\emph{Psychological Methods, 11}, 287-305.

Tutz, G. (1997). Sequential models for ordered responses. In W.J. van der Linden & R. K. Hambleton (Eds.), Handbook of modern item response theory p. 139-152). New York, NY: Springer.
}
\seealso{
See \code{\link{autoGDINA}} for Q-matrix validation, item level model comparison and model calibration
in one run; See \code{\link{itemfit}} for item fit analysis, \code{\link{Qval}} for Q-matrix validation,
\code{\link{modelcomp}} for item level model comparison and \code{\link{simGDINA}} for data simulation.
Also see \code{gdina} in \pkg{CDM} package for the G-DINA model estimation.
}
\author{
{Wenchao Ma, Rutgers University, \email{wenchao.ma@rutgers.edu} \cr Jimmy de la Torre, The University of Hong Kong}
}
