% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/CRTgeeDR-main.R
\name{geeDREstimation}
\alias{geeDREstimation}
\title{Doubly Robust Inverse Probability Weighted Augmented GEE Estimator}
\usage{
geeDREstimation(formula, id, data = parent.frame(), family = gaussian,
  corstr = "independence", Mv = 1, weights = NULL, aug = NULL,
  pi.a = 1/2, corr.mat = NULL, init.beta = NULL, init.alpha = NULL,
  init.phi = 1, scale.fix = FALSE, sandwich = TRUE, maxit = 20,
  tol = 1e-05, print.log = FALSE, typeweights = "VW", nametrt = "TRT",
  model.weights = NULL, model.augmentation.trt = NULL,
  model.augmentation.ctrl = NULL, stepwise.augmentation = FALSE,
  stepwise.weights = FALSE, nameMISS = "MISSING", nameY = "OUTCOME",
  sandwich.nuisance = FALSE, fay.adjustment = FALSE, fay.bound = 0.75)
}
\arguments{
\item{formula}{an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted.}

\item{id}{a vector which identifies the clusters. The length of "id" should be the same as the number of observations. Data are assumed to be sorted so that observations on a cluster are contiguous rows for all entities in the formula.}

\item{data}{an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which CRTgeeDR is called.}

\item{family}{a description of the error distribution and link function to be used in the model. This can be a character string naming a family function, a family function or the result of a call to a family function. (See family for details of family functions.)}

\item{corstr}{a character string specifying the correlation structure. The following are permitted: '"independence"', '"exchangeable"', '"ar1"', '"unstructured"' and '"userdefined"'}

\item{Mv}{for "m-dependent", the value for m}

\item{weights}{A vector of weights for each observation. If an observation has weight 0, it is excluded from the calculations of any parameters. Observations with a NA anywhere (even in variables not included in the model) will be assigned a weight of 0.}

\item{aug}{A list of vector (one for A=1 treated, one for A=0 control) for each observation representing E(Y|X,A=a).}

\item{pi.a}{A number, Probability of treatment attribution P(A=1)}

\item{corr.mat}{The correlation matrix for "fixed". Matrix should be symmetric with dimensions >= the maximum cluster size. If the correlation structure is "userdefined", then this is a matrix describing which correlations are the same.}

\item{init.beta}{an optional vector with the initial values of beta. If not specified, then the intercept will be set to InvLink(mean(response)). init.beta must be specified if not using an intercept.}

\item{init.alpha}{an optional scalar or vector giving the initial values for the correlation. If provided along with Mv>1 or unstructured correlation, then the user must ensure that the vector is of the appropriate length.}

\item{init.phi}{an optional initial overdispersion parameter. If not supplied, initialized to 1.}

\item{scale.fix}{if set to TRUE, then the scale parameter is fixed at the value of init.phi.}

\item{sandwich}{if set to TRUE, the sandwich variance is provided together with the naive estimator of variance.}

\item{maxit}{maximum number of iterations.}

\item{tol}{tolerance in calculation of coefficients.}

\item{print.log}{if set to TRUE, a report is printed.}

\item{typeweights}{a character string specifying the weights implementation. The following are permitted: "GENMOD" for $W^{1/2}V^{-1}W^{1/2}$, "WV" for $V^{-1}W$}

\item{nametrt}{Name of the variable containing information for the treatment}

\item{model.weights}{an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted for the propensity score. Must model the probability of being observed.}

\item{model.augmentation.trt}{an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted for the ouctome model for the treated group (A=1).}

\item{model.augmentation.ctrl}{an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted for the ouctome model for the control group (A=0).}

\item{stepwise.augmentation}{if set to TRUE, a stepwise for the augmentation model is performed during the fit of the augmentation model for the OM}

\item{stepwise.weights}{if set to TRUE, a stepwise for the propensity score is performed during the fit of the augmentation model for the OM}

\item{nameMISS}{Name of the variable containing information for the Missing indicator}

\item{nameY}{Name of the variable containing information for the outcome}

\item{sandwich.nuisance}{if set to TRUE, the nuisance adjusted sandwich variance is provided.}

\item{fay.adjustment}{if set to TRUE, the small-sample nuisance adjusted sandwich variance with Fay's adjustement is provided.}

\item{fay.bound}{if set to 0.75 by default, bound value used for Fay's adjustement.}
}
\value{
An object of type 'CRTgeeDR' \cr

$beta Final values for regressors estimates \cr
\itemize{
\item $phi scale parameter estimate\cr
\item $alpha Final values for association parameters in the working correlation structure when exchangeable\cr
\item $coefnames Name of the regressors in the main regression \cr
\item $niter Number of iteration done by the algorithm before convergence
\item $converged convergence status
\item $var.naiv Variance of the estimates model based (naive)\cr
\item $var Variance of the estimates sandwich\cr
\item $var.nuisance Variance of the estimates nuisance adjusted sandwich\cr
\item $var.fay Variance of the estimates nuisance adjusted sandwich with Fay correction for small samples
\item $call Call function
\item $corr Correlation structure used
\item $clusz Number of unit in each cluster
\item $FunList List of function associated with the family
\item $X design matrix for the main regression
\item $offset Offset specified in the regression
\item $eta predicted values
\item $weights Weights vector used in the diagonal term for the IPW
\item $ps.model Summary of the regression fitted for the PS if computed internally
\item $om.model.trt Summary of the regression fitted for the OM for treated if computed internally
\item $om.model.ctrl Summary of the regression fitted for the OM for control if computed internally
}
}
\description{
This function implements a GEE estimator. It implements classical GEE, IPW-GEE, augmented GEE and IPW-Augmented GEE (Doubly robust). \cr
}
\details{
The estimator is founds by solving:
 \deqn{ 0= \sum_{i=1}^M \Bigg[ \boldsymbol D_i^T \boldsymbol V_i^{-1} \boldsymbol  W_i(\boldsymbol X_i, A_i, \boldsymbol \eta_W) \left( \boldsymbol Y_i - \boldsymbol B(\boldsymbol X_i, A_i, \boldsymbol \eta_B) \right) }
 \deqn{ \qquad + \sum_{a=0,1} p^a(1-p)^{1-a} \boldsymbol D_i^T \boldsymbol V_i^{-1}  \Big( \boldsymbol B(\boldsymbol X_i,A_i=a, \boldsymbol \eta_B) -\boldsymbol \mu_i(\boldsymbol \beta,A_i=a)\Big) \Bigg]}
 where \eqn{\boldsymbol D_i=\frac{\partial \boldsymbol \mu_i(\boldsymbol \beta,A_i)}{\partial \boldsymbol \beta^T}} is the design matrix, \eqn{\boldsymbol V_i} is the covariance matrix equal to \eqn{\boldsymbol U_i^{1/2} \boldsymbol C(\boldsymbol \alpha)\boldsymbol U_i^{1/2}$ with $\boldsymbol U_i} a diagonal matrix with elements \eqn{{\rm var}(y_{ij})} and \eqn{\boldsymbol C(\boldsymbol \alpha)} is the working correlation structure with non-diagonal terms \eqn{\boldsymbol \alpha}.
 Parameters \eqn{\boldsymbol \alpha} are estimated using simple moment estimators from the Pearson residuals.
 The matrix of weights \eqn{\boldsymbol W_i(\boldsymbol X_i, A_i, \boldsymbol \eta_W)=diag\left[R_{ij}/\pi_{ij}(\boldsymbol X_i, A_i, \boldsymbol \eta_W)\right]_{j=1,\dots,n_{i}}}, where \eqn{\pi_{ij}(\boldsymbol X_i, A_i, \boldsymbol \eta_W)=P(R_{ij}|\boldsymbol X_i, A_i)} is the Propensity score (PS).
 The function \eqn{\boldsymbol B(\boldsymbol X_i,A_i=a,\boldsymbol \eta_B)}, which is called the Outcome Model (OM), is a function linking \eqn{Y_{ij}} with \eqn{\boldsymbol X_i} and \eqn{A_i}.
 The \eqn{\boldsymbol \eta_B} are nuisance parameters that are estimated.
 The estimator is most efficient if the OM is equal to \eqn{E(\boldsymbol Y_i|\boldsymbol X_i,A_i=a)}
 The estimator denoted \eqn{\hat{\beta}_{aug}} is found by solving the estimating equation.
 Although  analytic solutions sometimes exist, coefficient estimates are generally obtained using an iterative procedure such as the Newton-Raphson method.
 Automatic implementation is such that, \eqn{\hat{ \boldsymbol \eta}_W$ in $\boldsymbol W_i(\boldsymbol X_i, A_i, \hat{ \boldsymbol \eta}_W)} are obtained using a logistic regression and \eqn{\hat{ \boldsymbol \eta}_B$ in $\boldsymbol B(\boldsymbol X_i,A_i,\hat{ \boldsymbol \eta}_B)} are obtained using a linear regression.


 The variance of \eqn{\hat{\boldsymbol \beta}_{aug}} is estimated by the sandwich variance estimator.
 There are two external sources of variability that need to be accounted for: estimation of \eqn{\boldsymbol \eta_W} for the PS and of \eqn{\boldsymbol \eta_B} for the OM.
 We denote \eqn{\boldsymbol \Omega=(\boldsymbol \beta, \boldsymbol \eta_W,\boldsymbol \eta_B)} the estimated parameters of interest and nuisance parameters.
 We can stack estimating functions and score functions for \eqn{\boldsymbol \Omega}:
 \deqn{\small \boldsymbol U_i(\boldsymbol \Omega)= \left( \begin{array}{c} \boldsymbol \Phi_i(\boldsymbol Y_i,\boldsymbol X_i,A_i,\boldsymbol \beta, \boldsymbol \eta_W, \boldsymbol \eta_B) \\ \boldsymbol S^W_i(\boldsymbol X_i, A_i, \boldsymbol \eta_W)\\ \boldsymbol S^B_i(\boldsymbol X_i, A_i, \boldsymbol \eta_B)\\ \end{array} \right)}
 where \eqn{\boldsymbol S^W_i} and \eqn{\boldsymbol S^B_i} represent the score equations for patients in cluster \eqn{i} for the estimation of \eqn{\boldsymbol \eta_W} and \eqn{\boldsymbol \eta_B}  in the PS and the OM.
 A standard Taylor expansion paired with Slutzky's theorem and the central limit theorem give the sandwich estimator adjusted for nuisance parameters estimation in the OM and PS:
 \deqn{Var(\boldsymbol \Omega)={{E\left[\frac{\partial   \boldsymbol U_i(\boldsymbol \Omega)}{\partial \boldsymbol \Omega}\right]}^{-1}}^{T} \underbrace{{E\left[ \boldsymbol U_i(\boldsymbol \Omega)\boldsymbol U_i^T(\boldsymbol \Omega) \right]}}_{\boldsymbol \Delta_{adj}} \underbrace{E\left[\frac{\partial   \boldsymbol U_i(\boldsymbol \Omega)}{\partial \boldsymbol \Omega}\right]^{-1} }_{\boldsymbol \Gamma^{-1}_{adj}}.}
}
\examples{
data(data.sim)
 \dontrun{
 #### STANDARD GEE
 geeresults<-geeDREstimation(formula=OUTCOME~TRT,
                               id="CLUSTER" , data = data.sim,
                               family = "binomial", corstr = "independence")
 summary(geeresults)
 #### IPW GEE
 ipwresults<-geeDREstimation(formula=OUTCOME~TRT,
                               id="CLUSTER" , data = data.sim,
                               family = "binomial", corstr = "independence",
                               model.weights=I(MISSING==0)~TRT*AGE)
 summary(ipwresults)
 #### AUGMENTED GEE
 augresults<-geeDREstimation(formula=OUTCOME~TRT,
                               id="CLUSTER" , data = data.sim,
                               family = "binomial", corstr = "independence",
                               model.augmentation.trt=OUTCOME~AGE,
                               model.augmentation.ctrl=OUTCOME~AGE, stepwise.augmentation=FALSE)
 summary(augresults)
 }
 #### DOUBLY ROBUST
 drresults<-geeDREstimation(formula=OUTCOME~TRT,
                               id="CLUSTER" , data = data.sim,
                               family = "binomial", corstr = "independence",
                               model.weights=I(MISSING==0)~TRT*AGE,
                               model.augmentation.trt=OUTCOME~AGE,
                               model.augmentation.ctrl=OUTCOME~AGE, stepwise.augmentation=FALSE)
 summary(drresults)
}
\author{
Melanie Prague [based on R packages 'geeM' L. S. McDaniel, N. C. Henderson, and P. J. Rathouz.  Fast Pure R Implementation of GEE: Application of the Matrix Package. The R Journal, 5(1):181-188, June 2013.]
}
\references{
Details regarding implementation can be found in
\itemize{
      \item 'Augmented GEE for improving efficiency and validity of estimation in cluster randomized trials by leveraging cluster-and individual-level covariates' - 2012 - Stephens A., Tchetgen Tchetgen E. and De Gruttola V. : Stat Med 31(10) - 915-930.
      \item 'Accounting for interactions and complex inter-subject dependency for estimating treatment effect in cluster randomized trials with missing at random outcomes' - 2015 - Prague M., Wang R., Stephens A., Tchetgen Tchetgen E. and De Gruttola V. : in revision.
      \item 'Fast Pure R Implementation of GEE: Application of the Matrix Package' - 2013 - McDaniel, Lee S and Henderson, Nicholas C and Rathouz, Paul J : The R Journal 5(1) - 181-197.
      \item 'Small-Sample Adjustments for Wald-Type Tests Using Sandwich Estimators' - 2001 - Fay, Michael P and Graubard, Barry I : Biometrics 57(4) - 1198-1206.
}
}

