% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hdma.R
\name{mediate_hdma}
\alias{mediate_hdma}
\title{High-Dimensional Mediation Analysis}
\source{
\url{https://github.com/YuzhaoGao/High-dimensional-mediation-analysis-R}
}
\usage{
mediate_hdma(
  A,
  M,
  Y,
  C1 = NULL,
  C2 = NULL,
  binary_y = FALSE,
  n_include = NULL,
  ...
)
}
\arguments{
\item{A}{length \code{n} numeric vector containing exposure variable}

\item{M}{\code{n x p} numeric matrix of high-dimensional mediators.}

\item{Y}{length \code{n} numeric vector containing continuous or binary outcome variable.}

\item{C1}{optional numeric matrix of covariates to include in the outcome model.}

\item{C2}{optional numeric matrix of covariates to include in the mediator model.}

\item{binary_y}{logical flag for whether \code{Y} should be interpreted as a
binary variable with 1/0 coding rather than as continuous. Default is \code{FALSE}.}

\item{n_include}{integer specifying the number of top markers from sure
independent screening to be included. Default is \code{NULL}, in which case
\code{n_include} will be either \code{ceiling(n/log(n))} if
\code{binary_Y = F}, or \code{ceiling(n/(2*log(n)))} if \code{binary_Y = T}.
If \code{n_include >= p}, all mediators are included with no screening. Note
that if \code{binary_y = F}, screening is performed based on the single-mediator
outcome model p-values, and if \code{binary_y = F}, screening is based on the
the mediator model p-values.}

\item{...}{other arguments passed to \code{\link[hdi:hdi]{hdi::hdi()}}.}
}
\value{
A list containing:
\itemize{
\item{contributions: }{a data frame containing the estimates and p-values
of the mediation contributions}
\item{effects: }{a data frame containing the estimated direct, global
mediation, and total effects}
}
}
\description{
\code{mediate_hdma} fits a high-dimensional mediation model with
the de-biased LASSO approach as proposed by Gao et al. (2022),
estimating the mediation contributions of potential mediators.
}
\details{
The first step in HDMA is to perform sure independence
screening (SIS) to choose the \code{n_include} mediators that are most
associated with the outcome (when Y is continuous) or the exposure
(when Y is binary), based on p-values from linear regression. The second step
is to fit the outcome model for the remaining mediators using de-sparsified
(A.K.A de-biased) LASSO, which as asymptotic properties allowing for
computation of p-values by the \code{hdi} package. HDMA then fits the
mediator models using linear regression among those mediators that have both
survived SIS (in step 1) and been identified by the LASSO (in step 2), obtaining
p-values for the mediation contributions by taking the maximum of the \eqn{\alpha_a}
and \eqn{\beta_m} p-values. The global indirect effect is estimated by summing the
mediation contributions, and the direct effect is estimated by subtracting
the global indirect effect from an estimate of the total effect. See References for
more detail.
}
\examples{
A <- med_dat$A
M <- med_dat$M
Y <- med_dat$Y

# Fit hdma with continuous outcomes
out <- mediate_hdma(A, M, Y)
head(out$contributions)
out$effects

}
\references{
Gao, Y. et al. Testing Mediation Effects in High-Dimensional
Epigenetic Studies. Front. Genet. 10, 1195 (2019).

Fan, J. & Lv, J. Sure independence screening for ultrahigh dimensional
feature space. J. R. Stat. Soc. 70, 849-911 (2008)
}
