% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/genscore.R
\name{estimate}
\alias{estimate}
\title{The main function for the generalized score-matching estimator for graphical models.}
\usage{
estimate(
  x,
  setting,
  domain,
  elts = NULL,
  centered = TRUE,
  symmetric = "symmetric",
  scale = "",
  lambda1s = NULL,
  lambda_length = NULL,
  lambda_ratio = Inf,
  mode = NULL,
  param1 = NULL,
  param2 = NULL,
  h_hp = NULL,
  unif_dist = NULL,
  verbose = TRUE,
  verbosetext = "",
  tol = 1e-06,
  maxit = 1000,
  BIC_refit = TRUE,
  warmstart = TRUE,
  diagonal_multiplier = NULL,
  eBIC_gammas = c(0, 0.5, 1),
  cv_fold = NULL,
  cv_fold_seed = NULL,
  return_raw = FALSE,
  return_elts = FALSE
)
}
\arguments{
\item{x}{An \code{n} by \code{p} matrix, the data matrix, where \code{n} is the sample size and \code{p} the dimension.}

\item{setting}{A string that indicates the distribution type, must be one of \code{"exp"}, \code{"gamma"}, \code{"gaussian"}, \code{"log_log"}, \code{"log_log_sum0"}, or of the form \code{"ab_NUM1_NUM2"}, where \code{NUM1} is the \code{a} value and \code{NUM2} is the \code{b} value, and \code{NUM1} and \code{NUM2} must be integers or two integers separated by "/", e.g. "ab_2_2", "ab_2_5/4" or "ab_2/3_1/2".}

\item{domain}{A list returned from \code{make_domain()} that represents the domain.}

\item{elts}{A list (optional), elements necessary for calculations returned by get_elts().}

\item{centered}{A boolean, whether in the centered setting (assume \eqn{\boldsymbol{\mu}=\boldsymbol{\eta}=0}{\mu=\eta=0}) or not. Default to \code{TRUE}.}

\item{symmetric}{A string. If equals \code{"symmetric"}, estimates the minimizer \eqn{\mathbf{K}}{K} over all symmetric matrices; if \code{"and"} or \code{"or"}, use the "and"/"or" rule to get the support. Default to \code{"symmetric"}.}

\item{scale}{A string indicating the scaling method. If contains \code{"sd"}, columns are scaled by standard deviation; if contains \code{"norm"}, columns are scaled by l2 norm; if contains \code{"center"} and \code{setting == "gaussian" && domain$type == "R"}, columns are centered to have mean zero. Default to \code{"norm"}.}

\item{lambda1s}{A vector of lambdas, the penalty parameter for K.}

\item{lambda_length}{An integer >= 2, the number of lambda1s. Ignored if \code{lambda1s} is provided, otherwise a grid of lambdas is automatically chosen so that the results range from an empty graph to a complete graph. Default to \code{10} if neither \code{lambda1s} nor \code{lambda_length} is provided.}

\item{lambda_ratio}{A positive number, the fixed ratio between \eqn{\lambda_{\mathbf{K}}}{\lambda_K} and \eqn{\lambda_{\boldsymbol{\eta}}}{\lambda_\eta}, if \eqn{\lambda_{\boldsymbol{\eta}}\neq 0}{\lambda_\eta!=0} (non-profiled) in the non-centered setting.}

\item{mode}{A string, the class of the \code{h} function. Ignored if \code{elts}, or \code{h} and \code{hp} are provided, or if \code{setting == "gaussian" && domain$type == "R"}.}

\item{param1}{A number, the first parameter to the \code{h} function. Ignored if \code{elts}, or \code{h} and \code{hp} are provided, or if \code{setting == "gaussian" && domain$type == "R"}.}

\item{param2}{A number, the second parameter (may be optional depending on \code{mode}) to the \code{h} function. Ignored if \code{elts}, or \code{h} and \code{hp} are provided, or if \code{setting == "gaussian" && domain$type == "R"}.}

\item{h_hp}{A function that returns a list containing \code{hx=h(x)} (element-wise) and \code{hpx=hp(x)} (element-wise derivative of \eqn{h}) when applied to a vector or a matrix \code{x}, both of which has the same shape as \code{x}.}

\item{unif_dist}{Optional, defaults to \code{NULL}. If not \code{NULL}, \code{h_hp} must be \code{NULL} and \code{unif_dist(x)} must return a list containing \code{"g0"} of length \code{nrow(x)} and \code{"g0d"} of dimension \code{dim(x)}, representing the l2 distance and the gradient of the l2 distance to the boundary: the true l2 distance function to the boundary is used for all coordinates in place of h_of_dist; see "Estimating Density Models with Complex Truncation Boundaries" by Liu et al, 2019. That is, \eqn{(h_j\circ \phi_j)(x_i)}{(h_j\circ phi_j)(xi)} in the score-matching loss is replaced by \eqn{g_0(x_i)}{g0(xi)}, the l2 distance of xi to the boundary of the domain.}

\item{verbose}{Optional. A boolean, whether to output intermediate results.}

\item{verbosetext}{Optional. A string, text to be added to the end of each printout if \code{verbose == TRUE}.}

\item{tol}{Optional. A number, the tolerance parameter. Default to \code{1e-6}.}

\item{maxit}{Optional. A positive integer, the maximum number of iterations for each fit. Default to \code{1000}.}

\item{BIC_refit}{A boolean, whether to get the BIC scores by refitting an unpenalized model restricted to the estimated edges, with \code{lambda1=lambda2=0} and \code{diagonal_multiplier=1}. Default to \code{TRUE}.}

\item{warmstart}{Optional. A boolean, whether to use the results from a previous (larger) lambda as a warm start for each new lambda. Default to \code{TRUE}.}

\item{diagonal_multiplier}{A number >= 1, the diagonal multiplier. Optional and ignored if elts is provided. If \code{ncol(x) > ncol(n)}, a value strictly larger than 1 is recommended. Default to \eqn{1+\left(1-\left(1+4e\max\left(6\log p/n, \sqrt{6\log p/n}\right)\right)^{-1}\right)}{1+(1-1/(1+4e*max(6*log(p)/n, sqrt(6*log(p)/n))))}.}

\item{eBIC_gammas}{Optional. A number of a vector of numbers. The \eqn{\gamma} parameter in eBIC. Default to \code{c(0,0.5,1)}.}

\item{cv_fold}{Optional. An integer larger than 1 if provided. The number of folds used for cross validation. If provided, losses will be calculated on each fold with model fitted on the other folds, and a \code{lambda_length x cv_fold} matrix \code{cv_losses} will be returned.}

\item{cv_fold_seed}{Optional. Seed for generating folds for cross validation.}

\item{return_raw}{A boolean, whether to return the raw estimates of \code{K}. Default to \code{FALSE}.}

\item{return_elts}{A boolean, whether to return the \code{elts} used for estimation. Default to \code{FALSE}.}
}
\value{
\item{edgess}{A list of vectors of integers: indices of the non-zero edges.}
   \item{BICs}{A \code{lambda_length} by \code{length(eBIC_gammas)} matrix of raw eBIC scores (without refitting). If \code{return_raw == FALSE}, may contain \code{Inf}s for rows after the first lambda that gives the complete graph.}
   \item{lambda1s}{A vector of numbers of length \code{lambda_length}: the grid of \code{lambda1}s over which the estimates are obtained.}
   \item{converged}{A vector of booleans of length \code{lambda_length}: indicators of convergence for each fit. If \code{return_raw == FALSE}, may contain \code{0}s for all lambdas after the first lambda that gives the complete graph.}
   \item{iters}{A vector of integers of length \code{lambda_length}: the number of iterations run for each fit. If \code{return_raw == FALSE}, may contain \code{0}s for all lambdas after the first lambda that gives the complete graph.}

   In addition,
   if \code{centered == FALSE},
   \item{etas}{A \code{lambda_length}*\code{p} matrix of \code{eta} estimates with the \eqn{i}-th row corresponding to the \eqn{i}-th \code{lambda1}. If \code{return_raw == FALSE},  may contain \code{NA}s after the first lambda that gives the complete graph.}
   if \code{centered == FALSE} and non-profiled,
   \item{lambda2s}{A vector of numbers of length \code{lambda_length}: the grid of \code{lambda2}s over which the estimates are obtained.}
   if \code{return_raw == TRUE},
   \item{raw_estimate}{A list that contains \code{lambda_length} estimates for \code{K} of size \code{ncol(x)}*\code{ncol(x)}.}
   if \code{BIC_refit == TRUE},
   \item{BIC_refits}{A \code{lambda_length} by \code{length(eBIC_gammas)} matrix of refitted eBIC scores, obtained by refitting unpenalized models restricted to the estimated edges. May contain \code{Inf}s for rows after the first lambda that gives the graph restricted to which an unpenalized model does not have a solution (loss unbounded from below).}
   if \code{cv_fold} is not \code{NULL},
   \item{cv_losses}{A \code{lambda_length x cv_fold} matrix of cross validation losses. If \code{return_raw == FALSE}, may contain \code{Inf}s for all lambdas after the first lambda that gives the complete graph.}
   if \code{return_elts == TRUE},
   \item{elts}{A list of elements returned from \code{get_elts()}.}
}
\description{
The main function for the generalized score-matching estimator for graphical models.
}
\examples{
# Examples are shown for Gaussian truncated to R+^p only. For other distributions
#   on other types of domains, please refer to \code{gen()} or \code{get_elts()},
#   as the way to call this function (\code{estimate()}) is exactly the same in those cases.
n <- 30
p <- 20
domain <- make_domain("R+", p=p)
mu <- rep(0, p)
K <- diag(p)
lambda1s <- c(0.01,0.1,0.2,0.3,0.4,0.5)
dm <- 1 + (1-1/(1+4*exp(1)*max(6*log(p)/n, sqrt(6*log(p)/n))))
x <- tmvtnorm::rtmvnorm(n, mean = mu, sigma = solve(K),
       lower = rep(0, p), upper = rep(Inf, p), algorithm = "gibbs",
       burn.in.samples = 100, thinning = 10)

## Centered estimates, no elts or h provided, mode and params provided
est1 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=TRUE,
          symmetric="symmetric", lambda1s=lambda1s, mode="min_pow",
          param1=1, param2=3, diag=dm, return_raw=TRUE)

h_hp <- get_h_hp("min_pow", 1, 3)
## Centered estimates, no elts provided, h provided; equivalent to est1
est2 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=TRUE,
          symmetric="symmetric", lambda1s=lambda1s, h_hp=h_hp, diag=dm, return_raw=TRUE)
compare_two_results(est1, est2) ## Should be almost all 0

elts_gauss_c <- get_elts(h_hp, x, setting="gaussian", domain=domain,
            centered=TRUE, diag=dm)
## Centered estimates, elts provided; equivalent to est1 and est2
## Here diagonal_multiplier will be set to the default value, equal to dm above
est3 <- estimate(x, "gaussian", domain=domain, elts=elts_gauss_c,
          symmetric="symmetric", lambda1s=lambda1s, diag=NULL,
          return_raw=TRUE)
compare_two_results(est1, est3) ## Should be almost all 0

## Noncentered estimates with Inf penalty on eta; equivalent to est1~3
est4 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=FALSE,
          lambda_ratio=0, symmetric="symmetric", lambda1s=lambda1s,
          h=h_hp, diag=dm, return_raw=TRUE)
sum(abs(est4$etas)) ## Should be 0 since non-centered with lambda ratio 0 is equivalent to centered
est4$etas <- NULL ## But different from est1 in that the zero etas are returned in est4
compare_two_results(est1, est4) ## Should be almost all 0


## Profiled estimates, no elts or h provided, mode and params provided
est5 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=FALSE,
          lambda_ratio=Inf, symmetric="or", lambda1s=lambda1s,
          mode="min_pow", param1=1, param2=3, diag=dm, return_raw=TRUE)

## Profiled estimates, no elts provided, h provided; equivalent to est5
est6 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=FALSE,
          lambda_ratio=Inf, symmetric="or", lambda1s=lambda1s,
          h_hp=h_hp, diag=dm, return_raw=TRUE)
compare_two_results(est5, est6) ## Should be almost all 0

elts_gauss_p <- get_elts(h_hp, x, setting="gaussian", domain=domain,
                centered=FALSE, profiled=TRUE, diag=dm)
## Profiled estimates, elts provided; equivalent to est5~6
est7 <- estimate(x, "gaussian", domain=domain, elts=elts_gauss_p, centered=FALSE,
          lambda_ratio=Inf, symmetric="or", lambda1s=lambda1s,
          diagonal_multiplier=NULL, return_raw=TRUE)
compare_two_results(est5, est7) ## Should be almost all 0


## Non-centered estimates, no elts or h provided, mode and params provided
## Using 5-fold cross validation and no BIC refit
est8 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=FALSE,
          lambda_ratio=2, symmetric="and", lambda_length=100,
          mode="min_pow", param1=1, param2=3, diag=dm, return_raw=TRUE,
          BIC_refit=FALSE, cv_fold=5, cv_fold_seed=2)

## Non-centered estimates, no elts provided, h provided; equivalent to est5
## Using 5-fold cross validation and no BIC refit
est9 <- estimate(x, "gaussian", domain=domain, elts=NULL, centered=FALSE,
          lambda_ratio=2, symmetric="and", lambda_length=100, h_hp=h_hp,
          diag=dm, return_raw=TRUE, BIC_refit=FALSE, cv_fold=5, cv_fold_seed=2)
compare_two_results(est8, est9) ## Should be almost all 0

elts_gauss_np <- get_elts(h_hp, x, setting="gaussian", domain=domain, centered=FALSE,
                profiled=FALSE, diag=dm)
## Non-centered estimates, elts provided; equivalent to est8~9
## Using 5-fold cross validation and no BIC refit
est10 <- estimate(x, "gaussian", domain, elts=elts_gauss_np, centered=FALSE,
           lambda_ratio=2, symmetric="and", lambda_length=100, diag=NULL,
           return_raw=TRUE, BIC_refit=FALSE, cv_fold=5, cv_fold_seed=2)
compare_two_results(est8, est10) ## Should be almost all 0

}
