% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/handleNA.R
\name{impute_na}
\alias{impute_na}
\title{Impute missing values}
\usage{
impute_na(
  raw_df,
  method = "minProb",
  tune_sigma = 1,
  q = 0.01,
  maxiter = 10,
  ntree = 20,
  n_pcs = 2,
  seed = NULL
)
}
\arguments{
\item{raw_df}{A \code{raw_df} object (output of \code{\link{create_df}})
containing missing values.}

\item{method}{Imputation method to use. Default is \code{"minProb"}.
Available methods: \code{"minDet", "RF", "kNN", and "SVD"}.}

\item{tune_sigma}{A scalar used in the \code{"minProb"} method for
controlling the standard deviation of the Gaussian distribution
from which random values are drawn for imputation.\cr
Default is 1.}

\item{q}{A scalar used in \code{"minProb"} and \code{"minDet"} methods
to obtain a low intensity value for imputation. \code{q} should be set to a
very low value. Default is 0.01.}

\item{maxiter}{Maximum number of iterations to be performed when using the
\code{"RF"} method. Default is \code{10}.}

\item{ntree}{Number of trees to grow in each forest when using the
\code{"RF"} method. Default is \code{20}.}

\item{n_pcs}{Number of principal components to calculate when using the
\code{"SVD"} method. Default is 2.}

\item{seed}{Numerical. Random number seed. Default is \code{NULL}}
}
\value{
An \code{imp_df} object, which is a data frame of protein intensities
with no missing values.
}
\description{
This function imputes missing values using a user-specified
imputation method.
}
\details{
\itemize{\item Ideally, you should first remove proteins with
high levels of missing data using the \code{filterbygroup_na} function
before running \code{impute_na} on the \code{raw_df} object.
\item \code{impute_na} function imputes missing values using a
user-specified imputation method from the available options, \code{minProb},
\code{minDet}, \code{kNN}, \code{RF}, and \code{SVD}
\item Make sure to fix the random number seed with \code{seed} for reproducibility}.
}
\examples{
## Generate a raw_df object with default settings. No technical replicates.
raw_df <- create_df(
prot_groups = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/pg1.txt",
exp_design = "https://raw.githubusercontent.com/caranathunge/promor_example_data/main/ed1.txt"
)

## Impute missing values in the data frame using the default minProb
## method.
imp_df1 <- impute_na(raw_df, seed = 3312)

\donttest{
## Impute using the RF method with the number of iterations set at 5
## and number of trees set at 100.
imp_df2 <- impute_na(raw_df, method = "RF",
maxiter = 5, ntree = 100,
seed = 3312)


## Using the kNN method.
imp_df3 <- impute_na(raw_df, method = "kNN", seed = 3312)
}


## Using the SVD method with n_pcs set to 3.
imp_df4 <- impute_na(raw_df, method = "SVD", n_pcs = 3, seed = 3312)

## Using the minDet method with q set at 0.001.
imp_df5 <- impute_na(raw_df, method = "minDet", q = 0.001, seed = 3312)

}
\references{
Lazar, Cosmin, et al. "Accounting for the multiple natures of
missing values in label-free quantitative proteomics data sets to compare
imputation strategies." Journal of proteome research 15.4 (2016): 1116-1125.
}
\seealso{
More information on the available imputation methods can be found
in their respective packages.
\itemize{\item \code{\link{create_df}}
\item For \code{minProb} and
\code{minDet} methods, see
\code{imputeLCMD} package.
\item For Random Forest (\code{RF}) method, see
\code{\link[missForest]{missForest}}.
\item For \code{kNN} method, see \code{\link[VIM]{kNN}} from the
\code{\link[VIM]{VIM}} package.
\item For \code{SVD} method, see \code{\link[pcaMethods]{pca}} from the
\code{\link[pcaMethods]{pcaMethods}} package.}
}
\author{
Chathurani Ranathunge
}
