% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/remove_blanks.R
\name{remove_blanks}
\alias{remove_blanks}
\title{Remove molecular formulas detected in blanks}
\usage{
remove_blanks(
  mfd,
  blank_file_ids = NULL,
  blank_prevalence = 0.5,
  ret_time_col = NULL,
  verbose = FALSE,
  ...
)
}
\arguments{
\item{mfd}{data.table with molecular formula data as derived from
\code{ume::assign_formulas}. Column names of elements/isotopes must match names in
the \code{isotope} column of \code{ume::masses}; values are integers representing
counts per formula.}

\item{blank_file_ids}{Integer vector of \code{file_id} values that represent blank analyses.}

\item{blank_prevalence}{Numeric between 0 and 1. Threshold for blank filtering:
the proportion of blanks in which a molecular formula must occur before it is
excluded from the sample data. For example, \code{blank_prevalence = 0} (default)
removes any formula detected in at least one blank, while \code{blank_prevalence = 0.5}
removes formulas detected in 50\% or more of the blanks.}

\item{ret_time_col}{Character scalar. Name of the retention-time column that
contains the beginning of the retention time segment that corresponds to the
mass spectrum.
If \code{NULL} (default), the function will auto-detect the first column in
\code{c("ret_time_min","retention_time","rt","RT")} that exists in \code{mfd}.
If none is found, blanks are removed ignoring retention time.}

\item{verbose}{logical; if \code{TRUE}, show progress messages.}

\item{...}{Additional arguments passed to methods.}
}
\value{
\code{data.table}; subset of the original molecular formula table (\code{mfd})
with blank formulas removed (globally or LC-segment-wise).
}
\description{
Remove all molecular formulas that were detected in one or more blank analyses
(identified via \code{blank_file_ids}). Matching is always on \code{mf}. If a
retention-time column is present (or provided using \code{ret_time_col}), removal
is restricted to the corresponding LC segment.
}
\details{
\itemize{
\item Requires a unique integer \code{file_id} per analysis in \code{mfd}.
\item Minimal required columns in \code{mfd}: \code{mf}, \code{file_id}.
\item Optional column: a retention-time column (e.g. \code{"ret_time_min"}).
\item If a retention-time column is used, formulas present in blanks are only
removed for rows whose \code{mf} \strong{and} retention time match
\item The input \code{mfd} is \strong{not} modified by reference; a subset is returned.
}
}
\section{Backward compatibility}{

The argument \code{LCMS} is deprecated and no longer used. Retention-time-aware
removal is now enabled automatically when a retention-time column is present
or explicitly provided via \code{ret_time_col}.
}

\examples{
# Presence/absence removal, no retention time:
remove_blanks(mfd = mf_data_demo,
              remove_blank_list = "Blank",
              verbose = TRUE)
}
\seealso{
Other Formula subsetting: 
\code{\link{filter_int}()},
\code{\link{filter_mass_accuracy}()},
\code{\link{filter_mf_data}()},
\code{\link{subset_known_mf}()},
\code{\link{ume_assign_formulas}()},
\code{\link{ume_filter_formulas}()}
}
\author{
Boris P. Koch
}
\concept{Formula subsetting}
\keyword{misc}
