% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/operations_mutate.R
\name{mutate_immundata}
\alias{mutate_immundata}
\alias{mutate.ImmunData}
\title{Modify or Add Columns to ImmunData Annotations}
\usage{
mutate_immundata(idata, ..., seq_options = NULL)

\method{mutate}{ImmunData}(.data, ..., seq_options = NULL)
}
\arguments{
\item{idata, .data}{An \code{ImmunData} object.}

\item{...}{\code{dplyr::mutate}-style named expressions (e.g., \code{new_col = existing_col * 2},
\code{category = ifelse(value > 10, "high", "low")}). These are applied first.
\strong{Important}: You cannot use names for new or modified columns that conflict
with the core \code{ImmunData} schema columns (retrieved via \code{imd_schema()}).}

\item{seq_options}{Optional named list specifying sequence-based annotation options.
Use \code{\link[=make_seq_options]{make_seq_options()}} for convenient creation. See \code{filter_immundata}
documentation (\code{?filter_immundata}) or the details section here for the list
structure (\code{query_col}, \code{patterns}, \code{method}, \code{name_type}). \code{max_dist} is
ignored for mutation. If \code{NULL} (the default), no sequence-based columns are added.}
}
\value{
A \emph{new} \code{ImmunData} object with the \verb{$annotations} table modified according
to the provided expressions and \code{seq_options}. The \verb{$repertoires} table (if present)
is carried over unchanged from the input \code{idata}.
}
\description{
Applies transformations to the \verb{$annotations} table within an \code{ImmunData}
object, similar to \code{dplyr::mutate}. It allows adding new columns or modifying
existing non-schema columns using standard \code{dplyr} expressions. Additionally,
it can add new columns based on sequence comparisons (exact match, regular
expression matching, or distance calculation) against specified patterns.
}
\details{
The function operates in two main steps:
\enumerate{
\item \strong{Standard Mutations (\code{...})}: Applies the standard \code{dplyr::mutate}-style
expressions provided in \code{...} to the \verb{$annotations} table. You can create
new columns or modify existing ones, but you \emph{cannot} modify columns
defined in the core \code{ImmunData} schema (e.g., \code{receptor_id}, \code{cell_id}).
An error will occur if you attempt to do so.
\item \strong{Sequence-based Annotations (\code{seq_options})}: If \code{seq_options} is provided,
the function calculates sequence similarities or distances and adds corresponding
new columns to the \verb{$annotations} table.
\itemize{
\item \code{method = "exact"}: Adds boolean columns (TRUE/FALSE) indicating whether the
\code{query_col} value exactly matches each \code{pattern}. Column names are generated
using a prefix (e.g., \code{sim_exact_}) and the pattern or its index.
\item \code{method = "regex"}: Uses \code{annotate_tbl_regex} to add columns indicating
matches for each regular expression pattern against the \code{query_col}. The
exact nature of the added columns depends on \code{annotate_tbl_regex} (e.g.,
boolean flags or captured groups).
\item \code{method = "lev"} or \code{method = "hamm"}: Uses \code{annotate_tbl_distance} to
calculate Levenshtein or Hamming distances between the \code{query_col} and
each \code{pattern}, adding columns containing these numeric distances.
\code{max_dist} is ignored in this context (internally treated as \code{NA}) as
all distances are calculated and added, not used for filtering.
\item The naming of the new sequence-based columns depends on the \code{name_type}
option within \code{seq_options} and internal helper functions like
\code{make_pattern_columns}. Prefixes like \code{sim_exact_}, \code{sim_regex_},
\code{dist_lev_}, \code{dist_hamm_} are typically used based on the schema.
}
}

The \verb{$repertoires} table, if present in the input \code{idata}, is copied to the
output object without modification. This function only affects the \verb{$annotations}
table.
}
\examples{
# Basic setup (assuming idata_test is a valid ImmunData object)
# print(idata_test)

\dontrun{
# Example 1: Add a simple derived column
idata_mut1 <- mutate(idata_test, V_family = substr(V_gene, 1, 5))
print(idata_mut1$annotations)

# Example 2: Add multiple columns and modify one (if 'custom_score' exists)
# Note: Avoid modifying core schema columns like 'V_gene' itself.
idata_mut2 <- mutate(idata_test,
  V_basic = gsub("-.*", "", V_gene),
  J_len = nchar(J_gene),
  custom_score = custom_score * 1.1
) # Fails if custom_score doesn't exist
print(idata_mut2$annotations)

# Example 3: Add boolean columns for exact CDR3 matches
cdr3_patterns <- c("CARGLGLVFYGMDVW", "CARDNRGAVAGVFGEAFYW")
seq_opts_exact <- make_seq_options(
  query_col = "CDR3_aa",
  patterns = cdr3_patterns,
  method = "exact",
  name_type = "pattern"
) # Name cols by pattern
idata_mut_exact <- mutate(idata_test, seq_options = seq_opts_exact)
# Look for new columns like 'sim_exact_CARGLGLVFYGMDVW'
print(idata_mut_exact$annotations)

# Example 4: Add Levenshtein distance columns for a CDR3 pattern
seq_opts_lev <- make_seq_options(
  query_col = "CDR3_aa",
  patterns = "CARGLGLVFYGMDVW",
  method = "lev",
  name_type = "index"
) # Name col like 'dist_lev_1'
idata_mut_lev <- mutate(idata_test, seq_options = seq_opts_lev)
# Look for new column 'dist_lev_1' (or similar based on schema)
print(idata_mut_lev$annotations)

# Example 5: Combine standard mutation and sequence annotation
seq_opts_regex <- make_seq_options(
  query_col = "V_gene",
  patterns = c(ighv1 = "^IGHV1-", ighv3 = "^IGHV3-"),
  method = "regex",
  name_type = "pattern"
)
idata_mut_combo <- mutate(idata_test,
  chain_upper = toupper(chain),
  seq_options = seq_opts_regex
)
# Look for 'chain_upper' and regex match columns (e.g., 'sim_regex_ighv1')
print(idata_mut_combo)
}

}
\seealso{
\code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[=make_seq_options]{make_seq_options()}}, \code{\link[=filter_immundata]{filter_immundata()}}, \link{ImmunData},
\code{vignette("immundata-classes", package = "immunarch")} (replace with actual package name if different)
}
\concept{mutation}
