% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sc.R
\name{matchCellMarker2}
\alias{matchCellMarker2}
\title{Annotate Clusters by Matching Markers with the CellMarker2.0 Database}
\usage{
matchCellMarker2(
  marker,
  n,
  avg_log2FC_threshold = 0,
  p_val_adj_threshold = 0.05,
  spc,
  tissueClass = available_tissue_class(spc),
  tissueType = available_tissue_type(spc),
  ref = NULL
)
}
\arguments{
\item{marker}{A \code{data.frame} or \code{data.table} of markers, usually the output of
\code{Seurat::FindAllMarkers}. It must contain columns for \code{cluster}, \code{gene},
\code{avg_log2FC}, and \code{p_val_adj}.}

\item{n}{An integer specifying the number of top marker genes to use from each
cluster for matching. Genes are ranked by \code{avg_log2FC} after filtering.}

\item{avg_log2FC_threshold}{A numeric value setting the minimum average log2 fold
change for a marker to be considered. Defaults to \code{0}.}

\item{p_val_adj_threshold}{A numeric value setting the maximum adjusted p-value
for a marker to be considered. Defaults to \code{0.05}.}

\item{spc}{A character string specifying the species, either "Human" or "Mouse".
This is used to filter the \code{cellMarker2} database. This parameter is ignored
if a custom \code{ref} is provided.}

\item{tissueClass}{A character vector of tissue classes to include from the
\code{cellMarker2} database. Defaults to all available tissue classes for the
specified species. This parameter is ignored if a custom \code{ref} is provided.
See \code{available_tissue_class()}.}

\item{tissueType}{A character vector of tissue types to include from the
\code{cellMarker2} database. Defaults to all available tissue types for the
specified species. This parameter is ignored if a custom \code{ref} is provided.
See \code{available_tissue_type()}.}

\item{ref}{An optional long \code{data.frame} which must contain 'cell_name'
and 'marker' columns to be used as the reference for marker matching.
If \code{NULL} (the default), the function uses the built-in \code{cellMarker2}
dataset. When a custom \code{ref} is provided, the \code{spc}, \code{tissueClass}, and
\code{tissueType} parameters are ignored for the matching process itself,
but their original values are saved for provenance.}
}
\value{
A \code{data.table} where each row represents a potential cell type match for a
cluster. The table is keyed by \code{cluster} and includes columns for \code{cluster},
\code{cell_name}, \code{uniqueN} (number of unique matching markers), \code{N} (total matches),
\code{ordered_symbol} (matching genes, ordered by frequency), and \code{orderN} (their frequencies).

The returned object also contains important attributes for downstream analysis:
\item{ref}{The reference data (either from \code{cellMarker2} or the custom \code{ref}) used for the annotation.}
\item{is_custom_ref}{A logical flag indicating if a custom \code{ref} was used.}
\item{filter_args}{A list containing the filtering parameters used during the annotation,
which is essential for the \code{check_marker} function.}
}
\description{
This function takes cluster-specific markers, typically from \code{Seurat::FindAllMarkers},
and annotates each cluster with potential cell types by matching these markers
against a reference database. It first filters and selects the top \code{n}
marker genes for each cluster based on specified thresholds and then compares
them to the reference database to find the most likely cell type annotations.
}
\examples{
\dontrun{
library(easybio)
data(pbmc.markers)

# Basic usage: Annotate clusters using the top 50 markers per cluster
matched_cells <- matchCellMarker2(pbmc.markers, n = 50, spc = "Human")
print(matched_cells)

# To see the top annotation for each cluster
top_matches <- matched_cells[, .SD[1], by = cluster]
print(top_matches)

# Advanced usage: Stricter filtering and focus on specific tissues
matched_cells_strict <- matchCellMarker2(
  pbmc.markers,
  n = 30,
  spc = "Human",
  avg_log2FC_threshold = 0.5,
  p_val_adj_threshold = 0.01,
  tissueType = c("Blood", "Bone marrow")
)
print(matched_cells_strict)

# --- Example with a custom reference ---
# Create a custom reference as a named list.
custom_ref_list <- list(
  "T-cell" = c("CD3D", "CD3E"),
  "B-cell" = c("CD79A", "MS4A1"),
  "Myeloid" = "LYZ"
)

# Convert the list to a long data.frame compatible with the 'ref' parameter.
custom_ref_df <- list2dt(custom_ref_list, col_names = c("cell_name", "marker"))

# Run annotation using the custom reference.
# When 'ref' is provided, the internal cellMarker2 database and its filters
# ('spc', 'tissueClass', 'tissueType') are ignored for matching.
matched_custom <- matchCellMarker2(
  pbmc.markers,
  n = 50,
  ref = custom_ref_df
)
print(matched_custom)
}
}
\seealso{
\code{\link{check_marker}}, \code{\link{plotPossibleCell}}, \code{\link{available_tissue_class}}, \code{\link{available_tissue_type}}
}
