% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sig_tally.R
\name{sig_tally}
\alias{sig_tally}
\alias{sig_tally.CopyNumber}
\alias{sig_tally.RS}
\alias{sig_tally.MAF}
\title{Tally a Genomic Alteration Object}
\usage{
sig_tally(object, ...)

\method{sig_tally}{CopyNumber}(
  object,
  method = "Wang",
  ignore_chrs = NULL,
  indices = NULL,
  add_loh = FALSE,
  feature_setting = sigminer::CN.features,
  cores = 1,
  keep_only_matrix = FALSE,
  ...
)

\method{sig_tally}{RS}(object, keep_only_matrix = FALSE, ...)

\method{sig_tally}{MAF}(
  object,
  mode = c("SBS", "DBS", "ID", "ALL"),
  ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
  genome_build = NULL,
  add_trans_bias = FALSE,
  ignore_chrs = NULL,
  use_syn = TRUE,
  keep_only_matrix = FALSE,
  ...
)
}
\arguments{
\item{object}{a \link{CopyNumber} object or \link{MAF} object or SV object (from \link{read_sv_as_rs}).}

\item{...}{custom setting for operating object. Detail see S3 method for
corresponding class (e.g. \code{CopyNumber}).}

\item{method}{method for feature classification, can be one of
"Wang" ("W"), "S" (for method described in Steele et al. 2019),
"X" (for method described in Tao et al. 2023).}

\item{ignore_chrs}{Chromsomes to ignore from analysis. e.g. chrX and chrY.}

\item{indices}{integer vector indicating segments to keep.}

\item{add_loh}{flag to add LOH classifications.}

\item{feature_setting}{a \code{data.frame} used for classification.
\strong{Only used when method is "Wang" ("W")}.
Default is \link{CN.features}. Users can also set custom input with "feature",
"min" and "max" columns available. Valid features can be printed by
\code{unique(CN.features$feature)}.}

\item{cores}{number of computer cores to run this task.
You can use \code{\link[future:re-exports]{future::availableCores()}} function to check how
many cores you can use.}

\item{keep_only_matrix}{if \code{TRUE}, keep only matrix for signature extraction.
For a \code{MAF} object, this will just return the most useful matrix.}

\item{mode}{type of mutation matrix to extract, can be one of 'SBS', 'DBS' and 'ID'.}

\item{ref_genome}{'BSgenome.Hsapiens.UCSC.hg19', 'BSgenome.Hsapiens.UCSC.hg38',
'BSgenome.Mmusculus.UCSC.mm10',  'BSgenome.Mmusculus.UCSC.mm9', etc.}

\item{genome_build}{genome build 'hg19', 'hg38', 'mm9' or "mm10", if not set, guess it by \code{ref_genome}.}

\item{add_trans_bias}{if \code{TRUE}, consider transcriptional bias categories.
'T:' for Transcribed (the variant is on the transcribed strand);
'U:' for Un-transcribed (the variant is on the untranscribed strand);
'B:' for Bi-directional (the variant is on both strand and is transcribed either way);
'N:' for Non-transcribed (the variant is in a non-coding region and is untranslated);
'Q:' for Questionable.
\strong{NOTE}: the result counts of 'B' and 'N' labels are a little different from
SigProfilerMatrixGenerator, the reason is unknown (may be caused by annotation file).}

\item{use_syn}{Logical. If \code{TRUE}, include synonymous variants in analysis.}
}
\value{
a \code{list} contains a \code{matrix} used for NMF de-composition.
}
\description{
Tally a variation object like \link{MAF}, \link{CopyNumber} and return a matrix for NMF de-composition and more.
This is a generic function,
so it can be further extended to other mutation cases.
\strong{Please read details about how to set sex for identifying copy number signatures}.
Please read \url{https://osf.io/s93d5/} for the generation of SBS, DBS and ID (INDEL)
components.
}
\details{
For identifying copy number signatures, we have to derive copy number
features firstly. Due to the difference of copy number values in sex chromosomes
between male and female, we have to do an extra step \strong{if we don't want to
ignore them}.

I create two options to control this, the default values are shown as
the following, you can use the same way to set (per R session).

\code{options(sigminer.sex = "female", sigminer.copynumber.max = NA_integer_)}
\itemize{
\item If your cohort are all females, you can totally ignore this.
\item If your cohort are all males, set \code{sigminer.sex} to 'male' and
\code{sigminer.copynumber.max} to a proper value (the best is consistent
with \link{read_copynumber}).
\item If your cohort contains both males and females, set \code{sigminer.sex}
as a \code{data.frame} with two columns "sample" and "sex". And
set \code{sigminer.copynumber.max} to a proper value (the best is consistent
with \link{read_copynumber}).
}
}
\section{Methods (by class)}{
\itemize{
\item \code{sig_tally(CopyNumber)}: Returns copy number features, components and component-by-sample matrix

\item \code{sig_tally(RS)}: Returns genome rearrangement sample-by-component matrix

\item \code{sig_tally(MAF)}: Returns SBS mutation sample-by-component matrix and APOBEC enrichment

}}
\examples{
# Load copy number object
load(system.file("extdata", "toy_copynumber.RData",
  package = "sigminer", mustWork = TRUE
))
\donttest{
# Use method designed by Wang, Shixiang et al.
cn_tally_W <- sig_tally(cn, method = "W")
}
# Use method designed by Steele et al.
# See example in read_copynumber
\donttest{
# Prepare SBS signature analysis
laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
laml <- read_maf(maf = laml.maf)
if (require("BSgenome.Hsapiens.UCSC.hg19")) {
  mt_tally <- sig_tally(
    laml,
    ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    use_syn = TRUE
  )
  mt_tally$nmf_matrix[1:5, 1:5]

  ## Use strand bias categories
  mt_tally <- sig_tally(
    laml,
    ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
    use_syn = TRUE, add_trans_bias = TRUE
  )
  ## Test it by enrichment analysis
  enrich_component_strand_bias(mt_tally$nmf_matrix)
  enrich_component_strand_bias(mt_tally$all_matrices$SBS_24)
} else {
  message("Please install package 'BSgenome.Hsapiens.UCSC.hg19' firstly!")
}
}
}
\references{
Wang, Shixiang, et al. "Copy number signature analyses in prostate cancer reveal
distinct etiologies and clinical outcomes." medRxiv (2020).

Steele, Christopher D., et al. "Undifferentiated sarcomas develop through
distinct evolutionary pathways." Cancer Cell 35.3 (2019): 441-456.

Mayakonda, Anand, et al. "Maftools: efficient and comprehensive analysis of somatic variants in cancer." Genome research 28.11 (2018): 1747-1756.

Roberts SA, Lawrence MS, Klimczak LJ, et al. An APOBEC Cytidine Deaminase Mutagenesis Pattern is Widespread in Human Cancers. Nature genetics. 2013;45(9):970-976. doi:10.1038/ng.2702.

Bergstrom EN, Huang MN, Mahto U, Barnes M, Stratton MR, Rozen SG, Alexandrov LB: SigProfilerMatrixGenerator: a tool for visualizing and exploring patterns of small mutational events. BMC Genomics 2019, 20:685 https://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-019-6041-2
}
\seealso{
\link{sig_estimate} for estimating signature number for \link{sig_extract},
\link{sig_auto_extract} for extracting signatures using automatic relevance determination technique.
}
\author{
Shixiang Wang
}
