% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RadixTree.R
\name{RadixTree}
\alias{RadixTree}
\title{RadixTree}
\description{
Radix Tree (trie) class implementation
}
\details{
The RadixTree class is a trie implementation. The primary usage is to be able to search of similar sequences based on a dynamic programming framework.
This can be done using the \emph{search} method which searches for similar sequences based on the Global, Anchored or Hamming distance metrics.

Three types of distance metrics are supported, based on the form of alignment performed. These are: Hamming, Global (Levenshtein) and Anchored.

An anchored alignment is a form of semi-global alignment, where the query sequence is "anchored" (global) to the beginning of both the query and target sequences,
but is semi-global in that the end of the either the query sequence or target sequence (but not both) can be unaligned. This type of alignment is sometimes called an "extension" alignment in literature.

In contrast a global alignment must align the entire query and target sequences. When mismatch and indel costs are equal to 1, this is also known as the Levenshtein distance.

By default, if mode == "global" or "anchored", all mismatches and indels are given a cost of 1. However, you can define your own distance metric by setting the substitution cost_matrix and separate gap parameters.
The cost_matrix is a strictly positive square integer matrix of substitution costs and should include all characters in query and target as column- and rownames. Any rows/columns named "gap" or "gap_open" are ignored.
To set the cost of a gap (insertion or deletion), use the gap_cost parameter (a single positive integer). To enable affine gaps, provide the gap_open_cost parameter (a single positive integer) in addition to gap_cost.
If affine alignment is used, the total cost of a gap of length L is defined as:
TOTAL_GAP_COST = gap_open_cost + (gap_cost * gap_length).

If mode == "hamming" all alignment parameters are ignored; mismatch is given a distance of 1 and gaps are not allowed.
}
\examples{
tree <- RadixTree$new()
tree$insert(c("ACGT", "AAAA"))
tree$erase("AAAA")
tree$search("ACG", max_distance = 1, mode = "levenshtein")
#   query target distance
# 1   ACG   ACGT        1

tree$search("ACG", max_distance = 1, mode = "hamming")
# query    target   distance
# <0 rows> (or 0-length row.names)
}
\seealso{
https://en.wikipedia.org/wiki/Radix_tree
}
\section{Public fields}{
\if{html}{\out{<div class="r6-fields">}}
\describe{
\item{\code{root_pointer}}{Root of the RadixTree}

\item{\code{char_counter_pointer}}{Character count data for the purpose of validating input}
}
\if{html}{\out{</div>}}
}
\section{Methods}{
\subsection{Public methods}{
\itemize{
\item \href{#method-RadixTree-new}{\code{RadixTree$new()}}
\item \href{#method-RadixTree-show}{\code{RadixTree$show()}}
\item \href{#method-RadixTree-to_string}{\code{RadixTree$to_string()}}
\item \href{#method-RadixTree-graph}{\code{RadixTree$graph()}}
\item \href{#method-RadixTree-to_vector}{\code{RadixTree$to_vector()}}
\item \href{#method-RadixTree-size}{\code{RadixTree$size()}}
\item \href{#method-RadixTree-insert}{\code{RadixTree$insert()}}
\item \href{#method-RadixTree-erase}{\code{RadixTree$erase()}}
\item \href{#method-RadixTree-find}{\code{RadixTree$find()}}
\item \href{#method-RadixTree-prefix_search}{\code{RadixTree$prefix_search()}}
\item \href{#method-RadixTree-search}{\code{RadixTree$search()}}
\item \href{#method-RadixTree-single_gap_search}{\code{RadixTree$single_gap_search()}}
\item \href{#method-RadixTree-validate}{\code{RadixTree$validate()}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-new"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-new}{}}}
\subsection{Method \code{new()}}{
Create a new RadixTree object
\subsection{Usage - new}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$new(sequences = NULL)}\if{html}{\out{</div>}}
}

\subsection{Arguments - new}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{sequences}}{A character vector of sequences to insert into the tree}
}
\if{html}{\out{</div>}}
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-show"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-show}{}}}
\subsection{Method \code{show()}}{
Print the tree to screen
\subsection{Usage - show}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$show()}\if{html}{\out{</div>}}
}

}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-to_string"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-to_string}{}}}
\subsection{Method \code{to_string()}}{
Print the tree to a string
\subsection{Usage - to_string}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$to_string()}\if{html}{\out{</div>}}
}

\subsection{Returns - to_string}{
A string representation of the tree
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-graph"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-graph}{}}}
\subsection{Method \code{graph()}}{
Plot of the tree using igraph (needs to be installed separately)
\subsection{Usage - graph}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$graph(depth = -1, root_label = "root", plot = TRUE)}\if{html}{\out{</div>}}
}

\subsection{Arguments - graph}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{depth}}{The tree depth to plot. If -1 (default), plot the entire tree.}

\item{\code{root_label}}{The label of the root node in the plot.}

\item{\code{plot}}{Whether to create a plot or return the data used to generate the plot.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - graph}{
A data frame of parent-child relationships used to generate the igraph plot OR a ggplot2 object
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-to_vector"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-to_vector}{}}}
\subsection{Method \code{to_vector()}}{
Output all sequences held by the tree as a character vector
\subsection{Usage - to_vector}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$to_vector()}\if{html}{\out{</div>}}
}

\subsection{Returns - to_vector}{
A character vector of all sequences contained in the tree. Return order is not guaranteed.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-size"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-size}{}}}
\subsection{Method \code{size()}}{
Output the size of the tree (i.e. how many sequences are contained)
\subsection{Usage - size}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$size()}\if{html}{\out{</div>}}
}

\subsection{Returns - size}{
The size of the tree
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-insert"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-insert}{}}}
\subsection{Method \code{insert()}}{
Insert new sequences into the tree
\subsection{Usage - insert}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$insert(sequences)}\if{html}{\out{</div>}}
}

\subsection{Arguments - insert}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{sequences}}{A character vector of sequences to insert into the tree}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - insert}{
A logical vector indicating whether the sequence was inserted (TRUE) or already existing in the tree (FALSE)
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-erase"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-erase}{}}}
\subsection{Method \code{erase()}}{
Erase sequences from the tree
\subsection{Usage - erase}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$erase(sequences)}\if{html}{\out{</div>}}
}

\subsection{Arguments - erase}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{sequences}}{A character vector of sequences to erase from the tree}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - erase}{
A logical vector indicating whether the sequence was erased (TRUE) or not found in the tree (FALSE)
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-find"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-find}{}}}
\subsection{Method \code{find()}}{
Find sequences in the tree
\subsection{Usage - find}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$find(query)}\if{html}{\out{</div>}}
}

\subsection{Arguments - find}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{query}}{A character vector of sequences to find in the tree}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - find}{
A logical vector indicating whether the sequence was found (TRUE) or not found in the tree (FALSE)
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-prefix_search"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-prefix_search}{}}}
\subsection{Method \code{prefix_search()}}{
Search for sequences in the tree that start with a specified prefix.
E.g.: a query of "CAR" will find "CART", "CARBON", "CARROT", etc. but not "CATS".
\subsection{Usage - prefix_search}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$prefix_search(query)}\if{html}{\out{</div>}}
}

\subsection{Arguments - prefix_search}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{query}}{A character vector of sequences to search for in the tree}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - prefix_search}{
A data frame of all matches with columns "query" and "target".
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-search"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-search}{}}}
\subsection{Method \code{search()}}{
Search for sequences in the tree that are with a specified distance metric to a specified query.
\subsection{Usage - search}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$search(
  query,
  max_distance = NULL,
  max_fraction = NULL,
  mode = "levenshtein",
  cost_matrix = NULL,
  gap_cost = NA_integer_,
  gap_open_cost = NA_integer_,
  nthreads = 1,
  show_progress = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments - search}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{query}}{A character vector of query sequences.}

\item{\code{max_distance}}{how far to search in units of absolute distance. Can be a single value or a vector. Mutually exclusive with max_fraction.}

\item{\code{max_fraction}}{how far to search in units of relative distance to each query sequence length. Can be a single value or a vector. Mutually exclusive with max_distance.}

\item{\code{mode}}{The distance metric to use. One of hamming (hm), global (gb) or anchored (an).}

\item{\code{cost_matrix}}{A custom cost matrix for use with the "global" or "anchored" distance metrics. See details.}

\item{\code{gap_cost}}{The cost of a gap for use with the "global" or "anchored" distance metrics. See details.}

\item{\code{gap_open_cost}}{The cost of a gap opening. See details.}

\item{\code{nthreads}}{The number of threads to use for parallel computation.}

\item{\code{show_progress}}{Whether to show a progress bar.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - search}{
The output is a data.frame of all matches with columns "query" and "target".
For anchored searches, the output also includes attributes "query_size" and "target_size" which are vectors containing the portion of the query and target sequences that are aligned.
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-single_gap_search"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-single_gap_search}{}}}
\subsection{Method \code{single_gap_search()}}{
A specialized algorithm for searching for sequences allowing at most a single gap within the alignment itself. The mode is always "anchored" and does not penalize end gaps.
\subsection{Usage - single_gap_search}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$single_gap_search(
  query,
  max_distance,
  gap_cost = 1L,
  nthreads = 1,
  show_progress = FALSE
)}\if{html}{\out{</div>}}
}

\subsection{Arguments - single_gap_search}{
\if{html}{\out{<div class="arguments">}}
\describe{
\item{\code{query}}{A character vector of query sequences.}

\item{\code{max_distance}}{how far to search in units of absolute distance. Can be a single value or a vector. Mutually exclusive with max_fraction.}

\item{\code{gap_cost}}{The cost of a gap for use with the "global" or "anchored" distance metrics. See details.}

\item{\code{nthreads}}{The number of threads to use for parallel computation.}

\item{\code{show_progress}}{Whether to show a progress bar.}
}
\if{html}{\out{</div>}}
}
\subsection{Returns - single_gap_search}{
The output is a data.frame of matches with columns "query", "target" and "distance".
}
}
\if{html}{\out{<hr>}}
\if{html}{\out{<a id="method-RadixTree-validate"></a>}}
\if{latex}{\out{\hypertarget{method-RadixTree-validate}{}}}
\subsection{Method \code{validate()}}{
Validate the tree
\subsection{Usage - validate}{
\if{html}{\out{<div class="r">}}\preformatted{RadixTree$validate()}\if{html}{\out{</div>}}
}

\subsection{Returns - validate}{
A logical indicating whether the tree is valid (TRUE) or not (FALSE). This is mostly an internal function for debugging purposes and should always return TRUE.
}
}
}
