% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/report_term_matches.R
\name{report_term_matches}
\alias{report_term_matches}
\title{Generate a Report of Term Matches}
\usage{
report_term_matches(dict, text = NULL, space = NULL, glob = TRUE,
  parse_phrases = TRUE, tolower = TRUE, punct = TRUE, special = TRUE,
  as_terms = FALSE, bysentence = FALSE, as_string = TRUE,
  term_map_freq = 1, term_map_spaces = 1, outFile = NULL,
  space_dir = getOption("lingmatch.lspace.dir"), verbose = TRUE)
}
\arguments{
\item{dict}{A vector of terms, list of such vectors, or a matrix-like object to be
categorized by \code{\link{read.dic}}.}

\item{text}{A vector of text to extract matches from. If not specified, will use the terms
in the \code{term_map} retrieved from \code{\link{select.lspace}}.}

\item{space}{A vector space used to calculate similarities between term matches.
Name of a the space (see \code{\link{select.lspace}}), a matrix with terms as row names, or
\code{TRUE} to auto-select a space based on matched terms.}

\item{glob}{Logical; if \code{TRUE}, converts globs (asterisk wildcards) to regular expressions.
If not specified, this will be set automatically.}

\item{parse_phrases}{Logical; if \code{TRUE} (default) and \code{space} is specified, will
break unmatched phrases into single terms, and average across and matched vectors.}

\item{tolower}{Logical; if \code{FALSE}, will retain \code{text}'s case.}

\item{punct}{Logical; if \code{FALSE}, will remove punctuation markings in \code{text}.}

\item{special}{Logical; if \code{FALSE}, will attempt to replace special characters in \code{text}.}

\item{as_terms}{Logical; if \code{TRUE}, will treat \code{text} as terms, meaning \code{dict}
terms will only count as matches when matching the complete text.}

\item{bysentence}{Logical; if \code{TRUE}, will split \code{text} into sentences, and only
consider unique sentences.}

\item{as_string}{Logical; if \code{FALSE}, returns matches as tables rather than a string.}

\item{term_map_freq}{Proportion of terms to include when using the term map as a source
of terms. Applies when \code{text} is not specified.}

\item{term_map_spaces}{Number of spaces in which a term has to appear to be included.
Applies when \code{text} is not specified.}

\item{outFile}{File path to write results to, always ending in \code{.csv}.}

\item{space_dir}{Directory from which \code{space} should be loaded.}

\item{verbose}{Logical; if \code{FALSE}, will not display status messages.}
}
\value{
A \code{data.frame} of results, with a row for each unique term, and the following columns:
\itemize{
  \item \strong{\code{term}}: The originally entered term.
  \item \strong{\code{regex}}: The converted and applied regular expression form of the term.
  \item \strong{\code{categories}}: Comma-separated category names,
  if \code{dict} is a list with named entries.
  \item \strong{\code{count}}: Total number of matches to the term.
  \item \strong{\code{max_count}}: Number of matches to the most representative
  (that with the highest average similarity) variant of the term.
  \item \strong{\code{variants}}: Number of variants of the term.
  \item \strong{\code{space}}: Name of the latent semantic space, if one was used.
  \item \strong{\code{mean_sim}}: Average similarity to the most representative variant among terms
  found in the space, if one was used.
  \item \strong{\code{min_sim}}: Minimal similarity to the most representative variant.
  \item \strong{\code{matches}}: Variants, with counts and similarity (Pearson's r) to the
  most representative term (if a space was specified). Either in the form of a comma-separated
  string or a \code{data.frame} (if \code{as_string} is \code{FALSE}).
}
}
\description{
Extract matches to fuzzy terms (globs/wildcards or regular expressions) from provided text, in order
to assess their appropriateness for inclusion in a dictionary.
}
\note{
Matches are extracted for each term independently, so they may not align with some implementations
of dictionaries. For instance, by default \code{\link{lma_patcat}} matches destructively, and sorts
terms by length such that shorter terms will not match the same text and longer terms that overlap.
Here, the match would show up for both terms.
}
\examples{
text <- c(
  "I am sadly homeless, and suffering from depression :(",
  "This wholesome happiness brings joy to my heart! :D:D:D",
  "They are joyous in these fearsome happenings D:",
  "I feel weightless now that my sadness has been depressed! :()"
)
dict <- list(
  sad = c("*less", "sad*", "depres*", ":("),
  happy = c("*some", "happ*", "joy*", "d:"),
  self = c("i *", "my *")
)

report_term_matches(dict, text)
}
\seealso{
For a more complete assessment of dictionaries, see \code{\link{dictionary_meta}()}.

Similar information is provided in the \href{https://miserman.github.io/dictionary_builder/}{dictionary builder} web tool.

Other Dictionary functions: 
\code{\link{dictionary_meta}()},
\code{\link{download.dict}()},
\code{\link{lma_patcat}()},
\code{\link{lma_termcat}()},
\code{\link{read.dic}()},
\code{\link{select.dict}()}
}
\concept{Dictionary functions}
