% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tokens_lookup.R
\name{tokens_lookup}
\alias{tokens_lookup}
\title{apply a dictionary to a tokens object}
\usage{
tokens_lookup(x, dictionary, levels = 1:5, valuetype = c("glob", "regex",
  "fixed"), concatenator = " ", case_insensitive = TRUE, capkeys = FALSE,
  exclusive = TRUE, multiword = TRUE, verbose = FALSE)
}
\arguments{
\item{x}{tokens object to which dictionary or thesaurus will be supplied}

\item{dictionary}{the \link{dictionary}-class object that will be applied to 
\code{x}}

\item{levels}{integers specifying the levels of entries in a hierarchical
dictionary that will be applied.  The top level is 1, and subsequent levels
describe lower nesting levels.  Values may be combined, even if these
levels are not contiguous, e.g. `levels = c(1:3)` will collapse the second
level into the first, but record the third level (if present) collapsed below
the first.  (See examples.)}

\item{valuetype}{how to interpret keyword expressions: \code{"glob"} for 
"glob"-style wildcard expressions; \code{"regex"} for regular expressions;
or \code{"fixed"} for exact matching. See \link{valuetype} for details.}

\item{concatenator}{a charactor that connect words in multi-words entries in \code{x}}

\item{case_insensitive}{ignore the case of dictionary values if \code{TRUE} 
uppercase to distinguish them from other features}

\item{capkeys}{if TRUE, convert dictionary keys to uppercase to distinguish 
them from other features}

\item{exclusive}{if \code{TRUE}, remove all features not in dictionary, 
otherwise, replace values in dictionary with keys while leaving other 
features unaffected}

\item{multiword}{if \code{FALSE}, multi-word entries in dictionary are treated
as single tokens}

\item{verbose}{print status messages if \code{TRUE}}
}
\description{
Convert tokens into equivalence classes defined by values of a dictionary 
object.
}
\examples{
toks <- tokens(data_corpus_inaugural)
dict <- dictionary(list(country = "united states", 
                   law=c('law*', 'constitution'), 
                   freedom=c('free*', 'libert*')))
dfm(tokens_lookup(toks, dict, valuetype='glob', verbose = TRUE))

dict_fix <- dictionary(list(country = "united states", 
                       law = c('law', 'constitution'), 
                       freedom = c('freedom', 'liberty'))) 
dfm(applyDictionary(toks, dict_fix, valuetype='fixed'))
dfm(tokens_lookup(toks, dict_fix, valuetype='fixed'))

# hierarchical dictionary example
txt <- c(d1 = "The United States has the Atlantic Ocean and the Pacific Ocean.",
         d2 = "Britain and Ireland have the Irish Sea and the English Channel.")
toks <- tokens(txt)
dict <- dictionary(list(US = list(Countries = c("States"), 
                                  oceans = c("Atlantic", "Pacific")),
                        Europe = list(Countries = c("Britain", "Ireland"),
                                      oceans = list(west = "Irish Sea", 
                                                    east = "English Channel"))))
tokens_lookup(toks, dict, levels = 1)
tokens_lookup(toks, dict, levels = 2)
tokens_lookup(toks, dict, levels = 1:2)
tokens_lookup(toks, dict, levels = 3)
tokens_lookup(toks, dict, levels = c(1,3))
tokens_lookup(toks, dict, levels = c(2,3))
}

