% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/textParsers.R
\name{nGram}
\alias{nGram}
\title{Tokenize (or split) text and emit multi-grams.}
\usage{
nGram(n, ignoreCase = FALSE, delimiter = "[ \\\\t\\\\b\\\\f\\\\r]+",
  punctuation = NULL, overlapping = TRUE, reset = NULL, sep = " ",
  minLength = 1)
}
\arguments{
\item{n}{length, in words, of each n-gram}

\item{ignoreCase}{logical: if FALSE, the n-gram matching is case sensitive and
if TRUE, case is ignored during matching.}

\item{delimiter}{character or string that divides one word from the next. 
You can use a regular expression as the \code{delimiter} value.}

\item{punctuation}{a regular expression that specifies the punctuation characters 
parser will remove before it evaluates the input text.}

\item{overlapping}{logical: true value allows for overlapping n-grams.}

\item{reset}{a regular expression listing one or more punctuation characters or 
strings, any of which the \code{nGram} parser will recognize as the end of a sentence 
of text. The end of each sentence resets the search for n-grams, meaning that 
\code{nGram} discards any partial n-grams and proceeds to the next sentence to search 
for the next n-gram. In other words, no n-gram can span two sentences.}

\item{sep}{a character string to separate multiple text columns.}

\item{minLength}{minimum length of words in ngram. Ngrams that contains words below 
shorter than the limit are omitted. Current implementation is not complete: it
filters out ngrams where each word is below the minimum length, i.e. total length of 
ngram is below n*minLength + (n-1).}
}
\value{
pluggable n-gram parser
}
\description{
Tokenize (or split) text and emit multi-grams.
}

