% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/count.R
\docType{methods}
\name{count}
\alias{count}
\alias{count-method}
\alias{count,partition-method}
\alias{count,subcorpus-method}
\alias{count,partition_bundle-method}
\alias{count,subcorpus_bundle-method}
\alias{count,corpus-method}
\alias{count,character-method}
\alias{count,vector-method}
\alias{count,remote_corpus-method}
\alias{count,remote_subcorpus-method}
\title{Get counts.}
\usage{
count(.Object, ...)

\S4method{count}{partition}(
  .Object,
  query = NULL,
  cqp = is.cqp,
  check = TRUE,
  breakdown = FALSE,
  decode = TRUE,
  p_attribute = getOption("polmineR.p_attribute"),
  mc = getOption("polmineR.cores"),
  verbose = TRUE,
  progress = FALSE,
  phrases = NULL,
  ...
)

\S4method{count}{subcorpus}(
  .Object,
  query = NULL,
  cqp = is.cqp,
  check = TRUE,
  breakdown = FALSE,
  decode = TRUE,
  p_attribute = getOption("polmineR.p_attribute"),
  mc = getOption("polmineR.cores"),
  verbose = TRUE,
  progress = FALSE,
  phrases = NULL,
  ...
)

\S4method{count}{partition_bundle}(
  .Object,
  query = NULL,
  cqp = FALSE,
  p_attribute = NULL,
  phrases = NULL,
  freq = FALSE,
  total = TRUE,
  mc = FALSE,
  progress = FALSE,
  verbose = FALSE,
  ...
)

\S4method{count}{subcorpus_bundle}(
  .Object,
  query = NULL,
  cqp = FALSE,
  p_attribute = NULL,
  phrases = NULL,
  freq = FALSE,
  total = TRUE,
  mc = FALSE,
  progress = TRUE,
  verbose = FALSE,
  ...
)

\S4method{count}{corpus}(
  .Object,
  query = NULL,
  cqp = is.cqp,
  check = TRUE,
  p_attribute = getOption("polmineR.p_attribute"),
  breakdown = FALSE,
  sort = FALSE,
  decode = TRUE,
  verbose = TRUE,
  ...
)

\S4method{count}{character}(
  .Object,
  query = NULL,
  cqp = is.cqp,
  check = TRUE,
  p_attribute = getOption("polmineR.p_attribute"),
  breakdown = FALSE,
  sort = FALSE,
  decode = TRUE,
  verbose = TRUE,
  ...
)

\S4method{count}{vector}(.Object, corpus, p_attribute, ...)

\S4method{count}{remote_corpus}(.Object, ...)

\S4method{count}{remote_subcorpus}(.Object, ...)
}
\arguments{
\item{.Object}{A \code{partition} or \code{partition_bundle}, or a length-one
character vector providing the name of a corpus.}

\item{...}{Further arguments. If \code{.Object} is a \code{remote_corpus}
object, the three dots (\code{...}) are used to pass arguments. Hence, it is
necessary to state the names of all arguments to be passed explicity.}

\item{query}{A character vector (one or multiple terms), CQP syntax can be
used.}

\item{cqp}{Either logical (\code{TRUE} if query is a CQP query), or a
function to check whether query is a CQP query or not (defaults to is.query
auxiliary function).}

\item{check}{A \code{logical} value, whether to check validity of CQP query
using \code{check_cqp_query}.}

\item{breakdown}{Logical, whether to report number of occurrences for
different matches for a query.}

\item{decode}{Logical, whether to turn token ids into decoded strings (only
if query is NULL).}

\item{p_attribute}{The p-attribute(s) to use.}

\item{mc}{Logical, whether to use multicore (defaults to \code{FALSE}).}

\item{verbose}{Logical, whether to be verbose.}

\item{progress}{Logical, whether to show progress bar.}

\item{phrases}{A \code{phrases} object. If provided, the denoted regions will
be concatenated as phrases.}

\item{freq}{Logical, if \code{FALSE}, counts will be reported, if TRUE,
(relative) frequencies are added to table.}

\item{total}{Defaults to \code{FALSE}, if \code{TRUE}, the total value of
counts (column named 'TOTAL') will be amended to the \code{data.table} that
is returned.}

\item{sort}{Logical, whether to sort table with counts (in stat slot).}

\item{corpus}{The name of a CWB corpus.}
}
\value{
A \code{data.table} if argument query is used, a \code{count}-object,
if query is \code{NULL} and \code{.Object} is a character vector (referring
to a corpus) or a \code{partition}, a \code{count_bundle}-object, if \code{.Object}
is a \code{partition_bundle}.
}
\description{
Count all tokens, or number of occurrences of a query (CQP syntax may be
used), or matches for the query.
}
\details{
If \code{.Object} is a \code{partiton_bundle}, the \code{data.table} returned will
have the queries in the columns, and as many rows as there are in the
\code{partition_bundle}.

If \code{.Object} is a length-one \code{character} vector and \code{query} is
\code{NULL}, the count is performed for the whole partition.

If \code{breakdown} is \code{TRUE} and one query is supplied, the function
returns a frequency breakdown of the results of the query. If several queries
are supplied, frequencies for the individual queries are retrieved.

Multiple queries can be used for argument \code{query}. Some care may be
necessary when summing up the counts for the individual queries. When the
CQP syntax is used, different queries may yield the same match result, so that
the sum of all individual query matches may overestimate the true number of
unique matches. In the case of overlapping matches, a warning message is
issued. Collapsing multiple CQP queries into a single query (separating the
individual queries by "|" and wrapping everything in round brackets) solves
this problem.
}
\examples{
use("polmineR")
use(pkg = "RcppCWB", corpus = "REUTERS")

debates <- partition("GERMAPARLMINI", date = ".*", regex=TRUE)
count(debates, query = "Arbeit") # get frequencies for one token
count(debates, c("Arbeit", "Freizeit", "Zukunft")) # get frequencies for multiple tokens
  
count("GERMAPARLMINI", query = c("Migration", "Integration"), p_attribute = "word")

debates <- partition_bundle(
  "GERMAPARLMINI", s_attribute = "date", values = NULL,
  mc = FALSE, verbose = FALSE
)
y <- count(debates, query = "Arbeit", p_attribute = "word")
y <- count(debates, query = c("Arbeit", "Migration", "Zukunft"), p_attribute = "word")
  
count("GERMAPARLMINI", '"Integration.*"', breakdown = TRUE)

P <- partition("GERMAPARLMINI", date = "2009-11-11")
count(P, '"Integration.*"', breakdown = TRUE)

sc <- corpus("GERMAPARLMINI") \%>\% subset(party == "SPD")
phr <- cpos(sc, query = '"Deutsche.*" "Bundestag.*"', cqp = TRUE) \%>\%
  as.phrases(corpus = "GERMAPARLMINI", enc = "latin1")
cnt <- count(sc, phrases = phr, p_attribute = "word")

# Multiple queries and overlapping query matches. The first count 
# operation will issue a warning that matches overlap, see the second 
# example for a solution.
corpus("REUTERS") \%>\%
  count(query = c('".*oil"', '"turmoil"'), cqp = TRUE)
corpus("REUTERS") \%>\% 
  count(query = '"(.*oil|turmoil)"', cqp =TRUE)
}
\references{
Baker, Paul (2006): \emph{Using Corpora in Discourse Analysis}. London: continuum, p. 47-69 (ch. 3).
}
\seealso{
For a metadata-based breakdown of counts (i.e. tabulation by
s-attributes), see \code{\link{dispersion}}. The \code{\link{hits}} is the
worker behind the \code{dispersion} method and offers a similar, yet more
low-level functionality as compared to the \code{count} method. Using the
\code{\link{hits}} method may be useful to obtain the data required for
flexible cross-tabulations.
}
