% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/partition.R
\docType{methods}
\name{partition}
\alias{partition}
\alias{partition,character-method}
\alias{partition,environment-method}
\alias{partition,partition-method}
\alias{partition,Corpus-method}
\alias{partition,context-method}
\title{Initialize a partition.}
\usage{
partition(.Object, ...)

\S4method{partition}{character}(.Object, def = NULL, name = "",
  encoding = NULL, p_attribute = NULL, regex = FALSE, xml = "flat",
  decode = TRUE, type = get_type(.Object), mc = FALSE,
  verbose = TRUE, ...)

\S4method{partition}{environment}(.Object, slots = c("name", "corpus",
  "size", "p_attribute"))

\S4method{partition}{partition}(.Object, def = NULL, name = "",
  regex = FALSE, p_attribute = NULL, decode = TRUE, xml = NULL,
  verbose = TRUE, mc = FALSE, ...)

\S4method{partition}{Corpus}(.Object, def = NULL, name = "",
  encoding = NULL, regex = FALSE, xml = "flat",
  type = get_type(.Object), verbose = TRUE, ...)

\S4method{partition}{context}(.Object, node = TRUE)
}
\arguments{
\item{.Object}{A length-one character-vector, the CWB corpus to be used.}

\item{...}{Arguments to define partition (see examples).}

\item{def}{A named list of character vectors of s-attribute values, the names
are the s-attributes (see details and examples)}

\item{name}{A name for the new \code{partition} object, defaults to "".}

\item{encoding}{The encoding of the corpus (typically "LATIN1 or "(UTF-8)),
if NULL, the encoding provided in the registry file of the corpus
(charset="...") will be used.}

\item{p_attribute}{The p-attribute(s) for which a count is performed.}

\item{regex}{A logical value (defaults to FALSE).}

\item{xml}{Either 'flat' (default) or 'nested'.}

\item{decode}{Logical, whether to turn token ids to strings (set FALSE to
minimize object size / memory consumption) in data.table with counts.}

\item{type}{A length-one character vector specifying the type of corpus / partition (e.g. "plpr")}

\item{mc}{Whether to use multicore (for counting terms).}

\item{verbose}{Logical, whether to be verbose.}

\item{slots}{Object slots that will be reported columns of \code{data.frame}
summarizing \code{partition} objects in environment.}

\item{node}{A logical value, whether to include the node (i.e. query matches) in the region matrix
generated when creating a \code{partition} from a \code{context}-object.}
}
\value{
An object of the S4 class \code{partition}.
}
\description{
Create a subcorpus and keep it in an object of the \code{partition} class. If
defined, counts are performed for the p-attribute defined by the parameter
\code{p_attribute}.
}
\details{
The function sets up a \code{partition} object based on s-attribute values.
The s-attributes defining the partition can be passed in as a list, e.g.
\code{list(interjection="speech", year = "2013")}, or directly (see
examples).

The s-attribute values defining the partition may use regular expressions. To
use regular expressions, set the parameter regex to \code{TRUE}. Regular
expressions are passed into \code{grep}, i.e. the regex syntax used in R
needs to be used (double backlashes etc.). If regex is \code{FALSE}, the
length of the character vectors can be > 1, matching s-attributes are
identifies with the operator '%in%'.

The XML imported into the CWB may be "flat" or "nested". This needs to be
indicated with the parameter \code{xml} (default is "flat"). If you generate
a \code{partition} based on a flat XML structure, some performance gain may be
achieved when ordering the s-attributes with decreasingly restrictive
conditions. If you have a nested XML, it is mandatory that the order of the
s-attributes provided reflects the hierarchy of the XML: The top-level
elements need to be positioned at the beginning of the list with the
s-attributes, the the most restrictive elements at the end.

If \code{p_attribute} is not NULL, a count of tokens in the corpus will be
performed and kept in the \code{stat}-slot of the partition-object. The
length of the \code{p_attribute} character vector may be 1 or more. If two or
more p-attributes are provided, The occurrence of combinations will be
counted. A typical scenario is to combine the p-attributes "word" or "lemma"
and "pos".

If \code{.Object} is a length-one character vector, a
  subcorpus/partition for the corpus defined be \code{.Object} is generated.

If \code{.Object} is an environment (typically \code{.GlobalEnv}),
  the \code{partition} objects present in the environment are listed.

If \code{.Object} is a \code{partition} object, a subcorpus of the
  subcorpus is generated.

If \code{.Object} is a \code{Corpus} object, preparing the
  \code{partition} may work more efficiently than if \code{.Object} is a
  length-one character vector.
}
\examples{
use("polmineR")
spd <- partition("GERMAPARLMINI", party = "SPD", interjection = "speech")
kauder <- partition("GERMAPARLMINI", speaker = "Volker Kauder", p_attribute = "word")
merkel <- partition("GERMAPARLMINI", speaker = ".*Merkel", p_attribute = "word", regex = TRUE)
s_attributes(merkel, "date")
s_attributes(merkel, "speaker")
merkel <- partition(
  "GERMAPARLMINI", speaker = "Angela Dorothea Merkel",
  date = "2009-11-10", interjection = "speech", p_attribute = "word"
  )
merkel <- subset(merkel, !word \%in\% punctuation)
merkel <- subset(merkel, !word \%in\% tm::stopwords("de"))
   
# a certain defined time segment
days <- seq(
  from = as.Date("2009-10-28"),
  to = as.Date("2009-11-11"),
  by = "1 day"
)
period <- partition("GERMAPARLMINI", date = days)
}
\seealso{
To learn about the methods available for objects of the class
  \code{partition}, see \code{\link{partition_class}},
}
\author{
Andreas Blaette
}
