% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfm_compress.R, R/fcm-methods.R
\name{dfm_compress}
\alias{dfm_compress}
\alias{fcm_compress}
\title{Recombine a dfm or fcm by combining identical dimension elements}
\usage{
dfm_compress(
  x,
  margin = c("both", "documents", "features"),
  verbose = quanteda_options("verbose")
)

fcm_compress(x)
}
\arguments{
\item{x}{input object, a \link{dfm} or \link{fcm}}

\item{margin}{character indicating on which margin to compress a dfm, either
\code{"documents"}, \code{"features"}, or \code{"both"} (default).  For fcm
objects, \code{"documents"} has no effect.}

\item{verbose}{if \code{TRUE} print the number of tokens and documents before and
after the function is applied. The number of tokens does not include paddings.}
}
\value{
\code{dfm_compress} returns a \link{dfm} whose dimensions have been
recombined by summing the cells across identical dimension names
(\link{docnames} or \link{featnames}).  The \link{docvars} will be
preserved for combining by features but not when documents are combined.

\code{fcm_compress} returns an \link{fcm} whose features have been
recombined by combining counts of identical features, summing their counts.
}
\description{
"Compresses" or groups a \link{dfm} or \link{fcm} whose dimension names are
the same, for either documents or features.  This may happen, for instance,
if features are made equivalent through application of a thesaurus.  It could also be needed after a
\code{\link[=cbind.dfm]{cbind.dfm()}} or \code{\link[=rbind.dfm]{rbind.dfm()}} operation.  In most cases, you will not
need to call \code{dfm_compress}, since it is called automatically by functions that change the
dimensions of the dfm, e.g. \code{\link[=dfm_tolower]{dfm_tolower()}}.
}
\note{
\code{fcm_compress} works only when the \link{fcm} was created with a
document context.
}
\examples{
# dfm_compress examples
dfmat <- rbind(dfm(tokens(c("b A A", "C C a b B")), tolower = FALSE),
               dfm(tokens("A C C C C C"), tolower = FALSE))
colnames(dfmat) <- char_tolower(featnames(dfmat))
dfmat
dfm_compress(dfmat, margin = "documents")
dfm_compress(dfmat, margin = "features")
dfm_compress(dfmat)

# no effect if no compression needed
dfmatsubset <- dfm(tokens(data_corpus_inaugural[1:5]))
dim(dfmatsubset)
dim(dfm_compress(dfmatsubset))

# compress an fcm
fcmat1 <- fcm(tokens("A D a C E a d F e B A C E D"),
             context = "window", window = 3)
## this will produce an error:
# fcm_compress(fcmat1)

txt <- c("The fox JUMPED over the dog.",
         "The dog jumped over the fox.")
toks <- tokens(txt, remove_punct = TRUE)
fcmat2 <- fcm(toks, context = "document")
colnames(fcmat2) <- rownames(fcmat2) <- tolower(colnames(fcmat2))
colnames(fcmat2)[5] <- rownames(fcmat2)[5] <- "fox"
fcmat2
fcm_compress(fcmat2)
}
