\name{word_stats}
\alias{word_stats}
\title{Descriptive Word Statistics}
\usage{
  word_stats(text.var, grouping.var = NULL, tot = NULL,
    parallel = FALSE, rm.incomplete = FALSE,
    digit.remove = FALSE, apostrophe.remove = FALSE,
    digits = 3, ...)
}
\arguments{
  \item{text.var}{The text variable or a
  \code{"word_stats"} object (i.e. the output of a
  \code{word_stats} function).}

  \item{grouping.var}{The grouping variables.  Default NULL
  generates one output for all text.  Also takes a single
  grouping variable or a list of 1 or more grouping
  variables.}

  \item{tot}{Optional turns of talk variable that yields
  turn of talk measures.}

  \item{parallel}{logical.  If TRUE attempts to run the
  function on multiple cores.  Note that this may not mean
  a speed boost if you have one core or if the data set is
  smaller as the cluster takes time to create (parallel is
  slower until approximately 10,000 rows).  To reduce run
  time pass a \code{"word_stats"} object to the
  \code{\link[qdap]{word_stats}} function.}

  \item{rm.incomplete}{logical.  If TRUE incomplete
  statements are removed from calculations in the output.}

  \item{digit.remove}{logical.  If TRUE removes digits from
  calculating the output.}

  \item{apostrophe.remove}{logical.  If TRUE removes
  apostrophes from calculating the output.}

  \item{digits}{Integer; number of decimal places to round
  when printing.}

  \item{\ldots}{Any other arguments passed to end_inc.}
}
\value{
  Returns a list of three descriptive word statistics:
  \item{ts}{A data frame of descriptive word statistics by
  row} \item{gts}{A data frame of word/sentence statistics
  per grouping variable: \itemize{ \item{n.tot}{ - number
  of turns of talk} \item{n.sent}{ - number of sentences}
  \item{n.words}{ - number of words} \item{n.char}{ -
  number of characters} \item{n.syl}{ - number of
  syllables} \item{n.poly}{ - number of polysyllables}
  \item{sptot}{ - syllables per turn of talk} \item{wptot}{
  - words per turn of talk} \item{wps}{ - words per
  sentence} \item{cps}{ - characters per sentence}
  \item{sps}{ - syllables per sentence} \item{psps}{ -
  poly-syllables per sentence} \item{cpw}{ - characters per
  word} \item{spw}{ - syllables per word} \item{n.state}{ -
  number of statements} \item{n.quest}{ - number of
  questions} \item{n.exclm}{ - number of exclamations}
  \item{n.incom}{ - number of incomplete statements}
  \item{p.state}{ - proportion of statements}
  \item{p.quest}{ - proportion of questions}
  \item{p.exclm}{ - proportion of exclamations}
  \item{p.incom}{ - proportion of incomplete statements}
  \item{n.hapax}{ - number of hapax legomenon}
  \item{n.dis}{ - number of dis legomenon}
  \item{grow.rate}{ - proportion of hapax legomenon to
  words} \item{prop.dis}{ - proportion of dis legomenon to
  words} } } \item{mpun}{An account of sentences with an
  improper/missing end mark} \item{word.elem}{A data frame
  with word element columns from gts} \item{sent.elem}{A
  data frame with sentence element columns from gts}
  \item{omit}{Counter of omitted sentences for internal use
  (only included if some rows contained missing values)}
  \item{percent}{The value of percent used for plotting
  purposes.} \item{zero.replace}{The value of zero.replace
  used for plotting purposes.} \item{digits}{integer value
  od number of digits to display; mostly internal use}
}
\description{
  Transcript apply descriptive word statistics.
}
\examples{
\dontrun{
word_stats(mraja1spl$dialogue, mraja1spl$person)
(desc_wrds <- with(mraja1spl, word_stats(dialogue, person, tot = tot)))
with(mraja1spl, word_stats(desc_wrds, person, tot = tot)) #speed boost
names(desc_wrds)
htruncdf(desc_wrds$ts, 15, 5)
htruncdf(desc_wrds$gts, 15, 6)
desc_wrds$mpun
desc_wrds$word.elem
desc_wrds$sent.elem
plot(desc_wrds)
plot(desc_wrds, label=TRUE, lab.digits = 1)
with(mraja1spl, word_stats(dialogue, list(sex, died, fam.aff)))
}
}
\keyword{descriptive}
\keyword{statistic}

