% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataset_trec.R
\name{dataset_trec}
\alias{dataset_trec}
\title{TREC dataset}
\source{
\url{https://cogcomp.seas.upenn.edu/Data/QA/QC/}

\url{https://trec.nist.gov/data/qa.html}
}
\usage{
dataset_trec(
  dir = NULL,
  split = c("train", "test"),
  version = c("6", "50"),
  delete = FALSE,
  return_path = FALSE,
  clean = FALSE,
  manual_download = FALSE
)
}
\arguments{
\item{dir}{Character, path to directory where data will be stored. If
\code{NULL}, \link[rappdirs]{user_cache_dir} will be used to determine path.}

\item{split}{Character. Return training ("train") data or testing ("test")
data. Defaults to "train".}

\item{version}{Character. Version 6("6") or version 50("50"). Defaults to
"6".}

\item{delete}{Logical, set \code{TRUE} to delete dataset.}

\item{return_path}{Logical, set \code{TRUE} to return the path of the dataset.}

\item{clean}{Logical, set \code{TRUE} to remove intermediate files. This can
greatly reduce the size. Defaults to FALSE.}

\item{manual_download}{Logical, set \code{TRUE} if you have manually
downloaded the file and placed it in the folder designated by running
this function with \code{return_path = TRUE}.}
}
\value{
A tibble with 5,452 or 500 rows for "train" and "test"
    respectively and 2 variables:
\describe{
  \item{class}{Character, denoting the class}
  \item{text}{Character, question text}
}
}
\description{
The TREC dataset is dataset for question classification consisting of
open-domain, fact-based questions divided into broad semantic categories.
It has both a six-class (TREC-6) and a fifty-class (TREC-50) version. Both
have 5,452 training examples and 500 test examples, but TREC-50 has
finer-grained labels. Models are evaluated based on accuracy.
}
\details{
The classes in TREC-6 are

\itemize{
\item ABBR - Abbreviation
\item DESC - Description and abstract concepts
\item ENTY - Entities
\item HUM - Human beings
\item LOC - Locations
\item NYM - Numeric values
}

the classes in TREC-50 can be found here
\url{https://cogcomp.seas.upenn.edu/Data/QA/QC/definition.html}.
}
\examples{
\dontrun{
dataset_trec()

# Custom directory
dataset_trec(dir = "data/")

# Deleting dataset
dataset_trec(delete = TRUE)

# Returning filepath of data
dataset_trec(return_path = TRUE)

# Access both training and testing dataset
train_6 <- dataset_trec(split = "train")
test_6 <- dataset_trec(split = "test")

train_50 <- dataset_trec(split = "train", version = "50")
test_50 <- dataset_trec(split = "test", version = "50")
}

}
\seealso{
Other topic: 
\code{\link{dataset_ag_news}()},
\code{\link{dataset_dbpedia}()}
}
\concept{topic}
\keyword{datasets}
