% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/receptiviti.R, R/receptiviti_status.R
\name{receptiviti}
\alias{receptiviti}
\alias{receptiviti_status}
\title{Receptiviti API}
\usage{
receptiviti(text, output = NULL, id = NULL, text_column = NULL,
  id_column = NULL, files = NULL, dir = NULL, file_type = "txt",
  encoding = NULL, return_text = FALSE,
  api_args = getOption("receptiviti.api_args", list()),
  frameworks = getOption("receptiviti.frameworks", "all"),
  framework_prefix = TRUE, as_list = FALSE, bundle_size = 1000,
  bundle_byte_limit = 7500000, collapse_lines = FALSE, retry_limit = 50,
  clear_cache = FALSE, clear_scratch_cache = TRUE, request_cache = TRUE,
  cores = detectCores() - 1, use_future = FALSE, in_memory = TRUE,
  verbose = FALSE, overwrite = FALSE, compress = FALSE,
  make_request = TRUE, text_as_paths = FALSE,
  cache = Sys.getenv("RECEPTIVITI_CACHE"), cache_overwrite = FALSE,
  cache_format = Sys.getenv("RECEPTIVITI_CACHE_FORMAT", "parquet"),
  key = Sys.getenv("RECEPTIVITI_KEY"),
  secret = Sys.getenv("RECEPTIVITI_SECRET"),
  url = Sys.getenv("RECEPTIVITI_URL"),
  version = Sys.getenv("RECEPTIVITI_VERSION"),
  endpoint = Sys.getenv("RECEPTIVITI_ENDPOINT"))

receptiviti_status(url = Sys.getenv("RECEPTIVITI_URL"),
  key = Sys.getenv("RECEPTIVITI_KEY"),
  secret = Sys.getenv("RECEPTIVITI_SECRET"), verbose = TRUE,
  include_headers = FALSE)
}
\arguments{
\item{text}{A character vector with text to be processed, path to a directory containing files, or a vector of file paths.
If a single path to a directory, each file is collapsed to a single text. If a path to a file or files,
each line or row is treated as a separate text, unless \code{collapse_lines} is \code{TRUE} (in which case,
files will be read in as part of bundles at processing time, as is always the case when a directory).
Use \code{files} to more reliably enter files, or \code{dir} to more reliably specify a directory.}

\item{output}{Path to a \code{.csv} file to write results to. If this already exists, set \code{overwrite} to \code{TRUE}
to overwrite it.}

\item{id}{Vector of unique IDs the same length as \code{text}, to be included in the results.}

\item{text_column, id_column}{Column name of text/id, if \code{text} is a matrix-like object, or a path to a csv file.}

\item{files}{A list of file paths, as alternate entry to \code{text}.}

\item{dir}{A directory to search for files in, as alternate entry to \code{text}.}

\item{file_type}{File extension to search for, if \code{text} is the path to a directory containing files to be read in.}

\item{encoding}{Encoding of file(s) to be read in. If not specified, this will be detected, which can fail,
resulting in mis-encoded characters; for best (and fasted) results, specify encoding.}

\item{return_text}{Logical; if \code{TRUE}, \code{text} is included as the first column of the result.}

\item{api_args}{A list of additional arguments to pass to the API (e.g., \code{list(sallee_mode = "sparse")}). Defaults to the
\code{receptiviti.api_args} option.}

\item{frameworks}{A vector of frameworks to include results from. Texts are always scored with all available framework --
this just specifies what to return. Defaults to \code{all}, to return all scored frameworks. Can be set by the
\code{receptiviti.frameworks} option (e.g., \code{options(receptiviti.frameworks = c("liwc", "sallee"))}).}

\item{framework_prefix}{Logical; if \code{FALSE}, will remove the framework prefix from column names, which may result in duplicates.
If this is not specified, and 1 framework is selected, or \code{as_list} is \code{TRUE}, will default to remove prefixes.}

\item{as_list}{Logical; if \code{TRUE}, returns a list with frameworks in separate entries.}

\item{bundle_size}{Number of texts to include in each request; between 1 and 1,000.}

\item{bundle_byte_limit}{Memory limit (in bytes) of each bundle, under \code{1e7} (10 MB, which is the API's limit).
May need to be lower than the API's limit, depending on the system's requesting library.}

\item{collapse_lines}{Logical; if \code{TRUE}, and \code{text} contains paths to files, each file is treated as a single text.}

\item{retry_limit}{Maximum number of times each request can be retried after hitting a rate limit.}

\item{clear_cache}{Logical; if \code{TRUE}, will clear any existing files in the cache. Use \code{cache_overwrite} if
you want fresh results without clearing or disabling the cache. Use \code{cache = FALSE} to disable the cache.}

\item{clear_scratch_cache}{Logical; if \code{FALSE}, will preserve the bundles written when \code{in_memory} is \code{TRUE}, after
the request has been made.}

\item{request_cache}{Logical; if \code{FALSE}, will always make a fresh request, rather than using the response
from a previous identical request.}

\item{cores}{Number of CPU cores to split bundles across, if there are multiple bundles. See the Parallelization section.}

\item{use_future}{Logical; if \code{TRUE}, uses a \code{future} back-end to process bundles, in which case,
parallelization can be controlled with the \code{\link[future]{plan}} function (e.g., \code{plan("multisession")}
to use multiple cores); this is required to see progress bars when using multiple cores. See the Parallelization section.}

\item{in_memory}{Logical; if \code{FALSE}, will write bundles to temporary files, and only load them as they are being requested.}

\item{verbose}{Logical; if \code{TRUE}, will show status messages.}

\item{overwrite}{Logical; if \code{TRUE}, will overwrite an existing \code{output} file.}

\item{compress}{Logical; if \code{TRUE}, will save as an \code{xz}-compressed file.}

\item{make_request}{Logical; if \code{FALSE}, a request is not made. This could be useful if you want to be sure and
load from one of the caches, but aren't sure that all results exist there; it will error out if it encounters
texts it has no other source for.}

\item{text_as_paths}{Logical; if \code{TRUE}, ensures \code{text} is treated as a vector of file paths. Otherwise, this will be
determined if there are no \code{NA}s in \code{text} and every entry is under 500 characters long.}

\item{cache}{Path to a directory in which to save unique results for reuse; defaults to
\code{Sys.getenv(}\code{"RECEPTIVITI_CACHE")}. See the Cache section for details.}

\item{cache_overwrite}{Logical; if \code{TRUE}, will write results to the cache without reading from it. This could be used
if you want fresh results to be cached without clearing the cache.}

\item{cache_format}{Format of the cache database; see \code{\link[arrow]{FileFormat}}.
Defaults to \code{Sys.getenv(}\code{"RECEPTIVITI_CACHE_FORMAT")}.}

\item{key}{API Key; defaults to \code{Sys.getenv("RECEPTIVITI_KEY")}.}

\item{secret}{API Secret; defaults to \code{Sys.getenv("RECEPTIVITI_SECRET")}.}

\item{url}{API URL; defaults to \code{Sys.getenv("RECEPTIVITI_URL")}, which defaults to
\code{"https://api.receptiviti.com/"}.}

\item{version}{API version; defaults to \code{Sys.getenv("RECEPTIVITI_VERSION")}, which defaults to
\code{"v1"}.}

\item{endpoint}{API endpoint (path name after the version); defaults to \code{Sys.getenv("RECEPTIVITI_ENDPOINT")},
which defaults to \code{"framework"}.}

\item{include_headers}{Logical; if \code{TRUE}, \code{receptiviti_status}'s verbose message will include
the HTTP headers.}
}
\value{
A \code{data.frame} with columns for \code{text} (if \code{return_text} is \code{TRUE}; the originally entered text),
\code{id} (if one was provided), \code{text_hash} (the MD5 hash of the text), a column each for relevant entries in \code{api_args},
and scores from each included framework (e.g., \code{summary.word_count} and \code{liwc.i}). If \code{as_list} is \code{TRUE},
returns a list with a named entry containing such a \code{data.frame} for each framework.
}
\description{
The main function to access the \href{https://www.receptiviti.com}{Receptiviti} API.
}
\section{Cache}{

If the \code{cache} argument is specified, results for unique texts are saved in an
\href{https://arrow.apache.org}{Arrow} database in the cache location
(\code{Sys.getenv(}\code{"RECEPTIVITI_CACHE")}), and are retrieved with subsequent requests.
This ensures that the exact same texts are not re-sent to the API.
This does, however, add some processing time and disc space usage.

If \code{cache} is \code{TRUE}, a default directory (\code{receptiviti_cache}) will be looked for
in the system's temporary directory (which is usually the parent of \code{tempdir()}).
If this does not exist, you will be asked if it should be created.

The primary cache is checked when each bundle is processed, and existing results are loaded at
that time. When processing many bundles in parallel, and many results have been cached,
this can cause the system to freeze and potentially crash.
To avoid this, limit the number of cores, or disable parallel processing.

The \code{cache_format} arguments (or the \code{RECEPTIVITI_CACHE_FORMAT} environment variable) can be used to adjust the format of the cache.

You can use the cache independently with \code{open_database(Sys.getenv("RECEPTIVITI_CACHE"))}.

You can also set the \code{clear_cache} argument to \code{TRUE} to clear the cache before it is used again, which may be useful
if the cache has gotten big, or you know new results will be returned. Even if a cached result exists, it will be
reprocessed if it does not have all of the variables of new results, but this depends on there being at least 1 uncached
result. If, for instance, you add a framework to your account and want to reprocess a previously processed set of texts,
you would need to first clear the cache.

Either way, duplicated texts within the same call will only be sent once.

The \code{request_cache} argument controls a more temporary cache of each bundle request. This is cleared when the
R session ends. You might want to set this to \code{FALSE} if a new framework becomes available on your account
and you want to process a set of text you already processed in the current R session without restarting.

Another temporary cache is made when \code{in_memory} is \code{FALSE}, which is the default when processing
in parallel (when \code{cores} is over \code{1} or \code{use_future} is \code{TRUE}). This contains
a file for each unique bundle, which is read in as needed by the parallel workers.
}

\section{Parallelization}{

\code{text}s are split into bundles based on the \code{bundle_size} argument. Each bundle represents
a single request to the API, which is why they are limited to 1000 texts and a total size of 10 MB.
When there is more than one bundle and either \code{cores} is greater than 1 or \code{use_future} is \code{TRUE} (and you've
externally specified a \code{\link[future]{plan}}), bundles are processed by multiple cores.

If you have texts spread across multiple files, they can be most efficiently processed in parallel
if each file contains a single text (potentially collapsed from multiple lines). If files contain
multiple texts (i.e., \code{collapse_lines = FALSE}), then texts need to be read in before bundling
in order to ensure bundles are under the length limit.

Whether processing in serial or parallel, progress bars can be specified externally with
\code{\link[progressr]{handlers}}; see examples.
}

\examples{
\dontrun{

# check that the API is available, and your credentials work
receptiviti_status()

# score a single text
single <- receptiviti("a text to score")

# score multiple texts, and write results to a file
multi <- receptiviti(c("first text to score", "second text"), "filename.csv")

# score many texts in separate files
## defaults to look for .txt files
file_results <- receptiviti(dir = "./path/to/txt_folder")

## could be .csv
file_results <- receptiviti(
  dir = "./path/to/csv_folder",
  text_column = "text", file_type = "csv"
)

# score many texts from a file, with a progress bar
## set up cores and progress bar (only necessary if you want the progress bar)
future::plan("multisession")
progressr::handlers(global = TRUE)
progressr::handlers("progress")

## make request
results <- receptiviti(
  "./path/to/largefile.csv",
  text_column = "text", use_future = TRUE
)
}
}
