% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/prep.R
\name{prep}
\alias{prep}
\title{Creates One Finalized Table Ready for Statistical Analysis}
\usage{
prep(
   dataset = NULL
   , file_name = NULL
   , id = NULL
   , within_vars = c()
   , between_vars = c()
   , dvc = NULL
   , dvd = NULL
   , keep_trials = NULL
   , drop_vars = c()
   , keep_trials_dvc = NULL
   , keep_trials_dvd = NULL
   , id_properties = c()
   , sd_criterion = c(1, 1.5, 2)
   , percentiles = c(0.05, 0.25, 0.75, 0.95)
   , outlier_removal = NULL
   , keep_trials_outlier = NULL
   , decimal_places = 4
   , notification = TRUE
   , dm = c()
   , save_results = TRUE
   , results_name = "results.txt"
   , save_summary = TRUE
)
}
\arguments{
\item{dataset}{Name of the data frame in R that contains the long format
table after merging the individual data files using
\code{file_merge()}. Either \code{dataset} or \code{file_name} must be
provided. Default is \code{NULL}.}

\item{file_name}{A string with the name of a txt or csv file (including the
file extension, e.g. \code{"my_data.txt"}) with the merged
dataset in case the user already merged the individual data files. Either
\code{dataset} or \code{file_name} must be provided. Default is
\code{NULL}.}

\item{id}{A string with the name of the column in \code{file_name} or in
\code{dataset} that contains the variable specifying the case identifier
(e.g., \code{"subject_number"}). This should be a unique value per case.
Values in this column must be numeric. Argument must be provided. Default
is \code{NULL}.}

\item{within_vars}{A vector with names of column or columns in
\code{file_name} or in \code{dataset} that contain independent
variables manipulated (or observed) within-ids (i.e., within-subjects,
repeated measures). Single or multiple values must be specified as a
string (e.g., \code{c("SOA", "condition")}) according to the hierarchical
order you wish. Values in these columns must be numeric. Either
\code{within_vars} or \code{between_vars} or both arguments must be
provided. Default is \code{c()}.}

\item{between_vars}{A vector with names of column or columns in
\code{file_name} or in \code{dataset} that contain independent variables
manipulated (or observed) between-ids (i.e., between-subjects). Single or
multiple values must be specified as a string (e.g., \code{c("order")}).
Values in this column must be numeric. Either \code{between_vars} or
\code{within_vars} or both arguments must be provided. Default is
\code{c()}.}

\item{dvc}{A string with the name of the column in \code{file_name} or in
\code{dataset} that contains the dependent variable (e.g., \code{"rt"}
for reaction-time as a dependent variable). Values in this column must be
in an interval or ratio scale. Either \code{dvc} or \code{dvd} or both
arguments must be provided. Default is \code{NULL}.}

\item{dvd}{A string with the name of the column in \code{file_name} or in
\code{dataset} that contains the dependent variable (e.g., \code{"ac"}
for accuracy as a dependent variable). Values in this column must be
numeric and discrete (e.g., 0 and 1). Either \code{dvc} or \code{dvd} or
both arguments must be provided. Default is \code{NULL}.}

\item{keep_trials}{A string. Keeps trials in the data frame according to
trials specified with logical conditions as strings. Single or multiple
logical conditions must be specified as \code{"raw_data$bar == baz"}
(e.g., \code{"raw_data$practice_experiment == 2 & raw_dada$block > 1"}).
Default is \code{NULL}.}

\item{drop_vars}{A vector with names of columns to drop in \code{file_name}
or in \code{dataset}. Single or multiple values must be specified as a
string (e.g., \code{c("font_size")}). Default is \code{c()}.}

\item{keep_trials_dvc}{A string. Keeps trials for \code{dvc} data according
to trials specified with logical conditions as strings. Single or multiple
logical conditions must be specified as \code{"raw_data$bar == baz"}
(e.g., \code{"raw_data$rt > 100 & raw_data$rt < 3000 & raw_dada$ac == 1"}).
All dependent measures for \code{dvc} except for those specified in
\code{outlier_removal} will be calculated on these trials Defalut is
\code{NULL}.}

\item{keep_trials_dvd}{A string. Keeps trials for \code{dvd} data according
to trials specified with logical conditions as strings. Single or multiple
logical conditions must be specified as \code{"raw_data$bar == baz"}
(e.g., \code{raw_data$rt > 100 & raw_data$rt < 3000}). All dependent
measures for \code{dvd} will be calculated on these trials (i.e., "mdvd"
and "merr"). Default is \code{NULL}.}

\item{id_properties}{A vector with names of columns in \code{dataset} or in
\code{file_name} that describe ids and are not manipulated (or observed)
within-or between-ids (e.g., \code{c("age", "gender")}). Single or
multiple values must be specified as a string. Values in these columns
must be numeric. Default is \code{c()}.}

\item{sd_criterion}{A vector containing criterions to reject all values
above a criterion number of standard deviations from the mean \code{dvc}
of each \code{id} by the combination of between and/or within grouping
independent variables. Values in this vector must be numeric. Default is
\code{c(1, 1.5, 2)}.}

\item{percentiles}{A vector containing wanted percentiles for \code{dvc}.
Values in this vector must be decimal numbers between 0 to 1. Percentiles
are calculated according to \code{type = 7} (see
\code{\link[stats]{quantile}} for more information). Default is
\code{c(0.05, 0.25, 0.75, 0.95)}.}

\item{outlier_removal}{Numeric. Specifies which outlier removal procedure
with moving criterion to calculate for \code{dvc} according to procedures
described by Van Selst & Jolicoeur (1994). If \code{1} then non-recursive
procedure is calculated, if \code{2} then modified recursive procedure is
calculated, if \code{3} then hybrid recursive procedure is calculated.
Moving criterion is according to Table 4 in Van Selst & Jolicoeur (1994).
If experimental cell has 4 trials or less it will result in \code{NA}.
Default is \code{NULL}.}

\item{keep_trials_outlier}{A string. Keeps trials according to trials
specified with logical conditions as strings. \code{outlier_removal}
procedure will be calculated on the remaining trials. Single or multiple
logical conditions must be specified as \code{"raw_data$bar == baz"}
(e.g., \code{"raw_data$ac == 1"}). Defalut is \code{NULL}.}

\item{decimal_places}{Numeric. Specifies number of decimals to be written
for each value in \code{results_name}. Value must be numeric. Default is
\code{4}.}

\item{notification}{Logical. If TRUE, prints messages about the progress of
the functio. Default is \code{TRUE}.}

\item{dm}{a Vector with names of dependent measures the function creats. If
empty (i.e., \code{c()}) the function returns a data frame with all
dependent measures. Values in this vector must be strings from the
following list: "mdvc", "sdvc", "meddvc",
"tdvc", "ntr", "ndvc", "ptr", "prt", "rminv", "mdvd", "merr". Default is
\code{c()}. See return for more details.}

\item{save_results}{Logical. If TRUE, the function creats a txt file
containing the returned data frame. Default is \code{TRUE}.}

\item{results_name}{A string of the name of the data frame the function
returns in case \code{save_results} is TRUE. Default is
\code{"results.txt"}.}

\item{save_summary}{Logical. if TRUE, creats a summary txt file. Default is
\code{TRUE}.}
}
\value{
A data frame with dependent measures for dependent variables by
 \code{id} and independent grouping variables:

     \code{mdvc}: mean \code{dvc}.

     \code{sdvc}: SD for \code{dvc}.

     \code{meddvc}: median \code{dvc}.

     \code{tdvc}: mean \code{dvc} after rejecting observations above
     standard deviation criterion/s specified \code{sd_criterion}.

     \code{ntr}: number of observations rejected for each standard deviation
     criterion/s specified \code{sd_criterion}.

     \code{ndvc}: number of observations before rejection.

     \code{ptr}: proportion of observations rejected for each standard
     deviation criterion/s specified \code{sd_criterion}.

     \code{rminv}: harmonic mean \code{dvc}.

     \code{prt}: \code{dvc} according to each of the percentiles specified
     in \code{percentiles}.

     \code{mdvd}: mean \code{dvd}.

     \code{merr}: mean error.

     \code{nrmc}: mean \code{dvc} according to non-recursive procedure with
     moving criterion.

     \code{nnrmc}: number of observations rejected for \code{dvc} according
     to non-recursive procedure with moving criterion.

     \code{pnrmc}: percent of observations rejected for \code{dvc} according
     to non-recursive procedure with moving criterion.

     \code{tnrmc}: total number of observations upon which the non-recursive
     procedure with moving criterion was applied.

     \code{mrmc}: mean \code{dvc} according to modified-recursive procedure
     with moving criterion.

     \code{nmrmc}: number of observations rejected for \code{dvc} according
     to modified-recursive procedure with moving criterion.

     \code{pmrmc}: percent of observations rejected for \code{dvc} according
     to modified-recursive procedure with moving criterion.

     \code{tmrmc}: total number of observations upon which the
     modified-recursive procedure with moving criterion was applied.

     \code{hrmc}: mean \code{dvc} according to hybrid-recursive procedure
     with moving criterion.

     \code{nhrmc}: number of observations rejected for \code{dvc} according
     to hybrid-recursive procedure with moving criterion.

     \code{thrmc}: total number of observations upon which the
     hybrid-recursive procedure with moving criterion was applied.
}
\description{
\code{prep()} aggregates a single dataset according to any
   combination of between and within grouping variables (i.e.,
   between-subjects and within-subjects independent variables,
   respectively), and returns a data frame with a number of dependent
   measures for further analysis for each experimental cell according to the
   combination of provided grouping variables. Dependent measures for each
   experimental cell include among others means before and after rejecting
   all observations according to a flexible standard deviation criterion/s,
   number of rejected observations according to the flexible standard
   deviation criterion/s, proportions of rejected observations according to
   the flexible standard deviation criterion/s, number of observations
   before rejection, means after rejecting observations according to
   procedures described in Van Selst & Jolicoeur (1994) (suitable when
   measuring reaction-times), standard deviations, medians, means according
   to any percentile (e.g., 0.05, 0.25, 0.75, 0.95) and harmonic means. The
   data frame prep() returns can also be exported as a txt file to be used
   for statistical analysis in other statistical programs.
}
\examples{
data(stroopdata)
finalized_data <- prep(
         dataset = stroopdata
         , file_name = NULL
         , id = "subject"
         , within_vars = c("block", "target_type")
         , between_vars = c("order")
         , dvc = "rt"
         , dvd = "ac"
         , keep_trials = NULL
         , drop_vars = c()
         , keep_trials_dvc = "raw_data$rt > 100 & raw_data$rt < 3000 & raw_data$ac == 1"
         , keep_trials_dvd = "raw_data$rt > 100 & raw_data$rt < 3000"
         , id_properties = c()
         , sd_criterion = c(1, 1.5, 2)
         , percentiles = c(0.05, 0.25, 0.75, 0.95)
         , outlier_removal = 2
         , keep_trials_outlier = "raw_data$ac == 1"
         , decimal_places = 4
         , notification = TRUE
         , dm = c()
         , save_results = TRUE
         , results_name = "results.txt"
         , save_summary = TRUE
      )
}
\references{
Grange, J.A. (2015). trimr: An implementation of common response
 time trimming methods. R Package Version 1.0.0.
 \url{https://cran.r-project.org/package=trimr}

Selst, M. V., & Jolicoeur, P. (1994). A solution to the effect of sample
size on outlier elimination. \emph{The quarterly journal of experimental
psychology, 47}(3), 631-650.
}

