% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/achievementLevels.R
\name{achievementLevels}
\alias{achievementLevels}
\title{Achievement Levels}
\usage{
achievementLevels(
  achievementVars = NULL,
  aggregateBy = NULL,
  data,
  cutpoints = NULL,
  returnDiscrete = TRUE,
  returnCumulative = FALSE,
  weightVar = NULL,
  jrrIMax = 1,
  omittedLevels = TRUE,
  defaultConditions = TRUE,
  recode = NULL,
  returnNumberOfPSU = FALSE,
  returnVarEstInputs = FALSE
)
}
\arguments{
\item{achievementVars}{character vector indicating variables to be included in the achievement 
levels table, potentially with a subject scale or subscale. When the subject 
scale or subscale is omitted, the default subject scale or subscale is 
used. You can find the default composite scale and all subscales using the 
function \code{\link{showPlausibleValues}}.}

\item{aggregateBy}{character vector specifying variables by which to aggregate achievement levels. The percentage
column sums up to 100 for all levels of all variables specified here. When set to the 
default of \code{NULL}, the percentage column sums up to 100 for all 
levels of all variables specified in \code{achievementVars}.}

\item{data}{an \code{edsurvey.data.frame}}

\item{cutpoints}{numeric vector indicating cutpoints. Set to standard NAEP cutpoints for 
Basic, Proficient, and Advanced by default.}

\item{returnDiscrete}{logical indicating if discrete achievement levels should be returned. Defaults 
to \code{TRUE}.}

\item{returnCumulative}{logical indicating if cumulative achievement levels should be returned. Defaults
to \code{FALSE}. The first and last categories are the same as defined for discrete levels.}

\item{weightVar}{character string indicating the weight variable to use.
Only the name of the
weight variable needs to be included here, and any
replicate weights will be automatically included.
When this argument is \code{NULL}, the function uses the default.
Use \code{\link{showWeights}} to find the default.}

\item{jrrIMax}{a numeric value. When using the jackknife variance estimation method, the default estimation option, \code{jrrIMax=1}, uses the 
sampling variance from the first plausible value as the component for sampling variance estimation. The \eqn{V_{jrr}} 
term (see \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{\emph{Statistical Methods Used in EdSurvey}} for the definition of \eqn{V_{jrr}})
can be estimated with any number of plausible values, and values larger than the number of 
plausible values on the survey (including \code{Inf}) will result in all plausible values being used. 
Higher values of \code{jrrIMax} lead to longer computing times and more accurate variance estimates.}

\item{omittedLevels}{a logical value. When set to the default value (\code{TRUE}), 
it drops those levels in all factor variables that are specified in \code{achievementVars} 
and \code{aggregateBy}. Use \code{print} on an \code{edsurvey.data.frame} 
to see the omitted levels.}

\item{defaultConditions}{a logical value. When set to the default value of \code{TRUE}, uses the default 
conditions stored in an \code{edsurvey.data.frame} to subset the data. 
Use \code{print} on an \code{edsurvey.data.frame} to see the default
conditions.}

\item{recode}{a list of lists to recode variables. Defaults to \code{NULL}. Can be set as
\code{recode} \code{=} \code{list(var1=} \code{list(from=c("a",} \code{"b",} \code{"c"),} \code{to ="d"))}. See Examples.}

\item{returnNumberOfPSU}{a logical value set to \code{TRUE} to return the number of 
primary sampling units (PSUs)}

\item{returnVarEstInputs}{a logical value set to \code{TRUE} to return the
inputs to the jackknife and imputation variance
estimates, which allows for the computation
of covariances between estimates.}
}
\value{
A \code{list} containing up to two data frames, one discrete achievement levels (when \code{returnDiscrete} is \code{TRUE})
and one for cumulative achievement levels (when \code{returnCumulative} is \code{TRUE}). The \code{data.frame} contains the following columns:
\item{Level}{one row for each level of the specified achievement cutpoints}
\item{Variables in achievementVars}{one column for each variable in \code{achievementVars} 
and one row for each level of each variable in \code{achievementVars}}
\item{Percent}{the percentage of students at or above each achievement level aggregated as specified by \code{aggregateBy}}
\item{StandardError}{the standard error of the percentage, accounting for the survey sampling methodology. 
                            See the vignette titled \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistical Methods Used in EdSurvey}.}
\item{N}{the number of observations in the incoming data (the
                 number of rows when \code{omittedLevels} and
                 \code{defaultConditions} are set to \code{FALSE})}
\item{wtdN}{the weighted number of observations in the data}
\item{nPSU}{the number of PSUs at or above each achievement level aggregated as specified by \code{aggregateBy}. Only returned with \code{returnNumberOfPSU=TRUE}.}
}
\description{
Returns achievement levels using weights and variance estimates appropriate for the \code{edsurvey.data.frame}.
}
\details{
The \code{achievementLevels} function applies appropriate weights
         and the variance estimation method for each
         \code{edsurvey.data.frame}, with several arguments for customizing
         the aggregation and output of the analysis 
         results. Namely, by using these optional arguments, users can choose
         to generate the percentage of students 
         performing at each achievement level (discrete), generate the
         percentage of students performing at or above each achievement level
         (cumulative), 
         calculate the percentage distribution of students by achievement
         level (discrete or cumulative) and 
         selected characteristics (specified in \code{aggregateBy}), and
         compute the percentage distribution of students 
         by selected characteristics within a specific achievement level.

\subsection{Calculation of percentages}{
         The details of the methods are shown in the vignette titled
\href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistical Methods Used in EdSurvey} in 
         \dQuote{Estimation of Weighted Percentages When Plausible Values Are Present} and are used to calculate 
         all cumulative and discrete probabilities.

         When the requested achievement levels are discrete (\code{returnDiscrete = TRUE}),
         the percentage \eqn{\mathcal{A}} is the percentage of students (within the categories specified in \code{aggregateBy}) 
         whose scores lie in the range  \eqn{[cutPoints_i, cutPoints_{i+1}), i = 0,1,...,n}.
         \code{cutPoints} is the score thresholds provided by the user with \eqn{cutPoints_0} taken
         to be 0. \code{cutPoints} are set to NAEP standard cutpoints for achievement levels by default.
         To aggregate by a specific variable, for example, \code{dsex}, specify \code{dsex} in \code{aggregateBy}
         and all other variables in \code{achievementVars}. To aggregate by subscale, specify 
         the name of the subscale (e.g., \code{num_oper}) in \code{aggregateBy} and all other variables in 
         \code{achievementVars}.
         
         When the requested achievement levels are cumulative (\code{returnCumulative = TRUE}),
         the percentage \eqn{\mathcal{A}} is the percentage of students (within the categories specified in \code{aggregateBy}) 
         whose scores lie in the range  [\eqn{cutPoints_i}, \eqn{\infty}), \eqn{i = 1, 2, ..., n-1}. The 
         first and last categories are the same as defined for discrete levels.
} 
        
\subsection{Calculation of standard error of percentages}{
         The method used to calculate the standard error of the percentages is described in the vignette titled
         \href{https://www.air.org/sites/default/files/EdSurvey-Statistics.pdf}{Statistical Methods Used in EdSurvey} 
         in the sections \dQuote{Estimation of the Standard Error of Weighted Percentages When Plausible Values Are Present, Using the Jackknife Method} 
         and \dQuote{Estimation of the Standard Error of Weighted Percentages When Plausible Values Are Not Present, Using the Taylor Series Method.}
         For \dQuote{Estimation of the Standard Error of Weighted Percentages When Plausible Values Are Present, Using the Jackknife Method,}
        the value of \code{jrrIMax} sets the value of \eqn{m^*}.
}
}
\examples{
\dontrun{
# read in the example data (generated, not real student data)
sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package="NAEPprimer"))

# discrete achievement levels
achievementLevels(achievementVars=c("composite"), aggregateBy=NULL, data=sdf)

# discrete achievement levels with a different subscale
achievementLevels(achievementVars=c("num_oper"), aggregateBy=NULL, data=sdf)

# cumulative achievement levels
achievementLevels(achievementVars=c("composite"), aggregateBy=NULL, data=sdf, 
                  returnCumulative=TRUE) 

# cumulative achievement levels with a different subscale
achievementLevels(achievementVars=c("num_oper"), aggregateBy=NULL, data=sdf, 
                  returnCumulative=TRUE) 

# achievement levels as independent variables, by sex aggregated by composite
achievementLevels(achievementVars=c("composite", "dsex"), aggregateBy="composite",
                  data=sdf, returnCumulative=TRUE) 

# achievement levels as independent variables, by sex aggregated by sex
achievementLevels(achievementVars=c("composite", "dsex"), aggregateBy="dsex", 
                  data=sdf, returnCumulative=TRUE) 

# achievement levels as independent variables, by race aggregated by race
achievementLevels(achievementVars=c("composite", "sdracem"),
                  aggregateBy="sdracem", data=sdf, returnCumulative=TRUE) 

# use customized cutpoints
achievementLevels(achievementVars=c("composite"), aggregateBy=NULL, data=sdf, 
                  cutpoints = c("Customized Basic" = 200, 
                                "Customized Proficient" = 300, 
                                "Customized Advanced" = 400))

# use recode to change values for specified variables:
achievementLevels(achievementVars=c("composite", "dsex", "b017451"),
                  aggregateBy = "dsex", sdf,
                  recode=list(b017451=list(from=c("Never or hardly ever",
                                                  "Once every few weeks",
                                                  "About once a week"),
                                           to="Infrequently"),
                              b017451=list(from=c("2 or 3 times a week",
                                                  "Every day"),
                                           to="Frequently")))

}
}
\references{
Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}. New York, NY: Wiley.
}
\author{
Huade Huo, Ahmad Emad, and Trang Nguyen
}
