% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/attrisk_analysis.R
\name{attrisk_analysis}
\alias{attrisk_analysis}
\title{Attributable risk analysis}
\usage{
attrisk_analysis(
  dframe,
  vars_response,
  vars_stressor,
  response_levels = NULL,
  stressor_levels = NULL,
  subpops = NULL,
  siteID = NULL,
  weight = "weight",
  xcoord = NULL,
  ycoord = NULL,
  stratumID = NULL,
  clusterID = NULL,
  weight1 = NULL,
  xcoord1 = NULL,
  ycoord1 = NULL,
  sizeweight = FALSE,
  sweight = NULL,
  sweight1 = NULL,
  fpc = NULL,
  popsize = NULL,
  vartype = "Local",
  conf = 95,
  All_Sites = FALSE
)
}
\arguments{
\item{dframe}{Data to be analyzed (analysis data). A data frame or
\code{sf} object containing survey design
variables, response variables, stressor variables, and subpopulation
(domain) variables.}

\item{vars_response}{Vector composed of character values that identify the
names of response variables in \code{dframe}. Each response
variable must have two category values (levels), where one level is
associated with poor condition and the other level is associated with good
condition.}

\item{vars_stressor}{Vector composed of character values that identify the
names of stressor variables in \code{dframe}. Each stressor
variable must have two category values (levels), where one level is
associated with poor condition and the other level is associated with good
condition.}

\item{response_levels}{List providing the category values (levels) for each
element in the \code{vars_response} argument.  Each element in the list
must contain two values, where the first value identifies poor condition,
and the second value identifies good condition.  This argument must be
named and must be the same length as argument \code{vars_response}.  Names
for this argument must match the values in the \code{vars_response}
argument. If this argument equals NULL, then a named list is created that
contains the values \code{"Poor"} and \code{"Good"} for the first and
second levels, respectively, of each element in the \code{vars_response}
argument and that uses values in the \code{vars_response} argument as names
for the list.  The default value is NULL.}

\item{stressor_levels}{List providing the category values (levels) for each
element in the \code{vars_stressor} argument.  Each element in the list
must contian two values, where the first value identifies poor condition,
and the second value identifies good condition.  This argument must be
named and must be the same length as argument \code{vars_stressor}.  Names
for this argument must match the values in the \code{vars_stressor}
argument. If this argument equals NULL, then a named list is created that
contains the values \code{"Poor"} and \code{"Good"} for the first and
second levels, respectively, of each element in the \code{vars_stressor}
argument and that uses values in the \code{vars_stressor} argument as names
for the list.  The default value is NULL.}

\item{subpops}{Vector composed of character values that identify the
names of subpopulation (domain) variables in \code{dframe}.
If a value is not provided, the value \code{"All_Sites"} is assigned to the
subpops argument and a factor variable named \code{"All_Sites"} that takes
the value \code{"All Sites"} is added to \code{dframe}.  The
default value is \code{NULL}.}

\item{siteID}{Character value providing the name of the site ID variable in
\code{dframe}.  For a two-stage sample, the site ID variable
identifies stage two site IDs.  The default value is \code{NULL}, which
assumes that each row in \code{dframe} represents a unique site.}

\item{weight}{Character value providing the name of the design weight
variable in \code{dframe}.  For a two-stage sample, the
weight variable identifies stage two weights.  The default value is
\code{"weight"}.}

\item{xcoord}{Character value providing name of the x-coordinate variable in
\code{dframe}.  For a two-stage sample, the x-coordinate
variable identifies stage two x-coordinates.  Note that x-coordinates are
required for calculation of the local mean variance estimator.  If \code{dframe}
is an \code{sf} object, this argument is not required (as the geometry column
in \code{dframe} is used to find the x-coordinate). The default
value is \code{NULL}.}

\item{ycoord}{Character value providing name of the y-coordinate variable in
\code{dframe}.  For a two-stage sample, the y-coordinate
variable identifies stage two y-coordinates.  Note that y-coordinates are
required for calculation of the local mean variance estimator.  If \code{dframe}
is an \code{sf} object, this argument is not required (as the geometry column
in \code{dframe} is used to find the t-coordinate). The default
value is \code{NULL}.}

\item{stratumID}{Character value providing the name of the stratum ID
variable in \code{dframe}.  The default value is
\code{NULL}.}

\item{clusterID}{Character value providing the name of the cluster
(stage one) ID variable in \code{dframe}.  Note that cluster
IDs are required for a two-stage sample.  The default value is \code{NULL}.}

\item{weight1}{Character value providing the name of the stage one weight
variable in \code{dframe}.  The default value is
\code{NULL}.}

\item{xcoord1}{Character value providing the name of the stage one
x-coordinate variable in \code{dframe}.  Note that x
coordinates are required for calculation of the local mean variance
estimator.  The default value is \code{NULL}.}

\item{ycoord1}{Character value providing the name of the stage one
y-coordinate variable in \code{dframe}.  Note that
y-coordinates are required for calculation of the local mean variance
estimator.  The default value is \code{NULL}.}

\item{sizeweight}{Logical value that indicates whether size weights should be
used during estimation, where \code{TRUE} uses size weights and
\code{FALSE} does not use size weights. To employ size weights for a
single-stage sample, a value must be supplied for argument weight.  To
employ size weights for a two-stage sample, values must be supplied for
arguments \code{weight} and \code{weight1}. The default value is
\code{FALSE}.}

\item{sweight}{Character value providing the name of the size weight variable
in \code{dframe}.  For a two-stage sample, the size weight
variable identifies stage two size weights.  The default value is
\code{NULL}.}

\item{sweight1}{Character value providing the name of the stage one size
weight variable in \code{dframe}.  The default value is
\code{NULL}.}

\item{fpc}{Object that specifies values required for calculation of the
  finite population correction factor used during variance estimation. The
  object must match the survey design in terms of stratification and whether
  the design is single-stage or two-stage.  For an unstratified design, the
  object is a vector.  The vector is composed of a single numeric value for a
  single-stage design.  For a two-stage unstratified design, the object is a
  named vector containing one more than the number of clusters in the sample,
  where the first item in the vector specifies the number of clusters in the
  population and each subsequent item specifies the number of stage two units
  for the cluster.  The name for the first item in the vector is arbitrary.
  Subsequent names in the vector identify clusters and must match the cluster
  IDs.  For a stratified design, the object is a named list of vectors, where
  names must match the strata IDs.  For each stratum, the format of the
  vector is identical to the format described for unstratified single-stage
  and two-stage designs.  Note that the finite population correction factor
  is not used with the local mean variance estimator.

  Example fpc for a single-stage unstratified survey design:

  \verb{fpc <- 15000}

  Example fpc for a single-stage stratified survey design:

  \verb{fpc <- list(
    Stratum_1 = 9000,
    Stratum_2 = 6000)
   }

  Example fpc for a two-stage unstratified survey design:

  \verb{fpc <- c(
    Ncluster = 150,
    Cluster_1 = 150,
    Cluster_2 = 75,
    Cluster_3 = 75,
    Cluster_4 = 125,
    Cluster_5 = 75)
  }

  Example fpc for a two-stage stratified survey design:

  \verb{fpc <- list(
    Stratum_1 = c(
      Ncluster_1 = 100,
      Cluster_1 = 125,
      Cluster_2 = 100,
      Cluster_3 = 100,
      Cluster_4 = 125,
      Cluster_5 = 50),
    Stratum_2 = c(
      Ncluster_2 = 50,
      Cluster_1 = 75,
      Cluster_2 = 150,
      Cluster_3 = 75,
      Cluster_4 = 75,
      Cluster_5 = 125))
  }}

\item{popsize}{Object that provides values for the population argument of the
  \code{calibrate} or \code{postStratify} functions in the survey package. If
  a value is provided for popsize, then either the \code{calibrate} or
  \code{postStratify} function is used to modify the survey design object
  that is required by functions in the survey package.  Whether to use the
  \code{calibrate} or \code{postStratify} function is dictated by the format
  of popsize, which is discussed below.  Post-stratification adjusts the
  sampling and replicate weights so that the joint distribution of a set of
  post-stratifying variables matches the known population joint distribution.
  Calibration, generalized raking, or GREG estimators generalize
  post-stratification and raking by calibrating a sample to the marginal
  totals of variables in a linear regression model. For the \code{calibrate}
  function, the object is a named list, where the names identify factor
  variables in \code{dframe}.  Each element of the list is a
  named vector containing the population total for each level of the
  associated factor variable.  For the \code{postStratify} function, the
  object is either a data frame, table, or xtabs object that provides the
  population total for all combinations of selected factor variables in the
  \code{dframe} data frame.  If a data frame is used for \code{popsize}, the
  variable containing population totals must be the last variable in the data
  frame.  If a table is used for \code{popsize}, the table must have named
  \code{dimnames} where the names identify factor variables in the
  \code{dframe} data frame.  If the popsize argument is equal to \code{NULL},
  then neither calibration nor post-stratification is performed.  The default
  value is \code{NULL}.

  Example popsize for calibration:

  \verb{popsize <- list(
    Ecoregion = c(
      East = 750,
      Central = 500,
      West = 250),
    Type = c(
      Streams = 1150,
      Rivers = 350))
  }

  Example popsize for post-stratification using a data frame:

  \verb{popsize <- data.frame(
    Ecoregion = rep(c("East", "Central", "West"),
      rep(2, 3)),
    Type = rep(c("Streams", "Rivers"), 3),
    Total = c(575, 175, 400, 100, 175, 75))
  }

  Example popsize for post-stratification using a table:

  \verb{popsize <- with(MySurveyFrame,
    table(Ecoregion, Type))}

  Example popsize for post-stratification using an xtabs object:

  \verb{popsize <- xtabs(~Ecoregion + Type,
    data = MySurveyFrame)}}

\item{vartype}{Character value providing the choice of the variance
estimator, where \code{"Local"} indicates the local mean estimator and \code{"SRS"} indicates the
simple random sampling estimator.  The default value is \code{"Local"}.}

\item{conf}{Numeric value providing the Gaussian-based confidence level.  The default value
is \code{95}.}

\item{All_Sites}{A logical variable used when \code{subpops} is not
\code{NULL}. If \code{All_Sites} is \code{TRUE}, then alongside the
subpopulation output, output for all sites (ignoring subpopulations) is
returned for each variable in \code{vars}. If \code{All_Sites} is
\code{FALSE}, then alongside the subpopulation output, output for all sites
(ignoring subpopulations) is not returned for each variable in \code{vars}.
The default is \code{FALSE}.}
}
\value{
The analysis results. A data frame of population estimates for all combinations of
  subpopulations, categories within each subpopulation, response variables,
  and categories within each response variable.  Estimates are provided for
  proportion and size of the population plus standard error, margin of
  error, and confidence interval estimates.
}
\description{
This function organizes input and output for the analysis of attributable risk (for
categorical variables).  The analysis data,
\code{dframe}, can be either a data frame or a simple features (\code{sf}) object.  If an
\code{sf} object is used, coordinates are extracted from the geometry column in the
object, arguments \code{xcoord} and \code{ycoord} are assigned values
\code{"xcoord"} and \code{"ycoord"}, respectively, and the geometry column is
dropped from the object.
}
\section{Details}{

Attributable risk measures the proportional reduction in the extent of poor
condition of a response variable that presumably would result from
eliminating a stressor variable, where the response and stressor variables
are classified as either good (i.e., reference condition) or poor (i.e.,
different from reference condition).  Attributable risk is defined as one
minus the ratio of two probabilities.  The numerator of the ratio is the
conditional probability that the response variable is in poor condition given
that the stressor variable is in good condition.   The denominator of the
ratio is the probability that the response variable is in poor condition.
Attributable risk values close to zero indicate that removing the stressor
variable will have little or no impact on the probability that the response
variable is in poor condition.  Attributable risk values close to one
indicate that removing the stressor variable will result in extensive
reduction of the probability that the response variable is in poor condition.
}

\examples{
dframe <- data.frame(
  siteID = paste0("Site", 1:100),
  wgt = runif(100, 10, 100),
  xcoord = runif(100),
  ycoord = runif(100),
  stratum = rep(c("Stratum1", "Stratum2"), 50),
  RespVar1 = sample(c("Poor", "Good"), 100, replace = TRUE),
  RespVar2 = sample(c("Poor", "Good"), 100, replace = TRUE),
  StressVar = sample(c("Poor", "Good"), 100, replace = TRUE),
  All_Sites = rep("All Sites", 100),
  Resource_Class = rep(c("Agr", "Forest"), c(55, 45))
)
myresponse <- c("RespVar1", "RespVar2")
mystressor <- c("StressVar")
mysubpops <- c("All_Sites", "Resource_Class")
attrisk_analysis(dframe,
  vars_response = myresponse,
  vars_stressor = mystressor, subpops = mysubpops, siteID = "siteID",
  weight = "wgt", xcoord = "xcoord", ycoord = "ycoord",
  stratumID = "stratum"
)
}
\references{
Sickle, J. V., & Paulsen, S. G. (2008). Assessing the attributable risks,
   relative risks, and regional extents of aquatic stressors.
   \emph{Journal of the North American Benthological Society}, 27(4), 920-931.
}
\seealso{
\describe{
  \item{\code{\link{relrisk_analysis}}}{ for relative risk analysis}
  \item{\code{\link{diffrisk_analysis}}}{ for risk difference analysis}
  }
}
\author{
Tom Kincaid \email{Kincaid.Tom@epa.gov}
}
\keyword{survey}
\keyword{univar}
