% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ds.R
\name{ds}
\alias{ds}
\title{Fit detection functions and calculate abundance from line or point transect
data}
\usage{
ds(
  data,
  truncation = ifelse(is.null(data$distend), ifelse(is.null(cutpoints),
    max(data$distance), max(cutpoints)), max(data$distend)),
  transect = "line",
  formula = ~1,
  key = c("hn", "hr", "unif"),
  adjustment = c("cos", "herm", "poly"),
  nadj = NULL,
  order = NULL,
  scale = c("width", "scale"),
  cutpoints = NULL,
  dht_group = FALSE,
  monotonicity = ifelse(formula == ~1, "strict", "none"),
  region_table = NULL,
  sample_table = NULL,
  obs_table = NULL,
  convert_units = 1,
  er_var = ifelse(transect == "line", "R2", "P2"),
  method = "nlminb",
  mono_method = "slsqp",
  quiet = FALSE,
  debug_level = 0,
  initial_values = NULL,
  max_adjustments = 5,
  er_method = 2,
  dht_se = TRUE,
  optimizer = "both",
  winebin = NULL,
  dht.group,
  region.table,
  sample.table,
  obs.table,
  convert.units,
  er.var,
  debug.level,
  initial.values,
  max.adjustments
)
}
\arguments{
\item{data}{a \code{data.frame} containing at least a column called \code{distance} or
a numeric vector containing the distances.  NOTE!  If there is a column
called \code{size} in the data then it will be interpreted as group/cluster size,
see the section "Clusters/groups", below. One can supply data as a "flat
file" and not supply \code{region_table}, \code{sample_table} and \code{obs_table}, see
"Data format", below and \code{\link{flatfile}}.}

\item{truncation}{either truncation distance (numeric, e.g. 5) or percentage
(as a string, e.g. "15\%","15"). Can be supplied as a \code{list} with elements
\code{left} and \code{right} if left truncation is required (e.g.  \code{list(left=1,right=20)}
or \code{list(left="1\%",right="15\%")} or even \code{list(left="1",right="15\%")}).  By
default for exact distances the maximum observed distance is used as the
right truncation. When the data is binned, the right truncation is the
largest bin end point. Default left truncation is set to zero.}

\item{transect}{indicates transect type "line" (default) or "point".}

\item{formula}{formula for the scale parameter. For a CDS analysis leave
this as its default \code{~1}.}

\item{key}{key function to use; \code{"hn"} gives half-normal (default), \code{"hr"}
gives hazard-rate and \code{"unif"} gives uniform. Note that if uniform key is
used, covariates cannot be included in the model.}

\item{adjustment}{adjustment terms to use; \code{"cos"} gives cosine (default),
\code{"herm"} gives Hermite polynomial and \code{"poly"} gives simple polynomial. A
value of \code{NULL} indicates that no adjustments are to be fitted.}

\item{nadj}{the number of adjustment terms to fit. In the absence of
covariates in the formula, the default value (\code{NULL}) will select via AIC
(using a sequential forward selection algorithm) up to \code{max.adjustment}
adjustments (unless \code{order} is specified). When covariates are present
in the model formula, the default value of \code{NULL} results in no adjustment
terms being fitted in the model. A non-negative integer value will cause
the specified number of adjustments to be fitted. Supplying an integer
value will allow the use of adjustment terms in addition to specifying
covariates in the model. The order of adjustment terms used will depend
on the \code{key}and \code{adjustment}. For \code{key="unif"}, adjustments of order
1, 2, 3, ... are fitted when \code{adjustment = "cos"} and order 2, 4, 6, ...
otherwise. For \code{key="hn"} or \code{"hr"} adjustments of order 2, 3, 4, ... are
fitted when \code{adjustment = "cos"} and order 4, 6, 8, ... otherwise. See
\insertCite{buckland2001;textual}{mrds} p. 47 for details.}

\item{order}{order of adjustment terms to fit. The default value (\code{NULL})
results in \code{ds} choosing the orders to use - see \code{nadj}. Otherwise a scalar
positive integer value can be used to fit a single adjustment term of the
specified order, and a vector of positive integers to fit multiple
adjustment terms of the specified orders. For simple and Hermite polynomial
adjustments, only even orders are allowed. The number of adjustment terms
specified here must match \code{nadj} (or \code{nadj} can be the default \code{NULL} value).}

\item{scale}{the scale by which the distances in the adjustment terms are
divided. Defaults to \code{"width"}, scaling by the truncation distance. If the
key is uniform only \code{"width"} will be used. The other option is \code{"scale"}:
the scale parameter of the detection}

\item{cutpoints}{if the data are binned, this vector gives the cutpoints of
the bins. Supplying a distance column in your data and specifying cutpoints
is the recommended approach for all standard binned analyses.
Ensure that the first element is 0 (or the left truncation
distance) and the last is the distance to the end of the furthest bin.
(Default \code{NULL}, no binning.) Provide \code{distbegin} and \code{distend} columns
in your data only when your cutpoints are not constant across all your
data, e.g. planes flying at differing altitudes then do not specify the
cutpoints argument.}

\item{dht_group}{should density abundance estimates consider all groups to
be size 1 (abundance of groups) \code{dht_group=TRUE} or should the abundance of
individuals (group size is taken into account), \code{dht_group=FALSE}. Default
is \code{FALSE} (abundance of individuals is calculated).}

\item{monotonicity}{should the detection function be constrained for
monotonicity weakly (\code{"weak"}), strictly (\code{"strict"}) or not at all
(\code{"none"} or \code{FALSE}). See Monotonicity, below. (Default \code{"strict"}). By
default it is on for models without covariates in the detection function,
off when covariates are present.}

\item{region_table}{\code{data_frame} with two columns:
\itemize{
\item \code{Region.Label} label for the region
\item \code{Area} area of the region
\item \code{region_table} has one row for each stratum. If there is no
stratification then \code{region_table} has one entry with \code{Area} corresponding
to the total survey area. If \code{Area} is omitted density estimates only are
produced.
}}

\item{sample_table}{\code{data.frame} mapping the regions to the samples
(i.e. transects). There are three columns:
\itemize{
\item \code{Sample.Label} label for the sample
\item \code{Region.Label} label for the region that the sample belongs to.
\item \code{Effort} the effort expended in that sample (e.g. transect length).
}}

\item{obs_table}{\code{data.frame} mapping the individual observations
(objects) to regions and samples. There should be three columns:
\itemize{
\item \code{object} unique numeric identifier for the observation
\item \code{Region.Label} label for the region that the sample belongs to
\item \code{Sample.Label} label for the sample
}}

\item{convert_units}{conversion between units for abundance estimation, see
"Units", below. (Defaults to 1, implying all of the units are "correct"
already.)}

\item{er_var}{specifies which encounter rate estimator to use in the case
that dht_se is TRUE, er_method is either 1 or 2 and there are two or more
samplers. Defaults to "R2" for line transects and "P2" for point transects
(>= 1.0.9, earlier versions <= 1.0.8 used the "P3" estimator by default
for points), both of which assume random placement of transects. For
systematic designs, alternative estimators may be more appropriate,
see \code{\link{dht2}} for more information.}

\item{method}{optimization method to use (any method usable by
\code{\link[stats:optim]{optim}} or \code{\link[optimx:optimx]{optimx}}). Defaults to
\code{"nlminb"}.}

\item{mono_method}{optimization method to use when monotonicity is enforced.
Can be either \code{slsqp} or \code{solnp}. Defaults to \code{slsqp}.}

\item{quiet}{suppress non-essential messages (useful for bootstraps etc).
Default value \code{FALSE}.}

\item{debug_level}{print debugging output. \code{0}=none, \code{1-3} increasing levels
of debugging output.}

\item{initial_values}{a \code{list} of named starting values, see
\code{\link[mrds:mrds_opt]{mrds_opt}}. Only allowed when AIC term selection is not
used.}

\item{max_adjustments}{maximum number of adjustments to try (default 5) only
used when \code{order=NULL}.}

\item{er_method}{encounter rate variance calculation: default = 2 gives the
method of \insertCite{innes2002;textual}{mrds}, using expected counts in the encounter rate. Setting
to 1 gives observed counts (which matches Distance for Windows) and 0 uses
negative binomial variance (only useful in the rare situation where study area =
surveyed area).
See \code{\link[mrds:dht.se]{dht.se}} for more details, noting this \code{er_method}
argument corresponds to the \code{varflag} element of the \code{options}
argument in \code{dht.se}.}

\item{dht_se}{should uncertainty be calculated when using \code{dht}? Safe to
leave as \code{TRUE}, used in \code{bootdht}.}

\item{optimizer}{By default this is set to 'both'. In this case
the R optimizer will be used and if present the MCDS optimizer will also
be used. The result with the best likelihood value will be selected. To
run only a specified optimizer set this value to either 'R' or 'MCDS'.
See \code{\link[mrds:mcds_dot_exe]{mcds_dot_exe}} for setup instructions.}

\item{winebin}{If you are trying to use our MCDS.exe optimizer on a
non-windows system then you may need to specify the winebin. Please
see \code{\link[mrds:mcds_dot_exe]{mcds_dot_exe}} for more details.}

\item{dht.group}{deprecated, see same argument with underscore, above.}

\item{region.table}{deprecated, see same argument with underscore, above.}

\item{sample.table}{deprecated, see same argument with underscore, above.}

\item{obs.table}{deprecated, see same argument with underscore, above.}

\item{convert.units}{deprecated, see same argument with underscore, above.}

\item{er.var}{deprecated, see same argument with underscore, above.}

\item{debug.level}{deprecated, see same argument with underscore, above.}

\item{initial.values}{deprecated, see same argument with underscore, above.}

\item{max.adjustments}{deprecated, see same argument with underscore, above.}
}
\value{
a list with elements:
\itemize{
\item \code{ddf} a detection function model object.
\item \code{dht} abundance/density information (if survey region data was supplied,
else \code{NULL})
}
}
\description{
This function fits detection functions to line or point transect data and
then (provided that survey information is supplied) calculates abundance and
density estimates. The examples below illustrate some basic types of
analysis using \code{ds()}.
}
\section{Details}{

If abundance estimates are required then the \code{data.frame}s \code{region_table}
and \code{sample_table} must be supplied. If \code{data} does not contain the columns
\code{Region.Label} and \code{Sample.Label} then the \code{data.frame} \code{obs_table} must
also be supplied. Note that stratification only applies to abundance
estimates and not at the detection function level. Density and abundance
estimates, and corresponding estimates of variance and confidence intervals,
are calculated using the methods described in \insertCite{buckland2001;textual}{mrds}
sections 3.6.1 and 3.7.1 (further details can be found in the documentation
for \code{\link[mrds:dht]{dht}}).

For more advanced abundance/density estimation please see the
\code{\link[mrds:dht]{dht}} and \code{\link{dht2}} functions.

Examples of distance sampling analyses are available at
\url{https://distancesampling.org/resources/vignettes.html}.

Hints and tips on fitting (particularly optimisation issues) are on the
\code{\link[mrds:mrds_opt]{mrds_opt}} manual page.
}

\section{Clusters/groups}{

Note that if the data contains a column named \code{size}, cluster size will be
estimated and density/abundance will be based on a clustered analysis of
the data. Setting this column to be \code{NULL} will perform a non-clustered
analysis (for example if "\code{size}" means something else in your dataset).
}

\section{Truncation}{

The right truncation point is by default set to be largest observed distance
or bin end point. This is a default will not be appropriate for all data and
can often be the cause of model convergence failures. It is recommended that
one plots a histogram of the observed distances prior to model fitting so as
to get a feel for an appropriate truncation distance. (Similar arguments go
for left truncation, if appropriate). \insertCite{buckland2001;textual}{mrds} provide
guidelines on truncation.

When specified as a percentage, the largest \code{right} and smallest \code{left}
percent distances are discarded. Percentages cannot be supplied when using
binned data.

For left truncation, there are two options: (1) fit a detection function to
the truncated data as is (this is what happens when you set \code{left}).  This
does not assume that g(x)=1 at the truncation point. (2) manually remove
data with distances less than the left truncation distance -- effectively
move the centre line out to be the truncation distance (this needs to be
done before calling \code{ds}). This then assumes that detection is certain at
the left truncation distance. The former strategy has a weaker assumption,
but will give higher variance as the detection function close to the line
has no data to tell it where to fit -- it will be relying on the data from
after the left truncation point and the assumed shape of the detection
function. The latter is most appropriate in the case of aerial surveys,
where some area under the plane is not visible to the observers, but their
probability of detection is certain at the smallest distance.
}

\section{Binning}{

Note that binning is performed such that bin 1 is all distances greater or
equal to cutpoint 1 (>=0 or left truncation distance) and less than cutpoint
2. Bin 2 is then distances greater or equal to cutpoint 2 and less than
cutpoint 3 and so on.
}

\section{Monotonicity}{

When adjustment terms are used, it is possible for the detection function to
not always decrease with increasing distance. This is unrealistic and can
lead to bias. To avoid this, the detection function can be constrained for
monotonicity (and is by default for detection functions without covariates).

Monotonicity constraints are supported in a similar way to that described
in \insertCite{buckland2001;textual}{mrds}. 20 equally spaced points over
the range of the
detection function (left to right truncation) are evaluated at each round
of the optimisation and the function is constrained to be either always
less than it's value at zero (\code{"weak"}) or such that each value is
less than or equal to the previous point (monotonically decreasing;
\code{"strict"}). See also \code{\link[mrds:check.mono]{check.mono}}.

Even with no monotonicity constraints, checks are still made that the
detection function is monotonic, see \code{\link[mrds:check.mono]{check.mono}}.
}

\section{Units}{

In extrapolating to the entire survey region it is important that the unit
measurements be consistent or converted for consistency. A conversion
factor can be specified with the \code{convert_units} argument. The values of
\code{Area} in \code{region_table}, must be made consistent with the units for
\code{Effort} in \code{sample_table} and the units of \code{distance} in the \code{data.frame}
that was analyzed. It is easiest if the units of \code{Area} are the square of
the units of \code{Effort} and then it is only necessary to convert the units of
\code{distance} to the units of \code{Effort}. For example, if \code{Effort} was entered
in kilometres and \code{Area} in square kilometres and \code{distance} in metres then
using \code{convert_units=0.001} would convert metres to kilometres, density
would be expressed in square kilometres which would then be consistent with
units for \code{Area}. However, they can all be in different units as long as
the appropriate composite value for \code{convert_units} is chosen. Abundance
for a survey region can be expressed as: \code{A*N/a} where \code{A} is \code{Area} for
the survey region, \code{N} is the abundance in the covered (sampled) region,
and \code{a} is the area of the sampled region and is in units of \code{Effort * distance}. The sampled region \code{a} is multiplied by \code{convert_units}, so it
should be chosen such that the result is in the same units as \code{Area}.  For
example, if \code{Effort} was entered in kilometres, \code{Area} in hectares (100m x
100m) and \code{distance} in metres, then using \code{convert_units=10} will convert
\code{a} to units of hectares (100 to convert metres to 100 metres for distance
and .1 to convert km to 100m units).
}

\section{Data format}{

One can supply \code{data} only to simply fit a detection function. However, if
abundance/density estimates are necessary further information is required.
Either the \code{region_table}, \code{sample_table} and \code{obs_table} \code{data.frame}s can
be supplied or all data can be supplied as a "flat file" in the \code{data}
argument. In this format each row in data has additional information that
would ordinarily be in the other tables. This usually means that there are
additional columns named: \code{Sample.Label}, \code{Region.Label}, \code{Effort} and
\code{Area} for each observation. See \code{\link{flatfile}} for an example.
}

\section{Density estimation}{

If column \code{Area} is omitted, a density estimate is generated but note that
the degrees of freedom/standard errors/confidence intervals will not match
density estimates made with the \code{Area} column present.
}

\examples{

# An example from mrds, the golf tee data.
library(Distance)
data(book.tee.data)
tee.data <- subset(book.tee.data$book.tee.dataframe, observer==1)
ds.model <- ds(tee.data, 4)
summary(ds.model)
plot(ds.model)

\dontrun{
# same model, but calculating abundance
# need to supply the region, sample and observation tables
region <- book.tee.data$book.tee.region
samples <- book.tee.data$book.tee.samples
obs <- book.tee.data$book.tee.obs

ds.dht.model <- ds(tee.data, 4, region_table=region,
                   sample_table=samples, obs_table=obs)
summary(ds.dht.model)

# specify order 2 cosine adjustments
ds.model.cos2 <- ds(tee.data, 4, adjustment="cos", order=2)
summary(ds.model.cos2)

# specify order 2 and 3 cosine adjustments, turning monotonicity
# constraints off
ds.model.cos23 <- ds(tee.data, 4, adjustment="cos", order=c(2, 3),
                   monotonicity=FALSE)
# check for non-monotonicity -- actually no problems
check.mono(ds.model.cos23$ddf, plot=TRUE, n.pts=100)

# include both a covariate and adjustment terms in the model
ds.model.cos2.sex <- ds(tee.data, 4, adjustment="cos", order=2,
                        monotonicity=FALSE, formula=~as.factor(sex))
# check for non-monotonicity -- actually no problems
check.mono(ds.model.cos2.sex$ddf, plot=TRUE, n.pts=100)

# truncate the largest 10\% of the data and fit only a hazard-rate
# detection function
ds.model.hr.trunc <- ds(tee.data, truncation="10\%", key="hr",
                        adjustment=NULL)
summary(ds.model.hr.trunc)

# compare AICs between these models:
AIC(ds.model)
AIC(ds.model.cos2)
AIC(ds.model.cos23)
}
}
\references{
\insertAllCited{}
}
\seealso{
\code{\link{flatfile}}, \code{\link[mrds]{AIC.ds}},
\code{\link{ds.gof}}, \code{\link{p_dist_table}},
\code{\link[mrds]{plot.ds}}, \code{\link{add_df_covar_line}}
}
\author{
David L. Miller
}
