% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/popRF.R
\name{popRF}
\alias{popRF}
\title{Disaggregating Census Data for Population Mapping Using Random Forests
with Remotely-Sensed and Ancillary Data.}
\usage{
popRF(pop, cov, mastergrid, watermask, px_area, output_dir, cores=0, 
quant=FALSE, set_seed=2010, fset=NULL, fset_incl=FALSE, 
fset_cutoff=20, fix_cov=FALSE, check_result=TRUE, verbose=TRUE, 
log=FALSE, ...)
}
\arguments{
\item{pop}{Character vector containing the name of the file from which the
unique area ID and corresponding population values are to be read
from. The file should contain two columns comma-separated with the
value of administrative ID and population without columns names.
If it does not contain an absolute path, the file name is relative to
the current working directory.}

\item{cov}{A nested list of named list(s), i.e. where each element of the
first list is a named list object with atomic elements. The name of
each named list corresponds to the 3-letter ISO code of a specified
country. The elements within each named list define the specified
input covariates to be used in the random forest model, i.e. the name
of the covariates and the corresponding, if applicable and local, path
to them. If the path is not a full path, it is assumed to be relative
to the current working directory.
Example for Nepal (NPL):\if{html}{\out{<div class="r">}}\preformatted{list(
    "NPL"=list(
               "covariate1" = "covariate1.tif",
               "covariate2" = "covariate2.tif"
              )  
   )
}\if{html}{\out{</div>}}\preformatted{## $NPL
## $NPL$covariate1
## [1] "covariate1.tif"
## 
## $NPL$covariate2
## [1] "covariate2.tif"
}}

\item{mastergrid}{A named list where each element of the list defines the
path to the input mastergrid(s), i.e. the template gridded raster(s)
that contains the unique area IDs as their value. The name(s)
corresponds to the 3-letter ISO code(s) of a specified country(ies).
Each corresponding element defines the path to the mastergrid(s). If
the path is local and not a full path, it is assumed to be relative to
the current working directory.
Example:\preformatted{list(
    "NPL" = "npl_mastergrid.tif"
   )
}}

\item{watermask}{A named list where each element of the list defines the path
to the input country-specific watermask. The name corresponds to the
3-letter ISO code of a specified country. Each corresponding element
defines the path to the watermask, i.e. the binary raster that
delineates the presence of water (1) and non-water (0), that is used
to mask out areas from modelling. If the path is local and not a full
path, it is assumed to be relative to the current working directory.
Example:\preformatted{list(
    "NPL" = "npl_watermask.tif"
   )
}}

\item{px_area}{A named list where each element of the list defines the path
to the input raster(s) containing the pixel area. The name corresponds
to the 3-letter ISO code of a specified country. Each corresponding
element defines the path to the raster whose values indicate the area
of each unprojected (WGS84) pixel. If the path is local and not a full
path, it is assumed to be relative to the current working directory.
Example:\if{html}{\out{<div class="r">}}\preformatted{list(
    "NPL" = "npl_px_area.tif"
   )
}\if{html}{\out{</div>}}\preformatted{## $NPL
## [1] "npl_px_area.tif"
}}

\item{output_dir}{Character vector containing the path to the directory for
writing output files. Default is the temp directory.}

\item{cores}{Integer vector containing an integer. Indicates the number of
cores to use in parallel when executing the function. If set to 0
\code{(max_number_of_cores - 1)}  will be used based on as many
processors as the hardware and RAM allow. Default is \code{cores} = 0.}

\item{quant}{Logical vector indicating whether to produce the quantile
regression forests (TRUE) to generate prediction intervals.
Default is \code{quant} = TRUE.}

\item{set_seed}{Integer, set the seed. Default is \code{set_seed} = 2010}

\item{fset}{Named list containing character vector elements that give the
path to the directory(ies) containing the random forest model objects
(.RData) with which we are using as a "fixed set" in this modeling,
i.e. are we parameterizing, in part or in full, this RF model run upon
another country's(ies') RF model object. The list should have two
named character vectors, "final" and "quant", with the character
vectors corresponding to the directory paths of the corresponding
folders that hold the random forest model objects and the quantile
regression random forest model objects, respectively.
Numerous model objects can be in each folder "./final/" and "./quant/"
representing numerous countries with the understanding that the model
being run will incorporate all model objects in the folder, e.g. if
a model object for Mexico and}

\item{fset_incl}{Logical vector indicating whether the RF model object
will or will not be combined with another RF model run upon another
country's(ies') RF model object. Default is \code{fset_incl} = FALSE}

\item{fset_cutoff}{Numeric vector containing an integer. This parameter is
only used if \code{fset_incl} is TRUE. If the country has less than
\code{fset_cutoff} admin units, then RF popfit will not be combined
with the RF model run upon another country's(ies') RF model object.
Default is \code{fset_cutoff} = 20.}

\item{fix_cov}{Logical vector indicating whether the raster extent of the
covariates will be corrected if the extent does not match mastergrid.
Default is \code{fix_cov} = FALSE.}

\item{check_result}{Logical vector indicating whether the results will be
compared with input data. Default is \code{check_result} = TRUE.}

\item{verbose}{Logical vector indicating whether to print
intermediate output from the function to the console, which might be
helpful for model debugging. Default is \code{verbose} = TRUE.}

\item{log}{Logical vector indicating whether to print intermediate
output from the function to the log.txt file.
Default is \code{log} = FALSE.}

\item{...}{Additional arguments:\cr
\code{binc}: Numeric. Increase number of blocks sugesting for
processing raster file.\cr
\code{boptimise}: Logical. Optimize total memory requires to
processing raster file by reducing the memory need to 35\%.\cr
\code{bsoft}: Numeric. If raster can be processed on less
then \code{cores} it will be foresed to use less number
of \code{cores}.\cr
\code{nodesize}: Minimum size of terminal nodes. Setting this number larger
causes smaller trees to be grown (and thus take less time). See
\code{\link[randomForest]{randomForest}} for more details. Default
is \code{nodesize} = NULL and will be calculated
as \code{length(y_data)/1000}.\cr
\code{maxnodes}: Maximum number of terminal nodes trees in the forest can have.
If not given, trees are grown to the maximum possible (subject to
limits by nodesize). If set larger than maximum possible, a warning is
issued. See \code{\link[randomForest]{randomForest}} for more details.
Default is \code{maxnodes} = NULL.\cr
\code{ntree}: Number of variables randomly sampled as candidates at each split.
See \code{\link[randomForest]{randomForest}} for more details.
Default is \code{ntree} = NULL and \code{ntree} will be used
\code{popfit$ntree}\cr
\code{mtry}: Number of trees to grow. This should not be set to too small a
number, to ensure that every input row gets predicted at least a few
times. See \code{\link[randomForest]{randomForest}} for more details.
Default is \code{ntree} = NULL and \code{ntree} will be used
\code{popfit$mtry}.\cr
\code{proximity}: Logical vector indicating whether proximity measures among
the rows should be computed. Default is \code{proximity} = TRUE.
See \code{\link[randomForest]{randomForest}} for more details.\cr
\code{const}: Character vector containing the name of the file from which the
mask will be used to constraine population layer. The mask file should
have value \code{0} as a mask. If it does not contain an absolute path,
the file name is relative to the current working directory.}
}
\value{
Raster* object of gridded population.
}
\description{
Disaggregating Census Data for Population Mapping Using Random Forests
with Remotely-Sensed and Ancillary Data.
}
\details{
This function produces gridded population density estimates using
a Random Forest model as described in \emph{Stevens, et al. (2015)}
\doi{10.1371/journal.pone.0107042}.
The unit-average log-transformed population density and covariate
summary values for each census unit are then used to train a
Random Forest model (\doi{10.1023/A:1010933404324})
to predict log population density. Random Forest models are an
ensemble, nonparametric modeling approach that grows a "forest" of
individual classification or regression trees and improves upon
bagging by using the best f a random selection of predictors at
each node in each tree. The Random Forest is used to produced grid,
i.e. pixel, level population density estimates that are used as
unit-relative weights to dasymetrically redistribute the census
based areal population counts. This function also allows for
modelling based upon a
regional parameterisation (\doi{10.1080/17538947.2014.965761})
of other previously run models as well as the creation of models based
upon multiple countries at once (\doi{10.1016/j.compenvurbsys.2019.01.006}).
This function assumes that all data is unprojected and is in the
WGS84 coordinate system.
}
\examples{
\dontrun{

library("popRF")

pop_table <- list("NPL"="/user/npl_population.csv")

input_cov <- list(
                 "NPL"=list(
                            "cov1" = "covariate1.tif",
                            "cov2" = "covariate2.tif"))
                            
                 
input_mastergrid <- list("NPL" = "npl_mastergrid.tif")
input_watermask  <- list("NPL" = "npl_watermask.tif")
input_px_area    <- list("NPL" = "npl_px_area.tif")

res <- popRF(pop=pop_table, 
             cov=input_cov, 
             mastergrid=input_mastergrid, 
             watermask=input_watermask, 
             px_area=input_px_area, 
             output_dir="/user/output", 
             cores=4) 
 
# Plot populataion raster 
plot(res$pop) 

# Plot Error via Trees     
plot(res$popfit)
            
}
}
\references{
\itemize{
\item Stevens, F. R., Gaughan, A. E., Linard, C. & A. J. Tatem. 2015.
Disaggregating Census Data for Population Mapping Using Random Forests
with Remotely-Sensed and Ancillary Data. PLoS ONE 10, e0107042
\doi{10.1371/journal.pone.0107042}
\item L. Breiman. 2001. Random Forests. Machine Learning, 45: 5-32.
\doi{10.1023/A:1010933404324}
\item Gaughan, A. E., Stevens, F. R., Linard, C., Patel, N. N., & A. J. Tatem.
2015. Exploring Nationally and Regionally Defined Models for Large Area
Population Mapping. International Journal of Digital Earth, 12(8):
989-1006. \doi{10.1080/17538947.2014.965761}
\item Sinha, P., Gaughan, A. E, Stevens, F. R., Nieves, J. J., Sorichetta, A.,
& A. J. Tatem. 2019. Assessing the Spatial Sensitivity of a Random
Forest Model: Application in Gridded Population Modeling. Computers,
Environment and Urban Systems, 75: 132-145.
\doi{10.1016/j.compenvurbsys.2019.01.006}
}
}
\author{
Maksym Bondarenko \href{mailto:mb4@soton.ac.uk}{mb4@soton.ac.uk},
Jeremiah J. Nieves \href{mailto:J.J.Nieves@liverpool.ac.uk}{J.J.Nieves@liverpool.ac.uk},
Forrest R. Stevens \href{mailto:forrest.stevens@louisville.edu}{forrest.stevens@louisville.edu},
Andrea E. Gaughan \href{mailto:ae.gaughan@louisville.edu}{ae.gaughan@louisville.edu},
David Kerr \href{mailto:dk2n16@soton.ac.uk}{dk2n16@soton.ac.uk},
Chris Jochem \href{mailto:W.C.Jochem@soton.ac.uk}{W.C.Jochem@soton.ac.uk} and
Alessandro Sorichetta \href{mailto:as1v13@soton.ac.uk}{as1v13@soton.ac.uk}
}
