% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simInitSpatial.R
\name{simInitSpatial}
\alias{simInitSpatial}
\title{Generation of smaller regions given an existing spatial variable and a
table.}
\usage{
simInitSpatial(
  simPopObj,
  additional,
  region,
  tspatialP = NULL,
  tspatialHH = NULL,
  eps = 0.05,
  maxIter = 100,
  nr_cpus = NULL,
  seed = 1,
  verbose = FALSE
)
}
\arguments{
\item{simPopObj}{an object of class \code{\linkS4class{simPopObj}}.}

\item{additional}{a character vector of length one holding the variable name
of the variable containing smaller geographical units. This variable name
must be available as a column in input argument \code{tspatial}.}

\item{region}{a character vector of length one holding the variable name of
the broader region. This variable must be available in the input
\code{tspatial} as well as in the sample and population slots of input
\code{simPopObj}.}

\item{tspatialP}{a  data.frame (or data.table) containing three columns. The broader region
(with the variable name being the same as in input \code{region}, the
smaller geographical units (with the variable name being the same as in
input \code{additional}) and a third column containing a numeric vector
holding counts of persons. This argument or tspatialHH has to be provided.}

\item{tspatialHH}{a  data.frame (or data.table) containing three columns. The broader region
(with the variable name being the same as in input \code{region}, the
smaller geographical units (with the variable name being the same as in
input \code{additional}) and a third column containing a numeric vector
holding counts of households. This argument or tspatialP has to be provided.}

\item{eps}{relative deviation of person counts if person and household counts are provided}

\item{maxIter}{maximum number of iteration for adjustment
if person and household counts are provided}

\item{nr_cpus}{if specified, an integer number defining the number of cpus
that should be used for parallel processing.}

\item{seed}{optional; an integer value to be used as the seed of the random
number generator, or an integer vector containing the state of the random
number generator to be restored.}

\item{verbose}{TRUE/FALSE if some information should be shown during the process}
}
\value{
An object of class \code{\linkS4class{simPopObj}} with an additional
variable in the synthetic population slot.
}
\description{
This function allows to manipulate an object of class
\code{\linkS4class{simPopObj}} in a way that a new variable containing
smaller regions within an already existing broader region is generated. The
distribution of the smaller region within the broader region is respected.
}
\details{
The distributional information must be contained in an input table that
holds combinations of characteristics of the broader region and the smaller
regions as well as population counts (which may be available from a census).
}
\examples{
library(data.table)
data(eusilcS)
data(eusilcP)
library(data.table)

# no districts are available in the population, so we have to generate those
# we randomly assign districts within "region" in the eusilc population data
# each hh has the same district
simulate_districts <- function(inp) {
  hhid <- "hid"
  region <- "region"

  a <- inp[!duplicated(inp[,hhid]),c(hhid, region)]
  spl <- split(a, a[,region])
  regions <- unique(inp[,region])

  tmpres <- lapply(1:length(spl), function(x) {
    codes <- paste(x, 1:sample(3:9,1), sep="")
    spl[[x]]$district <- sample(codes, nrow(spl[[x]]), replace=TRUE)
    spl[[x]]
  })
  tmpres <- do.call("rbind", tmpres)
  tmpres <- tmpres[,-c(2)]
  out <- merge(inp, tmpres, by.x=c(hhid), by.y=hhid, all.x=TRUE)
  invisible(out)
}

eusilcP <- data.table(simulate_districts(eusilcP))
# we generate the input table using the broad region (variable 'region')
# and the districts, we have generated before.
#Generate table with household counts by district
tabHH <- eusilcP[!duplicated(hid),.(Freq=.N),by=.(db040=region,district)]
setkey(tabHH,db040,district)
#Generate table with person counts by district
tabP <- eusilcP[,.(Freq=.N),by=.(db040=region,district)]
setkey(tabP,db040,district)

# we generate a synthetic population
setnames(eusilcP,"region","db040")
setnames(eusilcP,"hid","db030")
inp <- specifyInput(data=eusilcP, hhid="db030", hhsize="hsize", strata="db040",population=TRUE)
\dontrun{
# use only HH counts
simPopObj <- simStructure(data=inp, method="direct", basicHHvars=c("age", "gender"))
simPopObj1 <- simInitSpatial(simPopObj, additional="district", region="db040", tspatialHH=tabHH,
tspatialP=NULL, nr_cpus=1)

# use only P counts
simPopObj <- simStructure(data=inp, method="direct", basicHHvars=c("age", "gender"))
simPopObj2 <- simInitSpatial(simPopObj, additional="district", region="db040", tspatialHH=NULL,
tspatialP=tabP, nr_cpus = 1)

# use P and HH counts
simPopObj <- simStructure(data=inp, method="direct", basicHHvars=c("age", "gender"))
simPopObj3 <- simInitSpatial(simPopObj, additional="district", region="db040", tspatialHH=tabHH,
tspatialP=tabP, nr_cpus = 1)
}

}
\references{
M. Templ, B. Meindl, A. Kowarik, A. Alfons, O. Dupriez (2017) Simulation of Synthetic Populations for Survey Data Considering Auxiliary
Information. \emph{Journal of Statistical Survey}, \strong{79} (10), 1--38. \doi{10.18637/jss.v079.i10}
}
\author{
Bernhard Meindl and Alexander Kowarik
}
\keyword{manip}
