% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/codify.R
\name{codify}
\alias{codify}
\alias{codify.data.frame}
\alias{codify.data.table}
\alias{print.codified}
\title{Codify case data with external code data (within specified time frames)}
\usage{
codify(x, codedata, ..., id, code, date = NULL, code_date = NULL, days = NULL)

\method{codify}{data.frame}(x, ..., id, date = NULL, days = NULL)

\method{codify}{data.table}(
  x,
  codedata,
  ...,
  id,
  code,
  date = NULL,
  code_date = NULL,
  days = NULL,
  alnum = FALSE,
  .copy = NA
)

\method{print}{codified}(x, ..., n = 10)
}
\arguments{
\item{x}{data set with mandatory character id column
(identified by argument \code{id = "<col_name>"}),
and optional \code{\link{Date}}  of interest
(identified by argument \code{date = "<col_name>"}).
Alternatively, the output from \code{\link[=codify]{codify()}}}

\item{codedata}{additional data with columns
including case id (\code{character}), code and an optional date (\link{Date}) for
each code. An optional column \code{condition} might distinguish codes/dates
with certain characteristics (see example).}

\item{...}{arguments passed between methods}

\item{id, code, date, code_date}{column names with case id
(\code{character} from \code{x} and \code{codedata}), \code{code} (from \code{x}) and
optional date (\link{Date} from \code{x}) and
\code{code_date} (\link{Date} from \code{codedata}).}

\item{days}{numeric vector of length two with lower and upper bound for range
of relevant days relative to \code{date}. See "Relevant period".}

\item{alnum}{Should codes be cleaned from all non alphanumeric characters?}

\item{.copy}{Should the object be copied internally by \code{\link[data.table:copy]{data.table::copy()}}?
\code{NA} (by default) means that objects smaller than 1 GB are copied.
If the size is larger, the argument must be set explicitly. Set \code{TRUE}
to make copies regardless of object size. This is recommended if enough RAM
is available. If set to \code{FALSE}, calculations might be carried out
but the object will be changed by reference.
IMPORTANT! This might lead to undesired consequences and should only be used
if absolutely necessary!}

\item{n}{number of rows to preview as tibble.
The output is technically a \link[data.table:data.table]{data.table::data.table}, which might be an
unusual format to look at. Use \code{n = NULL} to print the object as is.}
}
\value{
Object of class \code{codified} (inheriting from \link[data.table:data.table]{data.table::data.table}).
Essentially \code{x} with additional columns:
\verb{code, code_date}: left joined from \code{codedata} or \code{NA}
if no match within period. \code{in_period}: Boolean indicator if the case
had at least one code within the specified period.

The output has one row for each combination of "id" from \code{x} and
"code" from \code{codedata}. Rows from \code{x} might be repeated
accordingly.
}
\description{
This is the first step of \code{codify() \%>\% classify() \%>\% index()}.
The function combines case data from one data set with related code data from
a second source, possibly limited to codes valid at certain time points
relative to case dates.
}
\section{Relevant period}{

Some examples for argument \code{days}:
\itemize{
\item \code{c(-365, -1)}: window of one year prior to the \code{date}
column of \code{x}. Useful for patient comorbidity.
\item \code{c(1, 30)}: window of 30 days after \code{date}.
Useful for adverse events after a surgical procedure.
\item \code{c(-Inf, Inf)}: no limitation on non-missing dates.
\item \code{NULL}: no time limitation at all.
}
}

\examples{
# Codify all patients from `ex_people` with their ICD-10 codes from `ex_icd10`
x <- codify(ex_people, ex_icd10, id = "name", code = "icd10")
x

# Only consider codes if recorded at hospital admissions within one year prior
# to surgery
codify(
  ex_people,
  ex_icd10,
  id        = "name",
  code      = "icd10",
  date      = "surgery",
  code_date = "admission",
  days      = c(-365, 0)   # admission during one year before surgery
)

# Only consider codes if recorded after surgery
codify(
  ex_people,
  ex_icd10,
  id        = "name",
  code      = "icd10",
  date      = "surgery",
  code_date = "admission",
  days      = c(1, Inf)     # admission any time after surgery
)


# Dirty code data ---------------------------------------------------------

# Assume that codes contain unwanted "dirty" characters
# Those could for example be a dot used by ICD-10 (i.e. X12.3 instead of X123)
dirt <- c(strsplit(c("!#\%&/()=?`,.-_"), split = ""), recursive = TRUE)
rdirt <- function(x) sample(x, nrow(ex_icd10), replace = TRUE)
sub <- function(i) substr(ex_icd10$icd10, i, i)
ex_icd10$icd10 <-
  paste0(
    rdirt(dirt), sub(1),
    rdirt(dirt), sub(2),
    rdirt(dirt), sub(3),
    rdirt(dirt), sub(4),
    rdirt(dirt), sub(5)
  )
head(ex_icd10)

# Use `alnum = TRUE` to ignore non alphanumeric characters
codify(ex_people, ex_icd10, id = "name", code = "icd10", alnum = TRUE)



# Big data ----------------------------------------------------------------

# If `data` or `codedata` are large compared to available
# Random Access Memory (RAM) it might not be possible to make internal copies
# of those objects. Setting `.copy = FALSE` might help to overcome such problems

# If no copies are made internally, however, the input objects (if data tables)
# would change in the global environment
x2 <- data.table::as.data.table(ex_icd10)
head(x2) # Look at the "icd10" column (with dirty data)

# Use `alnum = TRUE` combined with `.copy = FALSE`
codify(ex_people, x2, id = "name", code = "icd10", alnum = TRUE, .copy = FALSE)

# Even though no explicit assignment was specified
# (neither for the output of codify(), nor to explicitly alter `x2`,
# the `x2` object has changed (look at the "icd10" column!):
head(x2)

# Hence, the `.copy` argument should only be used if necessary
# and if so, with caution!


# print.codify() ----------------------------------------------------------

x # Preview first 10 rows as a tibble
print(x, n = 20) # Preview first 20 rows as a tibble
print(x, n = NULL) # Print as data.table (ignoring the 'classified' class)
}
\seealso{
Other verbs: 
\code{\link{categorize}()},
\code{\link{classify}()},
\code{\link{index_fun}}
}
\concept{verbs}
