% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/filter_joined.R
\name{filter_joined}
\alias{filter_joined}
\title{Filter Observations Taking Other Observations into Account}
\usage{
filter_joined(
  dataset,
  dataset_add,
  by_vars,
  join_vars,
  join_type,
  first_cond_lower = NULL,
  first_cond_upper = NULL,
  order = NULL,
  tmp_obs_nr_var = NULL,
  filter_add = NULL,
  filter_join,
  check_type = "warning"
)
}
\arguments{
\item{dataset}{Input dataset

The variables specified by the \code{by_vars} and \code{order} arguments are expected to be in the dataset.

\describe{
\item{Permitted values}{a dataset, i.e., a \code{data.frame} or tibble}
\item{Default value}{none}
}}

\item{dataset_add}{Additional dataset

The variables specified for \code{by_vars}, \code{join_vars}, and \code{order} are
expected.

\describe{
\item{Permitted values}{a dataset, i.e., a \code{data.frame} or tibble}
\item{Default value}{none}
}}

\item{by_vars}{By variables

The specified variables are used as by variables for joining the input
dataset with itself.

\describe{
\item{Permitted values}{list of variables created by \code{exprs()}, e.g., \code{exprs(USUBJID, VISIT)}}
\item{Default value}{none}
}}

\item{join_vars}{Variables to keep from joined dataset

The variables needed from the other observations should be specified for
this parameter. The specified variables are added to the joined dataset
with suffix ".join". For example to select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent visit \code{join_vars = exprs(AVALC, AVISITN)} and \code{filter_join = AVALC == "Y" & AVALC.join == "Y" & AVISITN < AVISITN.join} could be specified.

The \verb{*.join} variables are not included in the output dataset.

\describe{
\item{Permitted values}{list of variables created by \code{exprs()}, e.g., \code{exprs(USUBJID, VISIT)}}
\item{Default value}{none}
}}

\item{join_type}{Observations to keep after joining

The argument determines which of the joined observations are kept with
respect to the original observation. For example, if \code{join_type = "after"}
is specified all observations after the original observations are kept.

For example for confirmed response or BOR in the oncology setting or
confirmed deterioration in questionnaires the confirmatory assessment must
be after the assessment. Thus \code{join_type = "after"} could be used.

Whereas, sometimes you might allow for confirmatory observations to occur
prior to the observation. For example, to identify AEs occurring on or
after seven days before a COVID AE. Thus \code{join_type = "all"} could be used.

\describe{
\item{Permitted values}{\code{"before"}, \code{"after"}, \code{"all"}}
\item{Default value}{none}
}}

\item{first_cond_lower}{Condition for selecting range of data (before)

If this argument is specified, the other observations are restricted from
the first observation before the current observation where the specified
condition is fulfilled up to the current observation. If the condition is
not fulfilled for any of the other observations, no observations are
considered, i.e., the observation is not flagged.

This parameter should be specified if \code{filter_join} contains summary
functions which should not apply to all observations but only from a
certain observation before the current observation up to the current
observation. For examples see the "Examples" section below.

\describe{
\item{Permitted values}{an unquoted condition, e.g., \code{AVISIT == "BASELINE"}}
\item{Default value}{\code{NULL}}
}}

\item{first_cond_upper}{Condition for selecting range of data (after)

If this argument is specified, the other observations are restricted up to
the first observation where the specified condition is fulfilled. If the
condition is not fulfilled for any of the other observations, no
observations are considered, i.e., the observation is not flagged.

This parameter should be specified if \code{filter_join} contains summary
functions which should not apply to all observations but only up to the
confirmation assessment. For examples see the "Examples" section below.

\describe{
\item{Permitted values}{an unquoted condition, e.g., \code{AVISIT == "BASELINE"}}
\item{Default value}{\code{NULL}}
}}

\item{order}{Order

The observations are ordered by the specified order.

For handling of \code{NA}s in sorting variables see the "Sort Order" section in \code{vignette("generic")}.

\describe{
\item{Permitted values}{list of expressions created by \code{exprs()}, e.g., \code{exprs(ADT, desc(AVAL))} or \code{NULL}}
\item{Default value}{\code{NULL}}
}}

\item{tmp_obs_nr_var}{Temporary observation number

The specified variable is added to the input dataset (\code{dataset}) and the
additional dataset (\code{dataset_add}). It is set to the observation number
with respect to \code{order}. For each by group (\code{by_vars}) the observation
number starts with \code{1}. If there is more than one record for specific
values for \code{by_vars} and \code{order}, all records get the same observation
number. By default, a warning (see \code{check_type}) is issued in this case.
The variable can be used in the conditions (\code{filter_join},
\code{first_cond_upper}, \code{first_cond_lower}). It is not included in the output
dataset. It can also be used to select consecutive observations or the last
observation (see example below).

\describe{
\item{Permitted values}{an unquoted symbol, e.g., \code{AVAL}}
\item{Default value}{\code{NULL}}
}}

\item{filter_add}{Filter for additional dataset (\code{dataset_add})

Only observations from \code{dataset_add} fulfilling the specified condition are
joined to the input dataset. If the argument is not specified, all
observations are joined.

Variables created by the \code{order} argument can be used in the condition.

The condition can include summary functions. The additional dataset is
grouped by the by variables (\code{by_vars}).

\describe{
\item{Permitted values}{an unquoted condition, e.g., \code{AVISIT == "BASELINE"}}
\item{Default value}{\code{NULL}}
}}

\item{filter_join}{Condition for selecting observations

The filter is applied to the joined dataset for selecting the confirmed
observations. The condition can include summary functions like \code{all()} or
\code{any()}. The joined dataset is grouped by the original observations. I.e.,
the summary function are applied to all observations up to the confirmation
observation. For example in the oncology setting when using this function
for confirmed best overall response,  \code{filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) & count_vals(var = AVALC.join, val = "NE") <= 1} selects observations with response "CR" and for all
observations up to the confirmation observation the response is "CR" or
"NE" and there is at most one "NE".

\describe{
\item{Permitted values}{an unquoted condition, e.g., \code{AVISIT == "BASELINE"}}
\item{Default value}{none}
}}

\item{check_type}{Check uniqueness?

If \code{"message"}, \code{"warning"}, or \code{"error"} is specified, the specified
message is issued if the observations of the input dataset are not unique
with respect to the by variables and the order.

\describe{
\item{Permitted values}{\code{"none"}, \code{"message"}, \code{"warning"}, \code{"error"}}
\item{Default value}{\code{"warning"}}
}}
}
\value{
A subset of the observations of the input dataset. All variables of
the input dataset are included in the output dataset.
}
\description{
The function filters observation using a condition taking other observations
into account. For example, it could select all observations with \code{AVALC == "Y"} and \code{AVALC == "Y"} for at least one subsequent observation. The input
dataset is joined with itself to enable conditions taking variables from both
the current observation and the other observations into account. The suffix
".join" is added to the variables from the subsequent observations.

An example usage might be checking if a patient received two required
medications within a certain timeframe of each other.

In the oncology setting, for example, we use such processing to check if a
response value can be confirmed by a subsequent assessment. This is commonly
used in endpoints such as best overall response.
}
\details{
The following steps are performed to produce the output dataset.
\subsection{Step 1}{
\itemize{
\item The variables specified by \code{order} are added to the additional dataset
(\code{dataset_add}).
\item The variables specified by \code{join_vars} are added to the additional dataset
(\code{dataset_add}).
\item The records from the additional dataset (\code{dataset_add}) are restricted to
those matching the \code{filter_add} condition.
}

Then the  input dataset (\code{dataset}) is joined with the restricted
additional dataset by the variables specified for \code{by_vars}. From the
additional dataset only the variables specified for \code{join_vars} are kept.
The suffix ".join" is added to those variables which are also present in
the input dataset.

For example, for \code{by_vars = USUBJID}, \code{join_vars = exprs(AVISITN, AVALC)}
and input dataset and additional dataset

\if{html}{\out{<div class="sourceCode r">}}\preformatted{# A tibble: 2 x 4
USUBJID AVISITN AVALC  AVAL
<chr>     <dbl> <chr> <dbl>
1             1 Y         1
1             2 N         0
}\if{html}{\out{</div>}}

the joined dataset is

\if{html}{\out{<div class="sourceCode r">}}\preformatted{A tibble: 4 x 6
USUBJID AVISITN AVALC  AVAL AVISITN.join AVALC.join
<chr>     <dbl> <chr> <dbl>        <dbl> <chr>
1             1 Y         1            1 Y
1             1 Y         1            2 N
1             2 N         0            1 Y
1             2 N         0            2 N
}\if{html}{\out{</div>}}
}

\subsection{Step 2}{

The joined dataset is restricted to observations with respect to
\code{join_type} and \code{order}.

The dataset from the example in the previous step with \code{join_type = "after"} and \code{order = exprs(AVISITN)} is restricted to

\if{html}{\out{<div class="sourceCode r">}}\preformatted{A tibble: 4 x 6
USUBJID AVISITN AVALC  AVAL AVISITN.join AVALC.join
<chr>     <dbl> <chr> <dbl>        <dbl> <chr>
1             1 Y         1            2 N
}\if{html}{\out{</div>}}
}

\subsection{Step 3}{

If \code{first_cond_lower} is specified, for each observation of the input
dataset the joined dataset is restricted to observations from the first
observation where \code{first_cond_lower} is fulfilled (the observation
fulfilling the condition is included) up to the observation of the input
dataset. If for an observation of the input dataset the condition is not
fulfilled, the observation is removed.

If \code{first_cond_upper} is specified, for each observation of the input
dataset the joined dataset is restricted to observations up to the first
observation where \code{first_cond_upper} is fulfilled (the observation
fulfilling the condition is included). If for an observation of the input
dataset the condition is not fulfilled, the observation is removed.

For an example see the last example in the "Examples" section.
}

\subsection{Step 4}{

The joined dataset is grouped by the observations from the input dataset
and restricted to the observations fulfilling the condition specified by
\code{filter_join}.
}

\subsection{Step 5}{

The first observation of each group is selected and the \verb{*.join} variables
are dropped.

\strong{Note:} This function creates temporary datasets which may be much bigger
than the input datasets. If this causes memory issues, please try setting
the admiral option \code{save_memory} to \code{TRUE} (see \code{set_admiral_options()}).
This reduces the memory consumption but increases the run-time.
}
}
\seealso{
\code{\link[=count_vals]{count_vals()}}, \code{\link[=min_cond]{min_cond()}}, \code{\link[=max_cond]{max_cond()}}

Utilities for Filtering Observations: 
\code{\link{count_vals}()},
\code{\link{filter_exist}()},
\code{\link{filter_extreme}()},
\code{\link{filter_not_exist}()},
\code{\link{filter_relative}()},
\code{\link{max_cond}()},
\code{\link{min_cond}()}
}
\concept{utils_fil}
\keyword{utils_fil}
\section{Examples}{
\subsection{Filter records considering other records (\code{filter_join}, \code{join_vars})}{

In this example, the input dataset should be restricted to records with
a duration longer than 30 and where a COVID AE (\code{ACOVFL == "Y"}) occurred
before or up to seven days after the record. The condition for restricting
the records is specified by the \code{filter_join} argument. Variables from the
other records are referenced by variable names with the suffix \code{.join}.
These variables have to be specified for the \code{join_vars} argument. As
records before \emph{and} after the current record should be considered,
\code{join_type = "all"} is specified.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{library(tibble)

adae <- tribble(
  ~USUBJID, ~ADY, ~ACOVFL, ~ADURN,
  "1",        10, "N",          1,
  "1",        21, "N",         50,
  "1",        23, "Y",         14,
  "1",        32, "N",         31,
  "1",        42, "N",         20,
  "2",        11, "Y",         13,
  "2",        23, "N",          2,
  "3",        13, "Y",         12,
  "4",        14, "N",         32,
  "4",        21, "N",         41
)

filter_joined(
  adae,
  dataset_add = adae,
  by_vars = exprs(USUBJID),
  join_vars = exprs(ACOVFL, ADY),
  join_type = "all",
  filter_join = ADURN > 30 & ACOVFL.join == "Y" & ADY.join <= ADY + 7
)
#> # A tibble: 2 × 4
#>   USUBJID   ADY ACOVFL ADURN
#>   <chr>   <dbl> <chr>  <dbl>
#> 1 1          21 N         50
#> 2 1          32 N         31}\if{html}{\out{</div>}}}
\subsection{Considering only records after the current one (\code{join_type = "after"})}{

In this example, the input dataset is restricted to records with \code{AVALC == "Y"} and \code{AVALC == "Y"} at a subsequent visit. \code{join_type = "after"} is
specified to consider only records after the current one. Please note that
the \code{order} argument must be specified, as otherwise it is not possible to
determine which records are after the current record.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{data <- tribble(
  ~USUBJID, ~AVISITN, ~AVALC,
  "1",      1,        "Y",
  "1",      2,        "N",
  "1",      3,        "Y",
  "1",      4,        "N",
  "2",      1,        "Y",
  "2",      2,        "N",
  "3",      1,        "Y",
  "4",      1,        "N",
  "4",      2,        "N",
)

filter_joined(
  data,
  dataset_add = data,
  by_vars = exprs(USUBJID),
  join_vars = exprs(AVALC, AVISITN),
  join_type = "after",
  order = exprs(AVISITN),
  filter_join = AVALC == "Y" & AVALC.join == "Y"
)
#> # A tibble: 1 × 3
#>   USUBJID AVISITN AVALC
#>   <chr>     <dbl> <chr>
#> 1 1             1 Y    }\if{html}{\out{</div>}}}
\subsection{Considering a range of records only (\code{first_cond_lower}, \code{first_cond_upper})}{

Consider the following data.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{myd <- tribble(
  ~subj, ~day, ~val,
  "1",      1, "++",
  "1",      2, "-",
  "1",      3, "0",
  "1",      4, "+",
  "1",      5, "++",
  "1",      6, "-",
  "2",      1, "-",
  "2",      2, "++",
  "2",      3, "+",
  "2",      4, "0",
  "2",      5, "-",
  "2",      6, "++"
)}\if{html}{\out{</div>}}

To select \code{"0"} where all results from the first \code{"++"} before the
\code{"0"} up to the \code{"0"} (excluding the \code{"0"}) are \code{"+"} or \code{"++"} the
\code{first_cond_lower} argument and \code{join_type = "before"} are specified.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{filter_joined(
  myd,
  dataset_add = myd,
  by_vars = exprs(subj),
  order = exprs(day),
  join_vars = exprs(val),
  join_type = "before",
  first_cond_lower = val.join == "++",
  filter_join = val == "0" & all(val.join \%in\% c("+", "++"))
)
#> # A tibble: 1 × 3
#>   subj    day val  
#>   <chr> <dbl> <chr>
#> 1 2         4 0    }\if{html}{\out{</div>}}

To select \code{"0"} where all results from the \code{"0"} (excluding the \code{"0"})
up to the first \code{"++"} after the \code{"0"} are \code{"+"} or \code{"++"} the
\code{first_cond_upper} argument and \code{join_type = "after"} are specified.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{filter_joined(
  myd,
  dataset_add = myd,
  by_vars = exprs(subj),
  order = exprs(day),
  join_vars = exprs(val),
  join_type = "after",
  first_cond_upper = val.join == "++",
  filter_join = val == "0" & all(val.join \%in\% c("+", "++"))
)
#> # A tibble: 1 × 3
#>   subj    day val  
#>   <chr> <dbl> <chr>
#> 1 1         3 0    }\if{html}{\out{</div>}}}
\subsection{Considering only records up to a condition (\code{first_cond_upper})}{

In this example from deriving confirmed response in oncology, the
records with
\itemize{
\item \code{AVALC == "CR"},
\item \code{AVALC == "CR"} at a subsequent visit,
\item only \code{"CR"} or \code{"NE"} in between, and
\item at most one \code{"NE"} in between
}

should be selected. The other records to be considered are restricted to
those up to the first occurrence of \code{"CR"} by specifying the
\code{first_cond_upper} argument. The \code{count_vals()} function is used to count the
\code{"NE"}s for the last condition.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{data <- tribble(
  ~USUBJID, ~AVISITN, ~AVALC,
  "1",      1,        "PR",
  "1",      2,        "CR",
  "1",      3,        "NE",
  "1",      4,        "CR",
  "1",      5,        "NE",
  "2",      1,        "CR",
  "2",      2,        "PR",
  "2",      3,        "CR",
  "3",      1,        "CR",
  "4",      1,        "CR",
  "4",      2,        "NE",
  "4",      3,        "NE",
  "4",      4,        "CR",
  "4",      5,        "PR"
)

filter_joined(
  data,
  dataset_add = data,
  by_vars = exprs(USUBJID),
  join_vars = exprs(AVALC),
  join_type = "after",
  order = exprs(AVISITN),
  first_cond_upper = AVALC.join == "CR",
  filter_join = AVALC == "CR" & all(AVALC.join \%in\% c("CR", "NE")) &
    count_vals(var = AVALC.join, val = "NE") <= 1
)
#> # A tibble: 1 × 3
#>   USUBJID AVISITN AVALC
#>   <chr>     <dbl> <chr>
#> 1 1             2 CR   }\if{html}{\out{</div>}}}
\subsection{Considering order of values (\code{min_cond()}, \code{max_cond()})}{

In this example from deriving confirmed response in oncology, records
with
\itemize{
\item \code{AVALC == "PR"},
\item \code{AVALC == "CR"} or \code{AVALC == "PR"} at a subsequent visit at least 20 days later,
\item only \code{"CR"}, \code{"PR"}, or \code{"NE"} in between,
\item at most one \code{"NE"} in between, and
\item \code{"CR"} is
not followed by \code{"PR"}
}

should be selected. The last condition is realized by using \code{min_cond()} and
\code{max_cond()}, ensuring that the first occurrence of \code{"CR"} is after the last
occurrence of \code{"PR"}. The second call to \code{count_vals()} in the condition is
required to cover the case of no \code{"CR"}s (the \code{min_cond()} call returns \code{NA}
then).

\if{html}{\out{<div class="sourceCode r">}}\preformatted{data <- tribble(
  ~USUBJID, ~ADY, ~AVALC,
  "1",         6, "PR",
  "1",        12, "CR",
  "1",        24, "NE",
  "1",        32, "CR",
  "1",        48, "PR",
  "2",         3, "PR",
  "2",        21, "CR",
  "2",        33, "PR",
  "3",        11, "PR",
  "4",         7, "PR",
  "4",        12, "NE",
  "4",        24, "NE",
  "4",        32, "PR",
  "4",        55, "PR"
)

filter_joined(
  data,
  dataset_add = data,
  by_vars = exprs(USUBJID),
  join_vars = exprs(AVALC, ADY),
  join_type = "after",
  order = exprs(ADY),
  first_cond_upper = AVALC.join \%in\% c("CR", "PR") & ADY.join - ADY >= 20,
  filter_join = AVALC == "PR" &
    all(AVALC.join \%in\% c("CR", "PR", "NE")) &
    count_vals(var = AVALC.join, val = "NE") <= 1 &
    (
      min_cond(var = ADY.join, cond = AVALC.join == "CR") >
        max_cond(var = ADY.join, cond = AVALC.join == "PR") |
        count_vals(var = AVALC.join, val = "CR") == 0
    )
)
#> # A tibble: 1 × 3
#>   USUBJID   ADY AVALC
#>   <chr>   <dbl> <chr>
#> 1 4          32 PR   }\if{html}{\out{</div>}}}
\subsection{Considering the order of records (\code{tmp_obs_nr_var})}{

In this example, the records with \code{CRIT1FL == "Y"} at two consecutive
visits or at the last visit should be selected. A temporary order variable
is created by specifying the \code{tmp_obs_nr_var} argument. Then it is used in
\code{filter_join}. The temporary variable doesn't need to be specified for
\code{join_vars}.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{data <- tribble(
  ~USUBJID, ~AVISITN, ~CRIT1FL,
  "1",      1,        "Y",
  "1",      2,        "N",
  "1",      3,        "Y",
  "1",      5,        "N",
  "2",      1,        "Y",
  "2",      3,        "Y",
  "2",      5,        "N",
  "3",      1,        "Y",
  "4",      1,        "Y",
  "4",      2,        "N",
)

filter_joined(
  data,
  dataset_add = data,
  by_vars = exprs(USUBJID),
  tmp_obs_nr_var = tmp_obs_nr,
  join_vars = exprs(CRIT1FL),
  join_type = "all",
  order = exprs(AVISITN),
  filter_join = CRIT1FL == "Y" & CRIT1FL.join == "Y" &
    (tmp_obs_nr + 1 == tmp_obs_nr.join | tmp_obs_nr == max(tmp_obs_nr.join))
)
#> # A tibble: 2 × 3
#>   USUBJID AVISITN CRIT1FL
#>   <chr>     <dbl> <chr>  
#> 1 2             1 Y      
#> 2 3             1 Y      }\if{html}{\out{</div>}}}}

