#' @title Match Cohort Using Mahalanobis Distance
#'
#' @description This function constructs pairs of vaccinated and unvaccinated
#' individuals with similar characteristics. It relies on the matching
#' algorithm implemented in the package `{MatchIt}`.
#' By default, the function uses `method = "nearest"`, `ratio = 1`, and
#' `distance = "mahalanobis"` to perform the matching.
#' @inheritParams match_cohort
#' @return `data.frame` with the matched population.
#' @keywords internal
match_cohort_ <- function(data_set,
                          vacc_status_col,
                          exact = NULL,
                          nearest = NULL) {

  #Formula
  variables <- c(exact, names(nearest))
  formula <- paste(vacc_status_col, "~", paste(variables, collapse = " + "))
  formula_eval <- eval(parse(text = formula))

  #Matching
  matchit <- MatchIt::matchit(
    formula_eval,
    data = data_set,
    method = "nearest",
    ratio = 1,
    exact = exact,
    nearest = names(nearest),
    caliper = nearest,
    distance = "mahalanobis"
  )
  match <- MatchIt::match.data(matchit)
  match <- match[, -which(names(match) == "weights")]
  return(match)
}

#' @title Match Information by Pairs
#'
#' @description This function matches numeric/date information of a
#' pair provided in the column `column_to_match`. This matching
#' procedure is required for sharing the censoring information and the
#' exposure times when adjusting and removing negative exposures
#' generated by incoherent exposure dates.
#'
#' @inheritParams match_cohort
#' @param column_to_match The name of the column that contains the information
#' shared by the pairs.
#' @param criteria Selection criteria when both individuals provide information.
#' This can be the minimum (min) or maximum (max) value.
#' @keywords internal
match_pair_info <- function(data_set,
                            column_to_match,
                            criteria = c("min", "max")) {
  matched_info <- unlist(
    tapply(data_set[[column_to_match]],
      data_set$subclass,
      function(x) {
        if (all(is.na(x))) {
          return(NA)
        } else {
          if (criteria == "min") {
            return(as.character(min(x, na.rm = TRUE)))
          } else if (criteria == "max") {
            return(as.character(max(x, na.rm = TRUE)))
          }
        }
      }
    )
  )
  # return data matched by subclass
  return(matched_info[data_set$subclass])
}

#' @title Create Censoring date by Pairs
#'
#' @description This function creates the censoring date of the pairs by
#' inheriting the minimum date in which any of the partners has a
#' censoring event. Two conditions are checked to inherit a censoring date
#' in a pair.
#' 1. Individual censoring occurs before individual event;
#' 2. If an outcome happens before the censoring of the partner
#' no censoring is inherited by the other.
#'
#' @inheritParams match_cohort
#' @keywords internal

get_censoring_after_match <- function(data_set,
                                      outcome_date_col,
                                      censoring_date_col) {
  # 1. Check that individual censoring occurs before event
  data_set$censoring_individual <- as.Date(ifelse(
    (data_set[[censoring_date_col]] <
       data_set[[outcome_date_col]]) |
      is.na(data_set[[outcome_date_col]]),
    as.character(data_set[[censoring_date_col]]),
    as.Date(NA)
  ))

  # 2. Match minimum censoring date as censoring date for pair
  data_set$censoring_pair <-  as.Date(match_pair_info(
    data_set = data_set,
    column_to_match = "censoring_individual",
    criteria = "min"
  ))

  # 3. If an outcome happens before censoring_pair
  # no censoring must be assigned
  data_set$censoring_accepted <-
    as.Date(ifelse(
      (data_set$censoring_pair > data_set[[outcome_date_col]]) &
        (!is.na(data_set$censoring_pair)) &
        (!is.na(data_set[[outcome_date_col]])),
      as.Date(NA),
      as.character(data_set$censoring_pair)
    ))

  return(data_set$censoring_accepted)
}

#' @title Constructs Summary with Results from Matching
#'
#' @description This function creates a summary for the results from
#' cohort matching. It provides counts grouped by vaccine status for
#' the entire population, the matched and unmatched populations.
#'
#' @inheritParams match_cohort
#' @param all `data.frame` with the entire cohort.
#' @param matched `data.frame` with the matched cohort.
#' calculate removed cases. Default is NULL, which returns 0.
#' @return Summary `data.frame` with counts by vaccine status for:
#' all, matched, unmatched, and removed.
#' @keywords internal

match_summary <- function(all,
                          matched,
                          vacc_status_col) {
  summ_all <- as.data.frame(rbind(table(all[[vacc_status_col]])))
  if (is.null(matched)) {
    summ_matched <- data.frame(
      u = 0,
      v = 0
    )
  } else {
    summ_matched <- as.data.frame(rbind(table(matched[[vacc_status_col]])))
  }
  summ_unmatched <- summ_all - summ_matched
  summ <- rbind(summ_all, summ_matched, summ_unmatched)
  row.names(summ) <- c("All", "Matched", "Unmatched")

  return(summ)
}

#' @title Balance of Vaccinated/Unvaccinated Groups
#'
#' @description This function creates a summary after matching.
#' @inheritParams match_cohort
#' @param data_set `data.frame` to assess matching balance.
#' @return Summary `data.frame` with the balance of each variable by
#' vaccine status. Numeric variables are reported with means, and
#' categorical/factor variables are reported with proportions.
#' In both cases, the Standardized Mean Difference (SMD) is calculated.
#' @keywords internal
balance_summary <- function(data_set,
                            nearest,
                            exact,
                            vacc_status_col,
                            vaccinated_status,
                            unvaccinated_status) {
  columns <- c(names(nearest), exact)
  numeric <- columns[vapply(data_set[columns], is.numeric, logical(1))]

  categorical <- columns[vapply(data_set[columns], is.character, logical(1))]
  factor <- columns[vapply(data_set[columns], is.factor, logical(1))]
  categorical <- c(categorical, factor)

  # balance for numeric variables
  balance_num <- data.frame()
  for (n in numeric) {
    # mean and sd
    temp <- as.data.frame(stats::aggregate(
      data_set[n], list(data_set$vaccine_status),
      FUN = function(x) c(mean = mean(x), sd = stats::sd(x))
    ))
    temp <- do.call(data.frame, temp)
    # Std. Mean Diff.
    mean_v <- temp[2][temp$Group.1  == vaccinated_status, ]
    sd_v <- temp[3][temp$Group.1  == vaccinated_status, ]
    mean_u <- temp[2][temp$Group.1  == unvaccinated_status, ]
    sd_u <- temp[3][temp$Group.1  == unvaccinated_status, ]
    smd <- (mean_v - mean_u) / sqrt((sd_v^2 + sd_u^2) / 2)

    # Results
    summ_num <- data.frame(
      u = c(mean_u),
      v = c(mean_v),
      smd = c(smd)
    )
    colnames(summ_num) <- c(unvaccinated_status, vaccinated_status, "smd")
    rownames(summ_num) <- n

    balance_num <- rbind(balance_num, summ_num)
  }

  # balance for categorical/factor variables
  balance_cat <- data.frame()
  for (c in categorical) {
    # proportion by group
    temp <- as.data.frame(
      rbind(prop.table(table(data_set[[c]], data_set[[vacc_status_col]]),
                       2))
    )
    rownames(temp) <- paste(c, row.names(temp), sep = "_")

    # Std. Mean Diff.
    pooled <- sqrt(
      (temp[[vaccinated_status]] * (1 - temp[[vaccinated_status]]) +
         temp[[unvaccinated_status]] * (1 - temp[[unvaccinated_status]])) / 2
    )
    temp$smd <-
      (temp[[vaccinated_status]] - temp[[unvaccinated_status]]) / pooled
    # Results
    balance_cat <- rbind(balance_cat, temp)
  }

  balance <- rbind(balance_num, balance_cat)

  return(balance)
}
