#' Make matrix of treatment assignment probabilities
#'
#' This function creates a N x N matrix where the diagonals equal zero, and the
#' off-diagonal elements (i, j) contain the probability the ith observation has
#' Z = max(Z_i, Z_j) and the jth observation has Z = min(Z_i, Z_j), conditioned
#' on covariates. Uses the "model-based" conditional density estimation method
#' described in \insertCite{frazier2024bias}{nbpInference}.
#'
#' @param Z an N-length vector of treatment values, which must be numeric.
#' @param X an N x k matrix of covariate values, which must be numeric.
#' @return an N x N numeric matrix. Each entry represents the probability the
#'  ith observation has Z = max(Z_i, Z_j) and the jth observation has
#'  Z = min(Z_i, Z_j), conditioned on covariates.
#' @examples
#' set.seed(12345)
#' X <- rnorm(100, 0, 5)
#' Z <- X + rnorm(100, 0, (1+sqrt(abs(X))))
#' make.pmatrix(Z, X)
#' @family inference
#' @importFrom stats rnorm dnorm lm sd
#' @export
make.pmatrix <- function(Z, X){

  X <- data.frame(X)

  #### error messaging

  # checks if treatment vector is same length as covariate matrix
  if(length(Z) != nrow(X)){
    stop("Treatment vector is not same length as covariate matrix.")
  }

  # checks if treatment vector is numeric
  if(is.numeric(Z) == FALSE){
    stop("Treatment is not numeric.")
  }

  # checks if covariate matrix is numeric
  if(all(sapply(X, is.numeric)) == FALSE){
    stop("Covariate matrix contains one or more non-numeric entries.")
  }

  rownames(X) <- 1:length(Z)
  names(Z) <- 1:length(Z)

  #### functionality
  p_matrix <- matrix(0, nrow = length(Z), ncol = length(Z))
  rownames(p_matrix) <- 1:length(Z)

  # create pmatrix using the model-based method
    model <- lm(Z ~., data = X)
    fitted <- model$fitted.values
    sd_est <- sd(fitted - Z)

    # returns error message if estimate of generalized propensity score
    # distribution is degenerate
    if(sd_est == 0){
      stop("Generalized propensity score is degenerate.")
    }

    same_propensity_vector <- dnorm(Z, fitted, sd_est)
    observed_treatment_matrix <- same_propensity_vector %*% base::t(same_propensity_vector)

    p_matrix <- matrix(0, nrow = length(Z), ncol = length(Z))

    for(i in 1:(length(Z)-1)){
      for(j in (i+1):length(Z)){

        opposing_treatment <- dnorm(Z[i], fitted[j], sd_est) * dnorm(Z[j], fitted[i], sd_est)
        p_matrix[i, j] <- observed_treatment_matrix[i, j] / (observed_treatment_matrix[i, j] + opposing_treatment)
        p_matrix[j, i] <- p_matrix[i, j]

      }
    }

  return(p_matrix)
}

#' non-bipartite matching with treatment assignment caliper
#'
#' This function creates a I x 2 dataframe containing the indices of observations
#' that form our set of matched pairs. It uses the nbpMatch package
#' \insertCite{lu2011optimal}{nbpInference} along with a p-matrix
#' in order to create I matched pairs using a treatment assignment caliper. A p-matrix
#' can be created using the \link[nbpInference]{make.pmatrix} function.
#'
#' @param Z a 2I-length vector of treatment values, which must be numeric.
#' @param X a 2I x k matrix of covariate values, which must be numeric.
#' @param pmat a 2I x 2I symmetric matrix where the diagonals equal zero, and the
#' off-diagonal elements (i, j) contain the probability the ith observation has
#' Z = max(Z_i, Z_j) and the jth observation has Z = min(Z_i, Z_j). A p-matrix
#' can be made using the make.pmatrix function.
#' @param xi a number in the range 0 to 0.5, the cutoff related to the treatment
#' assignment probability caliper.
#' @param M an integer determining the penalty of the treatment assignment
#' probability caliper. If a potential matched pair between observations i and j
#' has treatment assignment probability less than xi or greater than 1-xi, add
#' M to the distance matrix in the (i, j) and (j, i) entry.
#' @return I x 2 dataframe
#' @examples
#' set.seed(12345)
#' X <- rnorm(100, 0, 5)
#' Z <- X + rnorm(100, 0, (1+sqrt(abs(X))))
#' pmat <- make.pmatrix(Z, X)
#' nbp.caliper(Z, X, pmat, xi = 0.1, M = 10000)
#' @family inference
#' @importFrom stats rnorm
#' @export
nbp.caliper <- function(Z, X, pmat, xi = 0, M = 0){

  X <- data.frame(X)

  #### error messaging

  # checks if treatment vector is same length as covariate matrix
  if(length(Z) != nrow(X)){
    stop("Treatment vector is not same length as covariate matrix.")
  }

  # checks if treatment vector is numeric
  if(is.numeric(Z) == FALSE){
    stop("Treatment is not numeric.")
  }

  # checks if covariate matrix is numeric
  if(sum(unlist(lapply(X, is.numeric))) != ncol(X)){
    stop("One or more covariates are not numeric.")
  }

  if(missing(pmat)){
    pmat <- matrix(0.5, nrow = length(Z), ncol = length(Z))
    diag(pmat) <- 0
  }

  # checks to see if pmat has the correct dimensions
  if(sum(dim(pmat) == nrow(X)) != 2){
    stop("pmat has the incorrect dimensions.")
  }

  # checks to see if the values in pmat are appropriate
  if(sum(pmat <= 1 & pmat >= 0) != (nrow(X)^2)){
    stop("One or more entries in pmat are not valid probability values.")
  }

  # checks the value of xi, which should be between 0 and 1/2
  if((xi < 0.5 & xi >= 0) == FALSE){
    stop("Hyperparameter xi must be between 0 and 0.5, excluding 0.5.")
  }

  # checks the value of M, which should be a nonnegative number
  if(M < 0){
    stop("Hyperparameter M must be a nonnegative number.")
  }

  # checks the value of M, which should be a nonnegative number
  if(is.numeric(M) == FALSE){
    stop("Hyperparameter M must be a numeric.")
  }

  #### functionality

  # matrix encoding our caliper penalty
  caliper_matrix <- matrix(0, nrow = nrow(X), ncol = nrow(X))

  for(i in 1:(nrow(X)-1)){
    for(j in (i+1):nrow(X)){

      # if probability of observed treatment assignment is extreme, enforce
      # infinite penalty on distance matrix
      caliper_matrix[i, j] <- ifelse(pmat[i, j] > (1-xi) | pmat[i, j] < xi, Inf, 0)
      caliper_matrix[j, i] <- caliper_matrix[i, j]

    }
  }

  # making mahalanobis distance matrix

  Matching_data <- cbind(Z, X)
  test.dist <- nbpMatching::gendistance(Matching_data, idcol = 1)
  test.mdm <- nbpMatching::distancematrix(test.dist)

  # applying caliper to mahalnobis distance matrix

  caliper_dist_matrix <- nbpMatching::distancematrix(test.mdm + caliper_matrix)
  test.match <- nbpMatching::nonbimatch(caliper_dist_matrix)
  matches <- test.match$matches
  matches$group <- ifelse(as.numeric(matches$Group1.ID) < as.numeric(matches$Group2.ID), "L", "H")
  high_groups <- matches[which(matches$group == "H"), 2]
  low_groups <- matches[which(matches$group == "L"), 2]

  balanced_pairs <- as.data.frame(matrix(nrow = 0, ncol = 2))
  high_data <- matches[which(matches$group == "H"), ]
  balanced_pairs <- high_data[, c(2, 4)]
  names(balanced_pairs) <- c("High", "Low")

  return(balanced_pairs)

}

#' Classic Neyman Sample Average Treatment Effect Estimator
#'
#' This function estimates the sample average treatment effect for a set of
#' matched pairs using the classic Neyman estimator. For references on the
#' classic Neyman estimator, see \insertCite{baiocchi2010building,zhang2022bridging,heng2023instrumental;textual}{nbpInference}
#'
#' @param Y a 2I-length vector of outcome values, which must be numeric.
#' @param Z a 2I-length vector of treatment values, which must be numeric.
#' @param pairs an I x 2 dataframe containing the indices of observations
#' that form our set of matched pairs. An appropriate pairs dataframe can be
#' formed using the \link[nbpInference]{nbp.caliper} function.
#' @return the sample average treatment effect (numeric)
#' @examples
#' set.seed(12345)
#' X <- rnorm(100, 0, 5)
#' Z <- X + rnorm(100, 0, (1+sqrt(abs(X))))
#' Y <- X + Z + rnorm(100, 0, 0.5)
#' pmat <- make.pmatrix(Z, X)
#' pairs <- nbp.caliper(Z, X, pmat, xi = 0.1, M = 10000)
#' classic.neyman(Y, Z, pairs)
#' @family inference
#' @importFrom stats rnorm sd
#' @export
classic.neyman <- function(Y, Z, pairs){

  #### error messaging

  # checks to see if outcome and treatment vectors are the same length
  if(length(Y) != length(Z)){
    stop("Outcome vector and treatment vector are different lengths.")
  }

  # checks to see if number of pairs corresponds with length of treatment vector
  if(length(Y) != (2*nrow(pairs))){
    stop("Number of pairs is not half the length of the outcome vector.")
  }

  # checks to see if outcome and treatment vectors are both numeric
  if((is.numeric(Y) & is.numeric(Z)) == FALSE){
    stop("Outcome and/or treatment vectors are not numeric.")
  }

  # checks to see if pair dataframe has any duplicated pairs / has index
  # values outside of 1 through 2I
  if(sum(sort(c(pairs[, 1], pairs[, 2])) == 1:(2*nrow(pairs))) != (2*nrow(pairs))){
    stop("pairs dataframe either has duplicated pairs, has indices outside of expected range, or is otherwise improperly formatted.")
  }

  #### functionality

  if(sd(Z) == 0){
    return(0)
  }

  if(sd(Z) != 0){
    numerator <- ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], Y[pairs[, 1]] - Y[pairs[, 2]], Y[pairs[, 2]] - Y[pairs[, 1]])
    denominator <- ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], Z[pairs[, 1]] - Z[pairs[, 2]], Z[pairs[, 2]] - Z[pairs[, 1]])
    return(sum(numerator) / sum(denominator))
  }
}

#' Bias-corrected Neyman Sample Average Treatment Effect Estimator
#'
#' This function estimates the sample average treatment effect for a set of
#' matched pairs using the bias-corrected Neyman estimator, defined in \insertCite{frazier2024bias;textual}{nbpInference}.
#'
#' @param Y a 2I-length vector of outcome values
#' @param Z a 2I-length vector of treatment values
#' @param pairs an I x 2 dataframe containing the indices of observations
#' that form our set of matched pairs. An appropriate pairs dataframe can be
#' formed using the \link[nbpInference]{nbp.caliper} function.
#' @param pmat a 2I x 2I matrix where the diagonals equal zero, and the
#' off-diagonal elements (i, j) contain the probability the ith observation has
#' Z = max(Z_i, Z_j) and the jth observation has Z = min(Z_i, Z_j). We can create
#' a p-matrix using the make.pmatrix function.A p-matrix can be created using
#' the \link[nbpInference]{make.pmatrix} function.
#' @param xi a number in the range 0 to 0.5, the cutoff related to the treatment
#' assignment probability caliper.
#' @return I x 2 dataframe
#' @examples
#' set.seed(12345)
#' X <- rnorm(100, 0, 5)
#' Z <- X + rnorm(100, 0, (1+sqrt(abs(X))))
#' Y <- X + Z + rnorm(100, 0, 0.5)
#' pmat <- make.pmatrix(Z, X)
#' pairs <- nbp.caliper(Z, X, pmat, xi = 0.1, M = 10000)
#' bias.corrected.neyman(Y, Z, pairs, pmat, xi = 0.1)
#' @family inference
#' @importFrom stats rnorm
#' @export
bias.corrected.neyman <- function(Y, Z, pairs, pmat, xi){

  #### error messaging

  # checks to see if outcome and treatment vectors are the same length
  if(length(Y) != length(Z)){
    stop("Outcome vector and treatment vector are different lengths.")
  }

  # checks to see if number of pairs corresponds with length of treatment vector
  if(length(Y) != (2*nrow(pairs))){
    stop("Number of pairs is not half the length of the outcome vector.")
  }

  # checks to see if outcome and treatment vectors are both numeric
  if((is.numeric(Y) & is.numeric(Z)) == FALSE){
    stop("Outcome and/or treatment vectors are not numeric.")
  }

  # checks to see if pair dataframe has any duplicated pairs / has index
  # values outside of 1 through 2I
  if(sum(sort(c(pairs[, 1], pairs[, 2])) == 1:(2*nrow(pairs))) != (2*nrow(pairs))){
    stop("pairs dataframe either has duplicated pairs, has indices outside of
         expected range, or is otherwise improperly formatted.")
  }

  # checks to see if pmat has the correct dimensions
  if(sum(dim(pmat) == length(Y)) != 2){
    stop("pmat has the incorrect dimensions.")
  }

  # checks to see if the values in pmat are appropriate
  if(sum(pmat <= 1 & pmat >= 0) != (length(Y)^2)){
    stop("One or more entries in pmat are not valid probability values.")
  }

  # checks the value of xi, which should be between 0 and 1/2
  if((xi < 0.5 & xi >= 0) == FALSE){
    stop("Hyperparameter xi must be between 0 and 0.5, excluding 0.5.")
  }

  #### functionality

  if(sd(Z) == 0){
    return(0)
  }

  if(sd(Z) != 0){
    p <- diag(pmat[pairs[, 1], pairs[, 2]])
    p[p < xi] <- xi
    p[p > (1-xi)] <- 1-xi
    numerator <- ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], (p^(-1))*(Y[pairs[, 1]] - Y[pairs[, 2]]), ((1-p)^(-1))*(Y[pairs[, 2]] - Y[pairs[, 1]]))
    denominator <- ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], 2*(Z[pairs[, 1]] - Z[pairs[, 2]]), 2*(Z[pairs[, 2]] - Z[pairs[, 1]]))
    return(sum(numerator) / sum(denominator))
  }

}

#' Covariate-Adjusted Variance Estimation
#'
#' This function calculates the covariate-adjusted conservative variance estimator
#' For the (classic or bias-corrected) Neyman estimator. For details on the
#' definition of the covariate-adjusted Neyman estimator, see \insertCite{fogarty2018mitigating;textual}{nbpInference}
#' and \insertCite{frazier2024bias;textual}{nbpInference}.
#'
#' @param Y a 2I-length vector of outcome values
#' @param Z a 2I-length vector of treatment values
#' @param X a 2I x k matrix of covariate values
#' @param pairs an I x 2 dataframe containing the indices of observations
#' that form our set of matched pairs. An appropriate pairs dataframe can be
#' formed using the \link[nbpInference]{nbp.caliper} function.
#' @param pmat a 2I x 2I matrix where the diagonals equal zero, and the
#' off-diagonal elements (i, j) contain the probability the ith observation has
#' Z = max(Z_i, Z_j) and the jth observation has Z = min(Z_i, Z_j). We can create
#' a p-matrix using the make.pmatrix function. A p-matrix can be created using
#' the \link[nbpInference]{make.pmatrix} function.
#' @param xi a number in the range 0 to 0.5, the cutoff related to the treatment
#' assignment probability caliper.
#' @param Q an arbitrary I x L numeric (real-valued) matrix, where L < I
#' @return a 2I x 2I numeric matrix
#' @examples
#' set.seed(12345)
#' X <- rnorm(100, 0, 5)
#' Z <- X + rnorm(100, 0, (1+sqrt(abs(X))))
#' Y <- X + Z + rnorm(100, 0, 0.5)
#' pmat <- make.pmatrix(Z, X)
#' pairs <- nbp.caliper(Z, X, pmat, xi = 0.1, M = 10000)
#' covAdj.variance(Y, Z, X, pairs, pmat, xi = 0.1)
#' @family inference
#' @importFrom stats rnorm
#' @export
covAdj.variance <- function(Y, Z, X, pairs, pmat, xi, Q){

  #### error messaging

  # checks to see Z, Y, and pairs agree on length
  if(length(unique(c(length(Z), length(Y), (2*nrow(pairs))))) != 1){
    stop("One of Y, Z or pairs is not the right length.")
  }

  # checks to see if outcome and treatment vectors are both numeric
  if((is.numeric(Y) & is.numeric(Z)) == FALSE){
    stop("Outcome and/or treatment vectors are not numeric.")
  }

  if(missing(X) == FALSE){
    X <- data.frame(X)

    # checks if covariate matrix is numeric
    if(sum(unlist(lapply(X, is.numeric))) != ncol(X)){
      stop("One or more covariates are not numeric.")
    }
  }

  # checks to see if pair dataframe has any duplicated pairs / has index
  # values outside of 1 through 2I
  if(sum(sort(c(pairs[, 1], pairs[, 2])) == 1:(2*nrow(pairs))) != (2*nrow(pairs))){
    stop("pairs dataframe either has duplicated pairs, has indices outside of
         expected range, or is otherwise improperly formatted.")
  }

  # checks the value of xi, which should be between 0 and 1/2
  if((xi < 0.5 & xi >= 0) == FALSE){
    stop("Hyperparameter xi must be between 0 and 0.5, excluding 0.5.")
  }


  #### optional arguments

  # if pmat is not specified, revert back to classic Neyman, where probability
  # of treatment assignment is assumed to be 0.5 for all pairs
  if(missing(pmat)){
    pmat <- matrix(0.5, nrow = length(Y), ncol = length(Z))
  }

  # checks to see if pmat has the correct dimensions
  if(sum(dim(pmat) == length(Z)) != 2){
    stop("pmat has the incorrect dimensions.")
  }

  # checks to see if the values in pmat are appropriate
  if(sum(pmat <= 1 & pmat >= 0) != (length(Z)^2)){
    stop("One or more entries in pmat are not valid probability values.")
  }

  # if Q is not specified, then either make Q == 1 if no covariates are present,
  # or set Q equal to average covariate profile within pairs if covariates
  # are present
  if(missing(Q) == TRUE){
    if(missing(X) == TRUE){
      Q <- rep(1, length(Y)/2)
    }
    if(missing(X) == FALSE){
      Q <- (X[pairs[, 1], ] + X[pairs[, 2], ])/2
    }
  }
  Q <- as.matrix(Q)
  singular_columns <- which(colSums(abs(Q)) == 0)

  # removes singular columns from Q
  if(length(singular_columns) != 0){
    Q <- Q[, -singular_columns]
  }

  # checks to see if Q matrix is properly low-dimensional
  if(dim(Q)[1] < dim(Q)[2]){
    stop("Q matrix has more columns than rows.")
  }

  #### functionality

  hatQ <- Q %*% solve(base::t(Q)%*%Q) %*% base::t(Q)
  pvec <- diag(pmat[pairs[, 1], pairs[, 2]])
  pvec[pvec < xi] <- xi
  pvec[pvec > (1-xi)] <- (1-xi)
  yvec <- ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], (pvec^(-1))*(Y[pairs[, 1]] - Y[pairs[, 2]])/sqrt(1-diag(hatQ)), ((1-pvec)^(-1))*(Y[pairs[, 2]] - Y[pairs[, 1]])/sqrt(1-diag(hatQ)))
  denominator <- sum(ifelse(Z[pairs[, 1]] > Z[pairs[, 2]], 2*(Z[pairs[, 1]] - Z[pairs[, 2]]), 2*(Z[pairs[, 2]] - Z[pairs[, 1]])))^2
  cov_adj_var <- (yvec %*% (diag(1, nrow(hatQ)) - hatQ) %*% yvec)/denominator
  return(cov_adj_var)
}
