#' cerr function is used to generate a dataset where the error term follows cosine-based distributions
#'
#' This cerr function generates a dataset with a specified number of observations and predictors,
#' along with a response vector that has an error term sampled from cosine-based distributions on [-pi/2, pi/2].
#'
#' @param n is the number of observations
#' @param nr is the number of observations with a different error distribution segment (the second block)
#' @param p is the dimension of the observation
#' @param dist_type is the cosine-based sampler to use:
#'   \code{"cosine_random"}, \code{"cosine_rejection_sampling"}, or \code{"cosine_metropolis_hastings"}
#' @param ... is additional arguments (reserved for compatibility; not used)
#'
#' @return X,Y,e
#'
#' @references
#' Guo, G., Song, H. & Zhu, L. The COR criterion for optimal subset selection in distributed estimation. \emph{Statistics and Computing}, 34, 163 (2024). \doi{10.1007/s11222-024-10471-z}
#'
#' Guo, G., Sun, Y., Qian, G., & Wang, Q. (2022). LIC criterion for optimal subset selection in distributed interval estimation. \emph{Journal of Applied Statistics}, 50(9), 1900-1920. \doi{10.1080/02664763.2022.2053949}.
#'
#' Chang, D., Guo, G. (2024). LIC: An R package for optimal subset selection for distributed data. \emph{SoftwareX}, 28, 101909.
#'
#' Jing, G., & Guo, G. (2025). TLIC: An R package for the LIC for T distribution regression analysis. \emph{SoftwareX}, 30, 102132.
#'
#' Chang, D., & Guo, G. (2025). Research on Distributed Redundant Data Estimation Based on LIC. \emph{IAENG International Journal of Applied Mathematics}, 55(1), 1-6.
#'
#' Gao, H., & Guo, G. (2025). LIC for Distributed Skewed Regression. \emph{IAENG International Journal of Applied Mathematics}, 55(9), 2925-2930.
#'
#' Zhang, C., & Guo, G. (2025). The optimal subset estimation of distributed redundant data. \emph{IAENG International Journal of Applied Mathematics}, 55(2), 270–277.
#'
#' Jing, G., & Guo, G. (2025). Student LIC for distributed estimation. \emph{IAENG International Journal of Applied Mathematics}, 55(3), 575–581.
#'
#' Liu, Q., & Guo, G. (2025). Distributed estimation of redundant data. \emph{IAENG International Journal of Applied Mathematics}, 55(2), 332–337.
#'
#' @export
#'
#' @examples
#' set.seed(12)
#' data <- cerr(n = 1200, nr = 200, p = 5, dist_type = "cosine_random")
#' str(data)

cerr <- function(n, nr, p, dist_type, ...) {
  beta <- sort(runif(p, 1, 5))
  X <- matrix(runif(n * p, 0, 1), ncol = p)

  if (dist_type == "cosine_random") {
    u1 <- runif(n - nr)
    u2 <- runif(nr)
    e1 <- asin(2 * u1 - 1)
    e2 <- asin(2 * u2 - 1)

  } else if (dist_type == "cosine_rejection_sampling") {
    e1 <- {
      m <- n - nr
      out <- numeric(m)
      k <- 0
      while (k < m) {
        theta <- runif(1, -pi/2, pi/2)
        proposal_density <- 1 / pi
        target_density   <- 0.5 * cos(theta)
        accept_prob <- target_density / proposal_density
        u <- runif(1)
        if (u <= accept_prob) {
          k <- k + 1
          out[k] <- theta
        }
      }
      out
    }
    e2 <- {
      m <- nr
      out <- numeric(m)
      k <- 0
      while (k < m) {
        theta <- runif(1, -pi/2, pi/2)
        proposal_density <- 1 / pi
        target_density   <- 0.5 * cos(theta)
        accept_prob <- target_density / proposal_density
        u <- runif(1)
        if (u <= accept_prob) {
          k <- k + 1
          out[k] <- theta
        }
      }
      out
    }

  } else if (dist_type == "cosine_metropolis_hastings") {
    e1 <- {
      m <- n - nr
      out <- numeric(m)
      current <- 0
      for (i in 1:m) {
        proposed <- runif(1, max(-pi/2, current - 0.1), min(pi/2, current + 0.1))
        target_current  <- 0.5 * cos(current)
        target_proposed <- 0.5 * cos(proposed)
        accept_prob <- target_proposed / target_current
        if (target_current == 0) {
          accept_prob <- ifelse(target_proposed == 0, 1, 0)
        }
        u <- runif(1)
        if (u <= accept_prob) {
          current <- proposed
        }
        out[i] <- current
      }
      out
    }
    e2 <- {
      m <- nr
      out <- numeric(m)
      current <- 0
      for (i in 1:m) {
        proposed <- runif(1, max(-pi/2, current - 0.1), min(pi/2, current + 0.1))
        target_current  <- 0.5 * cos(current)
        target_proposed <- 0.5 * cos(proposed)
        accept_prob <- target_proposed / target_current
        if (target_current == 0) {
          accept_prob <- ifelse(target_proposed == 0, 1, 0)
        }
        u <- runif(1)
        if (u <= accept_prob) {
          current <- proposed
        }
        out[i] <- current
      }
      out
    }

  } else {
    stop("Unknown distribution type: ", dist_type)
  }

  e <- c(e1, e2)
  Y <- X %*% beta + e

  return(list(X = X, Y = Y, e = e))
}

