## ------------------------------------------------------------------------
# 
# "GetBlocks(X, atom, m=min(X), M=max(X),func=sum,...)" 
# 
# Compute the division into the blocks for the atomic Markov chains.
# and calculate the function func on each blocks
# If m and M are not set to default also compute the truncated mean and winzorized version of the statistics  
#
# ------------------------------------------------------------------------
##

#' @aliases GetBlocks
#' @title Compute Block Splitting for Atomic Markov Chains
#' @description Computes regenerative blocks for atomic 
#' Markov chains.
#' @param X A numeric vector representing a Markov chain.
#' @param atom A numeric value; an atom of the Markov chain.
#' @param m A numeric value; the lower truncation threshold 
#' Default is the 5th percentile of \code{X}.
#' @param M A numeric value; the upper truncation threshold 
#' Default is the 95th percentile of \code{X}.
#' @param func A function to apply to each block. Default is \code{sum}.
#' @param ... Additional arguments passed to the function \code{func}.
#' @details Identifies values in \code{X} equal to \code{atom} to 
#' determines regeneration times and creates regeneration blocks (or cycles).
#' The function then assigns block numbers, counts observations in each block, and calculates 
#' various statistics for each block.
#' @return Returns a list containing:
#' \enumerate{
#'   \item A data frame with the following columns:
#'   \itemize{
#'     \item \code{Time} - the index of each observation,
#'     \item \code{X} - values of the process,
#'     \item \code{Bnumber} - block number assigned to each observation,
#'     \item \code{regen} - indicator (1 or 0) of regeneration times. 1 corresponds to
#'     the regeneration time.
#'   }
#'   \item A matrix summarizing block characteristics with the following columns:
#'   \itemize{
#'     \item \code{Block number} - the block index,
#'     \item \code{Block length} - number of observations in the block,
#'     \item \code{Truncated sum} - the value of \code{func} applied to truncated 
#'     observations in the block,
#'     \item \code{Valid points} - number of observations within the truncation thresholds,
#'     \item \code{Winsorized value} - the Winsorized value of \code{func} applied to the block,
#'     \item \code{Start index} - the starting index of the block,
#'     \item \code{End index} -  the ending index of the block.
#'   }
#'   \item \code{Total blocks} - the total number of regeneration blocks.
#' }
#' @references Bertail, P. and Dudek, A. (2025). \emph{Bootstrap for 
#' Dependent Data, with an R package} (by Bernard Desgraupes and Karolina Marek) - submitted. 
#' 
#' @seealso \code{\link{findBestEpsilon}}, \code{\link{ftrunc}},
#' \code{\link{regenboot}}, \code{\link{smallEnsemble}}.
#' @keywords "Regenerative Block Bootstrap" "Markov chains" Atom
#' @export
#' @examples 
#' X = genMM1(1000, 1, 2)
#' blocks = GetBlocks(X, 0, func=sum) # compute sum over all blocks (without truncation)
#' # compute sum over all blocks (with truncation over quantiles of order 5% and 95%)
#' blocks = GetBlocks(X, 0, func=sum,m=quantile(X,0.05),M=quantile(X,0.95)) 
#' 


GetBlocks<-function(X, atom,m=min(X),M=max(X),func=sum,...) {
  # Fix a level of truncation m and M for robustified estimators, by default min  and max , there is no truncation
  
  # Check if 'atom' is numeric and not empty
  if (!is.numeric(atom) || length(atom) != 1) {
    stop("Error: 'atom' must be a single numeric value.")
  }
  
  # Check if 'm' is numeric and of length 1
  if (!is.numeric(m) || length(m) != 1) {
    stop("Error: 'm' must be a single numeric value.")
  }
  
  # Check if 'M' is numeric and of length 1
  if (!is.numeric(M) || length(M) != 1) {
    stop("Error: 'M' must be a single numeric value.")
  }
  
  regen=(X==atom)
  n=length(X)-1
  Bnumber=cumsum(c(0,regen[1:n]))
  l_n=max(Bnumber)
  nb=l_n
  if (regen[n+1]) {nb=l_n} else {nb=l_n-1}
  if (nb<=0) stop(" Not enough regeneration blocks : check the atom")
  dataset=cbind(1:(n+1), X, Bnumber, regen)
  Submax=matrix(0,nb,1)
  lB=matrix(0,nb,1)
  Submax_1=matrix(0,nb,1)
  First=matrix(0,nb,1)
  debX=matrix(0,nb,1)
  finX=matrix(0,nb,1)
  
  for (i in 1:nb)
  {
    aux=matrix(dataset[dataset[,3]==i,1:2], ncol=2)
    First[i]=sum((aux[,2]<=M)&(m<=aux[,2])) #number of non-truncated points outside [m,M]
    Submax[i]=func(aux[(aux[,2]<=M)&(aux[,2]>=m),2])   # truncated sum on blocks
    Submax_1[i]=func(aux[(aux[,2]<=M)&(aux[,2]>=m),2])+func((aux[,2]>M)*M) +func((aux[,2]<m)*m)
    # winzorized value of the function func on blocks (large value replaced by M, small value replace by m)   
    
    lB[i]=nrow(aux)
    debX[i]=aux[1,1]
    finX[i]=aux[nrow(aux),1]
  }
  Block=cbind(1:nb,lB,Submax,First,Submax_1,debX,finX)
  return(list(dataset, Block, l_n))
}
