#' Generating Realizations of a Hidden Markov Model
#' 
#' This function generates a sequence of hidden states of a Markov chain and a 
#' corresponding parallel sequence of observations.
#'
#' @param size length of the time-series of hidden states and observations (also \code{T}).
#' @param m     a (finite) number of states in the hidden Markov chain.
#' @param delta a vector object containing values for the marginal probability 
#'    distribution of the \code{m} states of the Markov chain at the time point \code{t=1}.
#'    Default is \code{delta = rep(1 / m, times = m)}.
#' @param gamma  a matrix (\code{ncol = nrow = m}) containing values for the transition 
#'    matrix of the hidden Markov chain. 
#'    Default is \code{gamma=0.8 * diag(m)} \code{ + rep(0.2 / m, times = m)}
#' @param distribution_class a single character string object with the abbreviated name of 
#'    the \code{m} observation distributions of the Markov dependent observation process.  
#'    The following distributions are supported by this algorithm: Poisson (\code{pois}); 
#'    generalized Poisson (\code{genpois}); normal (\code{norm}); geometric (\code{geom}).
#' @param distribution_theta a list object containing the parameter values for the 
#'    \code{m} observation distributions that are dependent on the hidden Markov state.
#' @param obs_range a vector object specifying the range for the observations to be 
#'    generated.  For instance, the vector \code{c(0,1500)} allows only observations 
#'    between 0 and 1500 to be generated by the HMM.  Default value is \code{FALSE}.  
#'    See Notes for further details.
#' @param obs_round a logical object. \code{TRUE} if all generated observations are natural.  
#'    Default value is \code{FALSE}. See Notes for further details.
#' @param obs_non_neg a logical object. \code{TRUE}, if non negative observations are 
#'    generated. Default value is \code{FALSE}.  See Notes for further details.
#' @param plotting a numeric value between 0 and 5 (generates different outputs). 
#' NA suppresses graphical output. Default value is \code{0}.\cr
#'   \code{0}: output 1-5 \cr
#'   \code{1}: summary of all results \cr
#'   \code{2}: generated time series of states of the hidden Markov chain  \cr
#'   \code{3}: means (of  the observation distributions, which depend on the states of the 
#'             Markov chain) along the time series of states of the hidden Markov chain \cr
#'   \code{4}: observations along the time series of states of the hidden Markov chain \cr
#'   \code{5}: simulated observations  
#'
#' @return 
#' The function \code{ HMM_simulation } returns a list containing the following components:
#' \describe{
#' The function \code{ HMM_simulation } returns a list containing the following components:
#' \item{size}{length of the generated time-series of hidden states and observations.}
#' \item{m}{input number of states in the hidden Markov chain.}
#' \item{delta}{a vector object containing the chosen values for the marginal probability 
#'      distribution of the \code{m} states of the Markov chain at the time point \code{t=1}.}
#' \item{gamma}{a matrix containing the chosen values for the transition matrix of the 
#'      hidden Markov chain.}
#' \item{distribution_class}{a single character string object with the abbreviated name of 
#'      the chosen observation distributions of the Markov dependent observation process.}
#' \item{distribution_theta}{a list object containing the chosen values for the parameters 
#'      of the \code{m} observation distributions that are dependent on the hidden Markov state.}
#' \item{markov_chain}{a vector object containing the generated sequence of states of the 
#'      hidden Markov chain of the HMM.}
#' \item{means_along_markov_chain}{a vector object containing the sequence of means 
#'      (of the state dependent distributions) corresponding to the generated sequence of states.}
#' \item{observations}{a vector object containing the generated sequence of (state dependent) 
#'      observations of the HMM.}
#'}
#'
#' @note
#' Some notes regarding the default values: \cr
#' \code{gamma}: \cr
#' The default setting assigns higher probabilities for remaining in a state than c
#' hanging into another.  
#' \code{obs_range}:\cr
#' Has to be used with caution. since it manipulates the results of the HMM. 
#' If a value for an observation at time \code{t} is generated outside the defined range, 
#' it will be regenerated as long as it falls into \code{obs_range}. It is possible just 
#' to define one boundary, e.g. \code{obs_range=c(NA,2000)} for observations lower than 
#' 2000, or \code{obs_range=c(100,NA)} for observations higher than 100. 
#' \code{obs_round }:\cr
#' Has to be used with caution!  Rounds each generated observation and hence manipulates 
#' the results of the HMM (important for the normal distribution based HMM). 
#' \code{obs_ non_neg}:\cr
#' Has to be used with caution, since it manipulates the results of the HMM. If a negative 
#' value for an observation at a time \code{t} is generated, it will be re-generated as 
#' long as it is non-negative (important for the normal distribution based HMM).
#'
#' @author Vitali Witowski (2013).
#'    
#' @seealso \code{\link{AIC_HMM}}, \code{\link{BIC_HMM}}, \code{\link{HMM_training}}  
#' @keywords datagen
#' 
#' @export
#'
#' @examples
#' # i.) Generating a HMM with Poisson-distributed data -----
#' \donttest{
#'  Pois_HMM_data <- 
#'    HMM_simulation(size = 300, 
#'                      m = 5, 
#'     distribution_class = "pois", 
#'     distribution_theta = list( lambda=c(10,15,25,35,55)))
#'     
#'  print(Pois_HMM_data)
#'  }
#'  
#'  # ii.) Generating 6 physical activities with normally -----
#'  #      distributed accelerometer counts using a HMM.
#'  
#'  # Define number of time points (1440 counts equal 6 hours of 
#'  # activity counts assuming an epoch length of 15 seconds).
#'  size <- 1440
#'  
#'  # Define 6 possible physical activity ranges
#'  m <- 6
#'  
#'  # Start with the lowest possible state 
#'  # (in this case with the lowest physical activity)
#'  delta <- c(1, rep(0, times = (m - 1)))
#'  
#'  # Define transition matrix to generate according to a 
#'  # specific activity 
#'  gamma <- 0.935 * diag(m) + rep(0.065 / m, times = m)
#'  
#'  # Define parameters 
#'  # (here: means and standard deviations for m=6 normal 
#'  #  distributions that define the distribution in 
#'  #  a phsycial acitivity level)
#'  distribution_theta <- list(mean = c(0,100,400,600,900,1200), 
#'                             sd = rep(x = 200, times = 6))
#'                             
#'  # Assume for each count an upper boundary of 2000
#'  obs_range <-c(NA,2000)
#'  
#'  # Accelerometer counts shall not be negative
#'  obs_non_neg <-TRUE
#'  
#'  # Start simulation
#'  \donttest{
#'  accelerometer_data <- 
#'      HMM_simulation(size = size, 
#'                        m = m, 
#'                    delta = delta, 
#'                    gamma = gamma, 
#'       distribution_class = "norm", 
#'       distribution_theta = distribution_theta, 
#'                obs_range = obs_range, 
#'              obs_non_neg = obs_non_neg, 
#'                 plotting = 0)
#'  print(accelerometer_data)
#'  }
  
HMM_simulation <- function(size, m, delta = rep(1 / m, times = m), 
                           gamma = 0.8 * diag(m) + rep(0.2 / m, times = m), 
                           distribution_class, distribution_theta, obs_range=c(NA,NA), 
                           obs_round=FALSE, obs_non_neg = FALSE, plotting = 0)
{

  if (!exists('delta')) 
  {
    delta <- rep(1 / m, times = m)
  }
  
  if (!exists('gamma')) 
  {
    gamma <- 0.8 * diag(m) + rep(0.2 / m, times = m)
  }
  
  markov_chain <- sample(x = seq(1, m, by = 1), 1, prob = delta)
  for (i in 2:size) 
  {
    last_state <- markov_chain[i - 1]
    markov_chain <- c(markov_chain, sample(x = seq(1, m, by=1), 1, prob = gamma[last_state, ]))
  }

  observations <- rep(NA, times = size)
   
  # For a "pois"-HMM -----
  if (distribution_class == "pois") 
  {	
    obs_dist_means <- distribution_theta$lambda
    
    means_along_markov_chain <- NULL
    for (i in 1:size) 
    {
      means_along_markov_chain <- c(means_along_markov_chain, 
                                    distribution_theta$lambda[markov_chain[i]])	
    }
    
    for (i in 1:size)
    { 
      observations[i] <- rpois(n = 1, lambda = distribution_theta$lambda[markov_chain[i]])
      
      if (any(!is.na(obs_range))) 
      {
        if (!is.na(obs_range[1]) & !is.na(obs_range[2]))
        {
          while(observations[i] < obs_range[1] | observations[i] > obs_range[2])
          {
            observations[i] <- rpois(n = 1, 
                                     lambda = distribution_theta$lambda[markov_chain[i]])
          }
        }
        if (!is.na(obs_range[1]) & is.na(obs_range[2]))
        {
          while(observations[i] < obs_range[1])
          {
            observations[i] <- rpois(n = 1, 
                                     lambda = distribution_theta$lambda[markov_chain[i]])				 		
          }	
        }
        if (is.na(obs_range[1]) & is.na(obs_range[2]))
        {
          while(observations[i] > obs_range[2])
          {
            observations[i] <- rpois(n = 1, 
                                     lambda = distribution_theta$lambda[markov_chain[i]])				 		
          }	
        }
      }
      
    }
  }
  
  
  # For a "norm"-HMM -----
  if (distribution_class == "norm")
  {
    obs_dist_means <- distribution_theta$mean
    
    means_along_markov_chain <- NULL
    for(i in 1:size)
    {
      means_along_markov_chain <- c(means_along_markov_chain, 
                                    distribution_theta$mean[markov_chain[i]])	
    }
    
    
    for (i in 1:size) 
    { 
      observations[i] <- rnorm(n = 1, 
                               mean = distribution_theta$mean[markov_chain[i]], 
                               sd = distribution_theta$sd[markov_chain[i]])
      
      if (any(!is.na(obs_range))) 
      {
        if (!is.na(obs_range[1]) & !is.na(obs_range[2])) 
        {
          while(observations[i] < obs_range[1] | observations[i] > obs_range[2])
          {
            observations[i] <- rnorm(n = 1, 
                                     mean = distribution_theta$mean[markov_chain[i]], 
                                     sd = distribution_theta$sd[markov_chain[i]])
          }
        }
        if (!is.na(obs_range[1]) & is.na(obs_range[2])) 
        {
          while(observations[i] < obs_range[1])
          {
            observations[i] <- rnorm(n = 1, 
                                     mean = distribution_theta$mean[markov_chain[i]], 
                                     sd = distribution_theta$sd[markov_chain[i]])
          }	
        }
        if (is.na(obs_range[1]) & is.na(obs_range[2])) 
        {
          while(observations[i] > obs_range[2])
          {
            observations[i] <- rnorm(n = 1, 
                                     mean = distribution_theta$mean[markov_chain[i]], 
                                     sd = distribution_theta$sd[markov_chain[i]])
          }	
        }
      }
      
      if (obs_non_neg == TRUE) 
      {
        if (observations[i] < 0)
        {
          observations[i] <- 0
        }	
      }
      
    }			
  }
  
  
  # For a "genpois"-HMM -----
  if (distribution_class == "genpois")
  {
    obs_dist_means <- distribution_theta$lambda1 / (1 - distribution_theta$lambda2)
    
    means_along_markov_chain <- NULL
    for (i in 1:size)
    {
      means_along_markov_chain <- 
        c(means_along_markov_chain, 
          (distribution_theta$lambda1[markov_chain[i]]) / (1 - distribution_theta$lambda2[markov_chain[i]]))	
    }
    
    
    for (i in 1:size)
    { 
      observations[i] <- rgenpois(n = 1, 
                                  lambda1 = distribution_theta$lambda1[markov_chain[i]], 
                                  lambda2 = distribution_theta$lambda2[markov_chain[i]])
      
      if (any(!is.na(obs_range)))
      {
        if (!is.na(obs_range[1]) & !is.na(obs_range[2]))
        {
          while(observations[i] < obs_range[1] | observations[i] > obs_range[2])
          {
            observations[i] <- rgenpois(n = 1, 
                                        lambda1 = distribution_theta$lambda1[markov_chain[i]], 
                                        lambda2 = distribution_theta$lambda2[markov_chain[i]])
          }
        }
        if (!is.na(obs_range[1]) & is.na(obs_range[2]))
        {
          while(observations[i] < obs_range[1])
          {
            observations[i] <- rgenpois(n = 1, 
                                        lambda1 = distribution_theta$lambda1[markov_chain[i]], 
                                        lambda2 = distribution_theta$lambda2[markov_chain[i]])				 		}	
        }
        if(is.na(obs_range[1]) & is.na(obs_range[2]))
        {
          while(observations[i] > obs_range[2])
          {
            observations[i] <- rgenpois(n = 1, 
                                        lambda1 = distribution_theta$lambda1[markov_chain[i]], 
                                        lambda2 = distribution_theta$lambda2[markov_chain[i]])
          }	
        }
      }				
      
    }
  }
  
  # For a "geom"-HMM -----
  if (distribution_class == "geom")
  {	
    obs_dist_means <- distribution_theta$prob
    
    means_along_markov_chain <- NULL
    for (i in 1:size)
    {
      means_along_markov_chain <- c(means_along_markov_chain, distribution_theta$prob[markov_chain[i]])	
    }
    
    for (i in 1:size)
    { 
      observations[i] <- rgeom(n = 1, prob = distribution_theta$prob[markov_chain[i]])
      
      if (any(!is.na(obs_range))) 
      {
        if (!is.na(obs_range[1]) & !is.na(obs_range[2])) 
        {
          while(observations[i] < obs_range[1] | observations[i] > obs_range[2]) 
          {
            observations[i] <- rgeom(n = 1,prob = distribution_theta$prob[markov_chain[i]]) 
          }
        }
        if (!is.na(obs_range[1]) & is.na(obs_range[2])) 
        {
          while(observations[i] < obs_range[1])
          {
            observations[i] <- rgeom(n = 1,prob = distribution_theta$prob[markov_chain[i]])
          }	
        }
        if (is.na(obs_range[1]) & is.na(obs_range[2]))
        {
          while(observations[i] > obs_range[2])
          {
            observations[i] <- rgeom(n = 1, prob = distribution_theta$prob[markov_chain[i]])
          }	
        }
      }
    }
  }

  
  if (!is.na(plotting)) 
  {	
    
    if (plotting == 0)
    {   
      par(mfrow=c(2,2))	
      
      plot(markov_chain, xlab ='t', main ='simulated (hidden) Markov chain', col = "green", type = 'o', ylab = "states")
      
      plot(means_along_markov_chain, xlab = 't', main = 'means along Markov chain', col = "green", type = 'o', ylab = 'observation')
      
      plot(observations, xlab = 't', main = 'observations along Markov chain')
      abline(h = obs_dist_means, col = "grey50", lty = "dashed")
      lines(means_along_markov_chain, xlab = 'time', main = 'simulation data', type = "l", col = "green")
      
      plot(observations, xlab = 't', main = 'simulated observations')
      
      par(mfrow=c(1,1))
      
      plot(markov_chain, xlab = 't', main = 'simulated (hidden) Markov chain', col = "green", type = 'o', ylab = "states")
      
      
      plot(means_along_markov_chain, xlab = 't', main = 'means along Markov chain', col = "green", type = 'o', ylab = 'observation')
      
      plot(observations, xlab = 't', main = 'observations along Markov chain')
      abline(h = obs_dist_means, col = "grey50", lty = "dashed")
      lines(means_along_markov_chain, xlab = 'time', main = 'simulation data', type = "l", col = "green")
      
      plot(observations, xlab = 't', main = 'simulated observations')
      
      par(mfrow=c(1,1))  
      
    }
    if (plotting == 1) 
    {   
      par(mfrow=c(2,2))	
      
      plot(markov_chain, xlab = 't', main = 'simulated (hidden) Markov chain', col = "green", type = 'o', ylab = "states")
      
      
      plot(means_along_markov_chain, xlab = 't', main = 'means along Markov chain', col = "green", type = 'o', ylab = 'observation')
      
      plot(observations, xlab = 't', main = 'observations along Markov chain')
      abline(h = obs_dist_means, col = "grey50", lty = "dashed")
      lines(means_along_markov_chain, xlab = 'time', main = 'simulation data', type = "l", col = "green")
      
      plot(observations, xlab = 't', main = 'simulated observations')
      
      par(mfrow=c(1,1))
    }
    
    if (plotting == 2) 
    {   
      plot(markov_chain, xlab = 't', main = 'simulated (hidden) Markov chain', col = "green", type = 'o', ylab = "states")
    }
    if (plotting == 3)
    {   
      plot(means_along_markov_chain, xlab = 't', main = 'means along Markov chain', col = "green", type = 'o', ylab = 'observation')
    }
    if (plotting == 4) 
    {   
      plot(observations, xlab = 't', main = 'observations along Markov chain')
      abline(h = obs_dist_means, col = "grey50", lty = "dashed")
      lines(means_along_markov_chain, xlab = 'time', main = 'simulation data', type = "l", col = "green")
    }
    if (plotting == 5)
    {   
      plot(observations, xlab = 't', main = 'simulated observations')
    }
  }
  
  if(obs_round == TRUE)
  {
    observations <- round(observations)	
  }	


  return(list(size = size, 
  			      m = m, 
  			      delta = delta, 
  			      gamma = gamma, 
  			      distribution_class = distribution_class, 
  			      distribution_theta = distribution_theta, 
  			      markov_chain = markov_chain, 
  			      means_along_markov_chain = means_along_markov_chain, 
  			      observations = observations))
}             