#' \href{https://www.taylorfrancis.com/books/mono/10.1201/b20790/hidden-markov-models-time-series-walter-zucchini-iain-macdonald-roland-langrock}{Forward algorithm} for hidden semi-Markov models with periodically varying transition probability matrices
#'
#' Hidden semi-Markov models (HSMMs) are a flexible extension of HMMs. 
#' For direct numerical maximum likelhood estimation, HSMMs can be represented as HMMs on an enlarged state space (of size \eqn{M}) and with structured transition probabilities such that approximate inference is possible.
#' Recently, this inference procedure has been generalized to allow either the dwell-time distributions or the conditional transition probabilities to depend on external covariates such as the time of day. This special case is implemented here.
#' This function allows for that, by expecting a transition probability matrix for each time point in a period, and an integer valued (\eqn{1, \dots, L}) time variable that maps the data index to the according time.
#'
#' @param delta Initial or periodically stationary distribution of length M (where M is the number of approximating states)
#' @param Gamma Array of transition probability matrices of dimension c(M,M,L). \cr \cr
#' Here we use the definition \eqn{\Pr(S_t=j \mid S_{t-1}=i) = \gamma_{ij}^{(t)}}
#' such that the transition probabilities between time point \eqn{t-1} and \eqn{t} are an element of \eqn{\Gamma^{(t)}}.
#' @param allprobs Matrix of state-dependent probabilities/ density values of dimension c(n, N), where N is the number of semi-Markovian states.
#' This will automatically be converted to the appropriate dimension.
#' @param sizes State aggregate sizes that are used for the approximation of the semi-Markov chain.
#' @param tod (Integer valued) time variable in 1, ..., L, mapping the data index to a generalized time of day (length n).
#' For half-hourly data L = 48. It could, however, also be day of year for daily data and L = 365.
#'
#' @return Log-likelihood for given data and parameters
#' @export
#'
#' @examples
#' ## generating data from homogeneous 2-state HSMM
#' mu = c(0, 6)
#' beta = matrix(c(log(4),log(6),-0.2,0.2,-0.1,0.4), nrow=2)
#' # time varying mean dwell time
#' Lambda = exp(cbind(1, trigBasisExp(1:24, 24, 1))%*%t(beta))
#' omega = matrix(c(0,1,1,0), nrow = 2, byrow = TRUE)
#' # simulation
#' # for a 2-state HSMM the embedded chain always alternates between 1 and 2
#' s = rep(1:2, 100)
#' C = x = numeric(0)
#' tod = rep(1:24, 50) # time of day variable
#' time = 1
#' for(t in 1:100){
#'   dt = rpois(1, Lambda[tod[time], s[t]])+1 # dwell time depending on time of day
#'   time = time + dt
#'   C = c(C, rep(s[t], dt))
#'   x = c(x, rnorm(dt, mu[s[t]], 1.5)) # fixed sd 2 for both states
#' }
#' tod = tod[1:length(x)]
#' 
#' ## negative log likelihood function
#' mllk = function(theta.star, x, sizes, tod){
#'   # parameter transformations for unconstraint optimization
#'   omega = matrix(c(0,1,1,0), nrow = 2, byrow = TRUE) # omega fixed (2-states)
#'   mu = theta.star[1:2]
#'   sigma = exp(theta.star[3:4])
#'   beta = matrix(theta.star[5:10], nrow=2)
#'   # time varying mean dwell time
#'   Lambda = exp(cbind(1, trigBasisExp(1:24, 24, 1))%*%t(beta))
#'   dm = list()
#'   for(j in 1:2){
#'     dm[[j]] = sapply(1:sizes[j]-1, dpois, lambda = Lambda[,j])
#'   }
#'   Gamma = tpm_phsmm(omega, dm)
#'   delta = stationary_p(Gamma, tod[1])
#'   # calculate all state-dependent probabilities
#'   allprobs = matrix(1, length(x), 2)
#'   for(j in 1:2){ allprobs[,j] = dnorm(x, mu[j], sigma[j]) }
#'   # return negative for minimization
#'   -forward_sp(delta, Gamma, allprobs, sizes, tod)
#' }
#' 
#' ## fitting an HSMM to the data
#' theta.star = c(1, 4, log(2), log(2), # state-dependent parameters
#'                  log(4), log(6), rep(0,4)) # state process parameters dm
#' mod = nlm(mllk, theta.star, x = x, sizes = c(10, 15), tod = tod, stepmax = 5)
forward_sp = function(delta, Gamma, allprobs, sizes, tod){
  if(min(tod)==1){
    tod = tod-1
  } 
  forward_cpp_sp(allprobs, delta, Gamma, sizes, tod)
}
