#' Find additive outliers
#'
#' Searches for additive outliers using the method described in Appendix C of Findley et al. (1998).
#' If the number of trigonometric variables is not specified will search automatically through the model space to identify the best number of trigonometric variables, with the lowest AIC, AICc or BIC value.
#'
#' @importFrom stats AIC BIC lm median supsmu
#' @importFrom MuMIn AICc
#' @import lubridate
#'
#' @param x Numeric vector. Time series to seasonally adjust
#' @param dates a vector of class "Date", containing the data dates
#' @param out.tolerance t-stat threshold for outliers (see Findley et al., 1998)
#' @param my.AO.list (optional) Vector with user defined additive outlier variables
#' @param H (optional) Matrix with holiday and trading day variables
#' @param my.k_l (optional) Vector with the number of fourier terms to capture the yearly and monthly cycle. If NULL, would perform automatic search using AICc criterion
#' @param method Decomposition method: "additive" or "multiplicative". By default uses the additive method
#'
#' @return my.k_l
#' @return ao list of AO dates
#' @references Findley, D.F., Monsell, B.C., Bell, W.R., Otto, M.C. and B.C Chen (1998). New capabilities and methods of the X-12-ARIMA seasonal-adjustment program. Journal of Business & Economic Statistics, 16(2), pp.127-152.
#' @export
#'
#' @examples
#'
#' \donttest{
#' #Not run:
#' # Searching for additive outliers in Gasoline data
#' data(gasoline.data)
#' ao_list=find_outliers(x=gasoline.data$y,dates = gasoline.data$date)}
#'

find_outliers=function(x,dates,out.tolerance=3.8,my.AO.list=NULL,H=NULL,my.k_l=NULL,method="additive"){

  # function to find optimal number of trigonometric variables

  find_opt=function(x,dates,H=NULL,AO=NULL,method="additive",l.max=24,k.max=42,by=6){


    if (method=="multiplicative") {
      x=log(x)
    }

    if(length(x)<2*(k.max+l.max+ifelse(is.null(H),0,ncol(H))+ifelse(is.null(AO),0,ncol(AO)))){

      stop("There is not enough observations to search through the given model space")
    }

    trend.init=stats::supsmu(1:length(x),x)$y

    y=x-trend.init

    aic0=matrix(NA,nrow=length(seq(by,k.max,by)),ncol=length(seq(by,l.max,by)))
    aicc0=matrix(NA,nrow=length(seq(by,k.max,by)),ncol=length(seq(by,l.max,by)))
    bic0=matrix(NA,nrow=length(seq(by,k.max,by)),ncol=length(seq(by,l.max,by)))


    # function to create fourier variables
    fourier_vars=function(k=1,l=1,dates){


      if (l>0) {

        X=matrix(NA_real_,nrow = length(dates),ncol=2*l)


        Nm=as.numeric(lubridate::days_in_month(dates)) # number of days in a moth
        mt=lubridate::day(dates) # day in a month

        for (i in 1:l) {

          X[,i]=sin(2*pi*i*mt/Nm)

          X[,l+i]=cos(2*pi*i*mt/Nm)

        }


        Xm=X

        colnames(Xm)=c(paste0("S(",1:l,"/Nm",")"),paste0("C(",1:l,"/Nm",")"))
      }else{

        Xm=NULL
      }




      if (k>0) {
        # creating yearly cycle variables

        yt=lubridate::yday(dates)
        Ny=Hmisc::yearDays(dates)



        X=matrix(NA_real_,nrow = length(dates),ncol=2*k)



        for (i in 1:k) {

          X[,i]=sin(2*pi*i*yt/Ny)

          X[,k+i]=cos(2*pi*i*yt/Ny)

        }

        colnames(X)=c(paste0("S(",1:k,"/Ny",")"),paste0("C(",1:k,"/Ny",")"))

      }else{

        X=NULL
      }





      cbind(X,Xm)->X



      return(X)


    }



    # searching through model space
    for (i in 1:length(seq(by,k.max,by))) {

      for (j in 1:length(seq(by,l.max,by))) {

        X=fourier_vars(k=(i-1)*by,l=(j-1)*by,dates)

        X=cbind(X,H,AO)

        if(is.null(X)){
          m=stats::lm(y~-1)
        }else{m=stats::lm(y~X-1)}




        aic0[i,j]=stats::AIC(m)
        aicc0[i,j]=MuMIn::AICc(m)
        bic0[i,j]=stats::BIC(m)

      }


    }


    opt.aic=(which(aic0 == min(aic0), arr.ind = TRUE)-1)*by
    opt.aicc=(which(aicc0 == min(aicc0), arr.ind = TRUE)-1)*by
    opt.bic=(which(bic0 == min(bic0), arr.ind = TRUE)-1)*by

    return(list(opt.aic=opt.aic,opt.aicc=opt.aicc,opt.bic=opt.bic))

  }



  if (method=="multiplicative") {
    x=log(x)
  }

  trend.init=stats::supsmu(1:length(x),x)$y

  y=x-trend.init

  if (is.null(my.k_l)) {

    if (is.null(my.AO.list)) {
      AO=NULL
    }

    opt=find_opt(x = x, dates = dates,H = H, AO = AO, method=method)

    my.k_l=opt$opt.aicc

  }

  X=fourier_vars(k=my.k_l[1],l=my.k_l[2],dates = dates)


  Xs=cbind(X,H,AO)

  err=y-Xs%*%solve(t(Xs)%*%Xs)%*%t(Xs)%*%y

  sig_R=1.49*stats::median(abs(err))



  f.sel.pos=NULL

  out.search.points=(1:length(dates))[!dates%in%my.AO.list]

  run=T

  while (run) {

    Ts=NULL

    for (t in out.search.points) {

      AOt=rep(0,length(dates))

      AOt[t]=1

      Xst=cbind(Xs,AOt)

      Tt=(solve(t(Xst)%*%Xst)%*%t(Xst)%*%y)[ncol(Xst)]/(diag(solve((t(Xst)%*%Xst))*sig_R^2)[ncol(Xst)]^0.5)

      Ts=c(Ts,abs(Tt))

    }


    if (max(Ts)>=out.tolerance) {

      AOt=rep(0,length(dates))

      AOt[out.search.points[which.max(Ts)]]=1

      f.sel.pos=c(f.sel.pos,out.search.points[which.max(Ts)])

      out.search.points=out.search.points[-which.max(Ts)]



      Xs=cbind(Xs,AOt)

    }





    if (max(Ts)<out.tolerance) {
      run=F
    }


  }


  # Backward deletion


  if(length(f.sel.pos)>0){run=T}



  while (run) {


    f.sel.ao.dates=dates[f.sel.pos]


    AObd=my_ao(dates=dates,out.list=c(my.AO.list,f.sel.ao.dates))


    Xst=cbind(X,H,AObd)

    err=y-Xst%*%solve(t(Xst)%*%Xst)%*%t(Xst)%*%y

    sig_R=1.49*stats::median(abs(err))

    Tt=abs((solve(t(Xst)%*%Xst)%*%t(Xst)%*%y)/(diag(solve((t(Xst)%*%Xst))*sig_R^2)^0.5))[(ncol(Xst)-length(f.sel.ao.dates)+1):ncol(Xst)]


    if(min(Tt)<out.tolerance){

      f.sel.ao.dates=f.sel.ao.dates[-which.min(Tt)]

    }else{

      run=F
    }

    if(length(f.sel.ao.dates)==0){

      run=F
    }


  }


  f.sel.ao.dates=f.sel.ao.dates[order(f.sel.ao.dates)]

  return(list(ao=f.sel.ao.dates,my.k_l=my.k_l))



}
