#' Optimize the detection of signals based on a-priori detections
#' @usage optimize_auto_detec(X, Y, threshold = 10, power = 1, wl = 512, ssmooth = 0, 
#' hold.time = 0, mindur = NULL, maxdur = NULL, thinning = 1, parallel = 1, 
#' pb = TRUE, by.sound.file = FALSE, bp = NULL, path = NULL, previous.output = NULL)
#' @param X 'selection_table' object or a data frame with columns
#' for sound file name (sound.files), selection number (selec), and start and end time of signal
#' (start and end). \strong{It should contain the selections that will be used for detection optimization}.
#' @param Y Optional.An object of class 'autodetec.output' (generated by \code{\link{auto_detec}}) in which to optimize detections. Must refer to the same sound files as in 'X'. Default is (\code{NULL}).
#' @param threshold A numeric vector specifying the amplitude threshold for detecting
#'   signals (in \%). \strong{Several values can be supplied for optimization}.
#' @param power A numeric vector indicating a power factor applied to the amplitude envelope. Increasing power will reduce low amplitude modulations and increase high amplitude modulations, in order to reduce background noise. Default is 1 (no change). \strong{Several values can be supplied for optimization}.
#' @param wl A numeric vector of length 1 specifying the window used internally by
#' \code{\link[seewave]{ffilter}} for bandpass filtering (so only applied when 'bp' is supplied). Default is 512.
#' @param ssmooth A numeric vector to smooth the amplitude envelope
#'   with a sum smooth function. Default is 0 (no smoothing). \strong{Several values can be supplied for optimization}.
#' @param hold.time Numeric vector of length 1. Specifies the time range at which selections will be merged (i.e. if 2 selections are separated by less than the specified hold.time they will be merged in to a single selection). Default is \code{0}. \strong{Several values can be supplied for optimization}.
#' @param mindur Numeric vector giving the shortest duration (in
#'   seconds) of the signals to be detected. It removes signals below that
#'   threshold. \strong{Several values can be supplied for optimization}.
#' @param maxdur Numeric vector giving the longest duration (in
#'   seconds) of the signals to be detected. It removes signals above that
#'   threshold. \strong{Several values can be supplied for optimization}.
#' @param thinning Numeric vector in the range 0~1 indicating the proportional reduction of the number of
#' samples used to represent amplitude envelopes (i.e. the thinning of the envelopes). Usually amplitude envelopes have many more samples
#' than those needed to accurately represent amplitude variation in time, which affects the size of the
#' output (usually very large R objects / files). Default is  \code{1} (no thinning). Higher sampling rates may afford higher size reduction (e.g. lower thinning values). Reduction is conducted by interpolation using \code{\link[stats]{approx}}. Note that thinning may decrease time precision, and the higher the thinning the less precise the time detection. \strong{Several values can be supplied for optimization}.
#' @param parallel Numeric. Controls whether parallel computing is applied.
#'  It specifies the number of cores to be used. Default is 1 (i.e. no parallel computing).
#' @param pb Logical argument to control progress bar and messages. Default is \code{TRUE}.
#' @param by.sound.file Logical to control if diagnostics are calculated for each sound file independently (\code{TRUE}) or for all sound files combined (\code{FALSE}, default).
#' @param bp Numeric vector of length 2 giving the lower and upper limits of a
#'   frequency bandpass filter (in kHz). Default is \code{NULL}.
#' @param path Character string containing the directory path where the sound files are located.
#' If \code{NULL} (default) then the current working directory is used. Only needed if 'Y' is not supplied.
#' @param previous.output Data frame with the output of a previous run of this function. This will be used to include previous results in the new output and avoid recalculating detection performance for parameter combinations previously evaluated.
#' @return A data frame in which each row shows the result of a detection job with a particular combination of tuning parameters (including in the data frame). It also includes the following diagnostic metrics:
#' \itemize{
#'  \item \code{true.positives}: number of detections that correspond to signals referenced in 'X'. Matching is defined as some degree of overlap in time. In a perfect detection routine it should be equal to the number of rows in 'X'. 
#'  \item \code{false.positives}: number of detections that don't match any of the signals referenced in 'X'. In a perfect detection routine it should be 0.
#'  \item \code{false.negatives}: number of signals in 'reference' that were not detected (not found in 'detection'. In a perfect detection routine it should be 0.
#'  \item \code{split.positives}: number of signals referenced in 'X' that were overlapped by more than 1 detection (i.e. detections that were split). In a perfect detection routine it should be 0.  
#'  \item \code{mean.duration.true.positives}: mean duration of true positives (in s).  
#'  \item \code{mean.duration.false.positives}: mean duration of false positives (in s). 
#'  \item \code{mean.duration.false.negatives}: mean duration of false negatives (in s). Only included when \code{time.diagnostics = TRUE}.
#'  \item \code{proportional.duration.true.positives}: ratio of total duration of true positives to the total duration of signals referenced in 'X'. In a perfect detection routine it should be 1.
#'  \item \code{sensitivity}: Proportion of signals referenced in 'X' that were detected. In a perfect detection routine it should be 1.
#'  \item \code{specificity}: Proportion of detections that correspond to signals referenced in 'X' that were detected. In a perfect detection routine it should be 1.
#'  } 
##' @export
#' @name optimize_auto_detec
#' @details This function takes a selections data frame or 'selection_table' ('X') and the output of a \code{\link{auto_detec}} routine ('Y') and estimates the detection performance for different detection parameter combinations. This is done by comparing the position in time of the detection to those of the reference selections in 'X'. The function returns several diagnostic metrics to allow user to determine which parameter values provide a detection that more closely matches the selections in 'X'. Those parameters can be later used for performing a more efficient detection using \code{\link{auto_detec}}.
#'
#' @examples{
#' # Save to temporary working directory
#' data(list = c("Phae.long1", "Phae.long2", "Phae.long3", "Phae.long4", "lbh_selec_table"))
#' writeWave(Phae.long1, file.path(tempdir(), "Phae.long1.wav"))
#' writeWave(Phae.long2, file.path(tempdir(), "Phae.long2.wav"))
#' writeWave(Phae.long3, file.path(tempdir(), "Phae.long3.wav"))
#' writeWave(Phae.long4, file.path(tempdir(), "Phae.long4.wav"))
#' 
#' # run auto_detec with thining
#' ad <- auto_detec(output = "list", thinning = 1 / 10, ssmooth = 300, path = tempdir())
#' optimize_auto_detec(X = lbh_selec_table, Y = ad, threshold = c(5, 10, 15), path = tempdir())
#' }
#'
#' @references {
#' Araya-Salas, M., & Smith-Vidaurre, G. (2017). warbleR: An R package to streamline analysis of animal acoustic signals. Methods in Ecology and Evolution, 8(2), 184-191.
#' }
#' @author Marcelo Araya-Salas (\email{marcelo.araya@@ucr.ac.cr}).
#last modification on dec-21-2021 (MAS)
optimize_auto_detec <- function(X, Y, threshold = 10, power = 1, wl = 512, ssmooth = 0, hold.time = 0, mindur = NULL, maxdur = NULL, thinning = 1, parallel = 1, pb = TRUE, by.sound.file = FALSE, bp = NULL, path = NULL, previous.output = NULL){
  
  print("this function has been deprecated, please look at the ohun package for automatic signal detection (https://marce10.github.io/ohun/index.html")
 
   # reset pb on exit
  on.exit(pbapply::pboptions(type = .Options$pboptions$type))
       
  #### set arguments from options
  # get function arguments
  argms <- methods::formalArgs(optimize_auto_detec)
  
  # get warbleR options
  opt.argms <- if(!is.null(getOption("warbleR"))) getOption("warbleR") else SILLYNAME <- 0
  
  # remove options not as default in call and not in function arguments
  opt.argms <- opt.argms[!sapply(opt.argms, is.null) & names(opt.argms) %in% argms]
  
  # get arguments set in the call
  call.argms <- as.list(base::match.call())[-1]
  
  # remove arguments in options that are in call
  opt.argms <- opt.argms[!names(opt.argms) %in% names(call.argms)]
  
  # set options left
  if (length(opt.argms) > 0)
    for (q in 1:length(opt.argms))
      assign(names(opt.argms)[q], opt.argms[[q]])
  
  #if X is not a data frame
  if (!any(is.data.frame(X), is_selection_table(X))) stop("X is not of a class 'data.frame', 'selection_table'")
  
  if (is_extended_selection_table(X)) stop("This function cannot take extended selection tables ('X' argument)")
  
  #check path to working directory
  if (is.null(path)) path <- getwd() else 
    if (!dir.exists(path)) stop("'path' provided does not exist") else
      path <- normalizePath(path)
   
        # check Y being a autodetec.output object
        if (!is(Y, "autodetec.output")) 
          stop("'Y' must be and object of class 'autodetec.ouput'")
        
        # check that all sound files in X have and envelope in Y
        if (!all(X$sound.files %in% unique(Y$envelopes$sound.files)))
           stop("Not all sound files in 'X' are found in 'Y'")
   
      # get all possible combinations of parameters
      exp_grd <- expand.grid(threshold = threshold, power = power, ssmooth = ssmooth, hold.time = hold.time, mindur = if(is.null(mindur)) -Inf else mindur, maxdur = if(is.null(maxdur)) Inf else maxdur, thinning = thinning)
      
      # if previous output included
      if (!is.null(previous.output)){
        
        # create composed variable to find overlapping runs
        previous.output$temp.label <- apply(previous.output[, c("threshold", "power", "hold.time", "mindur", "maxdur", "thinning")], 1, paste, collapse = "-")
        
        exp_grd <- exp_grd[!apply(exp_grd[, c("threshold", "power", "hold.time", "mindur", "maxdur", "thinning")], 1, paste, collapse = "-") %in% previous.output$temp.label, ]
        
        # remove composed variable
        previous.output$temp.label <- NULL
            }
    

      if (nrow(exp_grd) == 0){
        cat("all combinations were already evaluated on previous call to this function (based on 'pevious.output')")
      
        return(previous.output)
        } else {
        
      # warn about number of combinations
      cat(paste(nrow(exp_grd), "combinations will be evaluated:"))
       cat("\n")
      
       # only files in X
       flist <- unique(as.character(X$sound.files))
       
        # subset to sound files in X
        if (!is.null(Y)){
        Y$selection.table <- Y$selection.table[Y$selection.table$sound.files %in% X$sound.files, ]
        Y$envelopes <- Y$envelopes[Y$envelopes$sound.files %in% X$sound.files, ]
        Y$org.selection.table <- Y$org.selection.table[Y$org.selection.table$sound.files %in% X$sound.files, ]
        
    
        } else {
          Y <- auto_detec(output = "list", path = tempdir(), flist = flist)
        }
        
      
       
       # set pb options
       pbapply::pboptions(type = ifelse(pb, "timer", "none"))
       
      ad_results <- pbapply::pblapply(X = 1:nrow(exp_grd), FUN = function(x){
      
          ad <- auto_detec(X = Y, threshold = exp_grd$threshold[x], ssmooth = exp_grd$ssmooth[x], mindur = exp_grd$mindur[x], maxdur = exp_grd$maxdur[x], thinning = exp_grd$thinning[x], parallel = parallel, pb = FALSE, power = exp_grd$power[x], hold.time = exp_grd$hold.time[x], bp = bp, path = path, flist = flist, output = "data.frame")
          
          # make factor a character vector
          ad$sound.files <- as.character(ad$sound.files)
          
          ad$..row.id <- 1:nrow(ad)    
          
          ad <- ad[!is.na(ad$start), ]
          
         return(ad)
          })
    
      performance_l <- lapply(ad_results, function(Z) suppressWarnings(diagnose_wrlbr_int(reference = X, detection = Z, by.sound.file = by.sound.file, time.diagnostics = TRUE)))   
        
      performance <- do.call(rbind, performance_l)
      
      # duplicate expand grid tuning parameters if by sound file
      if (by.sound.file)
        exp_grd <- exp_grd[rep(1:nrow(exp_grd), each = length(unique(X$sound.files))), ]
      
        performance <- data.frame(exp_grd, performance) 
          
      if (!is.null(previous.output))
        performance <- rbind(previous.output, performance)
        
      return(performance)
    }
}

# internal, has been moved to ohun
diagnose_wrlbr_int <- function(reference, detection, by.sound.file = FALSE, time.diagnostics = FALSE)
{
  #### set arguments from options
  # get function arguments
  argms <- methods::formalArgs(diagnose_wrlbr_int)
  
  # get warbleR options
  opt.argms <- if(!is.null(getOption("warbleR"))) getOption("warbleR") else SILLYNAME <- 0
  
  # remove options not as default in call and not in function arguments
  opt.argms <- opt.argms[!sapply(opt.argms, is.null) & names(opt.argms) %in% argms]
  
  # get arguments set in the call
  call.argms <- as.list(base::match.call())[-1]
  
  # remove arguments in options that are in call
  opt.argms <- opt.argms[!names(opt.argms) %in% names(call.argms)]
  
  # set options left
  if (length(opt.argms) > 0)
    for (q in 1:length(opt.argms))
      assign(names(opt.argms)[q], opt.argms[[q]])
  
  
  # remove rows with NAs in detection
  detection <- detection[!is.na(detection$start), ]
  
  # run when detection is not empty
  if (nrow(detection) > 0){
    # look at detections matching 1 training selection at the time
    performance_l <- lapply(unique(reference$sound.files), function(z){
      
      # get subset from template for that sound file
      W <- reference[reference$sound.files == z, ]
      
      # get subset from detection for that sound file
      Z <- detection[detection$sound.files == z, ]
      
      if (nrow(Z) > 0){
        # add row labels to both
        W$.row.id <- 1:nrow(W)
        Z$.row.id <- 1:nrow(Z)
        
        # these are all the true positives
        true.positives_l <- lapply(1:nrow(W), function(y){
          
          # defined as any detection that overlaps with the template selections
          Q <- Z[(Z$start >= W$start[y] & Z$start < W$end[y]) |
                   (Z$end > W$start[y] & Z$end <= W$end[y]) |
                   (Z$start <= W$start[y] & Z$end >= W$end[y]) |
                   (Z$start >= W$start[y] & Z$end  <= W$end[y]), ]
          
          # add row label to find false.negatives
          Q$.template.row.id <- if (nrow(Q) > 0) W$.row.id[y] else
            vector()
          
          return(Q)
        })
        
        true.positives <- do.call(rbind, true.positives_l)
        
        # those not in true positives
        false.positives <- Z[!Z$.row.id %in% true.positives$.row.id, ]
        
        performance <- data.frame(
          sound.files = z,
          true.positives = length(unique(true.positives$.template.row.id)),
          false.positives = nrow(false.positives),
          false.negatives = nrow(W) - length(unique(true.positives$.template.row.id)),
          split.positives = sum(sapply(true.positives_l, nrow) > 1),
          mean.duration.true.positives = mean(true.positives$end - true.positives$start),
          mean.duration.false.positives = mean(false.positives$end - false.positives$start),
          mean.duration.false.negatives = mean(true.positives$end[!W$.row.id %in% true.positives$.template.row.id] - true.positives$start[!W$.row.id %in% true.positives$.template.row.id]),
          proportional.duration.true.positives = mean(true.positives$end - true.positives$start) / mean(W$end - W$start),
          sensitivity = length(unique(true.positives$.template.row.id)) / nrow(W),
          specificity =  length(unique(true.positives$.row.id)) / nrow(Z),
          stringsAsFactors = FALSE
        ) 
        
        # replace NaNs with NA
        for(i in 1:ncol(performance))
          if (is.nan(performance[, i])) performance[, i] <- NA
        
        # fix values when no false positives or true positives
        performance$false.positives[performance$false.positives < 0] <- 0
        performance$mean.duration.false.positives[is.na(performance$mean.duration.false.positives) | performance$false.positives == 0] <- NA
        performance$mean.duration.true.positives[is.na(performance$mean.duration.true.positives) | performance$true.positives == 0] <- NA
        
        # make sensitvities higher than 1 (because of split positives) 1
        performance$sensitivity[performance$sensitivity > 1] <- 1
      } else
        performance <- data.frame(
          sound.files = z,
          true.positives = 0,
          false.positives = 0,
          false.negatives = nrow(W),
          split.positives = NA,
          mean.duration.true.positives = NA,
          mean.duration.false.positives = NA,
          mean.duration.false.negatives = mean(W$end - W$start),
          proportional.duration.true.positives = NA,
          sensitivity = 0,
          specificity = 0,
          stringsAsFactors = FALSE
        ) 
      
      
      return(performance)
    })
    
    out_df <- do.call(rbind, performance_l)
    
  } else
    # output when there were no detections
    out_df <-
    data.frame(
      sound.files = unique(reference$sound.files),
      true.positives = 0,
      false.positives = 0,
      false.negatives = nrow(reference),
      split.positives = NA,
      mean.duration.true.positives = NA,
      mean.duration.false.positives = NA,
      mean.duration.false.negatives = NA,
      proportional.duration.true.positives = NA,
      sensitivity = 0,
      specificity = 0,
      stringsAsFactors = FALSE
    ) 
  
  # summarize across sound files
  if (!by.sound.file)
    out_df <- summarize_diagnose_wrlbr_int(diagnostic = out_df, time.diagnostics = time.diagnostics)
  
  
  # remove time diagnostics
  if (!time.diagnostics)
    out_df <- out_df[ , grep(".duration.", names(out_df), invert = TRUE)]
  
  return(out_df)
}



summarize_diagnose_wrlbr_int <- function(diagnostic, time.diagnostics = FALSE){
  
  # basic columns required in 'diagnostic'
  basic_colms <- c("true.positives", "false.positives", "false.negatives", "split.positives", "sensitivity", "specificity")
  
  #check diagnostic
  if (any(!(basic_colms %in% colnames(diagnostic))))
    stop(paste(paste(
      basic_colms[!(basic_colms %in% colnames(diagnostic))], collapse =
        ", "
    ), "column(s) not found in data frame"))
  
  # get extra column names (ideally should include tuning parameters)
  extra_colms <- setdiff(colnames(diagnostic), c(basic_colms, c("sound.files", "mean.duration.true.positives", "mean.duration.false.positives", "mean.duration.false.negatives", "proportional.duration.true.positives")))
  
  # create column combining all extra columns
  diagnostic$..combined.extra.colms <- if (length(extra_colms) > 0)
    apply(diagnostic[, extra_colms, drop = FALSE], 1, paste, collapse = "~>~") else "1"
  
  # get which extra columns were numeric
  if (length(extra_colms) > 0) numeric_colms <- sapply(diagnostic[, extra_colms, drop = FALSE], is.numeric)
  
  # switch to FALSE if no time columns
  if (is.null(diagnostic$mean.duration.true.positives)) time.diagnostics <- FALSE
  
  summ_diagnostic_l <- lapply(unique(diagnostic$..combined.extra.colms), function(x){
    # subset for each combination
    Y <- diagnostic[diagnostic$..combined.extra.colms == x, ]
    
    # summarize across sound files
    summ_diagnostic <- data.frame(
      true.positives = sum(Y$true.positives, na.rm = TRUE),
      false.positives = sum(Y$false.positives, na.rm = TRUE),
      false.negatives = sum(Y$false.negatives, na.rm = TRUE),
      split.positives = sum(Y$split.positives, na.rm = TRUE),
      ..combined.extra.colms = x,
      stringsAsFactors = FALSE
    ) 
    
    # add time diagnostics
    if (time.diagnostics){
      
      summ_diagnostic$mean.duration.true.positives <- mean(Y$mean.duration.true.positives, na.rm = TRUE)
      summ_diagnostic$mean.duration.false.positives <- mean(Y$mean.duration.false.positives, na.rm = TRUE)
      summ_diagnostic$mean.duration.false.negatives <- mean(Y$mean.duration.false.negatives, na.rm = TRUE)
      summ_diagnostic$proportional.duration.true.positives <- weighted.mean(x = Y$proportional.duration.true.positives, w = Y$true.positives, na.rm = TRUE)
      
    }  
    
    # add sensitivity and specificity at the end
    summ_diagnostic$sensitivity <- sum(Y$true.positives, na.rm = TRUE) / (sum(Y$true.positives, na.rm = TRUE) + sum(Y$false.negatives, na.rm = TRUE))
    summ_diagnostic$specificity <- 1 - (sum(Y$false.positives, na.rm = TRUE) / (sum(Y$true.positives, na.rm = TRUE) + sum(Y$false.positives, na.rm = TRUE)))
    
    # replace NaNs with NA
    for(i in 1:ncol(summ_diagnostic))
      if (is.nan(summ_diagnostic[, i])) summ_diagnostic[, i] <- NA
    
    return(summ_diagnostic) 
    
  })
  
  # put all in a single data frame
  summ_diagnostics_df <- do.call(rbind, summ_diagnostic_l)
  
  # add extra columns data
  if (length(unique(diagnostic$..combined.extra.colms)) > 1){
    
    # extract extra columns as single columns
    extra_colms_df <- do.call(rbind, strsplit(summ_diagnostics_df$..combined.extra.colms, "~>~"))
    
    # add column names
    colnames(extra_colms_df) <- extra_colms
    
    # convert numeric columns
    if (any(numeric_colms)){
      extra_num_colms_df <- as.data.frame(apply(extra_colms_df[, numeric_colms, drop = FALSE], 2, as.numeric))
      
      # add non-numeric columns
      if (any(!numeric_colms)) {
        
        non_num_colms_df <- extra_colms_df[, !numeric_colms, drop = FALSE]
        colnames(non_num_colms_df) <- names(numeric_colms)[!numeric_colms]
        extra_colms_df <- cbind(non_num_colms_df, extra_num_colms_df)
        
      } else
        extra_colms_df <- extra_num_colms_df
    }
    
    # put all together 
    summ_diagnostics_df <- cbind(extra_colms_df, summ_diagnostics_df)
  }
  
  # remove column with all extra columns info
  summ_diagnostics_df$..combined.extra.colms <- NULL
  
  
  return(summ_diagnostics_df)
}