#' qaqc_stic_data
#'
#' @description This function provides multiple options for QAQC flagging of processed and classified STIC data frames, such as those generated by the \link{classify_wetdry} function.
#' Users can select which operations are to be performed, and a character flag is added as an addition column in the input data frame.
#' Users also have to option to concatenate multiple flags into one column. QAQC options currently include: (1) correction and flagging of negative SPC values resulting from the calibration process, i.e., changing the negative values to 0 and flagging this
#' (2) inspecting the wetdry classification time series for potential anomalies based on user-defined windows
#'
#' @param stic_data A data frame with classified STIC data, such as that produced by \code{classify_wetdry}.
#' @param spc_neg_correction a logical argument indicating whether the user would like to correct negative SPC values resulting from the calibration process to 0.
#' The character code associated with this correction is \code{"C"}.
#' @param inspect_classification a logical argument indicating whether the user would like to identify instances in which either a wet or dry reading is surrounded on both sides by 1000 or more observations of its opposite.
#' This operation is meant to identify potentially suspect binary wet/dry data points for further examination.
#' The character code associated with this operation is \code{"D"}.
#' @param anomaly_size a numeric argument specifying the maximum size (i.e., number of observations) of a clustered group of points that can be flagged as an anomaly
#' @param window_size a numeric argument specifying the minimum size (i.e., number of observations) that the anomaly must be surrounded by in order to be flagged
#' @param concatenate_flags a logical argument indicating whether the user would like to combine the character codes generated into a single QAQC flag column.
#' @import dplyr
#'
#' @return The same data frame as input, but with new QAQC columns or a single, concatenated QAQC column. The QAQC output
#' Can include: \code{"C"}, meaning the calibrated SpC value was negative from `spc_neg_correction`; \code{"D"}, meaning the point was identified as
#' a deviation or anomaly based on a moving window from `inspect_classification`; or \code{"O"}, meaning the calibrated SpC was
#' outside the standard range based on the function \code{apply_calibration}.
#' @export
#'
#' @examples qaqc_df <-
#'   qaqc_stic_data(classified_df,
#'     spc_neg_correction = TRUE,
#'     inspect_classification = TRUE, anomaly_size = 4,
#'     window_size = 100, concatenate_flags = TRUE
#'   )
#' head(qaqc_df)
qaqc_stic_data <- function(stic_data, spc_neg_correction = TRUE, inspect_classification = TRUE,
                           anomaly_size = 4, window_size = 1000, concatenate_flags = TRUE) {
  # bind variables
  SpC <- NULL

  # check if neg correction is possible
  if (spc_neg_correction & !("SpC" %in% names(stic_data))) stop("Cannot do spc_neg_correction - no SpC column. Change spc_neg_correction to FALSE or provide stic_data with SpC column.")

  if (spc_neg_correction == TRUE) {
    # Deal with negative spc values
    stic_data <-
      stic_data |>
      dplyr::mutate(negative_SpC = dplyr::if_else(
        condition = SpC < 0,
        true = "C",
        false = ""
      )) |>
      dplyr::mutate(SpC = dplyr::if_else(
        condition = SpC <= 0,
        true = 0,
        false = SpC
      ))
  }

  if (inspect_classification == TRUE) {
    # Get run lengths from rle object
    rle_object <- rle(stic_data$wetdry)
    run_lengths <- rle_object$lengths

    i_small <- which(run_lengths < anomaly_size)

    stic_data$anomaly <- rep("", nrow(stic_data))

    for (i in i_small) {
      i_window <- run_lengths[i - 1] + run_lengths[i + 1]

      if (i_window > window_size) {
        anomaly_start <- sum(run_lengths[1:(i - 1)]) + 1
        anomaly_end <- anomaly_start + run_lengths[i] - 1

        stic_data[anomaly_start:anomaly_end, "anomaly"] <- "D"
      }
    }
  }

  if (concatenate_flags == TRUE) {
    # concatenate the QAQC columns with col codes: "C" for negative SpC;
    # "D" for anomalous classification; "O" for outside standard range
    #
    # some columns will only exist for calibrated data, so only use columns that exist
    stic_data_qacols <-
      stic_data |>
      dplyr::select(any_of(c("negative_SpC", "anomaly", "outside_std_range"))) |>
      tidyr::unite("QAQC", sep = "", na.rm = T)

    stic_data$QAQC <- stic_data_qacols$QAQC

    stic_data <-
      stic_data |>
      dplyr::select(-any_of(c("negative_SpC", "anomaly", "outside_std_range")))
  }

  return(stic_data)
}
