#' Update T* Omic
#'
#' Provide an updated features, samples or measurements table to a
#'   \code{tomic}.
#'
#' @inheritParams infer_tomic_table_type
#'
#' @returns
#'
#' A \code{tomic} object with updated features, samples or measurements.
#'
#' @examples
#'
#' library(dplyr)
#' updated_features <- brauer_2008_triple$features %>%
#'   dplyr::filter(BP == "biological process unknown") %>%
#'   dplyr::mutate(chromosome = purrr::map_int(systematic_name, function(x) {
#'     which(LETTERS == stringr::str_match(x, "Y([A-Z])")[2])
#'   }))
#'
#' update_tomic(brauer_2008_triple, updated_features)
#' @export
update_tomic <- function(tomic, tomic_table) {
  checkmate::assertClass(tomic, "tomic")
  checkmate::assertDataFrame(tomic_table)

  # convert to triple_omic
  triple_omic <- tomic_to(tomic, "triple_omic")

  tomic_table_type <- infer_tomic_table_type(tomic, tomic_table)

  # update design
  new_design <- tomic$design
  new_design[[tomic_table_type]] <- tibble::tibble(
    variable = colnames(tomic_table),
    type = tomic_table %>% purrr::map_chr(~ class(.)[1])
  ) %>%
    dplyr::mutate(
      type = ifelse(
        variable == new_design$feature_pk,
        "feature_primary_key",
        type
      ),
      type = ifelse(
        variable == new_design$sample_pk,
        "sample_primary_key",
        type
      )
    )

  triple_omic$design <- new_design

  # update appropriate data table
  triple_omic[[tomic_table_type]] <- tomic_table

  # reconcile triple omic in case rows have been filtered
  triple_omic <- reconcile_triple_omic(triple_omic)
  check_triple_omic(triple_omic, fast_check = FALSE)

  # convert back to input class
  return(tomic_to(triple_omic, class(tomic)[1]))
}

#' Center T* Omic
#'
#' Center each measurement by subtracting the mean.
#'
#' @inheritParams tomic_to
#' @param measurement_vars measurement variables to center
#'
#' @return A \code{tomic} object where one or more measurements have
#'   been centered on a feature-by-feature basis.
#'
#' @examples
#' center_tomic(brauer_2008_tidy)
#' @export
center_tomic <- function(tomic, measurement_vars = "all") {
  checkmate::assertClass(tomic, "tomic")
  checkmate::assertCharacter(measurement_vars)

  # convert to triple_omic
  triple_omic <- tomic_to(tomic, "triple_omic")

  possible_measurements <- triple_omic$design$measurements %>%
    dplyr::filter(type %in% c("integer", "numeric")) %>%
    {
      .$variable
    }

  stopifnot(class(measurement_vars) == "character")

  if (measurement_vars[1] != "all") {
    excess_measurements <- setdiff(measurement_vars, possible_measurements)

    if (length(excess_measurements) != 0) {
      cli::cli_abort(c(
        "Invalid measurement variables",
        "x" = "{.var {excess_measurements}} {?is/are} not valid numeric or integer measurement variable{?s}",
        "i" = "Valid measurements: {.var {possible_measurements}}"
      ))
    }

    valid_measurements <- intersect(measurement_vars, possible_measurements)

    if (length(valid_measurements) == 0) {
      cli::cli_abort(c(
        "No valid measurement variables",
        "x" = "No valid numeric or integer measurement variables provided",
        "i" = "Valid measurements: {.var {possible_measurements}}"
      ))
    }
  } else {
    valid_measurements <- possible_measurements
  }

  measurement_pk <- triple_omic$design$measurements$variable[
    triple_omic$design$measurements$type == "feature_primary_key"
  ]

  triple_omic$measurements <- triple_omic$measurements %>%
    dplyr::group_by(!!rlang::sym(measurement_pk)) %>%
    dplyr::mutate(dplyr::across(dplyr::all_of(valid_measurements), center)) %>%
    dplyr::ungroup()

  # convert back to initial class
  return(tomic_to(triple_omic, class(tomic)[1]))
}

center <- function(x) {
  x - mean(x, na.rm = TRUE)
}

#' Update Tidy Omic
#'
#' Update a Tidy 'Omics data and schema to reflect newly added fields.
#'
#' @inheritParams check_tidy_omic
#' @param updated_tidy_data a tibble of data to use to update \code{tidy_omic}.
#' @param new_variable_tables a named character vector of newly added variables
#'   in \code{updated_tidy_data} (names) and the table (features, samples,
#'   measurements) they apply to (values).
#'
#' @returns a \code{tidy_omic} object with an updated schema and/or data.
#'
#' @examples
#'
#' library(dplyr)
#'
#' tidy_omic <- brauer_2008_tidy
#' updated_tidy_data <- tidy_omic$data %>%
#'   mutate(new_sample_var = "foo") %>%
#'   select(-DR)
#' new_variable_tables <- c("new_sample_var" = "samples")
#'
#' update_tidy_omic(tidy_omic, updated_tidy_data, new_variable_tables)
#'
#' @export
update_tidy_omic <- function(
  tidy_omic,
  updated_tidy_data,
  new_variable_tables = c()
) {
  checkmate::assertClass(tidy_omic, "tomic")
  checkmate::assertClass(tidy_omic, "tidy_omic")
  checkmate::assertDataFrame(updated_tidy_data)
  checkmate::assertNamed(new_variable_tables, type = "unique")

  purrr::walk(
    names(new_variable_tables),
    checkmate::assertChoice,
    colnames(updated_tidy_data)
  )
  purrr::walk(
    unname(new_variable_tables),
    checkmate::assertChoice,
    c("features", "samples", "measurements")
  )

  # check whether all new variables are defined in new_variable_tables
  current_fields <- get_design_tbl(tidy_omic)
  new_variables <- setdiff(colnames(updated_tidy_data), current_fields$variable)
  unclassified_new_variables <- setdiff(
    new_variables,
    names(new_variable_tables)
  )

  if (length(unclassified_new_variables) > 0) {
    n_vars <- length(unclassified_new_variables)
    cli::cli_abort(c(
      "Unclassified new variables",
      "x" = "{.arg updated_tidy_data} contains {n_vars} new variable{?s}: {.var {unclassified_new_variables}}",
      "i" = "Add {cli::qty(n_vars)}{?this variable/these variables} to {.arg new_variable_tables} so romic knows how to use {?it/them}"
    ))
  }

  current_fields <- get_design_tbl(tidy_omic)
  updated_fields <- current_fields %>%
    # remove unused variables
    dplyr::filter(
      variable %in% colnames(updated_tidy_data),
      !(type %in% c("feature_primary_key", "sample_primary_key"))
    ) %>%
    dplyr::select(-type) %>%
    # add new variables
    dplyr::bind_rows(
      tibble::tibble(
        variable = names(new_variable_tables),
        table = unname(new_variable_tables)
      )
    ) %>%
    # update classes of all variables
    dplyr::mutate(
      type = purrr::map_chr(
        variable,
        function(x) {
          class(updated_tidy_data[[x]])[1]
        }
      )
    ) %>%
    # remove absent fields
    dplyr::filter(variable %in% colnames(updated_tidy_data))

  # require the primary keys stay as-is
  updated_fields <- dplyr::bind_rows(
    current_fields %>%
      dplyr::filter(type %in% c("feature_primary_key", "sample_primary_key")),
    updated_fields
  )

  excess_vars <- union(
    updated_fields$variable,
    colnames(updated_tidy_data)
  ) %>%
    setdiff(
      intersect(
        updated_fields$variable,
        colnames(updated_tidy_data)
      )
    )

  if (length(excess_vars) > 0) {
    cli::cli_abort(c(
      "Variable mismatch",
      "x" = "{length(excess_vars)} variable{?s} {?was/were} not matched between {.arg updated_tidy_data} and the design list: {.var {excess_vars}}"
    ))
  }

  tidy_omic$data <- updated_tidy_data
  tidy_omic$design$features <- updated_fields %>%
    dplyr::filter(table == "features")
  tidy_omic$design$samples <- updated_fields %>%
    dplyr::filter(table == "samples")
  tidy_omic$design$measurements <- updated_fields %>%
    dplyr::filter(table == "measurements")

  check_tidy_omic(tidy_omic, fast_check = FALSE)

  return(tidy_omic)
}

#' Sort Triple Hclust
#'
#' Sort a \code{triple_omic} object using hierarchical clustering
#'
#' @inheritParams check_triple_omic
#' @inheritParams sort_tomic
#'
#' @returns A \code{triple_omic} with clustered features or samples.
sort_triple_hclust <- function(triple_omic, sort_table, value_var) {
  stopifnot(
    any(c("character", "factor", "ordered") %in% class(value_var)),
    length(value_var) == 1
  )

  available_value_vars <- triple_omic$design$measurements$variable[
    triple_omic$design$measurements$type == "numeric"
  ]

  if (length(available_value_vars) == 0) {
    cli::cli_abort(c(
      "No numeric variables available",
      "x" = "No numeric variables present in measurements",
      "i" = "Hierarchical clustering requires at least one numeric measurement variable"
    ))
  }

  if (!(value_var %in% available_value_vars)) {
    cli::cli_abort(c(
      "Invalid value variable",
      "x" = "{.var {value_var}} is not present in measurements",
      "i" = "Valid value variables: {.var {available_value_vars}}"
    ))
  }

  tidy_omic <- triple_to_tidy(triple_omic)
  pk <- ifelse(
    sort_table == "features",
    triple_omic$design$feature_pk,
    triple_omic$design$sample_pk
  )

  cluster_dim <- dplyr::case_when(
    sort_table == "features" ~ "rows",
    sort_table == "samples" ~ "columns"
  )

  cluster_orders <- hclust_order(
    tidy_omic$data,
    tidy_omic$design$feature_pk,
    tidy_omic$design$sample_pk,
    value_var,
    cluster_dim = cluster_dim
  ) %>%
    {
      .[[cluster_dim]]
    }

  if (!inherits(tidy_omic$data[[pk]], "factor")) {
    # match classes if needed to facilitate joins
    class(cluster_orders) <- class(tidy_omic$data[[pk]])
  }

  # use the ordered clusters to sort the appropriate sort_table
  sorted_table <- (
    triple_omic[[sort_table]] %>%
       dplyr::left_join(
         tibble::tibble(!!rlang::sym(pk) := cluster_orders) %>%
           dplyr::mutate(order = seq_len(dplyr::n())),
         by = pk
       ) %>%
       dplyr::arrange(order)
  )

  return(sorted_table)
}

#' Sort Triple Arrange
#'
#' Sort a \code{triple_omic} object based on the values of one or more
#'   variables.
#'
#' @inheritParams check_triple_omic
#' @inheritParams sort_tomic
#'
#' @returns A \code{triple_omic} with sorted features or samples.
sort_triple_arrange <- function(triple_omic, sort_table, sort_variables) {
  stopifnot(
    any(
      c("character", "factor", "ordered", "numeric") %in% class(sort_variables)
    ),
    length(sort_variables) > 0
  )

  available_sort_vars <- triple_omic$design[[sort_table]]$variable
  invalid_sort_vars <- setdiff(sort_variables, available_sort_vars)
  if (length(invalid_sort_vars) != 0) {
    n_invalid <- length(invalid_sort_vars)
    cli::cli_abort(c(
      "Invalid sort variables",
      "x" = "{n_invalid} sort variable{?s} {?was/were} not found in {.val {sort_table}}: {.var {invalid_sort_vars}}",
      "i" = "Available variables: {.var {available_sort_vars}}"
    ))
  }

  sorted_table <- triple_omic[[sort_table]] %>%
    dplyr::arrange(!!!rlang::syms(sort_variables))

  return(sorted_table)
}

#' Sort Triple Omic
#'
#' Sort a dataset's features or samples
#'
#' \code{sort_tomic} supports the reordering of features or samples using
#'   either hierarchical clustering or based on the levels of other variables.
#'   Sorting occurs by turning either the feature or sample primary key
#'   into a factor whose levels reflect the sort.
#'
#' @inheritParams tomic_to
#' @param sort_type
#' \describe{
#'   \item{hclust}{Arrange samples by hierarchical clustering of a provided
#'     \code{value_var}}
#'   \item{arrange}{Arrange samples by the factor or alphanumeric ordering of
#'     a set of \code{sort_variables}}
#' }
#' @param sort_table samples or features
#' @param sort_variables A set of attributes in sort_table to sort with in
#'   \code{arrange}.
#' @param value_var An abundance value to use with \code{hclust}
#'
#' @returns A \code{tomic} object where feature or sample primary keys have
#'   been turned into a factor reflecting how they are sorted.
#'
#' @examples
#'
#' library(dplyr)
#'
#' sort_tomic(brauer_2008_triple,
#'   sort_type = "arrange", sort_table = "samples",
#'   sort_variables = c("nutrient", "DR")
#' ) %>%
#'   sort_tomic(
#'     sort_type = "hclust",
#'     sort_table = "features",
#'     value_var = "expression"
#'   )
#' @export
sort_tomic <- function(
  tomic,
  sort_type,
  sort_table,
  sort_variables = NULL,
  value_var = NULL
  ) {
  checkmate::assertClass(tomic, "tomic")
  checkmate::assertChoice(sort_type, c("hclust", "arrange"))
  checkmate::assertChoice(sort_table, c("features", "samples"))

  # convert to triple_omic
  triple_omic <- tomic_to(tomic, "triple_omic")

  # sorts return a tibble mapping

  if (sort_type == "hclust") {
    sorted_attributes <- sort_triple_hclust(triple_omic, sort_table, value_var)
  } else if (sort_type == "arrange") {
    sorted_attributes <- sort_triple_arrange(
      triple_omic,
      sort_table,
      sort_variables
    )
  } else {
    cli::cli_abort("{.arg {sort_type}} has no defined sort method")
  }

  pk <- ifelse(
    sort_table == "features",
    triple_omic$design$feature_pk,
    triple_omic$design$sample_pk
  )

  sorted_attributes_fct <- sorted_attributes %>%
    dplyr::mutate(orderedId := factor(!!rlang::sym(pk),
      levels = !!rlang::sym(pk)
    ))

  # update features

  triple_omic[[sort_table]] <- sorted_attributes_fct %>%
    dplyr::select(-!!rlang::sym(pk)) %>%
    dplyr::rename(!!rlang::sym(pk) := orderedId) %>%
    dplyr::select(dplyr::all_of(triple_omic$design[[sort_table]]$variable))

  # update measurements

  triple_omic$measurements <- triple_omic$measurements %>%
    dplyr::left_join(
      sorted_attributes_fct %>%
        dplyr::select(!!rlang::sym(pk), orderedId),
      by = pk
    ) %>%
    dplyr::select(-rlang::sym(pk)) %>%
    dplyr::rename(!!rlang::sym(pk) := orderedId) %>%
    dplyr::select(dplyr::all_of(triple_omic$design$measurements$variable))

  # convert back to initial class
  return(tomic_to(triple_omic, class(tomic)[1]))
}

#' T* Omic Sort Status
#'
#' Determine whether features &/or samples have been sorted and stored as
#'   ordered_featureId and ordered_sampleId.
#'
#' @inheritParams tomic_to
#'
#' @returns length 1 character string indicating whether the \code{tomic}
#'   is sorted.
#'
#' @examples
#'
#' tomic_sort_status(brauer_2008_tidy)
#' @export
tomic_sort_status <- function(tomic) {
  checkmate::assertClass(tomic, "tomic")

  if ("tidy_omic" %in% class(tomic)) {
    is_sorted_features <- any(class(tomic$data[[tomic$design$feature_pk]]) %in%
      c("factor", "ordered"))
    is_sorted_samples <- any(class(tomic$data[[tomic$design$sample_pk]]) %in%
      c("factor", "ordered"))
  } else if ("triple_omic" %in% class(tomic)) {
    is_sorted_features <- any(class(tomic$features[[tomic$design$feature_pk]]) %in%
      c("factor", "ordered"))
    is_sorted_samples <- any(class(tomic$samples[[tomic$design$sample_pk]]) %in%
      c("factor", "ordered"))
  } else {
    cli::cli_abort(c(
      "Invalid tomic subclass",
      "!" = "Object has {.cls tomic} class but is neither {.cls tidy_omic} nor {.cls triple_omic}",
      "i" = "This is an internal error - please report this bug"
    ))
  }

  status <- dplyr::case_when(
    is_sorted_features & is_sorted_samples ~ "fully sorted",
    is_sorted_features ~ "sorted features, unsorted samples",
    is_sorted_samples ~ "sorted_samples, unsorted features",
    TRUE ~ "unsorted"
  )

  return(status)
}

#' Update Sample Factors
#'
#' Update sample metadata to order categorical variables based on a
#' specified factor order.
#'
#' @inheritParams tomic_to
#' @param factor_levels a character vector specifying the ordering of factor levels.
#'
#' @returns a tomic object with updated sample metadata
#'
#' @examples
#' update_sample_factors(
#'   brauer_2008_tidy, list(nutrient = c("G", "N", "P", "S", "L", "U"))
#' )
#'
#' @export
update_sample_factors <- function (tomic, factor_levels) {

  checkmate::assertClass(tomic, "tomic")
  checkmate::assertNamed(factor_levels)
  checkmate::assertList(factor_levels)

  samples <- romic::get_tomic_table(tomic, "samples")
  purrr::walk(names(factor_levels), checkmate::assertChoice, colnames(samples))

  # update all categorical variables with specified factor orders
  for (fct in names(factor_levels)) {
    samples[[fct]] <- set_factor_levels(samples[[fct]], factor_levels[[fct]], fct)
  }

  out <- romic::update_tomic(tomic, samples)

  return(out)
}

set_factor_levels <- function(samples_vec, fct_levels, fct_label = "?") {

  # validate factor orders
  if (!("character" %in% class(fct_levels))) {
    cli::cli_abort(
      "The factor levels for {fct_label} were {.val {class(fct_levels)}}.
      This should be a character vector."
    )
  }

  duplicated_levels <- unique(fct_levels[duplicated(fct_levels)])
  if (length(duplicated_levels) > 0) {
    cli::cli_abort(
      "{length(duplicated_levels)} factor levels {?was/were} duplicated in the `factor_levels` specification for
    {.val {fct_label}}: {duplicated_levels}"
    )
  }

  if ("character" %in% class(samples_vec)) {

    extra_sample_vars <- setdiff(samples_vec, fct_levels)
    if (length(extra_sample_vars)) {
      cli::cli_alert_warning(
        "{.val {extra_sample_vars}} {?was/were} present in the sample metadata's {.field {fct_label}} field but did not have a corresponding factor level in the {.arg factor_levels} list. They will be added to the end of the specified factor levels"
      )

      fct_levels <- c(fct_levels, extra_sample_vars)
    }

    missing_sample_vars <- setdiff(fct_levels, samples_vec)
    if (length(missing_sample_vars)) {
      cli::cli_alert_warning(
        "{.val {missing_sample_vars}} {?was/were} present in {.arg factor_levels} for {.field {fct_label}} but did not have a corresponding entry in the sample metadata."
      )
    }

    if (any(is.na(samples_vec))) {
      cli::cli_alert_warning(
        "The {.field {fct_label}} field in the sample metadata contains {sum(is.na(samples_vec))} NA values. These entries will be replaced with an {.val unspecified} level.")

      samples_vec[is.na(samples_vec)] <- "unspecified"
      fct_levels <- c(fct_levels, "unspecified")
    }

    samples_fct_vec <- factor(
      samples_vec,
      levels = fct_levels
    )
  } else {
    cli::cli_abort(
      "The factor levels for fct were {.val {class(fct)}} and cannot be converted
    to factors using the specified factor orders.")
  }

  return(samples_fct_vec)
}
