################################################################################
# Manhattan NMF via Linear Programming (Rglpk)
# L1 minimization for general non-negative data
################################################################################

# Define helper OUTSIDE the main function to avoid closure capture
.solve_w_row_lp <- function(i, Ht, x, obj_vec, mat_base, dir_vec, bounds_list) {
  n <- dim(Ht)[1]  ;  D <- dim(Ht)[2]
  y <- x[i, ]
  # Build full constraint matrix for this row
  mat <- rbind( cbind(Ht, diag(n) ),      # First n constraints
                cbind(-Ht, diag(n) ) )     # Second n constraints
  rhs <- c(y, -y)
  # Solve LP
  result <- Rglpk::Rglpk_solve_LP( obj = obj_vec, mat = mat,
            dir = dir_vec, rhs = rhs, bounds = bounds_list, max = FALSE )
  result$solution[1:D]
}



nmf.manh <- function(x, k, W = NULL, H = NULL, k_meds = TRUE,
                     maxiter = 1000, tol = 1e-6, ncores = 1) {

  runtime <- proc.time()
  n <- dim(x)[1]  ;  D <- dim(x)[2]
  In <- diag(n)
  # Initialize W and H
  if ( k_meds ){
    a <- ClusterR::Cluster_Medoids( t(x), k, distance_metric = "manhattan", threads = ncores)

    W <- matrix(nrow = n, ncol = k)
    for ( i in 1:k )  W[, i] <- Rfast::rowsums(x[, a$clusters == i, drop = FALSE])
    H <- ClusterR::Cluster_Medoids(x, k, distance_metric = "manhattan", threads = ncores)$medoids
  } else {
    W <- matrix( Rfast2::Runif(n * k, 0, Rfast::Median(x)), n, k )
    H <- matrix( Rfast2::Runif(k * D, 0, 10), k, D )
  }

  obj_history <- numeric(maxiter)

  # Fixed LP problem structure
  obj_vec <- c( rep(0, k), rep(1, D) )
  dir_vec <- rep(">=", 2 * D)
  bounds_list <- list(
    lower = list( ind = 1:(k + D), val = rep(0, k + D) ),
    upper = list( ind = 1:(k + D), val = rep(Inf, k + D) )
  )

  bounds <- list(
    lower = list( ind = 1:(k + n), val = rep(0, k + n) ),
    upper = list( ind = 1:(k + n), val = rep(Inf, k + n) )
  )
  obj <- c( rep(0, k), rep(1, n) )
  dir <- rep(">=", 2 * n)

  # CREATE CLUSTER ONCE BEFORE LOOP
  if ( ncores > 1 ) {
    cl <- parallel::makeCluster(ncores)
    on.exit(parallel::stopCluster(cl), add = TRUE)
    parallel::clusterEvalQ(cl, library(Rglpk))
    parallel::clusterExport(cl,
      varlist = c("obj_vec", "dir_vec", "bounds_list", ".solve_w_row_lp"),
      envir = environment() )
  }

  for ( iter in 1:maxiter ) {
    # Update H (fixing W) - sequential
    mat <- rbind( cbind(W, In ), cbind(-W, In ) )
    for ( j in 1:n ) {
      y <- x[, j]
      rhs <- c(y, -y)
      result <- Rglpk::Rglpk_solve_LP(obj = obj, mat = mat, dir = dir, rhs = rhs, bounds = bounds, max = FALSE)
      H[, j] <- result$solution[1:k]
    }

    # Update W (fixing H) - optionally parallel
    Ht <- t(H)
    if ( ncores > 1 ) {
      parallel::clusterExport(cl, varlist = c("Ht", "V"), envir = environment())
      suppressWarnings({
        W <- t( parallel::parSapply(cl, 1:n, .solve_w_row_lp, Ht = Ht, x = x, obj_vec = obj_vec,
                                   mat_base = NULL, dir_vec = dir_vec,  bounds_list = bounds_list) )
      })
    } else {
      for ( i in 1:n ) {
        W[i, ] <- .solve_w_row_lp(i, Ht, x, obj_vec, NULL, dir_vec, bounds_list)
      }
    }
    # Compute objective (L1 norm)
    Z <- W %*% H
    res <- x - Z
    obj_history[iter] <- sum( abs(res) )
    # Check convergence
    if ( iter > 1 ) {
      rel_change <- abs( obj_history[iter] - obj_history[iter-1] ) / (obj_history[iter-1] + 1e-10)
      if ( rel_change < tol ) {
        obj_history <- obj_history[1:iter]
        break
      }
    }
  }

  runtime <- proc.time() - runtime

  list( W = W, H = H, Z = Z, obj = obj_history[iter], iters = iter, runtime = runtime )
}
