# Checks H2O connection and installs H2O R package matching version on server if indicated by user
# 1) If can't connect and user doesn't want to start H2O, stop immediately
# 2) If user does want to start H2O and running locally, attempt to bring up H2O launcher
# 3) If user does want to start H2O, but running non-locally, print an error
h2o.init <- function(ip = "127.0.0.1", port = 54321, startH2O = TRUE, forceDL = FALSE, Xmx,
                     beta = FALSE, assertion = TRUE, license = NULL, nthreads = -2, max_mem_size = NULL, min_mem_size = NULL,
                     ice_root = NULL, strict_version_check = TRUE, data_max_factor_levels = 1000000, many_cols = FALSE, chunk_bytes = 22) {
  if(!is.character(ip)) stop("ip must be of class character")
  if(!is.numeric(port)) stop("port must be of class numeric")
  if(!is.logical(startH2O)) stop("startH2O must be of class logical")
  if(!is.logical(forceDL)) stop("forceDL must be of class logical")
  if(!missing(Xmx) && !is.character(Xmx)) stop("Xmx must be of class character")
  if(!is.numeric(nthreads)) stop("nthreads must be of class numeric")
  if(!is.null(max_mem_size) && !is.character(max_mem_size)) stop("max_mem_size must be of class character")
  if(!is.null(min_mem_size) && !is.character(min_mem_size)) stop("min_mem_size must be of class character")
  if(!is.null(max_mem_size) && !regexpr("^[1-9][0-9]*[gGmM]$", max_mem_size)) stop("max_mem_size option must be like 1g or 1024m")
  if(!is.null(min_mem_size) && !regexpr("^[1-9][0-9]*[gGmM]$", min_mem_size)) stop("min_mem_size option must be like 1g or 1024m")
  if(!missing(Xmx) && !regexpr("^[1-9][0-9]*[gGmM]$", Xmx)) stop("Xmx option must be like 1g or 1024m")
  if(!is.logical(beta)) stop("beta must be of class logical")
  if(!is.logical(assertion)) stop("assertion must be of class logical")
  if(!is.null(license) && !is.character(license)) stop("license must be of class character")
  if(!is.null(ice_root) && !is.character(ice_root)) stop("ice_root must be of class character")
  if(!is.logical(strict_version_check)) stop("strict_version_check must be of class logical")
  if(!is.numeric(data_max_factor_levels)) stop("`data_max_factor_levels` must be numeric.")
  if(!is.logical(many_cols)) stop("`many_cols` must be logical")
  if(!is.numeric(chunk_bytes)) stop("`chunk_bytes` must be numeric")

  if ((R.Version()$major == "3") && (R.Version()$minor == "1.0")) {
    warning("H2O is specifically not compatible with this exact")
    warning("version of R 3.1.0.")
    warning("Please change to a newer or older version of R.")
    warning("(For technical details, search the r-devel mailing list")
    warning("for type.convert changes in R 3.1.0.)")
    stop("R 3.1.0 is not compatible with H2O!")
  }

  if(!missing(Xmx)) {
    warning("Xmx is a deprecated parameter. Use `max_mem_size` and `min_mem_size` to set the memory boundaries. Using `Xmx` to set these.")
    max_mem_size <- Xmx
    min_mem_size <- Xmx
  }

  if (is.null(ice_root)) {
    ice_root = tempdir()
  }

  # Disable Up.json for now, since it doesn't exist in most builds.
  # Re-enable this sometime a few months from now.
  # myUpURL = paste("http://", ip, ":", port, "/Up.json", sep="")
  myUpURL <- paste("http://", ip, ":", port, sep="")
  myURL <- paste("http://", ip, ":", port, sep="")
  warnNthreads = FALSE
  if(!url.exists(myUpURL, .opts = curlOptions(useragent=R.version.string))) {
    if(!startH2O)
      stop(paste("Cannot connect to H2O server. Please check that H2O is running at", myURL))
    else if(ip == "localhost" || ip == "127.0.0.1") {
      cat("\nH2O is not running yet, starting it now...\n")

      if (nthreads == -2) {
        warnNthreads = TRUE
        nthreads = 2
      }

      .h2o.startJar(nthreads = nthreads, max_memory = max_mem_size,
                    min_memory = min_mem_size, beta = beta,
                    assertion = assertion, forceDL = forceDL,
                    license = license, ice_root = ice_root,
                    max_factor_levels = data_max_factor_levels,
                    many_cols = many_cols, chunk_bytes = chunk_bytes)

      count = 0;
      while(!url.exists(myURL) && (count < 60)) {
        Sys.sleep(1);
        count = count + 1
      }

      if (!url.exists(myURL)) {
        stop("H2O failed to start, stopping execution.")
      }
    } else {
      stop("Can only start H2O launcher if IP address is localhost.")
    }
  }

  cat("Successfully connected to", myURL, "\n\n")
  H2Oserver = new("H2OClient", ip = ip, port = port)
  # Sys.sleep(0.5)    # Give cluster time to come up
  h2o.clusterInfo(H2Oserver)
  cat("\n")

  if((verH2O = .h2o.__version(H2Oserver)) != (verPkg = packageVersion("h2o"))) {
    message = sprintf("Version mismatch! H2O is running version %s but R package is version %s", verH2O, toString(verPkg))
    if (strict_version_check) {
      stop(message)
    }
    else {
      warning(message)
    }
  }

  if (warnNthreads) {
    cat("Note:  As started, H2O is limited to the CRAN default of 2 CPUs.\n")
    cat("       Shut down and restart H2O as shown below to use all your CPUs.\n")
    cat("           > h2o.shutdown(localH2O)\n")
    cat("           > localH2O = h2o.init(nthreads = -1)\n")
    cat("\n")
  }

  assign("SERVER", H2Oserver, .pkg.env)
  return(H2Oserver)
}

# Shuts down H2O instance running at given IP and port
h2o.shutdown <- function(client, prompt = TRUE) {
  if(class(client) != "H2OClient") stop("client must be of class H2OClient")
  if(!is.logical(prompt)) stop("prompt must be of class logical")
  
  myURL = paste("http://", client@ip, ":", client@port, sep="")
  if(!url.exists(myURL)) stop(paste("There is no H2O instance running at", myURL))
  
  if(prompt) {
    ans = readline(paste("Are you sure you want to shutdown the H2O instance running at", myURL, "(Y/N)? "))
    temp = substr(ans, 1, 1)
  } else temp = "y"
  
  if(temp == "Y" || temp == "y") {
    res = getURLContent(paste(myURL, .h2o.__PAGE_SHUTDOWN, sep="/"))
    res = fromJSON(res)
    if(!is.null(res$error))
      stop(paste("Unable to shutdown H2O. Server returned the following error:\n", res$error))
  }
  
  if((client@ip == "localhost" || client@ip == "127.0.0.1") && .h2o.startedH2O()) {
    pid_file <- .h2o.getTmpFile("pid")
    if(file.exists(pid_file)) file.remove(pid_file)
  }
}

# ----------------------- Diagnostics ----------------------- #
# **** TODO: This isn't really a cluster status... it's a node status check for the node we're connected to.
# This is possibly confusing because this can come back without warning,
# but if a user tries to do any remoteSend, they will get a "cloud sick warning"
# Suggest cribbing the code from Internal.R that checks cloud status (or just call it here?)

h2o.clusterStatus <- function(client) {
  if(missing(client) || class(client) != "H2OClient") stop("client must be a H2OClient object")
  .h2o.__checkUp(client)
  myURL = paste("http://", client@ip, ":", client@port, "/", .h2o.__PAGE_CLOUD, sep = "")
  res = fromJSON(postForm(myURL, .params = list(quiet="true", skip_ticks="true"), style = "POST", .opts = curlOptions(useragent=R.version.string)))
  
  cat("Version:", res$version, "\n")
  cat("Cloud name:", res$cloud_name, "\n")
  cat("Node name:", res$node_name, "\n")
  cat("Cloud size:", res$cloud_size, "\n")
  if(res$locked) cat("Cloud is locked\n\n") else cat("Accepting new members\n\n")
  if(is.null(res$nodes) || length(res$nodes) == 0) stop("No nodes found!")
  
  # Calculate how many seconds ago we last contacted cloud
  cur_time <- Sys.time()
  for(i in 1:length(res$nodes)) {
    last_contact_sec = as.numeric(res$nodes[[i]]$last_contact)/1e3
    time_diff = cur_time - as.POSIXct(last_contact_sec, origin = "1970-01-01")
    res$nodes[[i]]$last_contact = as.numeric(time_diff)
  }
  cnames = c("name", "value_size_bytes", "free_mem_bytes", "max_mem_bytes", "free_disk_bytes", "max_disk_bytes", "num_cpus", "system_load", "rpcs", "last_contact")
  temp = data.frame(t(sapply(res$nodes, c)))
  return(temp[,cnames])
}

#---------------------------- H2O Jar Initialization -------------------------------#
.h2o.pkg.path <- NULL
.h2o.jar.env <- new.env()    # Dummy variable used to shutdown H2O when R exits

.onLoad <- function(lib, pkg) {
  .h2o.pkg.path <<- paste(lib, pkg, sep = .Platform$file.sep)
  
  # installing RCurl requires curl and curl-config, which is typically separately installed
  rcurl_package_is_installed = length(find.package("RCurl", quiet = TRUE)) > 0
  if(!rcurl_package_is_installed) {
    if(.Platform$OS.type == "unix") {
      # packageStartupMessage("Checking libcurl version...")
      curl_path <- Sys.which("curl-config")
      if(curl_path[[1]] == '' || system2(curl_path, args = "--version") != 0)
        stop("libcurl not found! Please install libcurl (version 7.14.0 or higher) from http://curl.haxx.se. On Linux systems, 
              you will often have to explicitly install libcurl-devel to have the header files and the libcurl library.")
    }
  }
}

.onAttach <- function(libname, pkgname) {
  msg = paste(
    "\n",
    "----------------------------------------------------------------------\n",
    "\n",
    "Your next step is to start H2O and get a connection object (named\n",
    "'localH2O', for example):\n",
    "    > localH2O = h2o.init()\n",
    "\n",
    "For H2O package documentation, ask for help:\n",
    "    > ??h2o\n",
    "\n",
    "After starting H2O, you can use the Web UI at http://localhost:54321\n",
    "For more information visit http://docs.0xdata.com\n",
    "\n",
    "----------------------------------------------------------------------\n",
    sep = "")
  packageStartupMessage(msg)
  
  # Shut down local H2O when user exits from R
  pid_file <- .h2o.getTmpFile("pid")
  if(file.exists(pid_file)) file.remove(pid_file)
  
  reg.finalizer(.h2o.jar.env, function(e) {
    ip = "127.0.0.1"; port = 54321
    myURL = paste("http://", ip, ":", port, sep = "")
            
    # require(RCurl); require(rjson)
    if(.h2o.startedH2O() && url.exists(myURL))
      h2o.shutdown(new("H2OClient", ip=ip, port=port), prompt = FALSE)
  }, onexit = TRUE)
}


.onDetach <- function(libpath) {
  ip    <- "127.0.0.1";
  port  <- 54321
  myURL <- paste("http://", ip, ":", port, sep = "")
  if (url.exists(myURL)) {
    tryCatch(h2o.shutdown(new("H2OClient", ip = ip, port = port), prompt = FALSE), error = function(e) {
      msg = paste(
        "\n",
        "----------------------------------------------------------------------\n",
            "\n",
            "Could not shut down the H2O Java Process!\n",
            "Please shutdown H2O manually by navigating to `http://localhost:54321/Shutdown`\n\n",
            "Windows requires the shutdown of h2o before re-installing -or- updating the h2o package.\n",
            "For more information visit http://docs.0xdata.com\n",
            "\n",
            "----------------------------------------------------------------------\n",
            sep = "")
      warning(msg)
    })
  }
}

#.onDetach <- function(libpath) {
#   if(exists(".LastOriginal", mode = "function"))
#      assign(".Last", get(".LastOriginal"), envir = .GlobalEnv)
#   else if(exists(".Last", envir = .GlobalEnv))
#     rm(".Last", envir = .GlobalEnv)
#}

# .onUnload <- function(libpath) {
#   ip = "127.0.0.1"; port = 54321
#   myURL = paste("http://", ip, ":", port, sep = "")
#   
#   require(RCurl); require(rjson)
#   if(.h2o.startedH2O() && url.exists(myURL))
#     h2o.shutdown(new("H2OClient", ip=ip, port=port), prompt = FALSE)
# }

.h2o.startJar <- function(nthreads = -1, max_memory = NULL,
                          min_memory = NULL, beta = FALSE,
                          assertion = TRUE, forceDL = FALSE,
                          license = NULL, ice_root, max_factor_levels = 1000000,
                          many_cols = FALSE, chunk_bytes = 22) {
  command <- .h2o.checkJava()

  if (! is.null(license)) {
    if (! file.exists(license)) {
      stop(paste("License file not found (", license, ")", sep=""))
    }
  }

  if (missing(ice_root)) {
    stop("ice_root must be specified for .h2o.startJar");
  }

  # Note: Logging to stdout and stderr in Windows only works for R version 3.0.2 or later!
  stdout <- .h2o.getTmpFile("stdout")
  stderr <- .h2o.getTmpFile("stderr")
  write(Sys.getpid(), .h2o.getTmpFile("pid"), append = FALSE)   # Write PID to file to track if R started H2O
  
  jar_file <- .h2o.downloadJar(overwrite = forceDL)
  jar_file <- paste('"', jar_file, '"', sep = "")

  # Throw an error if GNU Java is being used
  jver <- system2(command, "-version", stdout = TRUE, stderr = TRUE)
  if(any(grepl("GNU libgcj", jver))) {
    stop("
Sorry, GNU Java is not supported for H2O.
Please download the latest Java SE JDK 7 from the following URL:
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html")
  }

  if(any(grepl("Client VM", jver))) {
    warning("
You have a 32-bit version of Java.  H2O works best with 64-bit Java.
Please download the latest Java SE JDK 7 from the following URL:
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html")

    # Set default max_memory to be 1g for 32-bit JVM.
    if(is.null(max_memory)) max_memory = "1g"
  }

  if (.Platform$OS.type == "windows") {
    slashes_fixed_ice_root = gsub("\\\\", "/", ice_root)
  }
  else {
    slashes_fixed_ice_root = ice_root
  }

  # Compose args
  mem_args <- c()
  if(!is.null(min_memory)) mem_args <- c(mem_args, paste("-Xms", min_memory, sep=""))
  if(!is.null(max_memory)) mem_args <- c(mem_args, paste("-Xmx", max_memory, sep=""))

  args <- mem_args
  if(assertion) args <- c(args, "-ea")
  args <- c(args, "-jar", jar_file)
  args <- c(args, "-name", "H2O_started_from_R")
  args <- c(args, "-ip", "127.0.0.1")
  args <- c(args, "-port", "54321")
  args <- c(args, "-ice_root", slashes_fixed_ice_root)
  if(nthreads > 0) args <- c(args, "-nthreads", nthreads)
  if(beta) args <- c(args, "-beta")
  if(!is.null(license)) args <- c(args, "-license", license)
  args <- c(args, "-data_max_factor_levels", max_factor_levels)
  if(many_cols) args <- c(args, "-many_cols")
  args <- c(args, "-chunk_bytes", chunk_bytes)

  cat("\n")
  cat(        "Note:  In case of errors look at the following log files:\n")
  cat(sprintf("    %s\n", stdout))
  cat(sprintf("    %s\n", stderr))
  cat("\n")

  # Print a java -version to the console
  system2(command, c(mem_args, "-version"))
  cat("\n")

  # Run the real h2o java command
  rc = system2(command,
               args=args,
               stdout=stdout,
               stderr=stderr,
               wait=FALSE)
  if (rc != 0) {
    stop(sprintf("Failed to exec %s with return code=%s", jar_file, as.character(rc)))
  }
}

.h2o.getTmpFile <- function(type) {
  if(missing(type) || !type %in% c("stdout", "stderr", "pid"))
    stop("type must be one of 'stdout', 'stderr', or 'pid'")

  if(.Platform$OS.type == "windows") {
    usr <- gsub("[^A-Za-z0-9]", "_", Sys.getenv("USERNAME"))
  } else {
    usr <- gsub("[^A-Za-z0-9]", "_", Sys.getenv("USER"))
  }

  if(type == "stdout")
    paste(tempdir(), paste("h2o", usr, "started_from_r.out", sep="_"), sep = .Platform$file.sep)
  else if(type == "stderr")
    paste(tempdir(), paste("h2o", usr, "started_from_r.err", sep="_"), sep = .Platform$file.sep)
  else
    paste(tempdir(), paste("h2o", usr, "started_from_r.pid", sep="_"), sep = .Platform$file.sep)
}

.h2o.startedH2O <- function() {
  pid_file <- .h2o.getTmpFile("pid")
  if(file.exists(pid_file)) {
    pid_saved <- as.numeric(readLines(pid_file))
    return(pid_saved == Sys.getpid())
  } else return(FALSE)
}

# This function returns the path to the Java executable if it exists
# 1) Check for Java in user's PATH
# 2) Check for JAVA_HOME environment variable
# 3) If Windows, check standard install locations in Program Files folder. Warn if JRE found, but not JDK since H2O requires JDK to run.
# 4) When all fails, stop and prompt user to download JDK from Oracle website.
.h2o.checkJava <- function() {
  if(nchar(Sys.which("java")) > 0)
    return(Sys.which("java"))
  else if(nchar(Sys.getenv("JAVA_HOME")) > 0)
    return(paste(Sys.getenv("JAVA_HOME"), "bin", "java.exe", sep = .Platform$file.sep))
  else if(.Platform$OS.type == "windows") {
    # Note: Should we require the version (32/64-bit) of Java to be the same as the version of R?
    prog_folder <- c("Program Files", "Program Files (x86)")
    for(prog in prog_folder) {
      prog_path <- paste("C:", prog, "Java", sep = .Platform$file.sep)
      jdk_folder <- list.files(prog_path, pattern = "jdk")
      
      for(jdk in jdk_folder) {
        path <- paste(prog_path, jdk, "bin", "java.exe", sep = .Platform$file.sep)
        if(file.exists(path)) return(path)
      }
    }
    
    # Check for existence of JRE and warn user
    for(prog in prog_folder) {
      path <- paste("C:", prog, "Java", "jre7", "bin", "java.exe", sep = .Platform$file.sep)
      if(file.exists(path)) warning("Found JRE at ", path, " but H2O requires the JDK to run.")
    }
  }
  
  stop("Cannot find Java. Please install the latest JDK from http://www.oracle.com/technetwork/java/javase/downloads/index.html")
}

.h2o.downloadJar <- function(branch, version, overwrite = FALSE) {
  if (is.null(.h2o.pkg.path)) {
    pkg_path = dirname(system.file(".", package = "h2o"))
  } else {
    pkg_path = .h2o.pkg.path
  }

  if (missing(branch)) {
    branchFile = paste(pkg_path, "branch.txt", sep = .Platform$file.sep)
    branch <- readLines(branchFile)
  }

  if (missing(version)) {
    buildnumFile = paste(pkg_path, "buildnum.txt", sep = .Platform$file.sep)
    version <- readLines(buildnumFile)
  }

  if(!is.logical(overwrite)) stop("overwrite must be TRUE or FALSE")
  
  dest_folder <- paste(pkg_path, "java", sep = .Platform$file.sep)
  if(!file.exists(dest_folder)) dir.create(dest_folder)
  dest_file <- paste(dest_folder, "h2o.jar", sep = .Platform$file.sep)
  
  # Download if h2o.jar doesn't already exist or user specifies force overwrite
  if(overwrite || !file.exists(dest_file)) {
    base_url <- paste("s3.amazonaws.com/h2o-release/h2o", branch, version, "Rjar", sep = "/")
    h2o_url <- paste("http:/", base_url, "h2o.jar", sep = "/")
    
    # Get MD5 checksum
    md5_url <- paste("http:/", base_url, "h2o.jar.md5", sep = "/")
    # ttt <- getURLContent(md5_url, binary = FALSE)
    # tcon <- textConnection(ttt)
    # md5_check <- readLines(tcon, n = 1)
    # close(tcon)
    md5_file <- tempfile(fileext = ".md5")
    download.file(md5_url, destfile = md5_file, mode = "w", cacheOK = FALSE, quiet = TRUE)
    md5_check <- readLines(md5_file, n = 1)
    if (nchar(md5_check) != 32) stop("md5 malformed, must be 32 characters (see ", md5_url, ")")
    unlink(md5_file)
    
    # Save to temporary file first to protect against incomplete downloads
    temp_file <- paste(dest_file, "tmp", sep = ".")
    cat("Performing one-time download of h2o.jar from\n")
    cat("    ", h2o_url, "\n")
    cat("(This could take a few minutes, please be patient...)\n")
    download.file(url = h2o_url, destfile = temp_file, mode = "wb", cacheOK = FALSE, quiet = TRUE)

    # Apply sanity checks
    if(!file.exists(temp_file))
      stop("Error: Transfer failed. Please download ", h2o_url, " and place h2o.jar in ", dest_folder)

    md5_temp_file = md5sum(temp_file)
    md5_temp_file_as_char = as.character(md5_temp_file)
    if(md5_temp_file_as_char != md5_check) {
      cat("Error: Expected MD5: ", md5_check, "\n")
      cat("Error: Actual MD5  : ", md5_temp_file_as_char, "\n")
      stop("Error: MD5 checksum of ", temp_file, " does not match ", md5_check)
    }

    # Move good file into final position
    file.rename(temp_file, dest_file)
  }
  return(dest_file)
}
