/*
 * Decompiled with CFR 0.152.
 */
package eu.kliegr.ac1.Rinterface.discretization;

import eu.kliegr.ac1.Rinterface.discretization.AttributeDiscretization;
import eu.kliegr.ac1.Rinterface.discretization.MissingValueTreatmentEnum;
import eu.kliegr.ac1.data.AttributeType;
import eu.kliegr.ac1.data.parsers.CSVparser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Stream;

public class DiscretizeWithR {
    private static final Logger LOGGER = Logger.getLogger(DiscretizeWithR.class.getName());

    public static void convertCSVwithHeader(String path, String outputPath, AttributeDiscretization[] mapping, String sep, MissingValueTreatmentEnum missingValueTreatment) throws FileNotFoundException, Exception {
        FileOutputStream os = new FileOutputStream(outputPath);
        PrintStream printStream = new PrintStream(os);
        LOGGER.log(Level.INFO, "Parsing csv file:{0}", path);
        Stream<String> firstLine = Files.lines(Paths.get(path, new String[0]));
        printStream.println(firstLine.findFirst().get());
        Stream<String> allLines = Files.lines(Paths.get(path, new String[0]));
        Stream<String[]> tokenized = allLines.map(line -> line.replaceAll(CSVparser.weirdCharacter, "").split(sep));
        tokenized.skip(1L).forEach(items -> {
            boolean skipThisEntry = false;
            for (int i = 0; i < ((String[])items).length; ++i) {
                items[i] = CSVparser.removeEnclosingQuotes(items[i]);
                items[i] = mapping[i].convert(items[i]);
                if (!items[i].isEmpty()) continue;
                if (missingValueTreatment == MissingValueTreatmentEnum.remove) {
                    skipThisEntry = true;
                    continue;
                }
                if (missingValueTreatment != MissingValueTreatmentEnum.replaceWithNAN) continue;
                items[i] = "NAN";
            }
            if (!skipThisEntry) {
                printStream.println(String.join((CharSequence)sep, items));
            }
        });
        printStream.close();
        LOGGER.log(Level.INFO, "Result saved to file:{0}", outputPath);
    }

    public static int getColPos(String name, String path, String sep) throws IOException {
        Stream<String> firstLine = Files.lines(Paths.get(path, new String[0]));
        String[] colNames = firstLine.findFirst().get().replaceAll(CSVparser.weirdCharacter, "").split(sep);
        for (int i = 0; i < colNames.length; ++i) {
            String curName = CSVparser.removeEnclosingQuotes(colNames[i]);
            if (!curName.equals(name)) continue;
            return i;
        }
        return -1;
    }

    public static AttributeDiscretization[] executeRdiscretization(String datapath, ArrayList<AttributeType> attributeTypes, String IDcolumnName, String targetColName, String sep) throws FileNotFoundException, IOException, InterruptedException {
        int IDcolumnPos = DiscretizeWithR.getColPos(IDcolumnName, datapath, sep);
        int targetColPos = DiscretizeWithR.getColPos(targetColName, datapath, sep);
        if (targetColPos == -1) {
            throw new UnsupportedOperationException("Target column '" + targetColName + "' not found in data, using dataset : " + datapath);
        }
        int i = 0;
        ArrayList<AttributeDiscretization> discs = new ArrayList<AttributeDiscretization>();
        for (AttributeType at : attributeTypes) {
            if (at == AttributeType.numerical) {
                if (targetColPos == i | IDcolumnPos == i) {
                    discs.add(new AttributeDiscretization());
                } else {
                    try {
                        AttributeDiscretization disc = DiscretizeWithR.interfaceWithRScript(datapath, i + 1, sep, targetColPos + 1);
                        discs.add(disc);
                    }
                    catch (Exception e) {
                        LOGGER.log(Level.INFO, "Discretization for attribute {0}  failed, setting discretization to all", i);
                        discs.add(new AttributeDiscretization());
                    }
                }
            } else {
                discs.add(new AttributeDiscretization());
            }
            ++i;
        }
        return discs.toArray(new AttributeDiscretization[discs.size()]);
    }

    private static AttributeDiscretization interfaceWithRScript(String datapath, int colpos, String sep, int targetColPos) throws FileNotFoundException, IOException, InterruptedException {
        String scriptID = Integer.toString(ThreadLocalRandom.current().nextInt(1, 1000000));
        String scriptName = "temp/discr_" + scriptID + ".R";
        String outputFileName = "temp/discr_" + scriptID + ".out";
        File Rscript = new File(scriptName);
        FileOutputStream os = new FileOutputStream(Rscript);
        PrintStream printStream = new PrintStream(os);
        StringBuilder Rcode = new StringBuilder();
        Rcode.append("sink(\"").append(outputFileName).append("\", append=FALSE, split=FALSE) # send to file\n");
        Rcode.append("library(discretization) # load lib + \n");
        Rcode.append("train <- read.csv(\"").append(datapath).append("\",header=TRUE, sep = \"").append(sep).append("\") # load csv + \n");
        Rcode.append("numeric <- subset( train, select = c (").append(colpos).append(",").append(targetColPos).append(") ) # store numeric columns\n");
        Rcode.append("numeric <- na.omit(numeric) # remove rows with missing data\n");
        Rcode.append("mdlp(numeric)$cutp\n");
        printStream.print(Rcode);
        printStream.close();
        Process p = Runtime.getRuntime().exec("Rscript " + Rscript);
        p.waitFor();
        int exitValue = p.exitValue();
        if (exitValue != 0) {
            LOGGER.log(Level.INFO, "R exit value {0} (NOT OK)", exitValue);
            throw new IOException("Discretization program execution failed. Program is at " + Rscript.getAbsolutePath());
        }
        LOGGER.log(Level.INFO, "R exit value {0} (OK)", exitValue);
        AttributeDiscretization disc = DiscretizeWithR.parseDiscFile(outputFileName);
        return disc;
    }

    private static AttributeDiscretization parseDiscFile(String path) throws IOException {
        String thisRow;
        AttributeDiscretization discr = null;
        BufferedReader br = new BufferedReader(new FileReader(path));
        ArrayList<String> cutPoints = new ArrayList<String>();
        while ((thisRow = br.readLine()) != null) {
            if (thisRow.startsWith("[[")) {
                cutPoints = new ArrayList();
                continue;
            }
            if (thisRow.trim().startsWith("[")) {
                String[] linePoints = thisRow.replaceFirst("\\s*\\[\\d+\\]\\s+", "").split("\\s+");
                List<String> asList = Arrays.asList(linePoints);
                cutPoints.addAll(asList);
                continue;
            }
            if (!thisRow.trim().isEmpty()) continue;
            discr = new AttributeDiscretization(cutPoints);
        }
        br.close();
        if (discr == null) {
            throw new IOException("Discretization failed. Is R and discretization package installed?");
        }
        return discr;
    }

    private DiscretizeWithR() {
    }
}

