#' A genome-wide procedure for predicting genetic variance and correlated response in bi-parental breeding populations
#' 
#' @description \code{pop.predict} uses phenotypic and genotypic data from a set of individuals known as a training population (TP) and a set of candidate parents, which may or may not be included in the TP, to predict the mean (\eqn{\mu}), genetic variance (\emph{V_G}), and superior progeny values (\eqn{\mu}\emph{_sp}) of the half-diallel, or a defined set of pairwise bi-parental crosses between parents. When multiple traits are provided \code{pop.predict} will also predict the correlated responses and correlation between all pairwise traits. See \cite{Mohammadi, Tiede, and Smith (2015)} for further details.
#' 
#'              NOTE - \code{pop.predict} writes and reads files to disk so it is highly recommended to set your working directory
#' @param G.in \code{Matrix} of genotypic data. First row contains marker names and the first column contains entry (taxa) names. Genotypes should be coded using the (1, 0, -1, NA) format familiar to users of \code{\link{rrBLUP}} (\cite{Endelman, 2011}). TIP - Set header=\code{FALSE} within \code{\link{read.table}} or \code{\link{read.csv}} when importing a tab-delimited file containing data for \code{G.in}.
#' @param y.in \code{Matrix} of phenotypic data. First column contains entry (taxa) names found in \code{G.in}, regardless of whether the entry has a phenotype for any or all traits. Additional columns contain phenotypic data; column names should reflect the trait name(s). TIP - Set header=\code{TRUE} within \code{\link{read.table}} or \code{\link{read.csv}} when importing a tab-delimited file contianing data for \code{y.in}.
#' @param map.in \code{Matrix} of genetic map data, three columns total. Column 1 contains marker names, column 2 contains chromosome number, and column 3 contains cM positions. TIP - Set header=\code{TRUE} within \code{read.table} or \code{read.csv} when importing a tab-delimited file contianing data for \code{map.in}.
#' @param crossing.table Optional \code{matrix} specifying which crosses are to be simulated, two columns total. Column 1 contains the first parent of the cross (Par1) and column 2 contains the second parent of the cross (Par2).
#' @param parents Optional \code{character vector}. If \code{parents="TP"} then only the entries (taxa) within the training population (i.e. are phenotyped for the trait) are considered as parents; all pairwise crosses will be simulated for these. User could otherwise provide a character vector of entry names; all pairwise crosses will be simulated for these.
#' @param tail.p Optional \code{numeric} indicating the percentile of the simulated progeny to be included into the calculation of \eqn{\mu}\emph{_sp} and correlated response. Default is \code{0.10}.
#' @param nInd Optional \code{integer} indicating the number of progeny simulated per cross, per iteration, using \code{\link[qtl]{sim.cross}} in R/qtl (\url{http://www.rqtl.org}; \emph{Broman et al., 2003}). Default is \code{200}.
#' @param map.plot Optional \code{logical}. If \code{TRUE} then a plot of the genetic map will be generated by \code{\link[qtl]{plot.map}}. Default is \code{FALSE}.
#' @param min.maf Optional \code{numeric} indicating a minimum minor allele frequency (MAF) when filtering \code{G.in}. Markers with an MAF < \code{min.maf} will be removed. Default is \code{0.01} to remove monomorphic markers. Set to \code{0} for no filtering.
#' @param mkr.cutoff Optional \code{numeric} indicating the maximum missing data per marker when filtering \code{G.in}. Markers missing > \code{mkr.cutoff} data will be removed. Default is \code{0.50}. Set to \code{1} for no filtering.
#' @param entry.cutoff Optional \code{numeric} indicating the maximum missing genotypic data per entry alloed when filtering \code{G.in}. Entries missing > \code{entry.cutoff} marker data will be removed. Default is \code{0.50}. Set to \code{1} for no filtering.
#' @param remove.dups Optional \code{logical}. If \code{TRUE} then duplicate entries in the genotype matrix, if present, will be removed. This step may be necessary for missing marker imputation via the EM algorithm by \code{\link[rrBLUP]{A.mat}} in \code{\link{rrBLUP}} (\cite{Endelman, 2011}; \cite{Poland et al., 2012}). Default is \code{TRUE}.
#' @param nSim Optional \code{integer} indicating the number of iterations a population should be simulated for each pairwise cross. Returned values are reported as means of parameters estimated in each of \code{nSim} simulations. Default is \code{25}.
#' @param frac.train Optional \code{numeric} indicating the fraction of the TP that is used to estimate marker effects (i.e. the prediction set) under cross-validation (CV) method 1 (see \code{Details} in \code{\link{x.val}}). The remaining \eqn{(1-frac.trait)} of the TP will then comprise the prediction set.
#' @param nCV.iter Optional \code{integer} indicating the number of times to iterate \emph{CV method 1} (see \code{Details} in \code{\link{x.val}}). Default is \code{100}.
#' @param nFold Optional \code{integer}. If a number is provided, denoting the number of "folds", then CV will be conducted using \emph{CV method 2} (see \code{Details} in \code{\link{x.val}}). Default is \code{NULL}, resulting in the default use of the \emph{CV method 1}.
#' @param nFold.reps Optional \code{integer} indicating the number of times \emph{CV method 2} is repeated. The CV accuracy returned is the average \emph{r} of each rep. Default is \code{1}.
#' @param nIter,burnIn Optional \code{integer} arguments used by \code{\link[BGLR]{BGLR}} (\cite{de los Compos and Rodriguez, 2014}) when fitting Bayesian models to estimate marker effects. The defaults are \code{12000} and \code{3000}, respectively. These values when conducting CV are fixed \code{1500} and \code{500}, respectively, for computational effeciency.
#' @param models Optional \code{Character vector} of the regression models to be used in CV and to estimate marker effects. Options include \code{rrBLUP, BayesA, BayesB, BayesC, BL, BRR}, one or more may be included at a time. By default all models are tested.
#' @param return.raw Optional \code{logical}. If \code{TRUE} then \code{pop.predict} will return the results of each simulation in addition to the summarized dataframe. Default is \code{FALSE}.
#' @details \code{pop.predict} can be used to predict the mean (\eqn{\mu}), genetic variance (\emph{V_G}), superior progeny values (\eqn{\mu}\eqn{_sp}), and predicted correlated response and correlations between all pairwise traits. The methodology and procedure to do so has been described in \cite{Bernardo (2014)} and \cite{Mohammadi, Tiede, and K.P. Smith (2015)}. Users familiar with genome-wide prediction, association mapping, and/or linkage mapping will be familiar with the
#'          required inputs of \code{pop.predict}. \code{G.in} includes all of the entries (taxa) in the TP as well as additional entries to be considered as parent candidates. Entries included in \code{G.in} that do have a phenotype for any or all traits in \code{y.in} are considered TP entries for those respective traits. \code{G.in} is filtered according to \code{min.maf}, \code{mkr.cutoff}, \code{entry.cutoff}, and \code{remove.dups};
#'          remaining missing marker data is imputed using the EM algorith (\cite{Poland et al., 2012}) when possible, and the marker mean otherwise, both implemented in \code{\link{rrBLUP}}. For each trait, the TP (i.e. entries with phenotype) is used to: \enumerate{
#'          \item Perform CV (see \code{frac.train} and \code{nCV.iter} for details about the CV method) to select a regression model
#'          \item Estimate marker effects using the model resulting in the highest CV accuracy
#'          }
#'          Models include ridge regression BLUP implemented in \code{\link{rrBLUP}} (\cite{Endelman, 2011}) and BayesA, BayesB, BayesC\eqn{\pi}, Bayesian lasso (BL), and Bayesian ridge regression (BRR) implemented in \code{\link{BGLR}} (\cite{de los Compos and Rodriguez, 2014}).
#'          Information from the \code{map.in} is then used to simulate chromosomal recombination expected in a recombinant inbred line (i.e. \emph{F-infinity}) (\cite{Broman et al., 2003}) population (size=\code{nInd}). A function then converts the recombined chromosomal segments of the generic RIL population to the chromosomal segments of the population's respective parents and GEBVs of the simulated progeny are calculated.
#'          The simulation and conversion process is repeated \emph{s} times, where \emph{s} = \code{nSim}, to calculate dispersion statistics for \eqn{\mu} and \emph{V_G}; the remainder of the values in the \code{predictions} output are means of the \emph{s} simulations.  During each iteration the correlation (\emph{r}) and correlated response of each pairwise combination of traits is also calculated and their mean across \emph{n} simulations is returned.
#'          The correlated respons of trait.B when predicting trait.A is the mean of trait.B for the (\eqn{\mu}\eqn{_sp}) of trait.A, and vice-versa; a correlated response for the bottom \code{tail.p} and upper \eqn{1-tail.p} is returned for each trait.
#'          
#'          A dataset \code{\link{think_barley.rda}} is provided as an example of the proper formatting of input files and also for users to become familiar with \code{pop.predict}.
#' @return A \code{list} containing: \itemize{ 
#'            \item \code{predictions} A \code{list} of dataframes containing predictions of (\eqn{\mu}), (\emph{V_G}), and (\eqn{\mu}\emph{_sp}). When multiple traits are provided the correlated responses and correlation between all pairwise traits is also included.
#'            \item \code{preds.per.sim} If return.raw is \code{TRUE} then a \code{dataframe} containing the results of each simulation is returned. This is usful for calculating dispersion statistics for traits not provided in the standard \code{predictions} dataframe.
#'            \item \code{CVs} A \code{dataframe} of CV results for each trait/model combination specified.
#'            \item \code{models.chosen} A \code{matrix} listing the statistical model chosen for each trait.
#'            \item \code{markers.removed} A \code{vector} of markers removed during filtering for MAF and missing data.
#'            \item \code{entries.removed} A \code{vector} of entries removed during filtering for missing data and duplicate entries.
#'          }
#' @references 
#'      Bernardo, R. 2014. Genomewide Selection of Parental Inbreds: Classes of Loci and Virtual Biparental Populations. Crop Sci. 55:2586-2595.
#'      
#'      Broman, K. W., H. Wu, S. Sen and G.A. Churchill. 2003. R/qtl: QTL mapping in experimental crosses. Bioinformatics 19:889-890.
#'      
#'      Endelman, J. B. 2011. Ridge regression and other kernels for genomic selection with R package rrBLUP. Plant Genome 4:250-255. doi: 10.3835/plantgenome2011.08.0024
#'
#'      Gustavo de los Campos and Paulino Perez Rodriguez, (2014). BGLR: Bayesian Generalized Linear Regression. R package version 1.0.3. http://CRAN.R-project.org/package=BGLR
#'      
#'      Mohammadi M., T. Tiede, and K.P. Smith. 2015. PopVar: A genome-wide procedure for predicting genetic variance and correlated response in bi-parental breeding populations. Crop Sci. \emph{Accepted}.
#'      
#'      Munoz-Amatriain, M., M. J. Moscou, P. R. Bhat, J. T. Svensson, J. Bartos, P. Suchankova, H. Simkova, T. R. Endo, R. D. Fenton, S. Lonardi, A. M. Castillo, S. Chao, L. Cistue, A. Cuesta-Marcos, K. L. Forrest, M. J. Hayden, P. M. Hayes, R. D. Horsley, K. Makoto, D. Moody, K. Sato, M. P. Valles, B. B. H. Wulff, G. J. Muehlbauer, J. Dolezel, and T. J. Close. 2011 An improved consensus linkage map of barley based on flow-sorted chromosomes and single nucleotide polymorphism markers. Plant Gen. 4:238-249.
#'      
#'      Poland, J., J. Endelman, J. Dawson, J. Rutkoski, S. Wu, Y. Manes, S. Dreisigacker, J. Crossa, H. Sanches-Villeda, M. Sorrells, and J.-L. Jannink. 2012. Genomic Selection in Wheat Breeding using Genotyping-by-Sequencing. Plant Genome 5:103-113.
#'      
#' @examples 
#' \dontrun{
#' ## View formatting
#' ## Use View() in RStudio or R GUI with X11 forwarding
#' ## Use head() in R GUI without X11 forwarding
#' View(G.in_ex)
#' View(y.in_ex)
#' View(map.in_ex)
#' View(cross.tab_ex)
#' 
#' ## setwd() - pop.predict writes and reads files to disk
#' ##   so it is recommended to set your working directory
#' 
#' ## nSim and nFold are set to low values in the
#' ## examples for sake of computing time
#' 
#' ## Ex. 1 - Predict a defined set of crosses
#' ## This example uses CV method 1 (see Details of x.val() function)
#' ex1.out <- pop.predict(G.in = G.in_ex, y.in = y.in_ex, 
#'    map.in = map.in_ex, crossing.table = cross.tab_ex,
#'    nSim=5, nCV.iter=10)
#' ex1.out$predictions  ## Predicted parameters
#' ex1.out$CVs          ## CV results
#'                
#' ## Ex. 2 - Predict all pairwise crosses between a list of parents
#' ## This example uses CV method 2 (see Details of x.val() function)
#' par.list <- sample(y.in_ex[,1], size = 10, replace = F)
#' ex2.out <- pop.predict(G.in = G.in_ex, y.in = y.in_ex,
#'    map.in = map.in_ex, parents = par.list, 
#'    nSim=5, nFold=5, nFold.reps=2)
#'        
#' ## Ex. 3 - Use only rrBLUP and Bayesian lasso (BL) models
#' ex3.out <- pop.predict(G.in = G.in_ex, y.in = y.in_ex,
#'    map.in = map.in_ex, crossing.table = cross.tab_ex,
#'    models = c("rrBLUP", "BL"), nSim=5, nCV.iter=10)  
#' }
#' @export

pop.predict <- function(G.in=NULL, y.in=NULL, map.in=NULL, crossing.table=NULL, parents=NULL, tail.p=0.10, nInd=200, map.plot=F, min.maf=0.01, mkr.cutoff=0.50, entry.cutoff=0.50, remove.dups=T, nSim=25, frac.train=0.60, nCV.iter=100, nFold=NULL, nFold.reps=1, nIter=12000, burnIn=3000, models=c("rrBLUP", "BayesA", "BayesB","BayesC", "BL", "BRR"), return.raw=F){
   
  ## QC steps
  if(is.null(G.in)) stop("Must provide a genotype (G.in) file.")
  if(is.null(y.in)) stop("Must provide a phenotype (y.in) file.")
  if(is.null(map.in)) stop("Must provide a map (map.in) file.")
  if(!is.null(min.maf) & min.maf >= 1) stop("min.maf must be within the range [0, 1)")
  if(!is.null(entry.cutoff) & entry.cutoff > 1) stop("entry.cutoff must be within the range (0, 1]")
  if(!is.null(mkr.cutoff) & mkr.cutoff > 1) stop("mkr.cutoff must be within the range (0, 1]")
  
  ### Requird functions found in 'Internal_PopVar_functions_2.20.15.R'  
  
  ###### START HERE ##############
  ## Step 1 - Parse out Geno and Map files
  G.entries <- as.character(G.in[-1, 1])
  entries.removed <- NULL; entries.to.remove <- c() ## This is needed for output, may be replaced by list of entries if filtering is enabled or if duplicate entries found
  G.markers <- as.character(t(G.in[1, -1]))
  map.markers <- as.character(map.in[,1])
  mkrs.removed <- NULL; mkrs.to.remove <- c()  ## This is needed for output, may be replaced by list of markers if filtering is enabled
  
  ## Marker, map, and geno QC
  if(!all(G.markers %in% map.markers) & all(map.markers %in% G.markers)) stop("Markers in Genotype matrix and genetic map do not completely match.")
  map <- map.in[order(map.in[,2], map.in[,3]), ] ## Sort map by chr then cM pos
  G.mat <- as.matrix(G.in[-1, -1]); class(G.mat) <- "numeric"
  G.mat <- G.mat[,order(match(map.markers, G.markers))] ## Sort G.mat so it is in same order as the map
  if(!all(unique(G.mat[,1]) %in% c(-1, 0, 1, NA))) stop("Genotypes need to be coded as -1, 0, 1")
  
  ### Remove duplicated rows from geno matrix and Filter for MAF and missing entry data
  if(remove.dups) entries.to.remove <- which(duplicated.array(G.mat))
  if(min.maf > 0) maf.list <- apply(G.mat, 2, maf.filt); mkrs.to.remove <- which(maf.list < min.maf)
  if(mkr.cutoff <1){mkrNA.list <- apply(G.mat, 2, function(M){return(length(which(is.na(M))) / length(M))}); mkrs.to.remove <- unique(c(mkrs.to.remove, which(mkrNA.list > mkr.cutoff)))}
  if(entry.cutoff < 1){entryNA.list <- apply(G.mat, 1, function(E){return(length(which(is.na(E))) / length(E))}); entries.to.remove <- unique(c(entries.to.remove, which(entryNA.list > entry.cutoff)))}
  
  if(length(mkrs.to.remove > 0)){
    G.mat <- G.mat[, -mkrs.to.remove]
    map <- map[-mkrs.to.remove, ]
    mkrs.removed <- map.markers[mkrs.to.remove]
    map.markers <- map.markers[-mkrs.to.remove]
  }
  
  if(length(entries.to.remove > 0)){
    G.mat <- G.mat[-entries.to.remove, ]
    entries.removed <- G.entries[entries.to.remove]
    G.entries <- G.entries[-entries.to.remove]
    if(!is.null(crossing.table)){ ## Removes crosses from crossing.table that include a parent in entries.removed
      cross.tabl.1col <- rbind(cbind(as.numeric(row.names(crossing.table)), as.character(crossing.table[,1])), cbind(as.numeric(row.names(crossing.table)), as.character(crossing.table[,2])))
      tab.rows.2remove <- as.numeric(unique(unlist(sapply(entries.removed, function(X){return(cross.tabl.1col[grep(X, cross.tabl.1col[,2]), 1])}))))
      if(length(tab.rows.2remove) > 0) crossing.table <- crossing.table[-tab.rows.2remove, ]
    }
    if(!is.null(parents)){parents <- parents[!parents %in% entries.removed]} ## Removes entries that are included in entries.removed from parent list 
  }
  
  y <- y.in[match(G.entries, as.character(y.in[,1])),]
  y.entries <- as.character(y[,1])
  traits <- as.character(colnames(y))[-1]; nTraits <- length(traits)
  
  
  ## Format map and write out
  name4out <- sample(10000:99999, 1)
  t.map <- t(map); rownames(t.map) <- NULL
  map4out <- cbind(c("pheno", "", ""), t.map)
  write.table(map4out, paste("map.tmp_", name4out, ".csv", sep=""), row.names=F, col.names=F, sep=",")
  
  
  ### Read in genetic map for the markers
  options(warn=-1) ## Temporarily turn off warnings
  read.map.out <-capture.output(read.map <- qtl::read.cross(format="csv", crosstype = "riself", file= paste("map.tmp_", name4out, ".csv", sep=""), na.strings="NA"))
  print(paste("Number of Markers Read in: ", unlist(strsplit(read.map.out[3], split = " "), recursive = T)[2], sep = ""), quote=F)
  unlink(paste("map.tmp_", name4out, ".csv", sep=""))
  map_t1 <- qtl::pull.map(read.map)
  options(warn=0)
  if(map.plot==T) qtl::plot.map(map_t1)
  
  
  ## Imput missing markers with EM... will switch to imputing with the mean if nEntries > nMarkers
  ## Will need to use our own MAF filter so that we can keep track of which markers are removed due to MAF and missing data
  G.imp <- rrBLUP::A.mat(G.mat, min.MAF = 0, max.missing = 1, impute.method = "EM", return.imputed = T)$imputed
  

  ### Start simulation
  for(t in 1:nTraits){
    
    trait <- traits[t]
      
    ### the y.noNA and G.noNA define the TP and are used for CV and marker effect estimation
    y_notNAs <- !is.na(y[,trait])
    y_TP <- as.numeric(y[y_notNAs, trait])
    TP.entries <- y.entries[y_notNAs]
    G_TP <- G.imp[y_notNAs, ] 
    
    cat("\n")
    cat(paste("\nSelecting best model via cross validation for ", trait, " and estimating marker effects", sep=""))
    cat("\nWarnings about 'closing unused connections' AND 'Error in rinvGauss' can be safely disregarded.\nThey are dealt with internally.")
    
    if(is.null(nFold)) junk <- capture.output(xval.out <- XValidate.nonInd(y.CV = y_TP, G.CV = G_TP, models.CV = models, frac.train.CV=frac.train, nCV.iter.CV=nCV.iter, burnIn.CV = 750, nIter.CV = 1500)$CV.summary)
    if(!is.null(nFold)) junk <- capture.output(xval.out <- XValidate.Ind(y.CV = y_TP, G.CV = G_TP, models.CV = models, nFold.CV = nFold, nFold.CV.reps = nFold.reps, burnIn.CV = 750, nIter.CV = 1500)$CV.summary)
    
    if(t == 1){
      CV.results <- list()
      mkr_effs.mat <- matrix(NA, ncol = nTraits, nrow = ncol(G.imp))
      colnames(mkr_effs.mat) <- traits
      beta.list <- c()
      best.models <- c()
    }
    
    CV.results[[t]] <- xval.out
    
    best.model <- as.character(xval.out$Model[which(xval.out$r_avg == max(xval.out$r_avg))])
    best.models[t] <- best.model; names(best.models)[t] <- trait
    
    if(best.model == "rrBLUP"){
      mix.solve.out <- rrBLUP::mixed.solve(y_TP, Z=G_TP, SE=F, return.Hinv=F)
      beta <- as.numeric(mix.solve.out$beta)
      mkr_effects <- mix.solve.out$u
    }else{
      capture.output(bayes.fit <- BGLR::BGLR(y=y_TP, ETA=list(list(X=G_TP, model=best.model)), verbose=F, nIter=nIter, burnIn=burnIn))
      mkr_effects <- as.numeric(bayes.fit$ETA[[1]]$b)
      beta <- bayes.fit$mu  
    }
    
    beta.list[t] <- beta
    mkr_effs.mat[,t] <- mkr_effects
        
  }
  
  
  ## Set up which crosses to predict
  if(!is.null(crossing.table)){ ## Used when the user provides a list of specific crosses, parents come from G.in.entries (i.e. not necessarily in TP)
    crossing.table <- as.matrix(crossing.table)
    crossing.mat <- par.position(crossing.table, par.entries = G.entries)$parent.position
    crosses.possible <- par.position(crossing.table, par.entries = G.entries)$crosses.possible
  }
    
  if(is.null(crossing.table) & !is.null(parents) & all(parents == "TP")){ ## Used to just make crosses among the TP -- useful for datasets where different traits have different levels of missing data
    par.G.pos <- match(TP.entries, G.entries)
    crossing.mat <- t(combn(par.G.pos, 2))
    crosses.possible <- par.name(crossing.mat, par.entries = G.entries)  
  }
    
  if(is.null(crossing.table) & all(parents != "TP") & !is.null(parents)){ ## Usd when a list of parent candidates is defined
    par.G.pos <- match(parents, G.entries)
    crossing.mat <- t(combn(par.G.pos, 2))
    crosses.possible <- par.name(crossing.mat, par.entries = G.entries)
  }
    
  if(is.null(crossing.table) & is.null(parents)){ ## If no crosses are specified then all combinations of the parent candidates are considered
    crossing.mat <- t(combn(1:nrow(G.imp), 2))
    crosses.possible <- par.name(crossing.mat, par.entries = G.entries)
  }
    
    
  ## Calculate GEBVs of all entries in G.imp
  par.BVs <- G.imp %*% mkr_effs.mat
  for(b in 1:length(beta.list)){
    beta.tmp <- beta.list[b]
    par.BVs[,b] <- par.BVs[,b] +  beta.tmp
  }
    
  
  ## Generates results dataframe for all traits
  df.tmp <- data.frame(cbind(crosses.possible, matrix(list(rep(NA, times=nSim)), nrow = nrow(crosses.possible), ncol = (8+3*(nTraits-1)))))
  names(df.tmp)[1:10] <- c("Par1", "Par1", "midPar.Pheno", "midPar.GEBV", "pred.mu", "pred.mu_sd", "pred.varG", "pred.varG_sd", "mu.sp_low", "mu.sp_high")
  for(n in 1:nTraits){
    if(n == 1) param.dfs <- list()
    param.dfs[[n]] <- as.matrix(df.tmp)
  }; names(param.dfs) <- paste(traits, "_param.df", sep="")
  
  
  ## Start simulation and var.prd process
  cat("\n")
  cat(paste("\nBrewing", nSim, "populations... Please be patient.", sep=" "))
  cat("\n")
  prog.bar <- txtProgressBar(min=1,  max=(nrow(crossing.mat)*nSim), style=3); p=1
  M <- nInd
  
  for(s in 1:nSim){
    sim.pop <- qtl::sim.cross(map_t1, type="riself", n.ind = M, model=NULL)
    qtl::write.cross(sim.pop, "csv", paste("sim.pop.tmp_", name4out, sep="")) ## NEED to figure out a way to not read it out and in
    pop.mat <- as.matrix(read.csv(paste("sim.pop.tmp_", name4out, ".csv", sep=""), header=T))[3:(M+2), 2:(length(mkr_effects)+1)]
    unlink(paste("sim.pop.tmp_", name4out, ".csv", sep=""))   
    
    for(z in 1:nrow(crossing.mat)){
      setTxtProgressBar(prog.bar, p)
      pop.mat2 <- matrix(NA, nrow=nrow(pop.mat), ncol=ncol(pop.mat))
      
      par1 <- G.imp[crossing.mat[z,1], ]
      par2 <- G.imp[crossing.mat[z,2], ]
      
      ## Assign parent genotypes onto simulated RILs (create a function to do this?)
      for(r in 1:M){
        pop.mat2[r,which(pop.mat[r,]=="A")] <- par1[which(pop.mat[r,]=="A")]
        pop.mat2[r,which(pop.mat[r,]=="B")] <- par2[which(pop.mat[r,]=="B")]
        #if(cross.type=="f2") pop.mat2[r,which(pop.mat[r,]=="H")] <- 0
      }
      
      mkr.has.0 <- apply(pop.mat2, 2, function(X){return(length(which(X == 0)))})
      replace.0.mat <- rbind(which(mkr.has.0 != 0), mkr.has.0[which(mkr.has.0 != 0)])
      
      if(ncol(replace.0.mat) > 0){
        for(b in 1:ncol(replace.0.mat)){
          pop.mat2[which(pop.mat2[,replace.0.mat[1,b]] == 0), replace.0.mat[1,b]] <- sample(c(1,-1), size = replace.0.mat[2,b],  replace = T, prob = c(.5,.5))
        }
      }
      
      prog_pred.mat <- pop.mat2 %*% mkr_effs.mat
      for(b in 1:length(beta.list)){
        beta.tmp <- beta.list[b]
        prog_pred.mat[,b] <- prog_pred.mat[,b] +  beta.tmp
      }
      
      for(n in 1:nTraits){
        if(s == 1){
          colnames(param.dfs[[n]])[11:(10+3*(nTraits-1))]<- c(paste("low.resp_", traits[-n], sep=""), paste("high.resp_", traits[-n], sep=""), paste("cor_w/_", traits[-n], sep=""))
          param.dfs[[n]][[z, "midPar.Pheno"]][s] <- 0.5*(y[,n+1][crossing.mat[z,1]] + y[,n+1][crossing.mat[z,2]])
          param.dfs[[n]][[z, "midPar.GEBV"]][s] <- 0.5*(par.BVs[,n][crossing.mat[z,1]] + par.BVs[,n][crossing.mat[z,2]])
        }
        param.dfs[[n]][[z, "pred.mu"]][s] <- mean(prog_pred.mat[, n])
        param.dfs[[n]][[z, "pred.mu_sd"]][s] <- mean(prog_pred.mat[, n])
        param.dfs[[n]][[z, "pred.varG"]][s] <- var(prog_pred.mat[, n])
        param.dfs[[n]][[z, "pred.varG_sd"]][s] <- var(prog_pred.mat[, n])
        param.dfs[[n]][[z, "mu.sp_low"]][s] <- tails(prog_pred.mat[, n], tail.p = tail.p)[2]
        param.dfs[[n]][[z, "mu.sp_high"]][s] <- tails(prog_pred.mat[, n], tail.p = tail.p)[1]
        ## Calculate correlations between traits and correlated response and 
        index <- 1; for(n2 in (1:nTraits)[-n]){
          param.dfs[[n]][[z, 10+index]][s] <- mean(prog_pred.mat[,n2][which(prog_pred.mat[,n] <= quantile(prog_pred.mat[,n], probs = tail.p))], na.rm = T)
          param.dfs[[n]][[z, 10+(nTraits-1)+index]][s] <- mean(prog_pred.mat[,n2][which(prog_pred.mat[,n] >= quantile(prog_pred.mat[,n], probs = 1-tail.p))], na.rm = T)
          param.dfs[[n]][[z, 10+2*(nTraits-1)+index]][s] <- cor(prog_pred.mat[,n], prog_pred.mat[,n2], use = "complete.obs")
          index <- index+1
        }
      }
    
      p <- p+1 # This is the counter for the progress bar
    } ## End of z loop for each cross
    
  }## End of nSim (s) loop
  
  preds.per.sim <- param.dfs
  
  for(n in 1:nTraits){
    col.names <- colnames(param.dfs[[n]])
    for(c in 3:length(col.names)){
      name.tmp <- col.names[c]
      if(name.tmp %in% c("pred.mu_sd", "pred.varG_sd")) param.dfs[[n]][,c] <- sapply(param.dfs[[n]][,c], FUN = sd, na.rm=T)
      if(!name.tmp %in% c("pred.mu_sd", "pred.varG_sd")) param.dfs[[n]][,c] <- sapply(param.dfs[[n]][,c], FUN = mean, na.rm=T)
    }
  }
  
  if(return.raw) return(list(predictions=param.dfs, preds.per.sim=param.dfs, CVs=CV.results, models.chosen=best.models, markers.removed=mkrs.removed, entries.removed=entries.removed))
  if(!return.raw) return(list(predictions=param.dfs, CVs=CV.results, models.chosen=best.models, markers.removed=mkrs.removed, entries.removed=entries.removed))
  
} # End of pop.predict


#pop.predict(G.in_ex, y.in_ex, map.in_ex, cross.tab_ex)

