
## Functions: pedgene 
## wrapper for computing retrospective likelihood stat on pedigrees for rare
## variants over multiple genes
## Authors: Jason Sinnwell, Dan Schaid, and Alessia Visconti

pedgene <- function(ped, geno,  map=NULL, male.dose=2, checkpeds=TRUE, verbose.return=FALSE,
                    weights=NULL, weights.beta=c(1,25), weights.mb=FALSE, relation=NULL,
                    method="kounen", acc.davies=1e-5) {
##Arguments:
##  
##  Required:
##  ped: data.frame with columns needed to create the pedigree
##  geno: data.frame with ped, person ids in the first two columns, and numeric columns
##       with minor allele count (0/1/2) for markers (columns) and subjects (rows) in
##       the ped object
##  
##  Optional:##  map: data.frame with columns chrom, position, and gene.
##        gene can be gene symbol or geneid (entrez or ensemble);
##        it is not matched with other annotation, just used for marker groups
##        If not passed, assume all variants from the same gene 
##  male.dose: When doing X-chrom, define how male genotypes should be
##        analyzed. male.dose can be between 0 and 2, but usually 1 or 2
##  checkpeds, perform basic pedigree structure checks. Time-consuming if peds are
##             are already validated
##  method: either Kounen or Davies method to calculate kernel test p-values
##  verbose.return: similar to glm in R, return geno and response data that is used in calculations
##  weights: allow user-specified weights. If Null, use beta distribution weighting
##  weights.mb: if weights=NULL and weights.MB=TRUE, do Madsen-Browning weights
##  weights.beta: beta distribution coefficients used for weighting.
##     By default, beta weights are used
##  acc.davies: numerical accuracy for davies method to choose eigven values
##       and to determine p-value
  
## Steps
## 1) verify ped columns, map and geno match dimensions
## 2) Create kinship matrices for autosomes and X for all subjects
## 3) run pedgene.stats on each gene given in map

  verbose=FALSE
  
  ## save the call
  call <- match.call() 
  ## save options before setting stringsAsFactors for just this function
  saveOpt <- options()
  options(stringsAsFactors=FALSE)

  ## check method, must be davies or kounen
  method=casefold(method[1])
  method=c("kounen","davies")[pmatch(method, c("kounen", "davies"))]
  if(is.na(method)) {
    warning("method not kounen or davies, setting to kounen\n")
    method="kounen"
  }  
  
  
  ## if no map, create one, assuming all one gene
  if(is.null(map)) {
    map <- data.frame(chrom=rep("unknown", ncol(geno)-2),
                      gene=rep("unknown", ncol(geno)-2))
  }

  ## unify names of ped and map to lowercase
  names(map) <- casefold(names(map))
  names(ped) <- casefold(names(ped))
  ## old requirement was to have ped column, change internally to famid
  names(ped) <- gsub("ped", "famid", names(ped))
  names(geno) <- gsub("ped", "famid", names(geno))
    
  ## verify map data.frame ###
  ## i. check column names
  if(any(!(c("chrom", "gene") %in% names(map)))) {
    stop("map requires columns for chrom and gene")
  }
  ## ii. recode chrom 23 to X
  map$chrom[map$chrom==23] <- "X"

  ## verify geno matrix, and that it matches map
  if(any(!(c("famid", "person") %in% names(geno)))) {
    stop("geno requires columns 'famid' and 'person' ids")
  }
    ## get famid-personid of geno to match to ped/kinship
    ## check for duplicates
  geno.famperson <- paste(geno$famid, geno$person, sep="_")
  if(any(duplicated(geno.famperson))) {
      warning(paste("geno subject(s) with multiple entries, only the first is used: ",
                    geno.famperson[which(duplicated(geno.famperson))], ".\n", sep=""))
      geno <- geno[!duplicated(geno.famperson),]
      geno.famperson <- geno.famperson[!duplicated(geno.famperson)]
     
  }
  ## check for duplicates in ped, and remove them
  ## but don't remove those with missing trait/covars until later
  ## after pedchecks and kinship matrix calculation
  ped.famperson <- paste(ped$famid, ped$person, sep="_")
  if(any(duplicated(ped.famperson))) {
      warning(paste("ped subject(s) with multiple entries, only the first is used: ",
                    ped.famperson[which(duplicated(ped.famperson))], ".\n", sep=""))
      ped <- ped[!duplicated(ped.famperson),]
      ped.famperson <- ped.famperson[!duplicated(ped.famperson)]
     
  }
  
  ## assign rownames of geno/ped, so can use rownames for matching/indexing       
  rownames(geno) <- geno.famperson
  rownames(ped) <- ped.famperson
    
  ## subset geno rows to those in ped, and     
  ## get rid of id cols because they are in rownames (drop=FALSE, in case 1 marker only)
  geno <- geno[which(rownames(geno) %in% rownames(ped)), !(names(geno) %in% c("famid", "person")),drop=FALSE]
  
  ## check that map and geno now have same columns for markers
  if(nrow(map) != (ncol(geno))) {
    stop(paste("map rows (", nrow(map), ") and geno columns (", ncol(geno),
               ") do not match \n",sep=""))
  }  
  
  ## verify ped data.frame has expected column names
  if(any(!(c("famid", "person", "father", "mother", "sex", "trait")
           %in% names(ped)))) {
    stop("Error: ped requires columns: famid, person, father, mother, sex, trait")
  }
 
  #############################################################################
  ## this is where to do trait.adjusted if we want it on all people that have trait 
  ## this caused different results when flipping major/minor alleles, so moved later
  #############################################################################
  
  ## check weights parameters
  ## verify user-passed weights, match ncol(geno)
  if(!is.null(weights)) {
    ## by default, do Beta weights, implemented in ped.gene.stats
    ## otherwise, these are user-specified, check length
    if(length(weights) != ncol(geno)) {
       stop(paste("Error: should have weights(", length(weights),
                  ") for every variant position(", ncol(geno), ")", sep=""))
    }
  } else {  ## no user-given weights
    if(weights.mb==FALSE) {
      ## verify weights.beta
      if(length(weights.beta) != 2 | any(weights.beta < 0)) {
        warning("weights.beta should be two positive numbers, setting to (1,25)\n")
        weights.beta=c(1,25)
      }
    }  ## m-b weights, nothing to check except that weights.mb is true/false
  }
  
  ## perform simple pedigree checks
  if(checkpeds) {
    uped <- unique(ped$famid)
    nped <- length(uped)
    
    for(i in 1:nped) {      
      iped <- uped[i]      
      temp.ped <- ped[ped$famid == iped,, drop=FALSE]      
      if(nrow(temp.ped) > 1) {      
        ## simple checks on pedigree
        pedigreeChecks(temp.ped, male.code=1, female.code=2)
      }
    }
  }
  ## additional checks <could> be done on peds when creating pedlist object,
  ## which could be used to create kinmat.
  # pedall <- with(ped, kinship2::pedigree(id=person, dadid=father, momid=mother,
  #                          sex=sex, famid=ped, missid=missid))
  #  kinmat <- Pedixplorer::kinship(pedall, chrtype="auto")

  ## We rather created it directly from ped  
  ## create kinship matrix, also for X if any genes on X chrom

  missid <- c(0, "0")
  peddf <- with(ped, data.frame(indId=person, fatherId=father, motherId=mother, gender=sex, famid=famid))
  peddf$fatherId[peddf$fatherId %in% missid] <- NA
  peddf$motherId[peddf$motherId %in% missid] <- NA
   
  if(missing(relation)) {
    pedobj <- Pedixplorer::Pedigree(peddf)
   
   ## ped2 <- with(ped, Pedixplorer::Pedigree(famid=famid, id=person, dadid=father, momid=mother,
   ##		 sex=sex, missid=0))
  } else  {
    reldf <- data.frame(indId1=relation$id1, indId2=relation$id2, code=relation$code, famid=relation$famid)
    pedobj <- Pedixplorer::Pedigree(peddf, rel_df=reldf)
    #ped2 <- with(ped, pedigree(famid=famid, id=person, dadid=father, momid=mother,
    #                           sex=sex, missid=0, relation=relation))    
  }
    
  kinmat <- Pedixplorer::kinship(pedobj, chrtype="autosome") ## convert to Matrix class?
  
   ## subset ped, geno, kinmat/kinmatX to only subjects who have genotype and ped data
  ## and in the same order
  keepped <- rownames(ped)[rownames(ped) %in% rownames(geno)]
  
  ped <- ped[keepped,]
  kinmat <- kinmat[keepped,keepped]
  geno <- geno[keepped,,drop=FALSE]
    
  if(any(map$chrom=="X")) {
    kinmatX <- Pedixplorer::kinship(pedobj, chrtype="X")  #Matrix class?
    kinmatX <- kinmatX[keepped, keepped]
  } else {
    kinmatX <- NULL
  }

    
  ## subset out anyone missing trait or all genotypes                      
  missidx <- is.na(ped$trait) | apply(is.na(geno), 1, all) 
  if("trait.adjusted" %in% names(ped)) missidx <- missidx | is.na(ped$trait.adjusted)
  if(sum(missidx)>0) {
    ped <- ped[!missidx,]
    kinmat <- kinmat[!missidx, !missidx]
    kinmatX <- kinmatX[!missidx, !missidx]
    geno <- geno[!missidx,,drop=FALSE]
  }

    
  ## Check that geno for males on X should only have 0 and 1 dosages
  xidx <- which(map$chrom=="X" | map$chrom=="x")
  if(length(xidx)) {
    xdosemale <- geno[ped$sex==1,xidx, drop=TRUE]
    if(sum(xdosemale>1, na.rm=TRUE)) {
      stop("All male dose on X chromosome should be <= 1")
    }
  }
       
 
  ## calculate trait.adjusted on everyone with genotype data
  ## add trait.adjusted if not already there
  if(!("trait.adjusted" %in% names(ped))) {
    ped$trait.adjusted <- mean(ped$trait, na.rm=TRUE)      
  }
  
  gvec <- chromvec <- nvariant <- noninform <- kstat <- kpval <- bstat <- bpval <- NULL
  
  for(g in unique(map$gene)) {
    if(verbose) {
      cat("test on gene ", g, "\n")
    }
    gidx <- which(map$gene==g)
    ## drop=FALSE for 1-marker gene
    genosub <- geno[,gidx,drop=FALSE]

    resid <- ped$trait - ped$trait.adjusted
    sex <- ped$sex
    chrom <- map$chrom[gidx[1]]
    
    c.factor <- quadfactor(
             if(chrom=="X") kinmatX else kinmat,
             chrom, resid, sex, male.dose)

    pgstat <- pedgene.stats(genosub, as.vector(c.factor), map$chrom[gidx[1]], male.dose, sex, resid,
                    weights=weights[gidx], weights.beta=weights.beta, weights.mb=weights.mb,
                    method=method, acc.davies=acc.davies)
    if(pgstat$nvariant==0) {
      cat("gene: '", g, "' has no markers after removing markers with all same genotype\n")     
    }
    gvec <- c(gvec, g)
    chromvec <- c(chromvec, chrom)
    nvariant <- c(nvariant,pgstat$nvariant)
    noninform <- c(noninform, pgstat$noninform)
    kstat <- c(kstat, pgstat$stat.kernel)
    kpval <- c(kpval, pgstat$pval.kernel)
    bstat <- c(bstat, pgstat$stat.burden)
    bpval <- c(bpval, pgstat$pval.burden)   
  }

  pgdf <- data.frame(gene=gvec, chrom=chromvec, n.variant=nvariant,
                     n.noninform=noninform,
                     stat.kernel=kstat, pval.kernel=kpval,
                     stat.burden=bstat, pval.burden=bpval)
  
  # re-set options
  options(saveOpt)
  if(verbose.return) {
    save <- list(geno=geno, ped=ped, map=map)
  } else {
    save=NULL
  }
  pglist <- list(pgdf=pgdf, call=call, save=save)
  class(pglist) <- "pedgene"
  return(pglist)
}

## print and summary methods for pedgene S3 class

print.pedgene <- function(x, ...) {
## suggest digits=4  
  print.data.frame(x$pgdf, ...)

  invisible()
}
summary.pedgene <- function(object, ...) {
## suggest digits=4 or 5  
  cat("\nSummary for pedgene object: \n\n")
  cat("Call:\n")
  print(object$call)
  cat("\n\n")

  ## invoke print method
  print(object,  ...)
  
  invisible()
}
