# $Id: diseq.Rd,v 1.5 2002/11/27 15:32:20 warnesgr Exp $
#
# $Log: diseq.Rd,v $
# Revision 1.5  2002/11/27 15:32:20  warnesgr
# Correct spelling errors and typos.
#
# Revision 1.4  2002/11/12 19:58:45  warnesgr
# - Changes to remove warnings generated by 'R CMD check'.
#
# Revision 1.3  2002/11/12 05:31:21  warnesgr
# - Fix mismatches between documentation and code that we generating
#   warning messages.
#
# Revision 1.2  2002/11/08 19:53:57  warnesgr
#
# - Moved ci.balance() to a separate file and created a documentation file for it.
# - Modified ci.balance to better annotate when it uses boundary values.
# - Modified diseq.ci to better provide warning message when the number of
#   alleles is greater than 3.
#
# Revision 1.1  2002/09/24 01:32:19  warnesgr
# - 'Un-genericized' diseq()
# - Moved documentation of diseq() and diseq from HWE.test.Rd to diseq.Rd
# - Cleaned up HWE.test.Rd and diseq.Rd
# - Fixed broken class check in diseq() and diseq.ci()
# - Removed allele.count.default() -- this will force the user to
#   explicitly call 'genotype' on the data to use allele.count().
# - Added zzz.R to require package 'boot'
#
#

\name{diseq}
\alias{diseq}
\alias{diseq.table}
\alias{diseq.genotype}
\alias{diseq.ci}
\alias{print.diseq}
\title{Estimate or Compute Confidence Interval for the Single-Marker Disequilibrium}
\description{
  Estimate or compute confidence interval for single-marker disequilibrium.
}
\usage{
diseq(x, ...)
\method{print}{diseq}(x, show=c("D","D'","r"), ...)
diseq.ci(x, R=1000, conf=0.95, correct=TRUE, na.rm=TRUE, ...)
}
\arguments{
  \item{x}{genotype or haplotype object.}
  \item{show}{a character value or vector indicating which
    disequilibrium measures should be displayed.  The default is to show
    all of the available measures.}
  \item{conf}{Confidence level to use when computing the confidence
    level for D-hat.  Defaults to 0.95, should be in (0,1). }
  \item{R}{Number of bootstrap iterations to use when computing the
    confidence interval. Defaults to 1000.}
  \item{correct}{See details.}
  \item{na.rm}{ logical. Should missing values be removed?}
  \item{...}{ optional parameters passed to \code{boot.ci}
    (\code{diseq.ci}) or ignored.}
}
\details{
  For a single-gene marker, \code{diseq} computes the Hardy-Weinberg
  (dis)equilibrium statistic D, D', and r (the correlation coefficient)
  for each pair of allele values, as well as an overall value for
  each. \code{print.diseq} displays the contents of a \code{diseq}
  object. \code{diseq.ci} computes a bootstrap confidence interval for
  this estimate.

  For each allele pair,
  \itemize{
    \item{D}{ is defined as the half of the raw difference
      in frequency between
      the observed number of heterozygotes and the expected number:
      
      \deqn{%
 	D = \frac{1}{2} ( p_{ij} - 2  p_i p_j ) %
      }{%
 	D = 1/2 * ( p(ij) - 2 * p(i)*p(j) )  %
      }
      
    }
    \item{D'}{ rescales D to span the range [-1,1] 
      
      
      \deqn{D' = \frac{D}{D_{max} } }{D' = D / Dmax}
      
      where
      \deqn{%
  	D_{max} = \min( p_i, p_j ) - p_{ij}  %
      }{%
  	Dmax = min( p(i),p(j) ) - p(ij)   %
      } if D > 0      
      and
      \deqn{%
  	D_{max} = p_{ij} %
      }{%
  	Dmax = p(ij)  %
      } if D < 0.
    }
    
    \item{r}{ is the correlation coefficient between the two alleles ignoring all
      other alleles, and can be computed by
      
      \deqn{%
 	r = \frac{-D}{\sqrt( p_i * (1-p_i) p(j) (1-p_j ) )} %
      }{%
 	r = -D / sqrt( p(i) * (1-p(i)) * p(j) * (1-p(j) ) ) %
      }
    }
    
  }
  
  where
  \itemize{
    \item{-}{ \eqn{P_i}{p(i)} defined as the observed probability of
      allele 'i', }
    \item{-}{\eqn{P_j}{p(j)} defined as the observed probability of
      allele 'j', and }
    \item{-}{\eqn{P_{ij}}{p(ij)} defined as the observed probability of
      the allele pair 'ij'. }
  }
  
  When there are more than two alleles, the summary values for these
  statistics are obtained by computing a weighted average of the
  absolute value of each allele pair, where the weight is determined by
  the expected frequency. For example:

   \deqn{%
     D_{overall} = \sum_{i \ne j}  |D_{ij}| * p_{ij} % 
   }{%
     D.overall = sum |D(ij)| * P(ij) %
   }

  Bootstrapping is used to generate confidence interval in order to
  avoid reliance on parametric assumptions, which will not hold for
  alleles with low frequencies (e.g. \eqn{D'} following a a Chi-square 
  distribution).  

  See the function \code{\link[genetics]{HWE.test}} for testing
  Hardy-Weinberg Equilibrium, \eqn{D=0}.
  
}
\value{
  \code{diseq} returns an object of class \code{diseq} with components
  \itemize{
    \item{data}{2-way table of allele pair counts}
    \item{D.hat}{matrix giving the observed count, expected count,
      observed - expected difference, and estimate of disequilibrium for
      each pair of alleles as well as an overall disequilibrium value.}
    \item{call}{function call used to create this object}
  }

  \code{diseq.ci} returns an object of class \code{\link[boot]{bootci}}
}
\author{ Gregory R. Warnes \email{Gregory\_R\_Warnes@groton.pfizer.com }
  }

\seealso{
  \code{\link{genotype}},
  \code{\link{HWE.test}},
  \code{\link[boot]{boot}},
  \code{\link[boot]{bootci}}
}

\examples{
\testonly{
set.seed(7981357)
}
example.data   <- c("D/D","D/I","D/D","I/I","D/D",
                    "D/D","D/D","D/D","I/I","")
g1  <- genotype(example.data)
g1

diseq(g1)
diseq.ci(g1)
HWE.test(g1)  # does the same, plus tests D-hat=0

three.data   <- c(rep("A/A",8),
                  rep("C/A",20),
                  rep("C/T",20),
                  rep("C/C",10),
                  rep("T/T",3))

g3  <- genotype(three.data)
g3

diseq(g3)
diseq.ci(g3, ci.B=10000, ci.type="bca")
}
\keyword{misc}
%\keyword{genetics}
