\name{sbfit}
\alias{sbfit}
\alias{sbfit.scaleboot}
\alias{sbfit.scalebootv}
\alias{sbfit.default}
\alias{sbfit.matrix}
\alias{sbfit.data.frame}
\alias{print.scaleboot}
\alias{print.scalebootv}

\title{Fitting Models to Bootstrap Probabilities}
\description{
  \code{sbfit} is used to fit parametric models to multiscale bootstrap
  probabilities by the maximum likelihood method.
}
\usage{
sbfit(x, ...)

\method{sbfit}{default}(x,nb,sa,models=NULL,nofit=FALSE,...)

\method{sbfit}{matrix}(x,nb,sa,models=NULL,names.hp=rownames(x),
      nofit=FALSE,cluster=NULL,...)

\method{sbfit}{data.frame}(x,...)

\method{sbfit}{scaleboot}(x,models=names(x$fi),...)

\method{sbfit}{scalebootv}(x,models=attr(x,"models"),...)
}
\arguments{
  \item{x}{an object used to select a method. For \code{sbfit.default},
    \code{x} is denoted as \code{nb} and is a vector of bootstrap
    probabilities for a hypothesis. For \code{sbfit.matrix}, \code{x} is
    denoted as  \code{bps} and is a matrix with row vectors of \code{bp}
    for several hypotheses.
  }
  \item{nb}{vector of numbers of bootstrap replicates. A short vector
    (or scalar) is cyclically extended to match the size of \code{bp}.}
  \item{sa}{vector of scales in sigma squared (\eqn{\sigma^2}). Should
      be the same size as \code{bp}.}
  \item{models}{character vector of model names. Valid model names are
    \code{poly.m} for m>=1 and \code{sing.m} for
    m>=3. The default is set by \code{sboptions()$models}, whose
    default is c("poly.1","poly.2","poly.3","sing.3","sphe.3").
    If \code{models} is an integer value, \code{sbmodelnames(m=models)} is
    used.}
  \item{nofit}{logical. If TRUE, fitting is not performed.}
  \item{names.hp}{character vector of hypotheses names.}
  \item{cluster}{\pkg{snow} cluster object which may be generated by
    function \code{makeCluster}.}
  \item{...}{further arguments passed to or from other methods.}  
  }
}
\details{
  \code{sbfit.default} fits parametric models to \code{bp}.
  by maximizing the log-likelihood value of a binomial model.
  A set of multiscale bootstrap resampling
  should be performed before a call to \code{sbfit} for preparing
  \code{bp}, where \code{bp[i]} is a bootstrap probability of a
  hypothesis calculated with a number of bootstrap
  replicates \code{nb[i]} and a scale \eqn{\sigma^2}=\code{sa[i]}.
  The scale is defined as \eqn{\sigma^2=n/n'},
  where \eqn{n} is the sample size of data, and \eqn{n'} is the sample
  size of replicated data for bootstrap resampling.

  Each model specifies a \code{psi(beta,s)}=\eqn{\psi(\sigma^2 | \beta)}
  function with a parameter vector \eqn{\beta}. The model
  may describe how the bootstrap probability changes along the scale.
  Let \code{cnt[i]=bp[i]*nb[i]} be the frequency indicating how many
  times the hypothesis of interest is observed in bootstrap replicates
  at scale \code{sa[i]}. Then we assume that \code{cnt[i]} is distributed
  as binomial with number of trials \code{nb[i]} and success
  probability \code{1-\pnorm(psi(beta,s=sa[i])/sqrt(sa[i]))}. Currently,
  \code{sbpsi.poly} and \code{sbpsi.sing} are available as \eqn{\psi}
  functions. The estimated model parameters are accessed by
  \code{\link{coef.scaleboot}} method.
  
  The model fitting is performed in the order
  specified in \code{models}, and the initial values for numerical
  optimization of the likelihood function are prepared by using
  previously estimated model parameters. Thus, "poly.(m-1)"
  should be specified before "poly.m", and "poly.(m-1)" and "sing.(m-1)"
  should be specified before "sing.m".

  \code{sbfit.matrix} calls \code{sbfit.default} repeatedly for each row
  vector \code{bp} of the matrix \code{bps}.  Parallel
  computing is performed when \code{cluster} is non NULL.

  \code{sbfit.scaleboot} calls \code{sbfit.default} with \code{bp},
  \code{nb}, and \code{sa} components in \code{x} object for refitting by
  giving another \code{models} argument. It discards the previous result
  of fitting, and recompute the model parameters.

  \code{sbfit.scalebootv} calls \code{sbfit.matrix} with \code{bps},
  \code{nb}, and \code{sa} components in the attributes of \code{x}.
}
\value{
  \code{sbfit.default} and \code{sbfit.scaleboot} return an object of
    class \code{"scaleboot"}, and \code{sbfit.matrix} and
    \code{sbfit.scalebootv} return an object of
    class \code{"scalebootv"}. 

  An object of class \code{"scaleboot"} is a list containing at least the
  following components:
  \item{bp}{the vector of bootstrap probabilities used.}
  \item{nb}{the \code{rep(nb,length=length(bp))} used.}
  \item{sa}{the \code{sa} used. }
  \item{fi}{list vector of fitted results for \code{models} used.  Each
    list consists of components \code{"par"} (estimated parameter),
    \code{"mag"} (magnification factor for \code{"par"} to make the actual
    parameter vector \code{beta=par*mag}),
    \code{"value"} (maximum log-likelihood), \code{"hessian"} (hessian
    matrix), \code{"var"} (variance estimate of \code{"par"}),
    \code{"mask"} (logical vector indicating parameter elements which are
    not at boundaries),
    \code{"init"} (initial values used for optimization),
    \code{"psi"} (psi function name of the model), \code{"df"}
    (degrees of freedom), \code{"rss"} (equivalent to the residual sum
    of squares, but actually defined as 2*(lik0-lik) where lik0 and lik
    are the log-likelihood function of the non-restricted model and the
    model of interest, respectively), \code{"pfit"} (p-value for
    \code{"rss"}),
    \code{"aic"} (aic value of the model relative to the non-restricted
    model).}

  An object of class \code{"scalebootv"} is a vector of
  \code{"scaleboot"} objects, and in addition, it has attributes
  \code{"models"}, \code{"bps"}, \code{"nb"}, and \code{"sa"}. 
}
\references{
  Shimodaira, H. (2002).  An approximately unbiased test of phylogenetic
  tree selection, \emph{Systematic Biology}, 51, 492-508.
  
  Shimodaira, H. (2004).  Approximately unbiased tests of
  regions using multistep-multiscale bootstrap resampling,
  \emph{Annals of Statistics}, 32, 2616-2641. 
  
  Shimodaira, H. (2006).  Approximately Unbiased Tests 
  for Singular Surfaces via Multiscale Bootstrap Resampling,
  \emph{submitted for publication}.
}
\author{Hidetoshi Shimodaira <shimo@is.titech.ac.jp>}
\seealso{\code{\link{sbpsi}}, \code{\link{summary.scaleboot}},
  \code{\link{plot.scaleboot}}, \code{\link{coef.scaleboot}},
  \code{\link{sbaic}}.}
\examples{
## Testing a hypothesis
## Examples of fitting models to a vector of bp's
## mam15.relltest$t4 of data(mam15), but
## using a different set of scales (sigma^2 values).
## In the below, sigma^2 ranges 0.01 to 100 in sa[i]
## This very large range is only for illustration.
## Typically, the range around 0.1 to 10
## is recommended for much better model fitting.
## In other examples, we have used
## sa = 9^seq(-1,1,length=13).

cnt <- c(0,0,0,0,6,220,1464,3565,5430,6477,6754,
         6687,5961) # observed frequencies at scales
nb <- 100000 # number of replicates at each scale
bp <- cnt/nb # bootstrap probabilities (bp's)
sa <- 10^seq(-2,2,length=13) # scales (sigma squared)
## model fitting to bp's 
f <- sbfit(bp,nb,sa) # model fitting ("scaleboot" object)
f # print the result of fitting
plot(f,legend="topleft") # observed bp's and fitted curves
## approximately unbiased p-values
summary(f) # calculate and print p-values
## refitting with models up to "poly.4" and "sing.4"
f <- sbfit(f,models=4)
f # print the result of fitting
plot(f,legend="topleft") # observed bp's and fitted curves
summary(f) # calculate and print p-values

## Testing multiple hypotheses (only two here)
## Examples of fitting models to vectors of bp's
## mam15.relltest[c("t1,t2")]
cnt1 <- c(99982,99247,95068,86876,77802,68562,57842,45529,
    34499,26731,21158,16663,12982) # cnt for "t1"
cnt2 <- c(0,0,0,0,56,926,3614,7162,10068,11623,
     12432,13361,13518) # cnt for "t2"
cnts <- rbind(cnt1,cnt2)
nb <- 100000 # number of replicates at each scale
bps <- cnts/nb # row vectors are bp's
sa <- 10^seq(-2,2,length=13) # scales (sigma squared)
fv <- sbfit(bps,nb,sa) # returns a "scalebootv" object
fv # print the result of fitting
plot(fv) # multiple plots
summary(fv) # calculate and print p-values

}
\keyword{ models }
\keyword{ nonparametric }
