% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/compute_STRAPP_test_for_focal_time.R
\name{compute_STRAPP_test_for_focal_time}
\alias{compute_STRAPP_test_for_focal_time}
\title{Compute STRAPP to test for a relationship between diversification rates and trait data}
\usage{
compute_STRAPP_test_for_focal_time(
  BAMM_object,
  trait_data_list,
  rate_type = "net_diversification",
  seed = NULL,
  nb_permutations = NULL,
  replace_samples = FALSE,
  alpha = 0.05,
  two_tailed = TRUE,
  one_tailed_hypothesis = NULL,
  posthoc_pairwise_tests = FALSE,
  p.adjust_method = "none",
  return_perm_data = FALSE,
  nthreads = 1,
  print_hypothesis = TRUE
)
}
\arguments{
\item{BAMM_object}{Object of class \code{"bammdata"}, typically generated with \code{\link[=update_rates_and_regimes_for_focal_time]{update_rates_and_regimes_for_focal_time()}},
that contains a phylogenetic tree and associated diversification rates
across selected posterior samples updated to a specific time in the past (i.e. the \code{focal_time}).}

\item{trait_data_list}{List obtained from \code{\link[=extract_most_likely_trait_values_for_focal_time]{extract_most_likely_trait_values_for_focal_time()}}
that contains at least a \verb{$trait_data} element, a \verb{$focal_time} element, and a \verb{$trait_data_type}.
\verb{$trait_data} is a named vector with the trait data found on the phylogeny at \code{focal_time}.
\verb{$focal_time} informs on the time in the past at which the trait and rates data will be tested.
\verb{$trait_data_type} informs on the type of trait data: continuous, categorical, or biogeographic.}

\item{rate_type}{A character string specifying the type of diversification rates to use. Must be one of 'speciation', 'extinction' or 'net_diversification' (default).}

\item{seed}{Integer. Set the seed to ensure reproducibility. Default is \code{NULL} (a random seed is used).}

\item{nb_permutations}{Integer. To select the number of random permutations to perform during the tests.
If NULL (default), all posterior samples will be used once.}

\item{replace_samples}{Logical. To specify whether to allow 'replacement' (i.e., multiple use) of a posterior sample when drawing samples used to carry out the test. Default is \code{FALSE}.}

\item{alpha}{Numerical. Significance level to use to compute the \code{estimate} corresponding to the values of the test statistic used to assess significance of the test. This does NOT affect p-values. Default is \code{0.05}.}

\item{two_tailed}{Logical. To define the type of tests. If \code{TRUE} (default), tests for correlations/differences in rates will be carried out with a null hypothesis
that rates are not correlated with trait values (continuous data) or equals between trait states (categorical and biogeographic data).
If \code{FALSE}, one-tailed tests are carried out.
\itemize{
\item For continuous data, it involves defining a \code{one_tailed_hypothesis} testing for either a "positive" or "negative" correlation under the alternative hypothesis.
\item For binary data (two states), it involves defining a \code{one_tailed_hypothesis} indicating which states have higher rates under the alternative hypothesis.
\item For multinominal data (more than two states), it defines the type of post hoc pairwise tests to carry out between pairs of states.
If \code{posthoc_pairwise_tests = TRUE}, all two-tailed (if \code{two_tailed = TRUE}) or one-tailed (if \code{two_tailed = FALSE}) tests are automatically carried out.
}}

\item{one_tailed_hypothesis}{A character string specifying the alternative hypothesis in the one-tailed test.
For continuous data, it is either "negative" or "positive" correlation.
For binary data, it lists the trait states with states ordered in increasing rates under the alternative hypothesis, separated by a greater-than such as c('A > B').}

\item{posthoc_pairwise_tests}{Logical. Only for multinominal data (with more than two states). If \code{TRUE}, all possible post hoc pairwise (Dunn) tests will be computed across all pairs of states.
This is a way to detect which pairs of states have significant differences in rates if the overall test (Kruskal-Wallis) is significant. Default is \code{FALSE}.}

\item{p.adjust_method}{A character string. Only for multinominal data (with more than two states). It specifies the type of correction to apply to the p-values
in the post hoc pairwise tests to account for multiple comparisons. See \code{\link[stats:p.adjust]{stats::p.adjust()}} for the available methods. Default is \code{none}.}

\item{return_perm_data}{Logical. Whether to return the stats data computed from the posterior samples for observed and permuted data in the output.
This is needed to plot the histogram of the null distribution used to assess significance of the test with \code{\link[=plot_histogram_STRAPP_test_for_focal_time]{plot_histogram_STRAPP_test_for_focal_time()}}.
Default is \code{FALSE}.}

\item{nthreads}{Integer. Number of threads to use for paralleled computing of the tests across the permutations. The R package \code{parallel} must be loaded for \code{nthreads > 1}. Default is \code{1}.}

\item{print_hypothesis}{Logical. Whether to print information on what test is carried out, detailing the null and alternative hypotheses,
and what significant level is used to rejected or not the null hypothesis. Default is \code{TRUE}.}
}
\value{
The function returns a list with at least eight elements.

Summary elements for the main test:
\itemize{
\item \verb{$estimate} Named numerical. Value of the test statistic used to assess significance of the test
according to the significance level provided (\code{alpha}). The test is significant if \verb{$estimate} is higher than zero.
\item \verb{$stats_median} Numerical. Median value of the distribution of test statistics across all selected posterior samples.
\item \verb{$p-value} Numerical. P-value of the test. The test is considered significant if \verb{$p-value} is lower than \code{alpha}.
\item \verb{$method} Character string. The statistical method used to carry out the test.
\item \verb{$rate_type} Character string. The type of diversification rates tested. One of 'speciation', 'extinction' or 'net_diversification'.
\item \verb{$trait_data_type} Character string. The type of trait data as found in 'trait_data_list$trait_data_type'. One of 'continuous', 'categorical', or 'biogeographic'.
\item \verb{$trait_data_type_for_stats} Character string. The type of trait data used to select statistical method. One of 'continuous', 'binary', or 'multinominal'.
\item \verb{$focal_time} The time in the past at which the trait and rates data were tested.
}

If using continuous or binary data:
\itemize{
\item \verb{$two-tailed} Logical. Record the type of test used: two-tailed if \code{TRUE}, one-tailed if \code{FALSE}.
If \code{one_tailed_hypothesis} is provided (only for continuous and binary trait data):
\item \verb{$one_tailed_hypothesis} Character string. Record of the alternative hypothesis used for the one-tailed tests.
}

If \code{posthoc_pairwise_tests = TRUE} (only for multinomial trait data):
\itemize{
\item \verb{$posthoc_pairwise_tests} List of at least 3 sub-elements:
\itemize{
\item \verb{$summary_df} Data.frame of five variables providing the summary results of post hoc pairwise tests
\item \verb{$method} Character string. The statistical method used to carry out the test. Here, "Dunn".
\item \verb{$two-tailed} Logical. Record the type of post hoc pairwise tests used: two-tailed if \code{TRUE}, one-tailed if \code{FALSE}.
}
}

If \code{return_perm_data = TRUE}, the stats data computed from the posterior samples for observed and permuted data are provided.
This is needed to plot the histogram of the null distribution used to assess significance of the test with \code{\link[=plot_histogram_STRAPP_test_for_focal_time]{plot_histogram_STRAPP_test_for_focal_time()}}.
\itemize{
\item \verb{$perm_data_df} A data.frame with four variables summarizing the data generated during the STRAPP test:
\itemize{
\item \verb{$posterior_samples_random_ID} Integer. ID of the posterior samples randomly drawn and used for the STRAPP test.
\item \verb{$*_obs} Numerical. Test stats computed from the observed data in the posterior samples. Name depends on the test used.
\item \verb{$*_perm} Numerical. Test stats computed from the permuted data in the posterior samples. Name depends on the test used.
\item \verb{$delta_*} OR \verb{$abs_delta_*} Numerical. Test stats computed for the STRAPP test comparing observed stats and permuted stats.
Name depends on the test used and the type of tests (two-tailed compare absolute values; one-tailed compare raw values).
Combined with \code{posthoc_pairwise_tests = TRUE}, the stats data are also provided for the post hoc pairwise tests:
}
\item \verb{$posthoc_pairwise_tests$perm_data_array} A 3D array containing stats data for all post hoc pairwise tests in a similar format that \verb{$perm_data_df}.
}

If no STRAPP test was performed in the case of categorical/biogeographic data with a single state/range at \code{focal_time},
only the \verb{$trait_data_type}, \verb{$trait_data_type_for_stats} = "none", and \verb{$focal_time} are returned.
}
\description{
Carries out the appropriate statistical method to test for a relationship between
diversification rates and trait data for a given point in the past (i.e. the \code{focal_time}).
Tests are based on block-permutations: rates data are randomized across tips following blocks
defined by the diversification regimes identified on each tip (typically from a BAMM).

Such tests are called STructured RAte Permutations on Phylogenies (STRAPP) as described in
Rabosky, D. L., & Huang, H. (2016). A robust semi-parametric test for detecting trait-dependent diversification.
Systematic biology, 65(2), 181-193. \doi{10.1093/sysbio/syv066}.

The function is an extension of the original \code{\link[BAMMtools:traitDependentBAMM]{BAMMtools::traitDependentBAMM()}} function used to
carry out STRAPP test on extant time-calibrated phylogenies.

Tests can be carried out on speciation, extinction and net diversification rates.

\code{deepSTRAPP::compute_STRAPP_test_for_focal_time()} can handle three types of statistical tests depending on the type of trait data provided:
\subsection{Continuous trait data}{

Tests for correlations between trait and rates carried out with \code{deepSTRAPP::compute_STRAPP_test_for_continuous_data()}.
The associated test is the Spearman's rank correlation test (See \link[stats:cor.test]{stats::cor.test}).
}

\subsection{Binary trait data}{

For categorical and biogeographic trait data that have only two states (ex: 'Nearctic' vs. 'Neotropics').
Tests for differences in rates between states are carried out with \code{deepSTRAPP::compute_STRAPP_test_for_binary_data()}.
The associated test is the Mann-Whitney-Wilcoxon rank-sum test (See \link[stats:wilcox.test]{stats::wilcox.test}).
}

\subsection{Multinominal trait data}{

For categorical and biogeographic trait data with more than two states (ex: 'No leg' vs. 'Two legs' vs. 'Four legs').
Tests for differences in rates between states are carried out with \code{deepSTRAPP::compute_STRAPP_test_for_multinominal_data()}.
The associated test for all states is the Kruskal-Wallis H test (See \link[stats:kruskal.test]{stats::kruskal.test}).
If \code{posthoc_pairwise_tests = TRUE}, post hoc pairwise tests between pairs of states will be carried out too.
The associated test for post hoc pairwise tests is the Dunn's post hoc pairwise rank-sum test (See \link[dunn.test:dunn.test]{dunn.test::dunn.test}).
}
}
\details{
These set of functions carries out the STructured RAte Permutations on Phylogenies (STRAPP) test as defined in
Rabosky, D. L., & Huang, H. (2016). A robust semi-parametric test for detecting trait-dependent diversification.
Systematic biology, 65(2), 181-193.

It is an extension of the original \code{\link[BAMMtools:traitDependentBAMM]{BAMMtools::traitDependentBAMM()}} function used to
carry out STRAPP test on extant time-calibrated phylogenies, but allowing here to test for
differences/correlations at any point in the past (i.e. the \code{focal_time}).

It takes an object of class \code{"bammdata"} (\code{BAMM_object}) that was updated such as
its diversification rates (\verb{$tipLambda} and \verb{$tipMu}) and regimes (\verb{$tipStates}) are reflecting
values observed at at a specific time in the past (i.e. the \verb{$focal_time}).
Similarly, it takes a list (\code{trait_data_list}) that provides \verb{$trait_data} as observed on branches
at the same \code{focal_time} than the diversification rates and regimes.

A STRAPP test is carried out by drawing a random set of posterior samples from the \code{BAMM_object}, then randomly permuting rates
across blocks of tips defined by the macroevolutionary regimes. Test statistics are then computed across the initial observed data
and the permuted data for each sample.
In a two-tailed test, the p-value is the proportion of posterior samples in which the test stats is as extreme in the permuted than in the observed data.
In a one-tailed test, the p-value is the proportion of posterior samples in which the test stats is higher in the permuted than in the observed data.

----------  Major changes compared to \code{\link[BAMMtools:traitDependentBAMM]{BAMMtools::traitDependentBAMM()}}  ----------
\itemize{
\item Allow to choose if random sampling of posterior configurations must be done with replacement or not with \code{replace_samples}.
\item Add post hoc pairwise tests (Dunn test) for multinominal data. Use \code{posthoc_pairwise_tests = TRUE}.
\item Provide outputs tailored for histogram plots \code{\link[=plot_histogram_STRAPP_test_for_focal_time]{plot_histogram_STRAPP_test_for_focal_time()}}
and p-value time-series plots \code{\link[=plot_STRAPP_pvalues_over_time]{plot_STRAPP_pvalues_over_time()}}.
\item Add prints detailing what test is carried out, what are the null and alternative hypotheses,
and what significant level is used to rejected or not the null hypothesis. (Enabled with \code{print_hypothesis = TRUE}).
\item Split the function in multiple sub-functions according to the type of data (\verb{$trait_data_type}).
\item Prevent using Pearson's correlation tests and applying log-transformation for continuous data.
The rationale is that there is no reason to assume that tip rates are distributed normally or log-normally.
Thus, a Spearman's rank correlation test is favored.
}
}
\examples{
if (deepSTRAPP::is_dev_version())
{
 # ------ Prepare data ------ #

 ## Load the BAMM_object summarizing 1000 posterior samples of BAMM with diversification rates
 # for ponerine ants extracted for 10My ago.
 data(Ponerinae_BAMM_object_10My, package = "deepSTRAPP")
 ## This dataset is only available in development versions installed from GitHub.
 # It is not available in CRAN versions.
 # Use remotes::install_github(repo = "MaelDore/deepSTRAPP") to get the latest development version.

 # Plot the associated phylogeny with mapped rates
 plot_BAMM_rates(Ponerinae_BAMM_object_10My)

 ## Load the object containing head width trait data for ponerine ants extracted for 10My ago.
 data(Ponerinae_trait_cont_tip_data_10My, package = "deepSTRAPP")

 # Plot the associated contMap (continuous trait stochastic map)
 plot_contMap(Ponerinae_trait_cont_tip_data_10My$contMap)

 # Check that objects are ordered in the same fashion
 identical(names(Ponerinae_BAMM_object_10My$tipStates[[1]]),
           names(Ponerinae_trait_cont_tip_data_10My$trait_data))

 # Save continuous data
 trait_data_continuous <- Ponerinae_trait_cont_tip_data_10My

 ## Transform trait data into binary and multinominal data

 # Binarize data into two states
 trait_data_binary <- trait_data_continuous
 trait_data_binary$trait_data[trait_data_continuous$trait_data < 0] <- "state_A"
 trait_data_binary$trait_data[trait_data_continuous$trait_data >= 0] <- "state_B"
 trait_data_binary$trait_data_type <- "categorical"

 table(trait_data_binary$trait_data)

 # Categorize data into three states
 trait_data_multinominal <- trait_data_continuous
 trait_data_multinominal$trait_data[trait_data_continuous$trait_data < 0] <- "state_B"
 trait_data_multinominal$trait_data[trait_data_continuous$trait_data < -1] <- "state_A"
 trait_data_multinominal$trait_data[trait_data_continuous$trait_data >= 0] <- "state_C"
 trait_data_multinominal$trait_data_type <- "categorical"

 table(trait_data_multinominal$trait_data)

 \donttest{ # (May take several minutes to run)
 # ------ Compute STRAPP test for continuous data ------ #

 plot(x = trait_data_continuous$trait_data, y = Ponerinae_BAMM_object_10My$tipLambda[[1]])

 # Compute STRAPP test under the alternative hypothesis of a "negative" correlation
 # between "net_diversification" rates and trait data
 STRAPP_results <- compute_STRAPP_test_for_focal_time(
    BAMM_object = Ponerinae_BAMM_object_10My,
    trait_data_list = trait_data_continuous,
    two_tailed = FALSE,
    one_tailed_hypothesis = "negative",
    return_perm_data = TRUE)
 str(STRAPP_results, max.level = 2)
 # Data from the posterior samples is available in STRAPP_results$perm_data_df
 head(STRAPP_results$perm_data_df)

 # ------ Compute STRAPP test for binary data ------ #

 # Compute STRAPP test under the alternative hypothesis that "state_A" is associated
 # with higher "net_diversification" that "state_B"
 STRAPP_results <- compute_STRAPP_test_for_focal_time(
    BAMM_object = Ponerinae_BAMM_object_10My,
    trait_data_list = trait_data_binary,
    two_tailed = FALSE,
    one_tailed_hypothesis = c("state_A > state_B"))
 str(STRAPP_results, max.level = 1)

 # Compute STRAPP test under the alternative hypothesis that "state_B" is associated
 # with higher "net_diversification" that "state_A"
 STRAPP_results <- compute_STRAPP_test_for_focal_time(BAMM_object = Ponerinae_BAMM_object_10My,
    trait_data_list = trait_data_binary,
    two_tailed = FALSE,
    one_tailed_hypothesis = c("state_B > state_A"))
 str(STRAPP_results, max.level = 1)

 # ------ Compute STRAPP test for multinominal data ------ #

 # Compute STRAPP test between all three states, and compute post hoc tests
 # for differences in rates between all possible pairs of states
 # with a p-value adjusted for multiple comparison using Bonferroni's correction
 STRAPP_results <- compute_STRAPP_test_for_focal_time(
    BAMM_object = Ponerinae_BAMM_object_10My,
    trait_data_list = trait_data_multinominal,
    posthoc_pairwise_tests = TRUE,
    two_tailed = TRUE,
    p.adjust_method = "bonferroni")
 str(STRAPP_results, max.level = 3)
 # All post hoc pairwise test summaries are available in $summary_df
 STRAPP_results$posthoc_pairwise_tests$summary_df }
}

}
\references{
For STRAPP: Rabosky, D. L., & Huang, H. (2016). A robust semi-parametric test for detecting trait-dependent diversification.
Systematic biology, 65(2), 181-193. \doi{10.1093/sysbio/syv066}.

For STRAPP in deep times: Doré, M., Borowiec, M. L., Branstetter, M. G., Camacho, G. P., Fisher, B. L., Longino, J. T., Ward, P. S., Blaimer, B. B., (2025),
Evolutionary history of ponerine ants highlights how the timing of dispersal events shapes modern biodiversity, Nature Communications.
\doi{10.1038/s41467-025-63709-3}
}
\seealso{
Associated functions in deepSTRAPP: \code{\link[=extract_most_likely_trait_values_for_focal_time]{extract_most_likely_trait_values_for_focal_time()}} \code{\link[=update_rates_and_regimes_for_focal_time]{update_rates_and_regimes_for_focal_time()}}

Original function in BAMMtools: \code{\link[BAMMtools:traitDependentBAMM]{BAMMtools::traitDependentBAMM()}}

Statistical tests: \code{\link[stats:cor.test]{stats::cor.test()}} \code{\link[stats:wilcox.test]{stats::wilcox.test()}} \code{\link[stats:kruskal.test]{stats::kruskal.test()}} \code{\link[dunn.test:dunn.test]{dunn.test::dunn.test()}}

For a guided tutorial, see this vignette: \code{vignette("explore_STRAPP_test_types", package = "deepSTRAPP")}
}
\author{
Maël Doré
}
