% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_p_value.R
\name{get_p_value}
\alias{get_p_value}
\alias{get_p_value.default}
\alias{get_pvalue}
\alias{get_p_value.infer_dist}
\title{Compute p-value}
\usage{
get_p_value(x, obs_stat, direction)

\method{get_p_value}{default}(x, obs_stat, direction)

get_pvalue(x, obs_stat, direction)

\method{get_p_value}{infer_dist}(x, obs_stat, direction)
}
\arguments{
\item{x}{A null distribution. For simulation-based inference, a data frame
containing a distribution of \code{\link[=calculate]{calculate()}}d statistics
or \code{\link[=fit.infer]{fit()}}ted coefficient estimates. This object should
have been passed to \code{\link[=generate]{generate()}} before being supplied or
\code{\link[=calculate]{calculate()}} to \code{\link[=fit.infer]{fit()}}. For theory-based inference,
the output of \code{\link[=assume]{assume()}}.}

\item{obs_stat}{A data frame containing the observed statistic (in a
\code{\link[=calculate]{calculate()}}-based workflow) or observed fit (in a
\code{\link[=fit.infer]{fit()}}-based workflow). This object is likely the output
of \code{\link[=calculate]{calculate()}} or \code{\link[=fit.infer]{fit()}} and need not
to have been passed to \code{\link[=generate]{generate()}}.}

\item{direction}{A character string. Options are \code{"less"}, \code{"greater"}, or
\code{"two-sided"}. Can also use \code{"left"}, \code{"right"}, \code{"both"},
\code{"two_sided"}, or \code{"two sided"}, \code{"two.sided"}.}
}
\value{
A \link[tibble:tibble]{tibble} containing the following columns:

\itemize{
\item \code{term}: The explanatory variable (or intercept) in question. Only
supplied if the input had been previously passed to \code{\link[=fit.infer]{fit()}}.
\item \code{p_value}: A value in [0, 1] giving the probability that a
statistic/coefficient as or more extreme than the observed
statistic/coefficient would occur if the null hypothesis were true.
}
}
\description{
Compute a p-value from a null distribution and observed statistic.

Learn more in \code{vignette("infer")}.
}
\section{Aliases}{

\code{get_pvalue()} is an alias of \code{get_p_value()}.
\code{p_value} is a deprecated alias of \code{get_p_value()}.
}

\section{Zero p-value}{

Though a true p-value of 0 is impossible, \code{get_p_value()} may return 0 in
some cases. This is due to the simulation-based nature of the \{infer\}
package; the output of this function is an approximation based on
the number of \code{reps} chosen in the \code{generate()} step. When the observed
statistic is very unlikely given the null hypothesis, and only a small
number of \code{reps} have been generated to form a null distribution,
it is possible that the observed statistic will be more extreme than
every test statistic generated to form the null distribution, resulting
in an approximate p-value of 0. In this case, the true p-value is a small
value likely less than \code{3/reps} (based on a poisson approximation).

In the case that a p-value of zero is reported, a warning message will be
raised to caution the user against reporting a p-value exactly equal to 0.
}

\examples{

# using a simulation-based null distribution ------------------------------

# find the point estimate---mean number of hours worked per week
point_estimate <- gss \%>\%
  specify(response = hours) \%>\%
  calculate(stat = "mean")

# starting with the gss dataset
gss \%>\%
  # ...we're interested in the number of hours worked per week
  specify(response = hours) \%>\%
  # hypothesizing that the mean is 40
  hypothesize(null = "point", mu = 40) \%>\%
  # generating data points for a null distribution
  generate(reps = 1000, type = "bootstrap") \%>\%
  # finding the null distribution
  calculate(stat = "mean") \%>\%
  get_p_value(obs_stat = point_estimate, direction = "two-sided")

# using a theoretical null distribution -----------------------------------

# calculate the observed statistic
obs_stat <- gss \%>\%
  specify(response = hours) \%>\%
  hypothesize(null = "point", mu = 40) \%>\%
  calculate(stat = "t")

# define a null distribution
null_dist <- gss \%>\%
  specify(response = hours) \%>\%
  assume("t")

# calculate a p-value
get_p_value(null_dist, obs_stat, direction = "both")

# using a model fitting workflow -----------------------------------------

# fit a linear model predicting number of hours worked per
# week using respondent age and degree status.
observed_fit <- gss \%>\%
  specify(hours ~ age + college) \%>\%
  fit()

observed_fit

# fit 100 models to resamples of the gss dataset, where the response
# `hours` is permuted in each. note that this code is the same as
# the above except for the addition of the `generate` step.
null_fits <- gss \%>\%
  specify(hours ~ age + college) \%>\%
  hypothesize(null = "independence") \%>\%
  generate(reps = 100, type = "permute") \%>\%
  fit()

null_fits

get_p_value(null_fits, obs_stat = observed_fit, direction = "two-sided")

# more in-depth explanation of how to use the infer package
\dontrun{
vignette("infer")
}

}
\seealso{
Other auxillary functions: 
\code{\link{get_confidence_interval}()}
}
\concept{auxillary functions}
