% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calculate_go_enrichment.R
\name{calculate_go_enrichment}
\alias{calculate_go_enrichment}
\title{Perform gene ontology enrichment analysis}
\usage{
calculate_go_enrichment(
  data,
  protein_id,
  is_significant,
  go_annotations_uniprot = NULL,
  ontology_type,
  organism_id = NULL,
  go_data = NULL,
  plot = TRUE,
  label = TRUE,
  plot_cutoff = "adj_pval top10"
)
}
\arguments{
\item{data}{a data frame that contains at least the input variables.}

\item{protein_id}{a character column in the \code{data} data frame that contains the protein
accession numbers.}

\item{is_significant}{a logical column in the \code{data} data frame that indicates if the
corresponding protein has a significantly changing peptide. The input data frame may contain
peptide level information with significance information. The function is able to extract
protein level information from this.}

\item{go_annotations_uniprot}{recommended, a character column in the \code{data} data frame
that contains gene ontology annotations obtained from UniProt using \code{fetch_uniprot}.
These annotations are already separated into the desired ontology type so the argument
\code{ontology_type} is not required.}

\item{ontology_type}{optional, character value specifying the type of ontology that should
be used. Possible values are molecular function (MF), biological process (BP), cellular component
(CC). This argument is not required if GO annotations are provided from UniProt in
\code{go_annotations_uniprot}. It is required if annotations are provided through \code{go_data} or
automatically fetched.}

\item{organism_id}{optional, character value specifying an NCBI taxonomy identifier of an
organism (TaxId). Possible inputs include only: "9606" (Human), "559292" (Yeast) and "83333"
(E. coli). Is only necessary if GO data is not provided either by \code{go_annotations_uniprot}
or in \code{go_data}.}

\item{go_data}{Optional, a data frame that can be obtained with \code{fetch_go}. If you provide
data not obtained with \code{fetch_go} make sure column names for protein ID (db_id) and GO ID
(go_id) are the same as for data obtained with \code{fetch_go}.}

\item{plot}{a logical argument indicating whether the result should be plotted or returned as a table.}

\item{label}{a logical argument indicating whether labels should be added to the plot.
Default is TRUE.}

\item{plot_cutoff}{a character value indicating if the plot should contain the top 10 most
significant proteins (p-value or adjusted p-value), or if a significance cutoff should be used
to determine the number of GO terms in the plot. This information should be provided with the
type first followed by the threshold separated by a space. Example are
\code{plot_cutoff = "adj_pval top10"}, \code{plot_cutoff = "pval 0.05"} or
\code{plot_cutoff = "adj_pval 0.01"}. The threshold can be chosen freely.}
}
\value{
A bar plot displaying negative log10 adjusted p-values for the top 10 enriched or
depleted gene ontology terms. Alternatively, plot cutoffs can be chosen individually with the
\code{plot_cutoff} argument. Bars are colored according to the direction of the enrichment. If
\code{plot = FALSE}, a data frame is returned. P-values are adjusted with Benjamini-Hochberg.
}
\description{
Analyses enrichment of gene ontology terms associated with proteins in the fraction of
significant proteins compared to all detected proteins. A two-sided Fisher's exact test is
performed to test significance of enrichment or depletion. GO annotations can be provided to
this function either through UniProt \code{go_annotations_uniprot}, through a table obtained
with \code{fetch_go} in the \code{go_data} argument or GO annotations are fetched automatically
by the function by providing \code{ontology_type} and \code{organism_id}.
}
\examples{
\donttest{
# Load libraries
library(dplyr)
library(stringr)

# Create example data
# Contains artificial de-enrichment for ribosomes.
data <- fetch_uniprot_proteome(
  organism_id = 83333,
  columns = c(
    "accession",
    "go_f"
  )
) \%>\%
  mutate(significant = c(
    rep(TRUE, 1000),
    rep(FALSE, n() - 1000)
  )) \%>\%
  mutate(significant = ifelse(
    str_detect(
      go_f,
      pattern = "ribosome"
    ),
    FALSE,
    significant
  ))

# Plot gene ontology enrichment
calculate_go_enrichment(
  data,
  protein_id = accession,
  go_annotations_uniprot = go_f,
  is_significant = significant,
  plot = TRUE,
  plot_cutoff = "pval 0.01"
)

# Calculate gene ontology enrichment
go_enrichment <- calculate_go_enrichment(
  data,
  protein_id = accession,
  go_annotations_uniprot = go_f,
  is_significant = significant,
  plot = FALSE,
)

head(go_enrichment, n = 10)
}
}
