% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rc.cmpd.get.pubchem.R
\name{rc.cmpd.get.pubchem}
\alias{rc.cmpd.get.pubchem}
\title{rc.cmpd.get.pubchem}
\usage{
rc.cmpd.get.pubchem(
  ramclustObj = NULL,
  search.name = NULL,
  cmpd.names = NULL,
  cmpd.cid = NULL,
  cmpd.inchikey = NULL,
  cmpd.smiles = NULL,
  use.parent.cid = FALSE,
  manual.entry = FALSE,
  get.vendors = FALSE,
  priority.vendors = c("Sigma Aldrich", "Alfa Chemistry", "Acros Organics", "VWR",
    "Alfa Aesar", "molport", "Key Organics", "BLD Pharm"),
  get.properties = TRUE,
  all.props = FALSE,
  get.synonyms = TRUE,
  find.short.lipid.name = TRUE,
  find.short.synonym = TRUE,
  max.name.length = 30,
  assign.short.name = TRUE,
  get.bioassays = TRUE,
  get.pathways = TRUE,
  write.csv = TRUE
)
}
\arguments{
\item{ramclustObj}{RAMClust Object input.  if used, ramclustObj$CID, ramclustObj$inchikey, and ramclustObj$ann are used as input, in that order, and ramclustObj is returned with $pubchem slot appended.}

\item{search.name}{character.  optional name to assign to pubchem search to name output .csv files.}

\item{cmpd.names}{character vector.  i.e. c("caffeine", "theobromine", "glucose")}

\item{cmpd.cid}{numeric integer vector.  i.e. c(2519, 5429, 107526)}

\item{cmpd.inchikey}{character vector.  i.e. c("RYYVLZVUVIJVGH-UHFFFAOYSA-N", "YAPQBXQYLJRXSA-UHFFFAOYSA-N", "GZCGUPFRVQAUEE-SLPGGIOYSA-N")}

\item{cmpd.smiles}{character vector.  i.e. c("CN1C=NC2=C1C(=O)N(C(=O)N2C)C", "CN1C=NC2=C1C(=O)NC(=O)N2C")}

\item{use.parent.cid}{logical.  If TRUE, the CID for each supplied name/inchikey is used to retrieve its parent CID (i.e. the parent of sodium palmitate is palmitic acid).  The parent CID is used to retrieve all other names, properties.}

\item{manual.entry}{logical.  if TRUE, user input is enabled for compounds not matched by name. A browser window will open with the pubchem search results in your default browser.}

\item{get.vendors}{logical.  if TRUE, vendor data is returned for each compound with a matched CID.  Includes vendor count and vendor product URL, if available}

\item{priority.vendors}{charachter vector.  i.e. c("MyFavoriteCompany", "MySecondFavoriteCompany").  If these vendors are found, the URL returned is from priority vendors. Priority is given by order input by user.}

\item{get.properties}{logical.  if TRUE, physicochemical property data are returned for each compound with a matched CID.}

\item{all.props}{logical.  If TRUE, all pubchem properties (https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest$_Toc494865567) are returned.  If false, only a subset (faster).}

\item{get.synonyms}{= TRUE. logical.  if TRUE, retrieve pubchem synonyms.  returned to $synonyms slot}

\item{find.short.lipid.name}{= TRUE. logical.  If TRUE, and get.synonyms = TRUE, looks for lipid short hand names in synonyms list (i.e. PC(36:6)). returned to $short.name slot.  Short names are assigned only if assign.short.names = TRUE.}

\item{find.short.synonym}{= TRUE. logical.  If TRUE, and get.synonyms = TRUE, looks for lipid short synonyms, with prioritization for names with fewer numeric characters (i.e. database accession numbers or CAS numbers). returned to $short.name slot.  Short names are assigned only if assign.short.names = TRUE.}

\item{max.name.length}{= 20.  integer.  If names are longer than this value, short names will be searched for, else, retain original name.}

\item{assign.short.name}{= TRUE.  If TRUE, short names from find.short.lipid.name and/or find.short.synonym = TRUE, short names are assigned the be the default annotation name ($ann slot), and original annotations are moved to $long.name slot.}

\item{get.bioassays}{logical. If TRUE, return a table summarizing existing bioassay data for that CID.}

\item{get.pathways}{logical.  If TRUE, return a table of metabolic pathways for that CID.}

\item{write.csv}{logical.  If TRUE, write csv files of all returned pubchem data.}
}
\value{
returns a list with one or more of $pubchem (compound name and identifiers) - one row in dataframe per CID; $properties contains physicochemical properties - one row in dataframe per CID; $vendors contains the number of vendors for a given compound and selects a vendor based on 'priority.vendors' supplied, or randomly choses a vendor with a HTML link - one row in dataframe per CID;  $bioassays contains a summary of bioassay activity data from pubchem - zero to many rows in dataframe per CID
}
\description{
use pubchem rest and view APIs to retrieve structures, CIDs (if a name or inchikey is given), synonyms, and optionally vendor data, when available.
}
\details{
useful for moving from chemical name to digital structure representation. greek letters are assumed to be 'UTF-8' encoded, and are converted to latin text before searching.   if you are reading in your compound name list, do so with 'encoding' set to 'UTF-8'.
}
\author{
Corey Broeckling
}
