% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\name{phenotypes}
\alias{phenotypes}
\title{Create Core Hunter phenotype data from data frame or file.}
\usage{
phenotypes(data, types, min, max, file)
}
\arguments{
\item{data}{Data frame containing one row per individual and one column per trait.
Unique row and column names are required and used as item and trait ids, respectively.
The data frame may optionally include a first column \code{NAME} used to assign names
to some or all individuals.}

\item{types}{Variable types (optional).
  Vector of characters, each of length one or two.
  Ignored when reading from file.

  The first letter indicates the scale type and should be one of \code{N} (nominal),
  \code{O} (ordinal), \code{I} (interval) or \code{R} (ratio).

  The second letter optionally indicates the variable encoding (in Java) and should
  be one of \code{B} (boolean), \code{T} (short), \code{I} (integer), \code{L} (long),
  \code{R} (big integer), \code{F} (float), \code{D} (double), \code{M} (big decimal),
  \code{A} (date) or \code{S} (string). The default encoding is \code{S} (string)
  for nominal variables, \code{I} (integer) for ordinal and interval variables
  and \code{D} (double) for ratio variables. Interval and ratio variables are
  limited to numeric encodings.

  If no explicit variable types are specified these are automatically inferred from
  the data frame column types and classes, whenever possible. Columns of type
  \code{character} are treated as nominal string encoded variables (\code{N}).
  Unordered \code{factor} columns are converted to \code{character} and also
  treated as string encoded nominals. Ordered factors are converted to
  integer encoded interval variables (\code{I}) as described below.
  Columns of type \code{logical} are taken to be asymmetric binary variables (\code{NB}).
  Finally, \code{integer} and more broadly \code{numeric} columns are treated as integer
  encoded interval variables (\code{I}) and double encoded ratio variables (\code{R}),
  respectively.

  Boolean encoded nominals (\code{NB}) are treated as asymmetric binary variables.
  For symmetric binary variables just use the default string encoding (\code{N}
  or \code{NS}). Other nominal variables are converted to factors.

  Ordinal variables of class \code{ordered} are converted to integers respecting
  the order and range of the factor levels and subsequently treated as integer
  encoded interval variables (\code{I}). This conversion allows to model the
  full range of factor levels also when some might not occur in the data. For other
  ordinal variables it is assumed that each value occurs at least once and that
  values follow the natural ordering of the chosen data type (in Java).

  If explicit types are given for some variables others can still be automatically inferred
  by setting their type to \code{NA}.}

\item{min}{Minimum values of interval or ratio variables (optional).
Numeric vector. Ignored when reading from file.
If undefined for some variables the respective minimum is inferred from the data.
If the data exceeds the minimum it is also updated accordingly.
For nominal and ordinal variables just put \code{NA}.}

\item{max}{Maximum values of interval or ratio variables (optional).
Numeric vector. Ignored when reading from file.
If undefined for some variables the respective maximum is inferred from the data.
If the data exceeds the maximum it is also updated accordingly.
For nominal and ordinal variables just put \code{NA}.}

\item{file}{File containing the phenotype data.}
}
\value{
Phenotype data of class \code{chpheno} with elements
\describe{
 \item{\code{data}}{Phenotypes (data frame).}
 \item{\code{size}}{Number of individuals in the dataset.}
 \item{\code{ids}}{Unique item identifiers.}
 \item{\code{names}}{Item names. Names of individuals to which no explicit name
   has been assigned are equal to the unique \code{ids}.}
 \item{\code{types}}{Variable types and encodings.}
 \item{\code{ranges}}{Variable ranges, when applicable (\code{NA} elsewhere).}
 \item{\code{java}}{Java version of the data object.}
 \item{\code{file}}{Normalized path of file from which the data was read (if applicable).}
}
}
\description{
Specify either a data frame containing the phenotypic trait observations
or a file from which to read the data. See \url{https://www.corehunter.org} for
documentation and examples of the phenotype data format used by Core Hunter.
}
\examples{
# create from data frame
pheno.data <- data.frame(
 season = c("winter", "summer", "summer", "winter", "summer"),
 yield = c(34.5, 32.6, 22.1, 54.12, 43.33),
 size = ordered(c("l", "s", "s", "m", "l"), levels = c("s", "m", "l")),
 resistant = c(FALSE, TRUE, TRUE, FALSE, TRUE)
)
pheno <- phenotypes(pheno.data)

# explicit types
pheno <- phenotypes(pheno.data, types = c("N", "R", "O", "NB"))
# treat last column as symmetric binary, auto infer others
pheno <- phenotypes(pheno.data, types = c(NA, NA, NA, "NS"))

# explicit ranges
pheno <- phenotypes(pheno.data, min = c(NA, 20.0, NA, NA), max = c(NA, 60.0, NA, NA))

# read from file
pheno.file <- system.file("extdata", "phenotypes.csv", package = "corehunter")
pheno <- phenotypes(file = pheno.file)

}
