% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\name{genotypes}
\alias{genotypes}
\title{Create Core Hunter genotype data from data frame, matrix or file.}
\usage{
genotypes(data, alleles, file, format)
}
\arguments{
\item{data}{Data frame or matrix containing the genotypes (individuals x markers)
depending on the chosen format:
\describe{
  \item{\code{default}}{
    Data frame. One row per individual and one or more columns per marker.
    Columns contain the names, numbers, references, ... of observed alleles.
    Unique row names (item ids) are required and columns should be named
    after the marker to which they belong, optionally extended with an
    arbitrary suffix starting with a dot (\code{.}), dash (\code{-}) or
    underscore (\code{_}) character.
  }
  \item{\code{biparental}}{
    Numeric matrix or data frame. One row per individual and one column per marker.
    Data consists of 0, 1 and 2 coding for homozygous (AA), heterozygous (AB) and
    homozygous (BB), respectively. Unique row names (item ids) are required and
    optionally column (marker) names may be included as well.
  }
  \item{\code{frequency}}{
    Numeric matrix or data frame. One row per individual (or bulk sample) and multiple
    columns per marker. Data consists of allele frequencies, grouped per marker in
    consecutive columns named after the corresponding marker, optionally extended
    with an arbitrary suffix starting witha dot (\code{.}), dash (\code{-}) or
    underscore (\code{_}) character.. The allele frequencies of each marker should
    sum to one in each sample. Unique row names (item ids) are required.
  }
  In case a data frame is provided, an optional first column \code{NAME}
  may be included to specify item names. The remaining columns should follow
  the format as described above.
  See \url{www.corehunter.org} for more details about the supported genotype formats.
  Note that both the \code{frequency} and \code{biparental} format syntactically also
  comply with the \code{default} format but with different semantics, meaning that it
  is very important to specify the correct format. Some checks have been built in that
  raise warnings in case it seems that the wrong format might have been specified based
  on an inspection of the data. If you are sure that you have selected the correct format
  these warnings, if any, can be safely ignored.
}}

\item{alleles}{Allele names per marker (\code{character} vector).
Ignored except when creating \code{frequency} data from a matrix or data frame.
Allele names should be ordered in correspondence with the data columns.}

\item{file}{File containing the genotype data.}

\item{format}{Genotype data format, one of \code{default}, \code{biparental} or \code{frequency}.}
}
\value{
Genotype data of class \code{chgeno} with elements
\describe{
 \item{\code{data}}{Genotypes. Data frame for default format, \code{numeric} matrix for other formats.}
 \item{\code{size}}{Number of individuals in the dataset.}
 \item{\code{ids}}{Unique item identifiers (\code{character}).}
 \item{\code{names}}{Item names (\code{character}). Names of individuals to which no explicit name
   has been assigned are equal to the unique \code{ids}.}
 \item{\code{markers}}{Marker names (\code{character}).
   May contain \code{NA} values in case only some or no marker names were specified.
   Marker names are always included for the \code{default} and \code{frequency} format
   but are optional for the \code{biparental} format.}
 \item{\code{alleles}}{List of character vectors with allele names per marker.
   Vectors may contain \code{NA} values in case only some or no allele names were
   specified. For \code{biparental} data the two alleles are name \code{"0"} and
   \code{"1"}, respectively, for all markers. For the \code{default} format allele
   names are inferred from the provided data. Finally, for \code{frequency} data
   allele names are optional and may be specified either in the file or through
   the \code{alleles} argument when creating this type of data from a matrix or
   data frame.}
 \item{\code{java}}{Java version of the data object.}
 \item{\code{format}}{Genotype data format used.}
 \item{\code{file}}{Normalized path of file from which data was read (if applicable).}
}
}
\description{
Specify either a data frame or matrix, or a file from which to read the genotypes.
See \url{www.corehunter.org} for documentation and examples of the genotype data
file format used by Core Hunter.
}
\examples{
# create from data frame or matrix

# default format
geno.data <- data.frame(
 NAME = c("Alice", "Bob", "Carol", "Dave", "Eve"),
 M1.1 = c(1,2,1,2,1),
 M1.2 = c(3,2,2,3,1),
 M2.1 = c("B","C","D","B",NA),
 M2.2 = c("B","A","D","B",NA),
 M3.1 = c("a1","a1","a2","a2","a1"),
 M3.2 = c("a1","a2","a2","a1","a1"),
 M4.1 = c(NA,"+","+","+","-"),
 M4.2 = c(NA,"-","+","-","-"),
 row.names = paste("g", 1:5, sep = "-")
)
geno <- genotypes(geno.data, format = "default")

# biparental (e.g. SNP)
geno.data <- matrix(
 sample(c(0,1,2), replace = TRUE, size = 1000),
 nrow = 10, ncol = 100
)
rownames(geno.data) <- paste("g", 1:10, sep = "-")
colnames(geno.data) <- paste("m", 1:100, sep = "-")
geno <- genotypes(geno.data, format = "biparental")

# frequencies
geno.data <- matrix(
 c(0.0, 0.3, 0.7, 0.5, 0.5, 0.0, 1.0,
   0.4, 0.0, 0.6, 0.1, 0.9, 0.0, 1.0,
   0.3, 0.3, 0.4, 1.0, 0.0, 0.6, 0.4),
 byrow = TRUE, nrow = 3, ncol = 7
)
rownames(geno.data) <- paste("g", 1:3, sep = "-")
colnames(geno.data) <- c("M1", "M1", "M1", "M2", "M2", "M3", "M3")
alleles <- c("M1-a", "M1-b", "M1-c", "M2-a", "M2-b", "M3-a", "M3-b")
geno <- genotypes(geno.data, alleles, format = "frequency")

# read from file

# default format
geno.file <- system.file("extdata", "genotypes.csv", package = "corehunter")
geno <- genotypes(file = geno.file, format = "default")

# biparental (e.g. SNP)
geno.file <- system.file("extdata", "genotypes-biparental.csv", package = "corehunter")
geno <- genotypes(file = geno.file, format = "biparental")

# frequencies
geno.file <- system.file("extdata", "genotypes-frequency.csv", package = "corehunter")
geno <- genotypes(file = geno.file, format = "frequency")

}

