% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Geno.R
\name{GRAB.ReadGeno}
\alias{GRAB.ReadGeno}
\title{Read genotype data from multiple file formats}
\usage{
GRAB.ReadGeno(
  GenoFile,
  GenoFileIndex = NULL,
  SampleIDs = NULL,
  control = NULL,
  sparse = FALSE
)
}
\arguments{
\item{GenoFile}{Path to genotype file. Supported formats determined by extension:
\itemize{
\item PLINK: "prefix.bed" (binary format)
\item BGEN: "prefix.bgen" (version 1.2 with 8-bit compression)
}}

\item{GenoFileIndex}{Associated index files for the genotype file:
\itemize{
\item PLINK: c("prefix.bim", "prefix.fam") (auto-detected if NULL)
\item BGEN: "prefix.bgen.bgi" or c("prefix.bgen.bgi", "prefix.sample")
}}

\item{SampleIDs}{Character vector of sample IDs to extract. If NULL,
extracts all samples.}

\item{control}{List of control parameters with the following options:
\itemize{
\item \code{imputeMethod}: Imputation method for genotype data.
Options: "none" (default), "mean" (2 times allele frequency).
"bestguess" (round mean to the nearest integer, 0, 1, or 2).
\item \code{AlleleOrder}: Allele order in genotype file. Options: "ref-first",
"alt-first", or NULL (default: "alt-first" for BGEN, "ref-first" for PLINK).
\item \strong{Marker Selection:}
\itemize{
\item \code{AllMarkers}: Set to TRUE (default) to analyze all markers.
Automatically set to FALSE if any include/exclude files are provided.
\item \code{IDsToIncludeFile}: Path to file with marker IDs to include.
\item \code{RangesToIncludeFile}: Path to file with genomic ranges to include.
Can be used with IDsToIncludeFile (union will be used).
\item \code{IDsToExcludeFile}: Path to file with marker IDs to exclude.
\item \code{RangesToExcludeFile}: Path to file with genomic ranges to exclude.
Can be used with IDsToExcludeFile (union will be excluded).
\item Note: Cannot use both include and exclude files simultaneously.
}
}}

\item{sparse}{Logical indicating whether to return sparse genotype matrix
(default: FALSE).}
}
\value{
List containing:
\describe{
\item{GenoMat}{Genotype matrix (samples × markers) with values 0, 1, 2, or NA.}
\item{markerInfo}{Data frame with columns CHROM, POS, ID, REF, ALT.}
}
}
\description{
Reads genotype data from PLINK or BGEN format files with flexible filtering
and processing options. Supports efficient memory usage and various
imputation methods for missing genotypes.
}
\details{
\strong{File Format Support:}

\emph{PLINK Format:} Binary BED/BIM/FAM files. See
\url{https://www.cog-genomics.org/plink/2.0/} for specifications.

\emph{BGEN Format:} Version 1.2 with 8-bit compression. See
\url{https://www.well.ox.ac.uk/~gav/bgen_format/spec/v1.2.html} for details.
Requires BGI index file created with bgenix tool.
}
\examples{
## Raw genotype data
RawFile <- system.file("extdata", "simuRAW.raw.gz", package = "GRAB")
GenoMat <- data.table::fread(RawFile)
GenoMat[1:10, 1:10]

## PLINK files
PLINKFile <- system.file("extdata", "simuPLINK.bed", package = "GRAB")
# If include/exclude files are not specified, then control$AllMarker should be TRUE
GenoList <- GRAB.ReadGeno(PLINKFile, control = list(AllMarkers = TRUE))
GenoMat <- GenoList$GenoMat
markerInfo <- GenoList$markerInfo
head(GenoMat[, 1:6])
head(markerInfo)

## BGEN files (Note the different REF/ALT order for BGEN and PLINK formats)
BGENFile <- system.file("extdata", "simuBGEN.bgen", package = "GRAB")
GenoList <- GRAB.ReadGeno(BGENFile, control = list(AllMarkers = TRUE))
GenoMat <- GenoList$GenoMat
markerInfo <- GenoList$markerInfo
head(GenoMat[, 1:6])
head(markerInfo)

## The below is to demonstrate parameters in control
PLINKFile <- system.file("extdata", "simuPLINK.bed", package = "GRAB")
IDsToIncludeFile <- system.file("extdata", "simuGENO.IDsToInclude", package = "GRAB")
RangesToIncludeFile <- system.file("extdata", "RangesToInclude.txt", package = "GRAB")
GenoList <- GRAB.ReadGeno(PLINKFile,
  control = list(
    IDsToIncludeFile = IDsToIncludeFile,
    RangesToIncludeFile = RangesToIncludeFile,
    AlleleOrder = "ref-first"
  )
)
GenoMat <- GenoList$GenoMat
head(GenoMat)
markerInfo <- GenoList$markerInfo
head(markerInfo)

## The below is for PLINK/BGEN files with missing data
PLINKFile <- system.file("extdata", "simuPLINK.bed", package = "GRAB")
GenoList <- GRAB.ReadGeno(PLINKFile, control = list(AllMarkers = TRUE))
head(GenoList$GenoMat)

GenoList <- GRAB.ReadGeno(PLINKFile, control = list(AllMarkers = TRUE, imputeMethod = "mean"))
head(GenoList$GenoMat)

BGENFile <- system.file("extdata", "simuBGEN.bgen", package = "GRAB")
GenoList <- GRAB.ReadGeno(BGENFile, control = list(AllMarkers = TRUE))
head(GenoList$GenoMat)

}
