% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ALoptimalGLMSub.R
\name{ALoptimalGLMSub}
\alias{ALoptimalGLMSub}
\title{A- and L-optimality criteria based subsampling under Generalised Linear Models}
\usage{
ALoptimalGLMSub(r0,rf,Y,X,N,family)
}
\arguments{
\item{r0}{sample size for initial random sample}

\item{rf}{final sample size including initial(r0) and optimal(r) samples}

\item{Y}{response data or Y}

\item{X}{covariate data or X matrix that has all the covariates (first column is for the intercept)}

\item{N}{size of the big data}

\item{family}{a character value for "linear", "logistic" and "poisson" regression from Generalised Linear Models}
}
\value{
The output of \code{ALoptimalGLMSub} gives a list of

\code{Beta_Estimates} estimated model parameters in a data.frame after subsampling

\code{Variance_Epsilon_Estimates} matrix of estimated variance for epsilon in a data.frame after subsampling

\code{Sample_A-Optimality} list of indexes for the initial and optimal samples obtained based on A-Optimality criteria

\code{Sample_L-Optimality} list of indexes for the initial and optimal samples obtained based on L-Optimality criteria

\code{Subsampling_Probability} matrix of calculated subsampling probabilities for A- and L- optimality criteria
}
\description{
Using this function sample from big data under linear, logistic and Poisson regression
to describe the data. Subsampling probabilities are obtained based on the A- and L-
optimality criteria.
}
\details{
Two stage subsampling algorithm for big data under Generalised Linear Models
(linear, logistic and Poisson regression).

First stage is to obtain a random sample of size \eqn{r_0} and estimate the model parameters.
Using the estimated parameters subsampling probabilities are evaluated for A- and L-optimality criteria.

Through the estimated subsampling probabilities an optimal sample of size \eqn{r \ge r_0} is obtained.
Finally, the two samples are combined and the model parameters are estimated.

\strong{NOTE} : If input parameters are not in given domain conditions
necessary error messages will be provided to go further.

If \eqn{r \ge r_0} is not satisfied then an error message will be produced.

If the big data \eqn{X,Y} has any missing values then an error message will be produced.

The big data size \eqn{N} is compared with the sizes of \eqn{X,Y} and if they are not aligned an error
message will be produced.

A character value is provided for \code{family} and if it is not of the any three types an error message
will be produced.
}
\examples{
Dist<-"Normal"; Dist_Par<-list(Mean=0,Variance=1,Error_Variance=0.5)
No_Of_Var<-2; Beta<-c(-1,2,1); N<-5000; Family<-"linear"
Full_Data<-GenGLMdata(Dist,Dist_Par,No_Of_Var,Beta,N,Family)

r0<-300; rf<-rep(c(6,9)*100,50); Original_Data<-Full_Data$Complete_Data;

ALoptimalGLMSub(r0 = r0, rf = rf,Y = as.matrix(Original_Data[,1]),
                X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                family = "linear")->Results

plot_Beta(Results)

Dist<-"Normal"; Dist_Par<-list(Mean=0,Variance=1)
No_Of_Var<-2; Beta<-c(-1,2,1); N<-5000; Family<-"logistic"
Full_Data<-GenGLMdata(Dist,Dist_Par,No_Of_Var,Beta,N,Family)

r0<-300; rf<-rep(c(6,9)*100,50); Original_Data<-Full_Data$Complete_Data;

ALoptimalGLMSub(r0 = r0, rf = rf,Y = as.matrix(Original_Data[,1]),
                X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                family = "logistic")->Results

plot_Beta(Results)

Dist<-"Normal";
No_Of_Var<-2; Beta<-c(-1,2,1); N<-5000; Family<-"poisson"
Full_Data<-GenGLMdata(Dist,NULL,No_Of_Var,Beta,N,Family)

r0<-300; rf<-rep(c(6,9)*100,50); Original_Data<-Full_Data$Complete_Data;

ALoptimalGLMSub(r0 = r0, rf = rf,Y = as.matrix(Original_Data[,1]),
                X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                family = "poisson")->Results

plot_Beta(Results)

}
\references{
\insertRef{wang2018optimal}{NeEDS4BigData}

\insertRef{ai2021optimal}{NeEDS4BigData}

\insertRef{yao2021review}{NeEDS4BigData}
}
