% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/textmodel_lr.R
\name{textmodel_lr}
\alias{textmodel_lr}
\title{Logistic regression classifier for texts}
\usage{
textmodel_lr(x, y, ...)
}
\arguments{
\item{x}{the \link{dfm} on which the model will be fit.  Does not need to
contain only the training documents.}

\item{y}{vector of training labels associated with each document identified
in \code{train}.  (These will be converted to factors if not already
factors.)}

\item{...}{additional arguments passed to \code{\link[glmnet:cv.glmnet]{cv.glmnet()}}}
}
\value{
an object of class \code{textmodel_lr}, a list containing:
\itemize{
\item \code{x}, \code{y} the input model matrix and input training class labels
\item \code{algorithm} character; the type and family of logistic regression model used in calling
\code{\link[glmnet:cv.glmnet]{cv.glmnet()}}
\item \code{type} the type of associated with \code{algorithm}
\item \code{classnames} the levels of training classes in \code{y}
\item \code{lrfitted} the fitted model object from \code{\link[glmnet:cv.glmnet]{cv.glmnet()}}
\item \code{call} the model call
}
}
\description{
Fits a fast penalized maximum likelihood estimator to predict discrete
categories from sparse \link[quanteda:dfm]{dfm} objects. Using the \pkg{glmnet}
package, the function computes the regularization path for the lasso or
elasticnet penalty at a grid of values for the regularization parameter
lambda.  This is done automatically by testing on several folds of the data
at estimation time.
}
\examples{
## Example from 13.1 of _An Introduction to Information Retrieval_
library("quanteda")
corp <- corpus(c(d1 = "Chinese Beijing Chinese",
                 d2 = "Chinese Chinese Shanghai",
                 d3 = "Chinese Macao",
                 d4 = "Tokyo Japan Chinese",
                 d5 = "London England Chinese",
                 d6 = "Chinese Chinese Chinese Tokyo Japan"),
               docvars = data.frame(train = factor(c("Y", "Y", "Y", "N", "N", NA))))
dfmat <- dfm(tokens(corp), tolower = FALSE)

## simulate bigger sample as classification on small samples is problematic
set.seed(1)
dfmat <- dfm_sample(dfmat, 50, replace = TRUE)

## train model
(tmod1 <- textmodel_lr(dfmat, docvars(dfmat, "train")))
summary(tmod1)
coef(tmod1)

## predict probability and classes
predict(tmod1, type = "prob")
predict(tmod1)
}
\references{
Friedman, J., Hastie, T., & Tibshirani, R. (2010). Regularization Paths for
Generalized Linear Models via Coordinate Descent. \emph{Journal of Statistical
Software} 33(1), 1-22.  \doi{10.18637/jss.v033.i01}
}
\seealso{
\code{\link[glmnet:cv.glmnet]{cv.glmnet()}}, \code{\link[=predict.textmodel_lr]{predict.textmodel_lr()}},
\code{\link[=coef.textmodel_lr]{coef.textmodel_lr()}}
}
