% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/woebin.R
\name{woebin}
\alias{woebin}
\title{WOE Binning}
\usage{
woebin(dt, y, x = NULL, breaks_list = NULL, special_values = NULL,
  min_perc_fine_bin = 0.02, min_perc_coarse_bin = 0.05, stop_limit = 0.1,
  max_num_bin = 8, positive = "bad|1", no_cores = NULL, print_step = 0L,
  method = "tree")
}
\arguments{
\item{dt}{A data frame with both x (predictor/feature) and y (response/label) variables.}

\item{y}{Name of y variable.}

\item{x}{Name of x variables. Default is NULL. If x is NULL, then all variables except y are counted as x variables.}

\item{breaks_list}{List of break points, default is NULL. If it is not NULL, variable binning will based on the provided breaks.}

\item{special_values}{the values specified in special_values will be in separate bins. Default is NULL.}

\item{min_perc_fine_bin}{The minimum percentage of initial binning class number over total. Accepted range: 0.01-0.2; default is 0.02, which means initial binning into 50 fine bins for continuous variables.}

\item{min_perc_coarse_bin}{The minimum percentage of final binning class number over total. Accepted range: 0.01-0.2; default is 0.05.}

\item{stop_limit}{Stop binning segmentation when information value gain ratio less than the stop_limit, or stop binning merge when the minimum of chi-square less than 'qchisq(1-stoplimit, 1)'. Accepted range: 0-0.5; default is 0.1.}

\item{max_num_bin}{Integer. The maximum number of binning.}

\item{positive}{Value of positive class, default "bad|1".}

\item{no_cores}{Number of CPU cores for parallel computation. Defaults NULL. If no_cores is NULL, the no_cores will set as 1 if length of x variables less than 10, and will set as the number of all CPU cores if the length of x variables greater than or equal to 10.}

\item{print_step}{A non-negative integer. Default is 1. If print_step>0, print variable names by each print_step-th iteration. If print_step=0 or no_cores>1, no message is print.}

\item{method}{Optimal binning method, it should be "tree" or "chimerge". Default is "tree".}
}
\value{
Optimal or customized binning information.
}
\description{
\code{woebin} generates optimal binning for numerical, factor and categorical variables using methods including tree-like segmentation or chi-square merge. \code{woebin} can also customizing breakpoints if the breaks_list was provided.
}
\examples{
# load germancredit data
data(germancredit)

# Example I
# binning of two variables in germancredit dataset
bins_2var = woebin(germancredit, y = "creditability", x = c("credit.amount", "purpose"))

\dontrun{
# Example II
# binning of the germancredit dataset
bins_germ = woebin(germancredit, y = "creditability")
# converting bins_germ into a dataframe
# bins_germ_df = data.table::rbindlist(bins_germ)

# Example III
# customizing the breakpoints of binning
library(data.table)
dat = rbind(
  germancredit,
  data.table(creditability=sample(c("good","bad"),10,replace=TRUE)),
  fill=TRUE)

breaks_list = list(
  age.in.years = c(26, 35, 37, "Inf\%,\%missing"),
  housing = c("own", "for free\%,\%rent")
)

special_values = list(
  credit.amount = c(2600, 9960, "6850\%,\%missing"),
  purpose = c("education", "others\%,\%missing")
)

bins_cus_brk = woebin(dat, y="creditability",
  x=c("age.in.years","credit.amount","housing","purpose"),
  breaks_list=breaks_list, special_values=special_values)

}

}
\seealso{
\code{\link{woebin_ply}}, \code{\link{woebin_plot}}, \code{\link{woebin_adj}}
}
