% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dtize_df.R
\name{dtize_df}
\alias{dtize_df}
\title{Discretize Dataframe Columns}
\usage{
dtize_df(
  data,
  cutoff = "median",
  labels = c("low", "high"),
  include_right = TRUE,
  infinity = TRUE,
  include_lowest = TRUE,
  na_fill = "none",
  m = 5,
  maxit = 5,
  seed = NULL,
  printFlag = FALSE
)
}
\arguments{
\item{data}{A dataframe containing the data to be discretized.}

\item{cutoff}{A character string specifying the splitting method for numeric columns.
Options are \code{"median"} (default), \code{"mean"} or a custom numeric vector of split points.}

\item{labels}{A character vector of labels for the discretized categories. Default is \code{c("low", "high")}.}

\item{include_right}{A logical value indicating if the intervals should be closed on the right. Default is \code{TRUE}.}

\item{infinity}{A logical value indicating if the split intervals should extend to infinity. Default is \code{TRUE}.}

\item{include_lowest}{A logical value indicating if the lowest value should be included in the first interval. Default is \code{TRUE}.}

\item{na_fill}{A character string specifying the imputation method for handling missing values.
Options are \code{"none"} (default), \code{"mean"}, \code{"median"}, or \code{"pmm"} (predictive mean matching).}

\item{m}{An integer specifying the number of multiple imputations if \code{na_fill = "pmm"}. Default is \code{5}.}

\item{maxit}{An integer specifying the maximum number of iterations for the \code{mice} algorithm. Default is \code{5}.}

\item{seed}{An integer seed for reproducibility of the imputation process. Default is \code{NULL}.}

\item{printFlag}{A logical value indicating if \code{mice} should print logs during imputation. Default is \code{FALSE}.}
}
\value{
A dataframe with numeric columns discretized and missing values handled based on the specified imputation method.
}
\description{
Discretizes numeric columns of a dataframe based on specified splitting criteria,
and handles missing values using specified imputation methods.
}
\examples{
data(BrookTrout)

# Example with median as cutoff
med_df <- dtize_df(
  BrookTrout,
  cutoff="median",
  labels=c("below median", "above median")
)

# Example with mean as cutoff
mean_df <- dtize_df(
  BrookTrout,
  cutoff="mean",
  include_right=FALSE
)

# Example with missing value imputation
air <- dtize_df(
  airquality,
  cutoff="mean",
  na_fill="pmm",
  m=10,
  maxit=10,
  seed=42
)


}
