% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/distribution.R
\name{distr}
\alias{distr}
\title{Compare Variables with their Distributions}
\usage{
distr(
  data,
  ...,
  type = 1,
  ref = TRUE,
  note = NA,
  top = 10,
  breaks = 10,
  na.rm = FALSE,
  force = "none",
  trim = 0,
  clean = FALSE,
  abc = FALSE,
  custom_colours = FALSE,
  plot = TRUE,
  chords = FALSE,
  save = FALSE,
  subdir = NA
)
}
\arguments{
\item{data}{Dataframe}

\item{...}{Variables. Main (target variable) and secondary (values 
variable) to group by}

\item{type}{Integer. 1 for both plots, 2 for counter plot only, 3 por 
percentages plot only.}

\item{ref}{Boolean. Show a reference line if levels = 2? Quite useful 
when data is unbalanced (not 50/50) because a reference line is drawn.}

\item{note}{Character. Caption for the plot.}

\item{top}{Integer. Filter and plot the most n frequent for categorical values.}

\item{breaks}{Integer. Number of splits for numerical values.}

\item{na.rm}{Boolean. Ignore \code{NA}s if needed.}

\item{force}{Character. Force class on the values data. Choose between 'none',
'character', 'numeric', 'date'}

\item{trim}{Integer. Trim labels until the nth character for categorical values
(applies for both, target and values)}

\item{clean}{Boolean. Use \code{cleanText()} for categorical values (applies
for both, target and values)}

\item{abc}{Boolean. Do you wish to sort by alphabetical order?}

\item{custom_colours}{Boolean. Use custom colours function?}

\item{plot}{Boolean. Return a plot? Otherwise, a table with results}

\item{chords}{Boolean. Use a chords plot?}

\item{save}{Boolean. Save the output plot in our working directory}

\item{subdir}{Character. Into which subdirectory do you wish to save the plot to?}
}
\value{
Plot when \code{plot=TRUE} with two plots in one: counter distribution 
grouped by cuts, and proportions distribution grouped by same cuts. data.frame when
\code{plot=FALSE} with counting, percentages, and cumulative percentages results.
When \code{type} argument is used, single plots will be returned.
}
\description{
Compare the distribution of a target variable vs another variable. This 
function automatically splits into quantiles for numerical variables.
Custom and tidyverse friendly.
}
\examples{
\donttest{
Sys.unsetenv("LARES_FONT") # Temporal
data(dft) # Titanic dataset

# Relation for categorical/categorical values
dft \%>\% distr(Survived, Sex)

# Relation for categorical/numeric values
dft \%>\% distr(Survived, Fare, plot = FALSE) \%>\% head(10)
# Sort values 
dft \%>\% distr(Survived, Fare, abc = TRUE)
# Less splits/breaks
dft \%>\% distr(Survived, Fare, abc = TRUE, breaks = 5)

# Distribution of numerical only
dft[dft$Fare < 20,] \%>\% distr(Fare)

# Distribution of numerical/numerical
dft \%>\% distr(Fare, Age)

# Select only one of the two default plots of distr()
dft \%>\% distr(Survived, Age, type = 2)
dft \%>\% distr(Survived, Age, type = 3)
}
}
\seealso{
Other Exploratory: 
\code{\link{corr_cross}()},
\code{\link{corr_var}()},
\code{\link{crosstab}()},
\code{\link{df_str}()},
\code{\link{freqs_df}()},
\code{\link{freqs_list}()},
\code{\link{freqs_plot}()},
\code{\link{freqs}()},
\code{\link{lasso_vars}()},
\code{\link{missingness}()},
\code{\link{plot_cats}()},
\code{\link{plot_df}()},
\code{\link{plot_nums}()},
\code{\link{tree_var}()},
\code{\link{trendsRelated}()}

Other Visualization: 
\code{\link{freqs_df}()},
\code{\link{freqs_list}()},
\code{\link{freqs_plot}()},
\code{\link{freqs}()},
\code{\link{gg_bars}()},
\code{\link{gg_pie}()},
\code{\link{noPlot}()},
\code{\link{plot_chord}()},
\code{\link{plot_survey}()},
\code{\link{plot_timeline}()},
\code{\link{theme_lares}()},
\code{\link{tree_var}()}
}
\concept{Exploratory}
\concept{Visualization}
