#' Dot-and-Whisker Plots of Regression Results
#'
#' \code{dwplot} is a function for quickly and easily generating dot-and-whisker plots of regression models saved in tidy data frames.
#'
#' @param x Either a tidy data.frame (see 'Details'), a model object to be tidied with \code{\link[broom]{tidy}}, or a list of such model objects.
#' @param alpha A number setting the criterion of the confidence intervals. The default value is .05, corresponding to 95-percent confidence intervals.
#' @param dodge_size A number (typically between 0 and 0.3) indicating how much vertical separation should be between different models' coefficients when multiple models are graphed in a single plot.  Lower values tend to look better when the number of independent variables is small, while a higher value may be helpful when many models appear on the same plot.
#' @param order_vars A vector of variable names that specifies the order in which the variables are to appear along the y-axis of the plot.
#' @param show_intercept A logical constant indicating whether the coefficient of the intercept term should be plotted.
#' @param model_name The name of a variable that distinguishes separate models within a tidy data.frame.
#' @param dot_args A list of arguments specifying the appearance of the dots representing mean estimates.  For supported arguments, see \code{\link[ggstance]{geom_pointrangeh}}.
#' @param \dots Extra arguments to pass to \code{\link[broom]{tidy}}.
#'
#' @details \code{dwplot} visualizes regression results saved in tidy data.frames by, e.g., \code{\link[broom]{tidy}} as dot-and-whisker plots generated by \code{\link[ggplot2]{ggplot}}.
#'
#' Tidy data.frames to be plotted should include the variables \code{term} (names of predictors), \code{estimate} (corresponding estimates of coefficients or other quantities of interest), \code{std.error} (corresponding standard errors), and optionally \code{model} (when multiple models are desired on a single plot; a different name for this last variable may be specified using the model_name argument).
#' In place of \code{std.error} one may substitute \code{conf.low} (the lower bounds of the confidence intervals of each estimate) and \code{conf.high} (the corresponding upper bounds).
#'
#' For convenience, \code{dwplot} also accepts as input those model objects that can be tidied by \code{\link[broom]{tidy}}, or a list of such model objects.
#'
#' Because the function takes a data.frame as input, it is easily employed for a wide range of models, including those not supported by \code{\link[broom]{tidy}}.
#' And because the output is a \code{ggplot} object, it can easily be further customized with any additional arguments and layers supported by \code{ggplot2}.
#' Together, these two features make \code{dwplot} extremely flexible.
#'
#' @references
#' Kastellec, Jonathan P. and Leoni, Eduardo L. 2007. "Using Graphs Instead of Tables in Political Science." Perspectives on Politics, 5(4):755-771.
#'
#' @return The function returns a \code{ggplot} object.
#'
#' @import ggplot2
#' @importFrom broom tidy
#' @importFrom dplyr "%>%" filter arrange left_join full_join bind_rows
#' @importFrom stats qnorm
#' @importFrom stats reorder
#' @importFrom ggstance geom_pointrangeh
#' @importFrom ggstance position_dodgev
#' @importFrom plyr ldply
#'
#' @examples
#' library(broom)
#' library(dplyr)
#' # Plot regression coefficients from a single model object
#' data(mtcars)
#' m1 <- lm(mpg ~ wt + cyl + disp, data = mtcars)
#' dwplot(m1) +
#'     xlab("Coefficient") + ylab("") +
#'     geom_vline(xintercept = 0, colour = "grey50", linetype = 2) +
#'     theme(legend.position="none")
#' # Plot regression coefficients from multiple models on the fly
#' m2 <- update(m1, . ~ . - disp)
#' dwplot(list(full = m1, nodisp = m2))
#' # Change the appearance of dots and whiskers
#' dwplot(m1, dot_args = list(size = 3, pch = 21, fill = "white"))
#' # Plot regression coefficients from multiple models in a tidy data.frame
#' by_trans <- mtcars %>% group_by(am) %>%
#'     do(tidy(lm(mpg ~ wt + cyl + disp, data = .))) %>% rename(model=am)
#' dwplot(by_trans) +
#'     theme_bw() + xlab("Coefficient") + ylab("") +
#'     geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
#'     ggtitle("Predicting Gas Mileage, OLS Estimates") +
#'     theme(plot.title = element_text(face = "bold"),
#'           legend.justification=c(0, 0), legend.position=c(0, 0),
#'           legend.background = element_rect(colour="grey80"),
#'           legend.title.align = .5) +
#'     scale_colour_grey(start = .4, end = .8,
#'                       name = "Transmission",
#'                       breaks = c(0, 1),
#'                       labels = c("Automatic", "Manual"))
#'
#' @export

dwplot <- function(x, alpha = .05, dodge_size = .4, order_vars = NULL,
                   show_intercept = FALSE, model_name = "model",
                   dot_args = list(size = .3), ...) {
    # If x is model object(s), convert to a tidy data.frame
    df <- dw_tidy(x,...)

    if (!show_intercept) df <- df %>% filter(!grepl("^\\(Intercept\\)$|^\\w+\\|\\w+$", term)) # enable detecting intercept in polr objects

    # Set variables that will appear in pipelines to NULL to make R CMD check happy
    estimate <- model <- conf.low <- conf.high <- term <- std.error <- NULL

    n_vars <- length(unique(df$term))
    dodge_size <- dodge_size

    # Confirm number of models, get model names
    if (model_name %in% names(df)) {
        dfmod <- df[[model_name]]
        n_models <- length(unique(dfmod))
        ## re-order/restore levels by order in data set
        df[[model_name]] <- factor(dfmod, levels = unique(dfmod))
    } else {
        if (length(df$term) == n_vars) {
            df[[model_name]] <- factor("one")
            n_models <- 1
        } else {
            stop("Please add a variable named '",
                 model_name,"' to distinguish different models")
        }
    }
    mod_names <- unique(df[[model_name]])

    # Specify order of variables if an order is provided
    if (!is.null(order_vars)) {
        df$term <- factor(df$term, levels = order_vars)
        df <- df[match(order_vars, df$term),] %>% stats::na.omit()
    }

    # Add rows of NAs for variables not included in a particular model
    if (n_models > 1) {
        df <- add_NAs(df, n_models, mod_names)
    }

    # Prep arguments to ggplot
    var_names <- df$term

    y_ind <- rep(seq(n_vars, 1), n_models)
    df$y_ind <- y_ind

    # Confirm alpha within bounds
    if (alpha < 0 | alpha > 1) {
        stop("Value of alpha for the confidence intervals should be between 0 and 1.")
    }

    # Generate lower and upper bound if not included in results
    if ((!"conf.low" %in% names(df)) || (!"conf.high" %in% names(df))) {
        if ("std.error" %in% names(df)) {
            ci <- 1 - alpha/2
            df <- transform(df,
                            conf.low = estimate - stats::qnorm(ci) * std.error,
                            conf.high = estimate + stats::qnorm(ci) * df$std.error)
        } else {
            df <- transform(df, conf.low=NA, conf.high=NA)
        }
    }

    # Catch difference between single and multiple models
    if (length(y_ind) != length(var_names)) {
        var_names <- unique(var_names)
    }

    point_args0 <- list(na.rm = TRUE, position=ggstance::position_dodgev(height = dodge_size))
    point_args <- c(point_args0, dot_args)


    # Make the plot

    p <- ggplot(df,aes(x = estimate, xmin = conf.low,xmax = conf.high, y = stats::reorder(term, y_ind), colour = model))+
        do.call(ggstance::geom_pointrangeh, point_args) +
        ylab("") + xlab("")

    # Omit the legend if there is only one model
    if (!"model" %in% names(df) | length(mod_names) == 1){
        p <- p + theme(legend.position="none")
    }

    return(p)
}


dw_tidy <- function(x,...) {
    # Set variables that will appear in pipelines to NULL to make R CMD check happy
    process_lm <- tidy.summary.lm <- fix_data_frame <- NULL

    if (!is.data.frame(x)) {
        if (class(x)=="list") {
            ind <- seq(length(x))
            nm <- paste("Model", ind)
            if (!is.null(nm_orig <- names(x))) {
                setNm <- nchar(nm)>0
                nm[setNm] <- nm_orig[setNm]
            }
            names(x) <- nm

            df <- do.call(plyr::ldply,
                          c(list(.data=x,.fun=broom::tidy, conf.int = TRUE, .id="model"), list(...)))

        } else if (class(x) == "lmerMod"){
            group <- vector() # only for avoiding the NOTE in check
            df <- broom::tidy(x, conf.int = TRUE) %>% filter(group == "fixed")
        } else {
            if (class(x) == "polr"){
                family.polr <- function(object,...) NULL
                tidy.lm <- function(x, conf.int = FALSE, conf.level = .95,
                                    exponentiate = FALSE, quick = FALSE, ...) {
                    if (quick) {
                        co <- stats::coef(x)
                        ret <- data.frame(term = names(co), estimate = unname(co))
                        return(process_lm(ret, x, conf.int = FALSE, exponentiate = exponentiate))
                    }
                    s <- summary(x)

                    tidy.summary.lm <- function(x, ...) {
                        co <- stats::coef(x)
                        nn <- c("estimate", "std.error", "statistic", "p.value")
                        if (inherits(co, "listof")) {
                            # multiple response variables
                            ret <- plyr::ldply(co, fix_data_frame, nn[1:ncol(co[[1]])],
                                               .id = "response")
                            ret$response <- stringr::str_replace(ret$response, "Response ", "")
                        } else {
                            ret <- fix_data_frame(co, nn[1:ncol(co)])
                        }

                        ret
                    }

                    ret <- tidy.summary.lm(s)

                    process_lm(ret, x, conf.int = conf.int, conf.level = conf.level,
                               exponentiate = exponentiate)
                }
            }
            df <- broom::tidy(x, conf.int = TRUE,...)
        }
    } else {
        df <- x
    }
    return(df)
}

add_NAs <- function(df = df, n_models = n_models, mod_names = mod_names,
                    model_name = "model") {
    # Set variables that will appear in pipelines to NULL to make R CMD check happy
    term <- model <- NULL

    if (!is.factor(df$term)) {
        df$term <- factor(df$term, levels = unique(df$term))
    }
    if (!is.factor(dfmod <- df[[model_name]])) {
        df[[model_name]] <- factor(dfmod, levels = unique(dfmod))
    }
    for (i in seq(n_models)) {
        m <- df %>% filter(model==factor(mod_names[[i]], levels = mod_names))
        not_in <- setdiff(unique(df$term), m$term)
        for (j in seq(not_in)) {
            t <- data.frame(term = factor(not_in[j], levels = levels(df$term)),
                            model = factor(mod_names[[i]], levels = mod_names))
            if ("submodel" %in% names(m)) {
                t$submodel <- m$submodel[1]
            }
            if ("submodel" %in% names(m)) {
                m <- full_join(m, t, by = c("term", "model", "submodel"))
            } else {
                m <- full_join(m, t, by = c("term", "model"))
            }
        }
        if (i==1) {
            dft <- m %>% arrange(term)
        } else {
            dft <- bind_rows(dft, m %>% arrange(term))
        }
    }

    df <- dft

    df$estimate <- as.numeric(df$estimate)
    if ("std.error" %in% names(df)) {
        df$std.error <- as.numeric(df$std.error)
    }
    if ("conf.high" %in% names(df)) {
        df$conf.high <- as.numeric(df$conf.high)
    }
    if ("conf.low" %in% names(df)) {
        df$conf.low <- as.numeric(df$conf.low)
    }

    return(df)
}
