# ----------------------------------------------------------------------

#' Generate tables with summarized GRAID counts (WIP)
#'
#' Constructs simple tables with counts for certain combinations of GRAID form,
#' person/animacy, and function symbols. In the current iteration, the GRAID
#' categories counted for the tables are predetermined and cannot be changed by
#' the user. The TEX files that can optionally be written by this function are
#' used for the 'Corpus counts' in the Multi-CAST documentation.
#'
#' @param data A \code{\link[data.table]{data.table}} in multicastR format.
#' @param by Character. \code{"all"} places all data in one table,
#'   \code{"corpus"} generates one table for each corpus, and \code{"text"} one
#'   table for each text.
#' @param format Unused. Will be used to select between \code{"wide"} and
#'   \code{"long"} table layouts.
#' @param write Logical. If \code{TRUE}, writes output to file.
#' @param writeto A directory to which to write output. Defaults to
#'   \code{getwd}. Ignored if \code{write} is \code{FALSE}.
#' @param output Unused. Will be used to specify the file format to write as.
#'   Currently only output as TEX files is supported.
#'
#' @return A table.
#'
#' @export
#'
#' @examples
#'  \dontrun{
#'     # generate a summary table for the entire collection
#'     mc <- multicast()
#'     mc_table(mc)
#'
#'     # generate a summary table for the English corpus
#'     mc_table(mc[corpus == "english", ])
#'   }
mc_table <- function(data, by = "all", format = "wide", write = FALSE, writeto = getwd(), output = "tex") {

	# define GRAID symbols to be listed in the table
	forms <- c("0", "pro", "np", "other")	# "f0"
	anims <- c("1", "2", "h", "d", "")
	funcs <- c("a", "s", "ncs", "p", "obl", "g", "l", "poss", "pred", "other")	# "dt"

	# construct regular expressions
	rgx_forms <- paste0("(", paste0(forms, collapse = "|"), ")$")
	rgx_anims <- paste0("^(", paste0(anims, collapse = "|"), ").*?$")
	rgx_funcs <- paste0("^(", paste0(funcs, collapse = "|"), ")2?([_=-].*?)?$")


	# subset data, removing all rows that do not match symbol lists
	gval <- data[grepl(rgx_forms, gform) & grepl(rgx_anims, ganim) & grepl(rgx_funcs, gfunc), ]

	# simplify annotation values
	gval[, xform := gsub(paste0("^.*", rgx_forms), "\\1", gform)]
	gval[, xanim := gsub(rgx_anims, "\\1", ganim)]
	gval[, xfunc := gsub(rgx_funcs, "\\1", gfunc)]


	# check 'by' argument
	if (by == "all") {
		# tabulate all data
		bycol <- NULL
	} else if (by == "corpus") {
		# tabulate by corpus
		bycol <- "corpus"
	} else if (by == "text") {
		# tabulate by text
		bycol <- "text"
	} else {
		# invalid value, abort
		stop("\n Argument 'by' has to be one of 'all', 'corpus', or 'text'.")
	}


	# create table frame
	if (by == "all") {
		frame <- CJ(xform = factor(forms, ordered = TRUE, levels = forms),
					xanim = factor(anims, ordered = TRUE, levels = anims),
					xfunc = factor(funcs, ordered = TRUE, levels = funcs))
	} else {
		frame <- CJ(bycol = unique(data[, .SD, .SDcols = bycol][[1]]),
					xform = factor(forms, ordered = TRUE, levels = forms),
					xanim = factor(anims, ordered = TRUE, levels = anims),
					xfunc = factor(funcs, ordered = TRUE, levels = funcs))

		setnames(frame, 1, bycol)
	}

	# remove unwanted rows in frame
	frame <- frame[!(grepl("np|other", xform) & grepl("1|2", xanim)), ]


	# generate table from data
	gtab <- gval[, .N, by = c(bycol, "xform", "xanim", "xfunc")]

	# join data with frame
	if (by == "all") {
		xtab <- merge(gtab, frame, by = c("xform", "xanim", "xfunc"), all.y = TRUE)
	} else {
		xtab <- merge(gtab, frame, by = c(bycol, "xform", "xanim", "xfunc"), all.y = TRUE)
	}

	# re-code NAs as 0
	xtab[is.na(N), N := 0]

	# rename columns
	setnames(xtab, c("xform", "xanim", "xfunc"), c("gform", "ganim", "gfunc"))

	# cast table into wide format
	if (by == "all") {
		xtab <- data.table::dcast(xtab, "gform + ganim ~ gfunc", value.var = "N")
	} else {
		xtab <- data.table::dcast(xtab, paste0(bycol, " + gform + ganim ~ gfunc"), value.var = "N")
	}


	# if write == FALSE, return table
	if (write == FALSE) {
		return(xtab)
	} else {
		message("Writing TEX output...")
		mc_prep_table(xtab, data, by, writeto, funcs, bycol)
	}
}

# ----------------------------------------------------------------------

# ----------------------------------------------------------------------

#' Prepare a summary table
#'
#' Called by \code{\link{mc_table}}. This is a wrapper for the loops that
#' generate tables collection-wide or corpus/text-wise.
#'
#' @param xtab A table generated by \code{\link{mc_table}}.
#' @param data The \code{data.table} from which the table was created.
#' @param by Create table for \code{"all"} data, or split by \code{"corpus"} or \code{"text"}.
#' @param writeto Path to write output to.
#' @param funcs List of function symbols to tabulate.
#' @param bycol Column to split data by.
#'
#' @return Nothing.
#'
#' @keywords internal
mc_prep_table <- function(xtab, data, by, writeto, funcs, bycol) {
	if (by == "all") {
		message("   Constructing table for all data.")
		mc_write_table(xtab, data, by, writeto, funcs, bycol)
		message("Finished!")

	} else {
		for (i in 1:length(unique(xtab[, .SD, .SDcols = bycol][[1]]))) {
			ctab <- xtab[get(bycol) == unique(xtab[, .SD, .SDcols = bycol][[1]])[i]]

			message(paste0("   Constructing table for ", by, " '", ctab[[1]][1], "'."))
			mc_write_table(ctab, data, by, writeto, funcs, bycol)
		}
		message("Finished!")
	}
}

# ----------------------------------------------------------------------

# ----------------------------------------------------------------------

#' Write summary tables to file
#'
#' Called by \code{\link{mc_prep_table}}.
#'
#' @param ctab A table generated by \code{\link{mc_table}}, then prepared by
#'   \code{\link{mc_prep_table}}.
#' @param data The \code{data.table} from which the table was created.
#' @param by Write files for \code{"all"} data, or split by \code{"corpus"} or
#'   \code{"text"}.
#' @param writeto Path to write output to.
#' @param funcs List of function symbols to tabulate.
#' @param bycol Column to split data by.
#'
#' @return Nothing.
#'
#' @keywords internal
mc_write_table <- function(ctab, data, by, writeto, funcs, bycol) {
	wtab <- copy(ctab)

	# merge form and animacy glosses
	wtab[, GRAID := gform]
	wtab[ganim != "", GRAID := stringi::stri_pad_right(GRAID, width = 6, pad = "~")]
	wtab[ganim != "", GRAID := paste0(GRAID, ".", ganim)]

	# add GRAID delimiters and LaTeX markup
	wtab[, GRAID := gsub("^(.*)$", "\\\\tann{\\1}", GRAID)]
	wtab[ganim == "" & gform != "other", GRAID := gsub("^", "\\\\smallskip", GRAID)]


	if (by == "all") {
		colno <- 3
	} else {
		colno <- 4
	}

	# rename columns
	setnames(wtab, colno:(length(funcs) + colno - 1), paste0("\\tcolm{\\tannC{:", funcs, "}}"))

	# reorder columns
	setcolorder(wtab, "GRAID")


	# calculate row margin totals
	wtab[, totals := as.integer(rowSums(.SD)), .SDcols = c((colno + 1):(length(funcs) + colno))]

	# calculate column margin totals
	if (by == "all") {
		wtab <- rbind(wtab, c("GRAID" = "\\midrule\\bigskip\\textit{totals}", "gform" = "", "ganim" = "",
							wtab[, lapply(.SD, sum, na.rm = TRUE), .SDcols = c((colno + 1):(length(funcs) + colno + 1))]))
	} else if (by == "corpus") {
		wtab <- rbind(wtab, c("GRAID" = "\\midrule\\bigskip\\textit{totals}", "corpus" = "", "gform" = "", "ganim" = "",
							wtab[, lapply(.SD, sum, na.rm = TRUE), .SDcols = c((colno + 1):(length(funcs) + colno + 1))]))
	} else {
		wtab <- rbind(wtab, c("GRAID" = "\\midrule\\bigskip\\textit{totals}", "text" = "", "gform" = "", "ganim" = "",
							wtab[, lapply(.SD, sum, na.rm = TRUE), .SDcols = c((colno + 1):(length(funcs) + colno + 1))]))
	}

	wtab[nrow(wtab), totals := NA_integer_]

	# add rows for clause units
	wtab <- rbind(wtab, wtab[1:2])
	wtab[(nrow(wtab) - 1):nrow(wtab), GRAID := c("\\tann{\\#\\#}", "\\tann{\\#}")]
	wtab[(nrow(wtab) - 1):nrow(wtab), c((colno + 1):(length(funcs) + colno)) := NA_integer_]

	# calculate number of clause units
	if (by == "all") {
		cl_double <- data[grepl("##", gform), .N, ]
		cl_single <- data[grepl("#", gform), .N, ]
		cl_nc <- data[grepl("#nc", gform), .N, ]
	} else {
		cl_double <- data[get(bycol) == as.character(wtab[1, .SD, .SDcols = bycol][[1]]) & grepl("##", gform), .N, ]
		cl_single <- data[get(bycol) == as.character(wtab[1, .SD, .SDcols = bycol][[1]]) & grepl("#", gform), .N, ]
		cl_nc <- data[get(bycol) == as.character(wtab[1, .SD, .SDcols = bycol][[1]]) & grepl("#nc", gform), .N, ]
	}

	# add clause counts to table
	wtab[(nrow(wtab) - 1):nrow(wtab), totals := c(cl_double, cl_single - cl_double - cl_nc)]

	# add row for clause unit totals
	wtab <- rbind(wtab, wtab[nrow(wtab)])
	wtab[nrow(wtab), GRAID := "\\midrule\\textit{totals}"]
	wtab[nrow(wtab), totals := cl_single - cl_nc]

	# rename columns
	setnames(wtab, c("GRAID", "totals"), c("\\tcolm{GRAID}", "\\tcolm{\\textit{totals}}"))

	# remove unneeded columns
	if (by != "all") { wtab[, (bycol) := NULL] }
	wtab[, gform := NULL]
	wtab[, ganim := NULL]


	# compose file name
	if (by == "all") {
		filename <- "multicast_counts"
	} else if (by == "corpus") {
		filename <- paste0("mc_", ctab[[1, 1]], "_counts")
	} else {
		filename <- paste0("mc_",
						   data[text == as.character(ctab[1, .SD, .SDcols = bycol][[1]]), ][[1, 1]],
						   "_",
						   ctab[[1, 1]],
						   "_counts")
	}

	# compose caption
	if (by == "all") {
		caption <- "Summarized GRAID counts for the Multi-CAST collection."
	} else if (by == "corpus") {
		caption <- "Summarized GRAID counts for the entire \\getlist[1]{corpus} corpus."
	} else {
		caption <- paste0("Summarized GRAID counts for the \\textit{", ctab[[1, 1]], "} text.")
	}

	# compose file path
	if (grepl("/$", writeto)) {
		filepath <- paste0(writeto, filename, ".tex")
	} else {
		filepath <- paste0(writeto, "/", filename, ".tex")
	}


	# write LaTeX table to file
	xtable::print.xtable(xtable::xtable(wtab,
										align = c("l", "l",
												  rep(">{\\raggedleft}X", length(funcs)),
												  ">{\\raggedleft\\arraybackslash}X"),
										digits = 0,
										caption = caption,
										table.placement = ""),
						 file = filepath,
						 comment = FALSE,
						 type = "latex",
						 latex.environments = "flushleft",
						 include.rownames = FALSE,
						 sanitize.text.function = identity,
						 sanitize.colnames.function = identity,
						 table.placement = "h!",
						 tabular.environment = "tabularx",
						 width = "600pt",
						 booktabs = TRUE,
						 size = "small",
						 caption.placement = "bottom")
}

# stop RMD CHECK from complaining about unbound global variables
if (getRversion() >= "2.15.1") {
	utils::globalVariables(c("xform", "xanim", "xfunc", "GRAID",
							 "totals", "text"))
}
