#'Export files generated by BOLDconnectR
#'
#' @description
#'The function is used to export some of the output data generated by BOLDconnectR
#'
#' @param bold_df The data.frame either retrieved from [bold.fetch()],`bold.analyze.align` or a user modified BCDM dataset.
#' @param export_type A character input specifying the type of output required. Should be either of "preset_df","msa" or "fas".
#' @param presets A single character vector specifying a preset for which a data summary is sought (Check the `details` section for more information). Default value is NULL.
#' @param cols_for_fas_names A single or multiple character vector indicating the column headers that should be used to name each sequence for the unaligned FASTA file. Default is NULL; in this case, only the processid is used as the name.
#' @param export A character value specifying the data file path and the name for the file. Extension should be included.
#'
#' @details
#' `bold.export` offers an added export option for some of the sequence-based outputs obtained from functions within the `BOLDconnectR` package as well as a `preset` defined modified BCDM dataframe. Sequence information from the BCDM data downloaded via [bold.fetch()] can be directly exported as an unaligned FASTA file with `export_type`=`fas`, while the aligned sequences (in the modified BCDM dataframe) obtained from `bold.analyze.align` can be exported as a FASTA file with `export_type`=`msa`. The FASTA headers for individual sequences when `export_type`=`fas` can be customized by using the `cols_for_fas_names` argument. If more than one field is specified, the name will follow the sequence of the fields given in the vector. The multiple sequence aligned FASTA file uses the same name provided by the user in the `bold.analyze.align()` function and using the `cols_for_fas_names` argument in this case will throw an error. `presets` can be considered as collections of predefined columns from the fetched BCDM data that relate to a common theme. The number of columns in each preset varies based on data availability. There are six presets currently available in the package (`taxonomy`, `geography`, `sequences`, `attributions`, `ecology_biogeography` & `other_meta_data`). Fields included in each preset is as follows:
#' * taxonomy = "kingdom", "phylum", "class", "order", "family", "subfamily", "genus", "species", "bin_uri".
#' * geography = "country.ocean", "country_iso", "province.state", "region", "sector", "site", "site_code", "coord", "coord_accuracy", "coord_source".
#' * sequences =  "nuc", "nuc_basecount", "marker_code", "sequence_run_site", "sequence_upload_date".
#' * attributions = "inst", "identification", "identification_method", "identification_rank", "identified_by", "collectors".
#' * ecology_biogeography = "elev", "elev_accuracy", "depth", "depth_accuracy", "habitat", "ecoregion", "biome", "realm", "coord", "coord_source".
#' * other_meta_data = "notes", "taxonomy_notes", "funding_src", "voucher_type", "tissue_type", "sampling_protocol".
#' "processids" and "sampleids" are present in all the presets.
#' Only one preset can be used at a time. `presets` should be NULL when exporting a FASTA file to avoid errors . Tabular data can be exported as a csv/tsv file. Data path with the name of the output file with the corresponding file extension (csv or tsv) should be provided (Ex. 'C:/Users/xyz/Desktop/fetch_data_output.csv' for Windows). This functionality is developed with the future potential of uploading data to BOLD using the package.
#'
#' @examples
#' \dontrun{
#' # Download the records
#' data_for_export_ids <- bold.public.search(taxonomy = list("Poecilia reticulata"))
#'
#' # Fetch the data using the ids.
#' #1. api_key must be obtained from BOLD support before using `bold.fetch()` function.
#' #2. Use the `bold.apikey()` function  to set the apikey in the global env.
#'
#' bold.apikey('apikey')
#'
#' # Fetching the data using the ids
#' data_for_export <- bold.fetch(get_by = "processid",
#'                               identifiers = data_for_export_ids$processid)
#'
#' #1. Export the BCDM data using 'presets' as a csv file
#' bold.export(bold_df=data_for_export,
#'             export_type = "preset_df",
#'             presets = 'taxonomy',
#'             export = file.path(tempdir(), "file_path_with_intended_name.csv"))
#'
#' #2. Export the fasta file (unaligned)
#' # Note that input data here is the original BCDM data (data_for_export)
#' bold.export(bold_df = data_for_export,
#'             export_type = "fas",
#'             cols_for_fas_names = c("bin_uri","genus","species"),
#'             export = file.path(tempdir(),"file_path_with_intended_name.fas"))
#'
#' #3. Export multiple sequence alignment
#' #a. Align the data
#' # (using processid and bin_uri as fields for sequence names)
#' # Users need to install and load packages `msa` and `Biostrings` before using bold.analyze.align.
#' seq_align<-bold.analyze.align(data_for_export,
#'                               cols_for_seq_names = c("processid","bin_uri"),
#'                               align_method = "ClustalOmega")
#'
#' #b. Export the multiple sequence alignment
#' # Note the input data here is the modified BCDM data (seq_align)
#' bold.export(bold_df=seq_align,
#'            export_type = "msa",
#'             export = "file_path_with_intended_name.fas")#'
#' }
#'
#' @returns It exports a .fas or a csv/tsv file based on the export argument.
#'
#' @importFrom utils write.table
#' @importFrom ape read.dna
#' @importFrom ape write.FASTA
#'
#' @export
#'
bold.export<-function(bold_df,
                      export_type=c("preset_df","msa","fas"),
                      presets=NULL,
                      cols_for_fas_names=NULL,
                      export)

{
  # Check if data is a non empty data frame object

  df_checks(bold_df)


  switch(export_type,

         "preset_df" =

           {

             if (is.null(presets)) stop("One of the presets must be provided when export_type = preset_df.")

             preset_data=check_and_return_preset_df(bold_df,
                                                    category = "check_return",
                                                    preset = presets)
             # If file path is not provided, working directory is taken as default

             if (!grepl("[/\\\\]", export)) {

               export <- file.path(getwd(), export)
             }

             # Determine file extension

             file.type <- if (grepl("\\.csv$", export, ignore.case = TRUE))

             {

               "csv"

             }

             else if (grepl("\\.tsv$", export, ignore.case = TRUE))

             {

               "tsv"
             }

             else

             {
               stop("Unsupported file type. Please provide a valid '.csv' or '.tsv' filename.")
             }

             # Write data based on file type
             switch(

               file.type,

               "csv" =

                 {
                   utils::write.table(
                     preset_data,
                     export,
                     sep = ",",
                     row.names = FALSE,
                     quote = FALSE)
                 },

               "tsv" =
                 {
                   utils::write.table(
                     preset_data,
                     export,
                     sep = "\t",
                     row.names = FALSE,
                     quote = FALSE)
                 }

             )
             },

         "msa" =

           {

             if (any(!is.null(cols_for_fas_names),!is.null(presets))) stop("Please remove any presets or field names provided in the 'presets' or 'cols_for_fas_names' arguments.")

             stopifnot(any(names(bold_df)=='msa.seq.name'))

             seq.data=bold_df%>%
               dplyr::filter(!is.na(nuc))%>%
               dplyr::filter(!is.null(nuc))%>%
               dplyr::mutate(nuc=gsub("-","",nuc))%>%
               dplyr::filter(nuc!="")%>%
               dplyr::select(matches("^aligned_seq$",ignore.case=TRUE),
                             matches("^msa.seq.name$",ignore.case=TRUE))%>%
               dplyr::rename('seq.name'='msa.seq.name')

             ## Export the result as a fasta file.

             result=generate_ape_file(data = seq.data,
                                      align_unaligned = "aligned_seq")

             ape::write.FASTA(result,
                              file=paste0(export,sep=""))
           },

         "fas" =

           {
             if (is.null(presets)==FALSE) stop("Please remove any presets provided in the 'presets' arguments.")

             seq.data=bold_df%>%
               dplyr::select(matches("^nuc$",ignore.case=TRUE),
                             all_of(cols_for_fas_names))%>%
               dplyr::filter(!is.na(nuc))%>%
               dplyr::filter(!is.null(nuc))%>%
               dplyr::mutate(nuc=gsub("-","",nuc))%>%
               dplyr::filter(nuc!="")%>%
               dplyr::rowwise()%>%
               dplyr::mutate(across(all_of(cols_for_fas_names),
                                    as.character))%>%
               dplyr::select(nuc,
                             all_of(cols_for_fas_names))%>%
               dplyr::mutate(seq.name=paste0(paste(as.character(c_across(all_of(cols_for_fas_names))),
                                                   collapse = "|")))%>%
               dplyr::ungroup()

             # Export the result as a raw fasta file.

             result=generate_ape_file(data = seq.data,
                                      align_unaligned = "nuc")

             ape::write.FASTA(result,
                              file=paste0(export,sep=""))

           }

  )

}
