## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ---- dpi=150, fig.width=3.5, fig.height=2.5----------------------------------
#load RADstackshelpR package
library(RADstackshelpR)
#optimize_m function will generate summary stats on your 5 iterative runs
#input can be full path to each file, or just the file name if the vcf files are in your working directory
m.out<-optimize_m(m3 = system.file("extdata", "m3.vcf.gz", package = "RADstackshelpR"),
                     m4 = system.file("extdata", "m4.vcf.gz", package = "RADstackshelpR"),
                     m5 = system.file("extdata", "m5.vcf.gz", package = "RADstackshelpR"),
                     m6 = system.file("extdata", "m6.vcf.gz", package = "RADstackshelpR"),
                     m7 = system.file("extdata", "m7.vcf.gz", package = "RADstackshelpR"))
#Assigning the output of this function to the variable 'm.out' should generate a list containing five objects of class 'data.frame' with the following characteristics: 'depth' showing depth per sample for each m value, 'snp' showing the number of non-missing SNPs retained in each sample at each m value, 'loci' showing the number of non-missing loci retained in each sample at each m value, 'snp.R80' showing the total number of SNPs retained at an 80% completeness cutoff, and 'loci.R80' showing the total number of polymorphic loci retained at an 80% completeness cutoff.
#Use this output list as input for this function, to visualize the effect of varying m on the depth of each sample
vis_depth(output = m.out)
#visualize the effect of varying m on the number of SNPs retained
vis_snps(output = m.out, stacks_param = "m")
#visualize the effect of varying m on the number of loci retained
vis_loci(output = m.out, stacks_param = "m")
#3 is the optimal m value, and will be used next to optimize M

## ---- dpi =150, fig.width=3.5, fig.height=2.5---------------------------------
#optimize_bigM function will generate summary stats on your 8 iterative runs
M.out<-optimize_bigM(M1 = system.file("extdata", "bigM1.vcf.gz", package = "RADstackshelpR"),
                     M2 = system.file("extdata", "bigM2.vcf.gz", package = "RADstackshelpR"),
                     M3 = system.file("extdata", "bigM3.vcf.gz", package = "RADstackshelpR"),
                     M4 = system.file("extdata", "bigM4.vcf.gz", package = "RADstackshelpR"),
                     M5 = system.file("extdata", "bigM5.vcf.gz", package = "RADstackshelpR"),
                     M6 = system.file("extdata", "bigM6.vcf.gz", package = "RADstackshelpR"),
                     M7 = system.file("extdata", "bigM7.vcf.gz", package = "RADstackshelpR"),
                     M8 = system.file("extdata", "bigM8.vcf.gz", package = "RADstackshelpR"))
#Assigning the output of this function to the variable 'M.out' should generate a list containing four objects of class 'data.frame' with the following characteristics: 'snp' showing the number of non-missing SNPs retained in each sample at each m value, 'loci' showing the number of non-missing loci retained in each sample at each m value, 'snp.R80' showing the total number of SNPs retained at an 80% completeness cutoff, and 'loci.R80' showing the total number of polymorphic loci retained at an 80% completeness cutoff.
#use this function to visualize the effect of varying 'M' on the number of SNPs retained
vis_snps(output = M.out, stacks_param = "M")
#visualize the effect of varying 'M' on the number of polymorphic loci retained
vis_loci(output = M.out, stacks_param = "M")
#optimal value for this dataset is M = 2

## ---- dpi=150, fig.width=3.5, fig.height=2.5----------------------------------
#optimize n
n.out<-optimize_n(nequalsMminus1 = system.file("extdata", "nequalsmminus1.vcf.gz", package = "RADstackshelpR"),
                        nequalsM = system.file("extdata", "nequalsm.vcf.gz", package = "RADstackshelpR"),
                        nequalsMplus1 = system.file("extdata", "nequalsmplus1.vcf.gz", package = "RADstackshelpR"))
##Assigning the output of this function to the variable 'n.out' should generate a single object of class 'data.frame' showing the number of SNPs and loci retained across filtering levels for each value of n.
#visualize the effect of varying n on the number of SNPs retained
vis_snps(output = n.out, stacks_param = "n")
#visualize the effect of varying n on the number of polymorphic loci retained
vis_loci(output = n.out, stacks_param = "n")

