#todo: enforce use of #iterator syntax in init section for vars with length > 1

#note that there's a bit of trickery in interpreting list tags
#they varnames are stored as only the prefix in the initCollection (no #iterator)
#and they are referenced in the body as var#iterator
#At this point, doesn't enforce proper use of iterator with a list

#setwd("C:/Users/Michael Hallquist/Documents/Automation_Sandbox")
#createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Covariate Template.txt")
#system.time(createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Template.txt"))
#createModels("C:/Users/Michael Hallquist/Documents/Automation_Sandbox/LSPD Template New Init.txt")

#need to think more closely about occasions where there are non-contiguous iterators
#this would break the array matching syntax, which looks up values by numeric position of the iterator
#but what about iterator with value 1 5 7, and array is c("a", "b", "c"). Then array[3] is missing, but
#logically represents value 7. Could work around with a named array that takes the value of the iterator
#names(array) <- as.character(iterator)

#need to sort out why is.na is working for lookupValue in replaceBodyTags
#in particular, why isn't the current value carrying over from the previous looping iteration?

#SOME THOUGHTS RE DOCUMENTATION
#foreach tags may only be with respect to an iterator... could not have some random foreach var
friendlyGregexpr <- function(pattern, charvector, perl=TRUE) {
  require(plyr)
  #now create data frame documenting the start and end of all tags
  #rather than ldply, need a usual loop to track element number (in cases where charvector is a vector)
  regexpMatches <- gregexpr(pattern, charvector, perl=perl)
  
  convertMatches <- c()
  for (i in 1:length(regexpMatches)) {
    thisLine <- regexpMatches[[i]]
    #only append if there is at least one match on this line
    if (thisLine[1] != -1) {
      convertMatches <- rbind(convertMatches, data.frame(element=i, start=thisLine, end=thisLine+attr(thisLine, "match.length")-1))
    }
  }
  
  #if no matches exist, return null (otherwise, will break adply)
  if (is.null(convertMatches)) return(NULL)
  
  #okay, now we have a data frame with the line, starting position, and ending position of every tag
  
  #time to classify into simple, array, iterator, and conditional
  
  #first, add the actual tag to the data.frame to make it easier to parse
  #using adply (is this not its intended use?) to iterate over rows and apply func
  convertMatches <- adply(convertMatches, 1, function(row) {
        row$tag <- substr(charvector[row$element], row$start, row$end)
        #for some reason, adply does not respect the stringsAsFactors here
				return(as.data.frame(row, stringAsFactors=FALSE))        
      })

	convertMatches$tag <- as.character(convertMatches$tag)
  return(convertMatches)
}


classifyTags <- function(tagVector, iteratorsVector) {
  #accepts a vector of tags to be classified
  #also needs a vector of the iterators to correctly classify tags
  #returns a vector of tag types
  
  #creates an empty character vector of the same length as tagVector (each element defaults to "") 
  tagType <- vector(mode="character", length=length(tagVector))

  #default to missing for tag type (replaced below)
  #tagData$tagType <- NA_character_
  
  iteratorsRegEx <- paste("\\[\\[\\s*(", paste(iteratorsVector, collapse="|"), ")\\s*\\]\\]", sep="")
  iteratorPositions <- grep(iteratorsRegEx, tagVector, perl=T)
  tagType[iteratorPositions] <- "iterator"
  
  arrayRegEx <- paste("\\[\\[\\s*\\b([\\w\\.]+)#(", paste(iteratorsVector, collapse="|"), ")\\b\\s*\\]\\]", sep="")
  arrayPositions <- grep(arrayRegEx, tagVector, perl=T)
  tagType[arrayPositions] <- "array"
  
  #optional forward slash for closing tags
  #could the alternation syntax be problematic if variable names overlaps (e.g., x matching xy)? Use word boundaries?
  #any reason to limit this to iterators?!
  
  conditionalRegEx <- paste("\\[\\[\\s*/*(", paste(iteratorsVector, collapse="|"), ")\\s*[!><=]+\\s*\\d+\\s*\\]\\]", sep="")
  conditionalPositions <- grep(conditionalRegEx, tagVector, perl=T)
  tagType[conditionalPositions] <- "conditional"
  
  #simple tags -- not wrt iterators, not conditional
  #use negative lookahead to skip tags that are iterators
  simpleRegEx <- paste("\\[\\[\\s*(?!", paste(iteratorsVector, collapse="|"), ")[\\w+\\.]+\\s*\\]\\]", sep="")  
  simplePositions <- grep(simpleRegEx, tagVector, perl=T)
  tagType[simplePositions] <- "simple"
  
  return(tagType)
}

getInitTags <- function(initCollection) {
  initMatches <- c()
  for (i in 1:length(initCollection)) {
    if (storage.mode(initCollection[[i]]) == "character") {
      matches <- friendlyGregexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", initCollection[[i]], perl=T)
      #if there are matches for this item, add its position in the list pos
      #the idea is that the list has elements and the elements can be vectors
      #thus, a match may occur for initCollection[[5]][3] if the fifth element of the list is a vector
      #and the match is the third element.
      if (!is.null(matches)) matches$listpos <- i
      initMatches <- rbind(initMatches, matches)
    }
  }
  
  #successfully creates a data.frame of the sort below.
#   element start end                          tag listpos
#1        1     1  11                  [[classes]]      14
#2        1    19  38         [[groupnames#group]]      14
#3        1    40  63     [[outcomenames#outcome]]      14
#4        1    65  84         [[modelnames#model]]      14
#5        1    85 112 [[zeroclassnames#zeroclass]]      14
#6        1     6  29     [[outcomenames#outcome]]      15
#7        1    31  50         [[groupnames#group]]      15
#8        1    73  92         [[modelnames#model]]      15
#9        1     1   9                    [[hello]]      17
#10       2     1  10                   [[hello2]]      17
  
  #classify tags in terms of simple, array, iterator, conditional, foreach 
  if (!is.null(initMatches) && nrow(initMatches) > 0) {
    initMatches$tagType <- classifyTags(initMatches$tag, initCollection$iterators)
    
    #chop off the [[ ]] portion of the tags, along with any leading or trailing space
    #this makes it easier to use the sub function to update current values   
    initMatches$tag <- sapply(initMatches$tag, function(tag) {
          return(sub("\\[\\[\\s*([\\s\\w=><!#/]+)\\s*\\]\\]", "\\1", tag, perl=TRUE))
        })
  }
  
  #return empty data frame if no matches 
  if (is.null(initMatches)) return(data.frame())
  else return(initMatches)
}

parseTags <- function(bodySection, initCollection) {
  #parses tags in the body section (character vector) and
  #init collection (list of vars defined in the init section
  #returns a list with $initTags and $bodyTags
  #where each list represents the location, start character, end character, tag type, etc.
  #of each tag
  
  #first handle init tags
  initMatches <- getInitTags(initCollection)
  
  initMatches$currentValue <- NA_character_
  
  bodyTagRegex <- "\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]"
  bodyMatches <- friendlyGregexpr(bodyTagRegex, bodySection, perl=TRUE)
  
  bodyMatches$tagType <- classifyTags(bodyMatches$tag, initCollection$iterators)  
  #okay, now every tag is categorized
  #the notion here is to substitute in the running value for a given variable
  #then we'll do a mass substitute for each model
  bodyMatches$currentValue <- NA_character_
  
  #chop off the [[ ]] portion of the tags, along with any leading or trailing space
  bodyMatches$tag <- sapply(bodyMatches$tag, function(tag) {
        return(sub("\\[\\[\\s*([\\s\\w=><!#/]+)\\s*\\]\\]", "\\1", tag, perl=TRUE))
      })
  
 
  #return a three-element list with constituent data frames for init and body tags.
  return(list(initTags=initMatches, bodyTags=bodyMatches, bodyText=bodySection))  
  
}


#should probably have the function cd to wherever the template file is located (if given as abs path)
#todo: allow for direct runs?
createModels <- function(templatefile) {
  if (!file.exists(templatefile)) stop("Template file not found.")
  
  readfile <- scan(templatefile, what="character", sep="\n", strip.white=FALSE)
  
  #divide into init versus body
  startinit <- grep("[[init]]", readfile, fixed=T)
  endinit <- grep("[[/init]]", readfile, fixed=T)
  
  if (length(startinit) != 1 || length(endinit) != 1) {
    stop("Unable to find init section in template file.")
  }
  
  #extract init section
  initSection <- readfile[(startinit+1):(endinit-1)]
  
  #extract body section
  bodySection <- readfile[(endinit+1):length(readfile)]
  
  #convert the init text into a list object containing parsed init instructions
  initCollection <- processInit(initSection)
	
  templateTags <- parseTags(bodySection, initCollection)
  
  #lookup values for simple tags, which won't vary by iterator
  templateTags <- lookupSimpleTags(templateTags, initCollection) 
   
  #kick off the recursive replace
  if (length(initCollection$iterators) > 0) {
    recurseReplace(templateTags, initCollection)
  }
}

lookupSimpleTags <- function(templateTags, initCollection) {
  #the purpose of this function is to set the currentValue column
  #for the bodyTags and initTags data.frames for simple tags only.
  #Most values will be replaced at the bottom level of recursion,
  #but simple tags do not change over iterations, so can be set one time.
  
#  #locate simple tags in body
#  simpleBodyPositions <- which(templateTags$bodyTags$tagType=="simple")
#
#  #replace tag with value
#  templateTags$bodyTags$currentValue[simpleBodyPositions] <- sapply(templateTags$bodyTags$tag[simpleBodyPositions],
#      function(value) {
#        currentValue <- eval(parse(text=paste("initCollection$", value, sep="")))
#        if (regexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", currentValue, perl=TRUE) > 0) {
#          #The replacement tag itself contains additional tags.
#          #Thus, not a simple replacement. This replacement needs to be deferred until
#          #we have iterated to the bottom of the tree and have all needed information
#          #set a deferred value to be replace later
#          currentValue <- "..deferred.."
#        }
#        return(currentValue)
#      })

  #locate simple tags in init
  simpleInitPositions <- which(templateTags$initTags$tagType=="simple")
  
  templateTags$initTags$currentValue[simpleInitPositions] <- sapply(templateTags$initTags$tag[simpleInitPositions],
      function(value) {
        return(eval(parse(text=paste("initCollection$", value, sep=""))))
      })
  
  return(templateTags)
  
}

#Better idea: only updateCurrentValues for init tags collection
#Body tags currentValues are substituted at the bottom-most level after
#init collection is finalized (recursively process any nested tags)

#And only update init collection for the respective iterator

#Only need to update values for a given iterator....
#The issue is that values for a given iterator shouldn't change when another iterator is active


updateCurrentValues <- function(templateTags, initCollection) {
  #need to replace array and iterator tags for this iterator
  
  #locate iterator tags in init
  initIteratorPositions <- which(templateTags$initTags$tagType=="iterator" & templateTags$initTags$tag == initCollection$curIteratorName)
  
  #set the current value to the position in the looping process for this iterator
  templateTags$initTags$currentValue[initIteratorPositions] <- initCollection$curItPos[initCollection$curIteratorDepth]

  #okay, allow for iterator lookups here... just to an is.na check in the replaceBodyTags
  #locate iterator tags in body
  bodyIteratorPositions <- which(templateTags$bodyTags$tagType=="iterator" & templateTags$bodyTags$tag == initCollection$curIteratorName)
  
  templateTags$bodyTags$currentValue[bodyIteratorPositions] <- initCollection$curItPos[initCollection$curIteratorDepth]

  #Next, handle array tags
  #figure out the iterator for each array tag and only select those that are relevant to the current iterator
  initArrayPositions <- which(templateTags$initTags$tagType=="array")

  #use plyr's splitter_a function to divide dataset by row (builds a big list)
  divideByRow <- splitter_a(templateTags$initTags[initArrayPositions,], 1)
  
  #for each element of the list, check for a match with this iterator and return the value of interest
  #if the array tag is not for this iterator, return the current value unchanged
  templateTags$initTags$currentValue[initArrayPositions] <- unlist(sapply(divideByRow,
      function(row) {
        split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
        if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
        
        if (split[2] == initCollection$curIteratorName) {
          currentValue <- eval(parse(text=paste("initCollection$", split[1], "[", initCollection$curItPos[initCollection$curIteratorDepth], "]", sep="")))
          if (is.null(currentValue)) stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
          return(currentValue)
        }
        else return(row$currentValue) #return unchanged current value if not this iterator
      }))

# for now, we don't use any current values for body tags collection (handled at bottom)
#  #conduct same process for body tags: locate array tags and update values for this iterator
#  bodyArrayPositions <- which(templateTags$bodyTags$tagType=="array")
#  
#  #use plyr's splitter_a function to divide dataset by row (builds a big list)
#  divideByRow <- splitter_a(templateTags$bodyTags[bodyArrayPositions,], 1)
#  
#  #for each element of the list, check for a match with this iterator and return the value of interest
#  templateTags$bodyTags$currentValue[bodyArrayPositions] <- unlist(sapply(divideByRow,
#      function(row) {
#        split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
#        if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
#        
#        if (split[2] == initCollection$curIteratorName) {
#          currentValue <- eval(parse(text=paste("initCollection$", split[1], "[", initCollection$curItPos[initCollection$curIteratorDepth], "]", sep="")))
#          if (regexpr("\\[\\[\\s*[\\s\\w=><!#/]+\\s*\\]\\]", currentValue, perl=TRUE) > 0) {
#            #The replacement tag itself contains additional tags.
#            #Thus, not a simple replacement. This replacement needs to be deferred until
#            #we have iterated to the bottom of the tree and have all needed information
#            #set a deferred value to be replace later
#            currentValue <- "..deferred.."
#          }
#          if (is.null(currentValue)) stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
#          return(currentValue)
#        }
#        else return(row$currentValue) #return unchanged current value if not this iterator
#      }))

  return(templateTags)
}


recurseReplace <- function(templateTags, initCollection, curiterator=1) {
  #bodySection is the character vector representing each line of the body section
  #bodyTags is a data.frame documenting the location and type of all tags in bodySection
  #initCollection is the list of all arguments parsed from the init section
  #initTags is a data.frame documenting the location and type of all tags in initCollection
  #curiterator is an integer that tracks of the depth of recursion through the iterators
  
  if (!is.list(initCollection)) {
    stop("Argument list passed to recurseReplace is not a list")
  }
  
  thisIterator <- initCollection$iterators[curiterator]
  
  #set the current iterator for the collection (used by replaceTags)
  initCollection$curIteratorName <- thisIterator
  initCollection$curIteratorDepth <- curiterator

  #would it work better to use a named array here?
  #like curItVals <- c(1, 3, 5, 2) for iterators a, b, c, d
  #then names(curItVals) <- c("a", "b", "c", "d")
  
  for (i in initCollection[[thisIterator]]) {
    
    #set the current position within this iterator for use in replace tags
    #create a vector of iterator positions for use in replaceTags
    #initCollection$curItPos[curiterator] <- i
    
    #add the iterator name to the vector of iterator positions
    #this has the same effect as above (appending as it recurses), but allows for name-based lookup
    initCollection$curItPos[thisIterator] <- i
    
    #print(paste("current iterator is:", thisIterator, ", position:", as.character(i)))
    
    #process foreach commands
    #For now, take this out
    #bodySection <- processForEachTags(bodySection, initCollection)
    
    #update the current values for this iterator and this iteration
    #this applies for every iterator and iteration, not just processing
    #at the deepest level. The function only updates array and iterator
    #tags that match this iterator, thus minimizing redundant work.
    #the latest is that only init collection tags will be updated
    #then body tags are replaced at the bottom level after init collection is finalized
    templateTags <- updateCurrentValues(templateTags, initCollection)
        
    if (curiterator < length(initCollection$iterators)) {
      #if not at deepest level, recurse to the next level by adding 1 to the iterator
      
      #NOTE to self: consider adding a "foreachReplacements" collection to templateTags
      #that contains the expansions of these tags (appended per iteration)
      #this avoids having to think about reparsing the tags based on new code created by foreach
          
      recurseReplace(templateTags, initCollection, curiterator = curiterator+1)
    }
    else {
      #we have reached the bottom of the iteration tree
      #simple, array, and iterator tags should be up to date in the templateTags collection 
      
      #first delete conditional tags from the body section, reduce subsequent processing burden
      #need to return templateTags collection from processConditionalTags (including bodyText)
			
			#need to use a copy of templateTags to avoid it affecting subsequent loop iterations
			finalTemplateTags <- processConditionalTags(templateTags, initCollection)
			
			#the body section to write is stored in the templateTags collection
			toWrite <- finalTemplateTags$bodyText

      #create a separate initCollection with the appropriate values substituted.
      finalInitCollection <- replaceInitTags(finalTemplateTags$initTags, initCollection)
  
      #finalize init collection values (in cases of nested tags)
      #wades through init collection for any remaining tags and replaces them
      finalInitCollection <- finalizeInitCollection(finalInitCollection)
        
      #update bodySection with tag values from finalized init tags
      toWrite <- replaceBodyTags(toWrite, finalTemplateTags$bodyTags, finalInitCollection)

      filename <- finalInitCollection$filename
      
      print (paste("writing file: ", filename))
      curdir <- getwd()
      
      #figure out the output directory
      outputDir <- finalInitCollection$outputDirectory
      
      if (!file.exists(outputDir)) {
        dir.create(outputDir, recursive=T)
      }
      
      setwd(outputDir)
      
      #make sure that no line is more than 90 chars
      toWrite <- unlist(lapply(toWrite, function(line) {
                if (nchar(line) > 90) {
                  strwrap(line, width=85, exdent=5)
                }
                else line
              }))
      
      writeLines(toWrite, con = filename, sep = "\n")
      
      setwd(curdir)
      
    }
  }
}

replaceInitTags <- function(initTags, initCollection) {
  targetRows <- which(initTags$tagType %in% c("simple", "iterator", "array"))
  targetTags <- initTags[targetRows,]
  targetTags$rownumber <- 1:nrow(targetTags)
  
  #going to re-use this chunk in finalizeSubstitutions, so functionalize... consider the looping replacement here
  for (i in 1:nrow(targetTags)) {
    row <- targetTags[i,]
    stringToChange <- initCollection[[row$listpos]][row$element]
    
    if(row$start > 1) preTag <- substr(stringToChange, 1, row$start-1)
    else preTag <- ""
    
    if(row$end < nchar(stringToChange)) postTag <- substr(stringToChange, row$end+1, nchar(stringToChange))
    else postTag <- ""
    
    initCollection[[row$listpos]][row$element] <- paste(preTag, row$currentValue, postTag, sep="")
    
    subsequentRows <- which(targetTags$rownumber > i & targetTags$listpos==row$listpos & targetTags$element==row$element)
    
    if (length(subsequentRows > 0)) {
      
      #need to offset subsequent start/stops by the difference between the tag and replacement lengths
      diffLength <- nchar(row$currentValue) - (row$end - row$start + 1)
      
      #update rows in targetTags that have additional tags on the same row
      #need to offset by the diffLength
      targetTags[subsequentRows,"start"] <- targetTags[subsequentRows,"start"] + diffLength
      targetTags[subsequentRows,"end"] <- targetTags[subsequentRows,"end"] + diffLength
    }
  }
  
  #refresh the initTags collection with the replaced values
  #need to dump the rownumber to align the data.frames (templateTags doesn't have a rownumber field)
  targetTags$rownumber <- NULL
  initTags[targetRows,] <- targetTags
  
  #return(initTags)
  #browser()
  return(initCollection)
}

replaceBodyTags <- function(bodySection, bodyTags, initCollection) {
  if (length(bodySection) <= 0) stop("Empty body section")
  
  #need to ponder issues where a replaced tag still contains another tag
  
  #hmm, actually seems futile to do a replacement in the init section
  #these are already set by update values.... won't affect the body section

# so we need to finalize the tag substitutions...
# the idea is that we need to convert all tags to literals in the initCollection
# once this is done, then we replace all deferred tags in the body section
  
    
  #don't update current values if initcollection value contains any tag
  #if so, replace at the last minute (check this in Init)
  
  #set a "deferred" status in currentValue if replacement contains tags
  
  targetTags <- subset(bodyTags, tagType %in% c("simple", "iterator", "array"))
  targetTags$rownumber <- 1:nrow(targetTags)
    
  #print(targetTags)
  #stop("test")
  
  #could improve this by replacing identical tags at once
  #like ddply by the tag
  
  for (i in 1:nrow(targetTags)) {
    row <- targetTags[i,]
    stringToChange <- bodySection[row$element]
    
    if(row$start > 1) preTag <- substr(stringToChange, 1, row$start-1)
    else preTag <- ""
    
    if(row$end < nchar(stringToChange)) postTag <- substr(stringToChange, row$end+1, nchar(stringToChange))
    else postTag <- ""
    
    #lookup value as needed
    if (is.na(row$currentValue)) row$currentValue <- lookupValue(row$tag, row$tagType, initCollection)
    #row$currentValue <- lookupValue(row$tag, row$tagType, initCollection)
    
    bodySection[row$element] <- paste(preTag, row$currentValue, postTag, sep="")

    #need to offset subsequent start/stops by the difference between the tag and replacement lengths
    diffLength <- nchar(row$currentValue) - (row$end - row$start + 1)
    
    subsequentRows <- which(targetTags$rownumber > i & targetTags$element==row$element)

    if (length(subsequentRows > 0)) {
      #update rows in targetTags that have additional tags on the same row
      #need to offset by the diffLength
      targetTags[subsequentRows,"start"] <- targetTags[subsequentRows,"start"] + diffLength
      targetTags[subsequentRows,"end"] <- targetTags[subsequentRows,"end"] + diffLength
    }
  }
  
  return(bodySection)
}

#redundant with finalize code... re-use
lookupValue <- function(tag, tagType, initCollection) {
  if (missing(tag)) stop("No tag provided")
  if (missing(tagType)) stop("No tag type provided")
  
  if (tagType == "simple") {
    return(eval(parse(text=paste("initCollection$", tag, sep=""))))
  }
  else if (tagType == "array") {
    split <- strsplit(tag, split="#", fixed=TRUE)[[1]]
    if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
    
    #find where in the iterator depth this iterator lies
    #iteratorPosition <- grep(paste("\\b", split[2], "\\b", sep=""), initCollection$iterators, perl=T)
    
    #use named array look-up  
    iteratorPosition <- initCollection$curItPos[split[2]]
    
		#note that the padding performed by processInit should handle non-contiguous iteratorPosition values here.
    currentValue <- eval(parse(text=paste("initCollection$", split[1], "[", iteratorPosition, "]", sep="")))
    if (is.null(currentValue)) stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
    return(currentValue)
  }
}

finalizeInitCollection <- function(initCollection) {
  #this function should handle initTags that still contain tags
  #once the initCollection is finalized, then process the deferred body tags
  #the notion is that the substitutions will be handled in an inefficient manner -- using lots
  #of regular expression parsing, not using the matched tags data.frame
  
  #we only need to handle simple and array tags
  #iterators should always be integers
  #foreach and conditional are not relevant
  
  #iterate over init tags until no tags are left
  #here, the init collection should already have had most of its tags substituted by
  #replaceInitTags above.
  
  tagsRemain <- TRUE
  numIterations <- 1
  while(tagsRemain) {
    initTags <- getInitTags(initCollection)
    
    if (nrow(initTags) == 0) break #if no tags found, then substitution complete
    
    initTags <- subset(initTags, tagType %in% c("simple", "array"))
    if (nrow(initTags) == 0) break #some tags, but none of the simple or array variety, which we want to replace

    #use plyr's splitter_a function to divide dataset by row (builds a big list)
    divideByRow <- splitter_a(initTags, 1)
    
    #for each element of the list, check for a match with this iterator and return the value of interest
    initTags$currentValue <- unlist(sapply(divideByRow,
        function(row) {
          if (row$tagType == "simple") {
            return(eval(parse(text=paste("initCollection$", row$tag, sep=""))))
          }
          else if (row$tagType == "array") {
            split <- strsplit(row$tag, split="#", fixed=TRUE)[[1]]
            if (length(split) != 2) stop("array tag missing iterator: ", row$tag)
            
            #find where in the iterator depth this iterator lies
            #iteratorPosition <- grep(paste("\\b", split[2], "\\b", sep=""), initCollection$iterators, perl=T)
            
            #use named array look-up  
            iteratorPosition <- initCollection$curItPos[split[2]]
            
            currentValue <- eval(parse(text=paste("initCollection$", split[1], "[", initCollection$curItPos[iteratorPosition], "]", sep="")))
            if (is.null(currentValue)) stop("When replacing tag: ", row$tag, ", could not find corresponding value.")
            return(currentValue)
          }
        }
    ))

    #now we have a list of curent values for any init tags
    #and we want to update the init collection with their values... just as with above.
    initCollection <- replaceInitTags(initTags, initCollection)
    
    numIterations <- numIterations + 1
    if (numIterations > 20) stop("While replacing tags in init section, looped over variables 20 times without completing substitutions.\n  Check for circular definitions within init section.") 
  }

  #browser()
  return(initCollection)
}

#note that at thie point the comparator must be a number (not another variable)
evaluateConditional <- function(tag, initCollection) {
  #evaluate whether tag is true
  #first divide up into name, operator, and value
  regexp <- "(\\w+)\\s*([!><=]+)\\s*(\\w+)"
  conditional <- unlist(strapply(tag, regexp, c))
  
  if (length(conditional) < 3) {
    stop("Error in conditional tag: does not contain variable, operator, and value. Tag = ", tag)
  }
  
  #convert simple equals to logical equals
  if (conditional[2] == "=") conditional[2] <- "=="
  
  #obsolete b/c using named array
  #iteratorPosition <- grep(paste("\\b", conditional[1], "\\b", sep=""), initCollection$iterators, perl=T)
  
  #return a boolean value indicating whether the conditional is true
  return(eval(parse(text=paste("initCollection$curItPos[conditional[1]]", conditional[2], conditional[3], sep=""))))
  
}

trimSpace <- function(string) {
	string <- sub("^\\s*", "", string, perl=TRUE)
	string <- sub("\\s*$","", string, perl=TRUE)
	return(string)
}

clipString <- function(string, start, end) {
  #if the string is shorter than the length of the clip, then nothing remains
  if (nchar(string) <= end-start+1) return("")
  
  if(start > 1) preString <- substr(string, 1, start-1)
  else preString <- ""
  
  if(end < nchar(string)) postString <- substr(string, end+1, nchar(string))
  else postString <- ""
  
  return(paste(preString, postString, sep=""))
    
}

processConditionalTags <- function(templateTags, initCollection) {
  require(gsubfn)
  #find all conditional tags in the body section and remove them from the templateTags and bodyText pieces...
  
  conditionalTagIndices <- which(templateTags$bodyTags$tagType=="conditional")
  openClose <- ifelse(substr(templateTags$bodyTags$tag[conditionalTagIndices], 1, 1)=="/", "close", "open")  
  allOpen <- conditionalTagIndices[openClose=="open"]
  
  bodyTagsToDrop <- c()
  bodyLinesToDrop <- c()
  for (i in allOpen) {
		#should be able to decide whether to skip an iteration if the affected lines are already in bodyLinesToDrop
    thisTag <- templateTags$bodyTags$tag[i]
    
    #evaluate truth of conditional
    conditionalTrue <- evaluateConditional(thisTag, initCollection)

    #only look for closing tags after the opening and accept the first exact match
    close <- conditionalTagIndices[templateTags$bodyTags$tag[conditionalTagIndices] == paste("/", thisTag, sep="") & 
            templateTags$bodyTags$element[conditionalTagIndices] >= templateTags$bodyTags$element[i]][1]
    
    sameLine <- FALSE
    #in case of same line match, check to make sure close follows opening on that line
    #the conditions above could match when a closing tag precedes opening tag on the same line
    if (templateTags$bodyTags$element[close]==templateTags$bodyTags$element[i]) {
      sameLine <- TRUE

      close <- conditionalTagIndices[openClose == "close" &
              templateTags$bodyTags$tag[conditionalTagIndices] == paste("/", thisTag, sep="") &
              templateTags$bodyTags$element[conditionalTagIndices] == templateTags$bodyTags$element[i] & 
              templateTags$bodyTags$start[conditionalTagIndices] > templateTags$bodyTags$end[i]][1]
      
      if (!close > 0) stop("Could not find closing tag for conditional:", thisTag)
    }

		#skip this iteration if the opening and closing tags in question are already in the drop pile
		#these lines (and the lines between, if necessary) will already be dropped, so don't process
		if (templateTags$bodyTags$element[i] %in% bodyLinesToDrop && templateTags$bodyTags$element[close] %in% bodyLinesToDrop) next
		
    #first check for tags to drop from the bodyTags collection (don't want these parsed later)
    if (conditionalTrue) {
      #only remove starting and ending tags
      bodyTagsToDrop <- c(bodyTagsToDrop, i, close)
    }
    else {
      #if conditional false, then remove all tags between conditional tags
      #first, dump all lines in the bodyTags section that fall between elements
      bodyTagsToDrop <- c(bodyTagsToDrop, i:close)
			
			
			#conditional is not true
			#so dump the tags and all space between
			#really, the only difference here from the calculation below is that
			#bodyLinesToDrop should encompass the space between opening and closing
			#and the clips below should dump the rest of the line when multiple tags on same line
			#no need to rewrite code for clipping out tags
			#don't clip the tag lines themselves because this is handled below (whole line goes if nchar <= 0)
			#print(bodyLinesToDrop)
			#browser()
	
			#only drop lines between matching open/close tags if not on the same line
			#otherwise, the clipping code below handles everything correctly
			#if on the same line, then element + 1:close - 1 will lead to something like 58:56, which is bad
			if (!sameLine) bodyLinesToDrop <- c(bodyLinesToDrop, (templateTags$bodyTags$element[i]+1):(templateTags$bodyTags$element[close]-1))
    }

    #then dump lines from the syntax section itself
    #handle same line issues, then delete whole lines between tags
    #as with replaceTags substitution, need to handle situation where tag is on line with other stuff
    #thus, need to update bodyTags collection, too to reflect new start/stop positions
    
    #when the conditional is true, just remove the tags and leave the syntax
    #dump the opening tag on the line

		if (conditionalTrue) endPos <- templateTags$bodyTags$end[i] #if the conditional is true, just use the last pos of the opening tag for the clip
		else if (!conditionalTrue && sameLine == FALSE) endPos <- nchar(templateTags$bodyText[templateTags$bodyTags$element[i]]) #want to clip the rest of the line
		else if (!conditionalTrue && sameLine == TRUE) endPos <- templateTags$bodyTags$start[close] - 1 #just clip anything between open tag and first element of close tag (close tag itself handled by code below)

    templateTags$bodyText[templateTags$bodyTags$element[i]] <- clipString(templateTags$bodyText[templateTags$bodyTags$element[i]], templateTags$bodyTags$start[i], endPos)
    if (nchar(trimSpace(templateTags$bodyText[templateTags$bodyTags$element[i]])) <= 0) bodyLinesToDrop <- c(bodyLinesToDrop, templateTags$bodyTags$element[i]) #no characters remain, so dump line
    else {
      #if there is other text on this line, it may contain tags that need to be adjusted given the clip

      subsequentTags <- which(templateTags$bodyTags$element == templateTags$bodyTags$element[i] & templateTags$bodyTags$start > endPos)
      
      if (length(subsequentTags > 0)) {
        #calculate length of opening tag
				openLength <- endPos - templateTags$bodyTags$start[i] + 1
        templateTags$bodyTags[subsequentTags,"start"] <- templateTags$bodyTags[subsequentTags,"start"] - openLength
        templateTags$bodyTags[subsequentTags,"end"] <- templateTags$bodyTags[subsequentTags,"end"] - openLength
				#print("openlength")
				#browser()
      }
    }

    #okay, we've handled issues related to the opening tag, now handle closing tag
		#for the closing tag, just need to clip the tag itself (spacing handled above)
		templateTags$bodyText[templateTags$bodyTags$element[close]] <- clipString(
				templateTags$bodyText[templateTags$bodyTags$element[close]], templateTags$bodyTags$start[close],
				templateTags$bodyTags$end[close]
		)
		
    if (nchar(trimSpace(templateTags$bodyText[templateTags$bodyTags$element[close]])) <= 0) bodyLinesToDrop <- c(bodyLinesToDrop, templateTags$bodyTags$element[close]) #no characters remain, so dump line
    else {
      #only look for additional tags if nchar > 0
      #redundant code with above... must be a way to consolidate
      #if there is other text on then end line, it may contain tags that need to be adjusted given the clip
      subsequentTags <- which(templateTags$bodyTags$element == templateTags$bodyTags$element[close] & templateTags$bodyTags$start > templateTags$bodyTags$end[close])
      
      if (length(subsequentTags > 0)) {
        closeLength <- templateTags$bodyTags$end[close] - templateTags$bodyTags$start[close] + 1
        templateTags$bodyTags[subsequentTags,"start"] <- templateTags$bodyTags[subsequentTags,"start"] - closeLength
        templateTags$bodyTags[subsequentTags,"end"] <- templateTags$bodyTags[subsequentTags,"end"] - closeLength
				#print("closelength")
				#browser()
      }          
    }
	}
	
	#only keep unique lines
	bodyTagsToDrop <- sort(unique(bodyTagsToDrop))
	bodyLinesToDrop <- sort(unique(bodyLinesToDrop))
	#print(bodyLinesToDrop)
	#print(bodyTagsToDrop)
	#print("done with loop")
	
	#drop all bad body lines
	templateTags$bodyText <- templateTags$bodyText[bodyLinesToDrop*-1]
	templateTags$bodyTags <- templateTags$bodyTags[bodyTagsToDrop*-1,]
	
	#browser()
	#need to move up the line markers in the bodyTags collection based on the lines dropped
	templateTags$bodyTags <- ddply(templateTags$bodyTags, "element", function(subDF) {
				numMoveUp <- length(which(bodyLinesToDrop < subDF$element[1]))
				#browser()
				subDF$element <- subDF$element - numMoveUp
				return(subDF)
			})
	
	return(templateTags)
}


processInit <- function(initsection) {
  #combine multi-line statements by searching for semi-colon
  assignments <- grep("^\\s*.+\\s*=", initsection, perl=TRUE)
	
	#check for valid variable names
	valid <- grep("^\\s*[A-Za-z\\.]+[\\w\\.#]*\\s*=", initsection[assignments], perl=TRUE)
	
	if (length(valid) < length(assignments)) {
		badvars <- initsection[assignments[which(!1:length(assignments) %in% valid)]]
		stop(paste(c("Invalid variable definitions in init section.",
				"Variables must begin with a letter or a period.",
				"Variables may contain only the following characters: letters, numbers, underscores, periods, and a single pound sign for list variables.",
				"Problematic variable(s):",
				badvars), collapse="\n  "))
	}
  
  #preallocate vector of strings to process
  argstoprocess <- vector("character", length(assignments))
  
  #loop through each line containing an assignment
  for (i in 1:length(assignments)) {
    argstoprocess[i] = initsection[assignments[i]]
    
    #if line does not terminate in semicolon, then read subsequent lines until semicolon found
    #start file position at n+1 line
    filepos = assignments[i]+1
    while (length(grep(";\\s*$", argstoprocess[i], perl=TRUE)) != 1) {
      #cat("multi-line: ", unlist(argstoprocess[i]), fill=T)
      argstoprocess[i] = paste(argstoprocess[i], initsection[filepos])
      filepos = filepos+1
    }
  }
  
	#will return a list (one element per argstoprocess) with a three-element vector (name, iterator, value)
	#note that the regexp implicitly dumps the semicolon and any trailing spaces
	arglist <- strapply(argstoprocess, "^\\s*(\\w+[\\w\\.]*)(#[\\w\\.]+)?\\s*=\\s*(.+);\\s*$", function(name, iterator, value) {
			return(c(name, iterator, value))
		}, perl=TRUE)

	#copy the first element (name) of each vector into the list names
	names(arglist) <- make.names(sapply(arglist, '[', 1))
	
	#1. parse values into vectors according to spaces and quotes
	#2. add iterator attribute to be processed after iterators are setup below
	#3. implicitly drop name by not including element[1]
	arglist <- lapply(arglist, function(element) {
				output <- friendlyGregexpr("(\"[^\"]*\"|[^\\s]+)", element[3])$tag
				output <- gsub("\"", "", output)

				#the regexp above matches the # itself. need to trim off in cases where iterator defined 
				if (nchar(element[2]) > 0) element[2] <- substr(element[2], 2, nchar(element[2]))
				attr(output, "iterator") <- element[2] 
				return(output)
			})
		
  
  if (is.null(arglist$iterators)) {
    stop("No iterators in init section. Cannot process template.")
  }
  	
  #convert iterators from string to list
  #arglist$iterators <- unlist(strsplit(as.character(arglist$iterate_wrt), "\\s*,\\s*", perl=T))
  
  #process sequence text for each iterator
  for (thisIt in arglist$iterators) {
		if (is.null(arglist[[thisIt]])) stop("Variable specified in iterators list, but not defined: ", thisIt)
		
		#expand colon notation as needed
		#use do.call to combine elements of list returned by lapply
		#if there are many elements (e.g., 1 3 5), then lapply returns an element for each
		#one, but we just want a combined array. In the case of colon expansion, want to c that together
		#with any other elements... Maybe in the future when we support non-contiguous iterators.
		
		arglist[[thisIt]] <- do.call("c", lapply(arglist[[thisIt]], function(x) {
					if (length(grep(":", x)) > 0) {
						return(strapply(x, "(\\d+)\\s*:\\s*(\\d+)", function(start, stop) return(start:stop))[[1]])
					}
					else return(as.numeric(x))
				}))

		#sort as ascending and only keep unique values
		if (length(unique(arglist[[thisIt]])) < length(arglist[[thisIt]])) stop("Problem with iterator: ", thisIt, "\n  Non-unique values specified: ", paste(arglist[[thisIt]], collapse=", "))
		arglist[[thisIt]] <- sort(unique(arglist[[thisIt]]))
	}
	
	#browser()
	
  
	#now that iterators are defined, ensure that list tags match
	#pad vectors accordingly

	arglist <- lapply(arglist, function(element) {
				#if the iterator is defined, then this is a list tag
				#need to make sure it is properly padded
				iteratorAttr <- attr(element, "iterator")
				if (!is.null(iteratorAttr) && nchar(iteratorAttr) > 0) {
					iteratorValues <- arglist[[iteratorAttr]]
					#make sure that the length of the values vector matches the length of the iterator vector
					if (length(element) != length(iteratorValues)) stop("Variable locked to iterator: ", iteratorAttr, ", but has different length.\n  Values: ", paste(element, collapse=", "), "\n  Should be length: ", length(iteratorValues))

					if (length(element) < max(iteratorValues)) {
						#pad
						updatedElement <- c()
						listElement <- 1
						#build a vector of the same length as the max of the iterator
						#only insert list values for defined indices. Otherwise pad
						for (i in 1:max(iteratorValues)) {
							if (i %in% iteratorValues) {
								updatedElement[i] <- element[listElement]
								listElement <- listElement + 1
							}
							else updatedElement[i] <- ""
						}
						element <- updatedElement
						attr(element, "iterator") <- iteratorAttr #re-add attribute
					}
				}
				
				return(element)
			})

	#browser()
	
	
  #default output directory to the current directory
  if (is.null(arglist$outputDirectory)) {
		warning("No output directory specified. Defaulting to the current directory.")
    arglist$outputDirectory <- getwd()
  }
	if (is.null(arglist$filename)) stop("No definition provided for the output filename. The filename definition is required.")
  
  return(arglist)
}
