#
# strings.tcl (Mark Nagata and Tom Scavo and Vince Darley)
#

namespace eval quote {}
namespace eval text {}
## 
 # -------------------------------------------------------------------------
 # 
 # "quote::" --
 # 
 # Manipulate string so search and insertion procedures work as expected.
 # These files have been both renamed and rewritten from the former
 # 'quoteExpr' procs.  They fix a number of bugs, and make their purpose
 # clear.  There were numerous examples throughout Alpha's Tcl code which
 # used the wrong quote function under the old scheme.
 # 
 # quote::Find
 # 
 #  use this for 'glob' type searches, but not 'glob' itself!  Glob itself,
 #  as opposed to 'string match', 'lsearch -glob' etc has {a,b,c} type
 #  expresions as special.
 # 	
 # quote::Regfind
 # 
 #  use this for regexp searches
 #  
 # quote::Insert
 # 
 #  Quotes any block of text captured from a window so it can be used as a 
 #  Tcl string. e.g. 'set a [quote::Insert [getSelect]] ; eval insertText $a'
 #  will work correctly.  Can be used to generate procedures on the fly,
 #  especially to add to your prefs.tcl:
 #   set a [quote::Insert [getSelect]]
 #   addUserLine "proc foo \{\} \{ return \"$a\" \}"
 # 
 # quote::Regsub
 # 
 #  use this for the replacement expression.  A common usage might look
 #  like this:
 #   
 #   regsub -all [quote::Regfind $from] [read $cid] [quote::Regsub $to] out
 # -------------------------------------------------------------------------
 ##
proc quote::Find  str {
    regsub -all {[][\\*?]} $str {\\&} str
    return $str
}

proc quote::Regfind str {
    regsub -all {[][\$?^|*+()\.\{\}\\]} $str {\\&} str
    return $str
}

proc quote::Regsub str {
    regsub -all {(\\|&)} $str {\\&} str
    return $str
}

proc quote::Glob str {
    regsub -all {[][*?\{\}\\]} $str {\\&} str
    return $str
}

proc quote::Insert str {
    regsub -all {[][\$"\{\}]} $str {\\&} str
    regsub -all "\[\r\n\]" $str "\\r" str
    regsub -all "\t" $str "\\t" str
    return $str
}

# These procs have been modified to avoid substitutions in TeX commands 
# starting with \n, \r and \t. The fix is based on replacing single \ by
# double \\ in 'quote::Display' and replacing \(n|r|t) by their ascii
# counterpart only if there is an odd number of \.
proc quote::Display str {
    regsub -all {\\} $str {\\\\} str
    regsub -all "\r" $str "\\r" str
    regsub -all "\n" $str "\\n" str
    regsub -all "\t" $str "\\t" str
    return $str
}

proc quote::Undisplay str {
    regsub -all {(^|[^\\]|(\\\\)+)\\r} $str "\\1\r" str
    regsub -all {(^|[^\\]|(\\\\)+)\\n} $str "\\1\n" str
    regsub -all {(^|[^\\]|(\\\\)+)\\t} $str "\\1\t" str
    regsub -all {\\\\} $str {\\} str
    return $str
}

## 
 # -------------------------------------------------------------------------
 # 
 # "quote::Prettify" --
 # 
 #  Since we're supposed to be a LaTeX editor, we handle symbols with
 #  TeX in a bit differently
 # -------------------------------------------------------------------------
 ##
proc quote::Prettify str {
    set a [string toupper [string index $str 0]]
    regsub -all {([^A-Z])([A-Z])} [string range $str 1 end] {\1 \2} b
    regsub -all {((La|Bib|Oz) )?Te X} $a$b {\2TeX } a
    regsub -all {::} $a {-} a
    return $a
}
proc quote::Menuify str {
    set a [string toupper [string index $str 0]]
    regsub -all { *([A-Z])} [string range $str 1 end] { \1} b
    append a $b
}
## 
 # -------------------------------------------------------------------------
 # 
 # "quote::WhitespaceReg" --
 # 
 #  Quote a string so you can search for it ignoring all problems with
 #  whitespace: all sequences of space/tab/cr are treated alike.
 # -------------------------------------------------------------------------
 ##
proc quote::WhitespaceReg { str } { 
    regsub -all "\[ \t\r\n\]+" $str {[ \t\r\n]+} str
    return $str
}

## 
 # -------------------------------------------------------------------------
 # 
 # "lremove" --
 # 
 #  removes items from a list
 #  
 #  options are '-all' to remove all, and -glob, -exact or -regexp
 #  for search type.  '-exact' is the default. '--' terminates options.
 #  
 #  lremove ?-opts? l args
 #  
 #  Note: if you want to remove all items of list 'b' from list 'a',
 #  the following is incorrect: lremove $a $b, you must use
 #  'eval lremove [list $a] $b', so that b is expanded.
 #  
 #  There is now a new option -l which treats the extra args as lists,
 #  so you can do lremove -l $a $b if you want.
 # -------------------------------------------------------------------------
 ##
proc lremove {args} {
    set opts(-all) 0
    set type "-exact"
    getOpts
    set l [lindex $args 0]
    if {[info exists opts(-glob)]} { set type "-glob" }
    if {[info exists opts(-regexp)]} { set type "-regexp" }
    if {[info exists opts(-l)]} { 
	set args [join [lreplace $args 0 0] " "]
    } else {
	set args [lreplace $args 0 0]
    }
    foreach i $args {
	if {[set ix [lsearch $type $l $i]] == -1} continue
	set l [lreplace $l $ix $ix]
	if {$opts(-all)} {
	    while {[set ix [lsearch $type $l $i]] != -1} {
		set l [lreplace $l $ix $ix]
	    }
	}
    }
    return $l
}

## 
 # -------------------------------------------------------------------------
 # 
 # "getOpts" --
 # 
 #  Rudimentary option passing.  Uses upvar to get to the 'args' list
 #  of the calling procedure and scans that.  Option information is
 #  stored in the 'opts' array of the calling procedure.
 #  
 #  Options are assumed to be flags, unless they occur in the
 #  optional parameter list.  Then they are variables which take a
 #  value; the next item in the args list.  If an item is a pair,
 #  then the first is the var name and the second the number of
 #  arguments to give it.
 # -------------------------------------------------------------------------
 ##
proc getOpts {{take_value ""} {set "set"}} {
    upvar args a
    upvar opts o
    while {[string match \-* [set arg [lindex $a 0]]]} {
	set a [lreplace $a 0 0]
	if {$arg == "--"} {
	    return
	} else {
	    if {[set idx [lsearch -regexp $take_value \
	      "^-?[string range $arg 1 end]( .*)?$"]] == -1} {
		set o($arg) 1
	    } else {
		if {[llength [set the_arg \
		  [lindex $take_value $idx]]] == 1} {
		    $set o($arg) [lindex $a 0]
		    set a [lreplace $a 0 0]
		} else {
		    set numargs [expr {[lindex $the_arg 1] -1}]
		    $set o($arg) [lrange $a 0 $numargs]
		    set a [lreplace $a 0 $numargs]
		}
	    }
	}
    }
}

## 
 # -------------------------------------------------------------------------
 # 
 # "ensureset" --
 # 
 #  Ensure the given variable is set, if it is unset, set it to the given
 #  value.  This works with both variables and array elements, including
 #  things which contain spaces etc.
 # -------------------------------------------------------------------------
 ##
proc ensureset {v {val ""}} {
    if {[uplevel [list info exists $v]]} { return [uplevel [list set $v]] }
    return [uplevel [list set $v $val]]
}
## 
 # -------------------------------------------------------------------------
 # 
 # "lunion" --
 # 
 #  Basic use: make sure a given list variable contains each element 
 #  of 'args'
 #  
 #  "llunion" --
 #  
 #  Advanced use: make sure a given list variable and index contains
 #  an element whose i'th index matches the i'th index of one of 'args'.
 #  In this case we call the proc with a list {var i} as first argument.
 # -------------------------------------------------------------------------
 ##
proc lunion {var args} {
    upvar $var a
    if {![info exists a]} {
	set a $args
	return
    } else {
	foreach item $args {
	    if {[lsearch $a $item] == -1} {
		lappend a $item
	    }
	}
    }
}
	
proc llunion {var idx args} {
    upvar $var a
    if {![info exists a]} {
	set a $args
	return
    } else {
	foreach item $args {
	    set add 1
	    foreach i $a {
		if {[lindex $i $idx] == [lindex $item $idx]} {
		    set add 0
		    break
		}
	    }
	    if {$add} {
		lappend a $item
	    }
	}
    }
}

proc lunique {l} {
    set lout ""
    foreach f $l {
	if {![info exists silly($f)]} {
	    set silly($f) 1
	    lappend lout $f
	}
    }
    return $lout
}
			
proc lreverse {l} {
    if {[llength $l] > 1} {
	set first [lindex $l 0]
	set l [lreverse [lrange $l 1 end]]
	lappend l $first
    }
    return $l
}

proc lcontains {l e} {
    upvar $l ll
    if {[info exists ll] && [lsearch -exact $ll $e] != -1} {
	return 1
    } else {
	return 0
    }
}

## 
 # -------------------------------------------------------------------------
 # 
 # "llindex" --
 # 
 #  Find the first index of a given list within another list.  
 # -------------------------------------------------------------------------
 ##
proc llindex {l e args} {
    upvar $l ll
    if {![info exists ll]} { return -1 }
    if {![llength $args]} {
	return [lsearch -exact $ll $e]
    } else {
	set i 0
	set len [llength $args]
	while {$i < [llength $ll] - $len} {
	    if {[lindex $ll $i] == $e} {
		set range [lrange $ll [expr {$i +1}] [expr {$i + $len}]]
		for {set j 0} {$j < $len} {incr j} {
		    if {[lindex $args $j] != [lindex $range $j]} {
			break
		    }
		}
		if {$j == $len} { return $i}
	    }
	    incr i
	}
	return -1
    }
}

# Returns a modified text string if the string $text is non-null, 
# and the null string otherwise.  The argument 'operation' is a 
# string directing 'doSuffixText' to either "insert" or "remove" 
# $suffixString to/from each line of $text.
proc doSuffixText {operation suffixString text} {
    if {$text == ""} {return ""}
    if {$operation == "insert"} {
	regsub -all "\[\r\n\]" $text "[quote::Regsub ${suffixString}]\r" text
    } elseif {$operation == "remove"} {
	regsub -all -- "[quote::Regfind $suffixString]\r" $text "\r" text
    }
    return $text
}

# Returns a modified text string if the string $text is non-null, 
# and the null string otherwise.  The argument 'operation' is a 
# string directing 'doPrefixText' to either "insert" or "remove" 
# $prefixString to/from each line of $text.  
proc doPrefixText {operation prefixString text} {
    if {$operation == "insert"} {
	set trailChar ""
	set textLen [string length $text]
	if {$textLen && ([string index $text [expr {$textLen-1}]] == "\r")} {
	    set text [string range $text 0 [expr {$textLen-2}]]
	    set trailChar "\r"
	}
	regsub -all \r $text "\r[quote::Regsub $prefixString]" text
	return $prefixString$text$trailChar
    } elseif {$operation == "remove"} {
	set pref [quote::Regfind $prefixString]
	regsub -all \r$pref $text \r text
	regsub ^$pref $text "" text
	return $text
    }
}

proc text::british {v} {
    uplevel "regsub -all -nocase {(Colo)r} \[set $v\] {\\1ur} $v"
}

rename getAscii {}
proc getAscii {} {
    set c [lookAt [getPos]]
    scan $c %c decVal
    set asOctal [format %o $decVal]
    set asHex   [format %x $decVal]
    alertnote "saw a \"$c\", $decVal -decimal,\
      \\$asOctal -octal, x$asHex -hex"
}

# nabbed from html mode
set text::_Ascii "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017"
append text::_Ascii "\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
append text::_Ascii " !\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
append text::_Ascii "\[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177"
append text::_Ascii ""
append text::_Ascii ""
proc text::Ascii {char {num 0}} {
    if {$char == ""} {return 0}
    global text::_Ascii
    if {$num} {
	if {$char > 256 || $char < 1} { beep ; message "text::Ascii called with bad argument" }
	return [string index ${text::_Ascii} [expr {$char - 1}]]
    } else {
	return [expr {1 + [string first $char ${text::_Ascii}]}]
    }
}

proc text::fromPstring {str} {
    set len [text::Ascii [string index $str 0]]
    return [string range $str 1 $len]
}

# Useful for -command flag of 'lsort'.
proc sortByTail {one two} {
    string compare [file tail $one] [file tail $two]
}


namespace eval is {}

proc is::Hexadecimal {str} {
    return [regexp {^[0-9a-fA-F]+$} [string trim $str]]
}

proc is::Numeric {str} {
    return [expr {![catch {expr {$str}}]}]
}

proc is::Integer {str1} {
    return [regexp {^(\+|-)?[0-9]+$} [string trim $str1]]
}

proc is::UnsignedInteger {str1} {
    return [regexp {^[0-9]+$} [string trim $str1]]
}

proc is::PositiveInteger {str1} {
    if {[is::UnsignedInteger $str1]} {
	return [expr {$str1 > 0}]
    }
    return 0
}

# Takes any string and tests whether or not that string contains all 
# whitespace characters.  Carriage returns are considered whitespace, 
# as are spaces and tabs.  Also returns true for the null string.
proc is::Whitespace {anyString} {
    return [regexp "^\[ \t\r\n\]*$" $anyString]
}


###########################################################################
#  Parse a string into "word"s, which include blocks of non-space text,
#  double- and single-quoted strings, and blocks of text enclosed in 
#  balanced parentheses or curly brackets.
#
#  If a word is delimited by a quote or paren character (\", \', \(, or \{),
#  then _that_ particular delimiter may be included within the word if it is 
#  backslash-quoted, as above.  No other characters are special or need quoting
#  with that word.  The quoted delimiters are unquoted in the list of words 
#  returned.  
#
proc parseWords {entry} {
    set slash "\\"
    set qslash "\\\\"
    
    set words {}
    set entry [string trim $entry]
    
    while {[string length $entry]} {
	set delim [string range $entry 0 0]
	set entry [string range $entry 1 end]
	
	#		regexp $endPat   matches the end of the word
	#		       $openPat  matches the open delimiter
	#		       $unescPat matches escaped instances of the open/close delimiters
	#
	#		$type == "quote" means open/close delimiters are the same
	#		      == "paren" means there's a close delimiter and nesting is possible
	#		      == "unquoted" means the word is delimited by whitespace.
	#
	if {$delim == {"}} {			
	    set endPat {^([^"]*)"}
	    set unescPat {\\(")}
	    set type quote
	    
	} elseif {$delim == {'}} {		
	    set endPat {^([^']*)'}
	    set unescPat {\\(')}
	    set type quote
	    
	} elseif {$delim == "\{"} {		
	    set endPat "^(\[^\}\]*)\}"
	    set openPat "\{"
	    set unescPat "\\\\(\[\{\}\])"
	    set type paren
	    
	} elseif {$delim == "("} {		
	    set endPat {^([^)]*)\)}
	    set openPat {(}
	    set unescPat {\\([()])}
	    set type paren
	    
	} else {						
	    set type unquoted
	}
	
	if {$type == "quote"} {
	    set ck $qslash
	    set fld ""
	    while {$ck == $qslash} {
		set ok [regexp -indices -- $endPat $entry mtch sub1]
		if {$ok} {
		    append fld [string range $entry [lindex $mtch 0] [lindex $mtch 1]]
		    set ck $slash[string range $entry [lindex $sub1 1] [lindex $sub1 1]]
		    set pos [expr {1 + [lindex $mtch 1]}]
		    set entry [string range $entry $pos end]
		} else {
		    error "Couldn't match $delim as field delimiter"
		}
	    }
	    set pos [expr {[string length $fld] - 2}]
	    set fld [string range $fld 0 $pos]
	    regsub -all -- $unescPat $fld {\1} fld
	    
	} elseif {$type == "paren"} {
	    
	    set nopen 1
	    set nclose 0
	    set fld ""
	    while {$nopen - $nclose != 0} {
		set ok [regexp -indices -- $endPat $entry mtch sub1]
		if {$ok} {
		    append fld [string range $entry [lindex $mtch 0] [lindex $mtch 1]]
		    set ck $slash[string range $entry [lindex $sub1 1] [lindex $sub1 1]]
		    set entry [string range $entry [expr {1 + [lindex $mtch 1]}] end]
		    regsub -all -- $unescPat $fld {} fld1
		    set nopen [llength [split $fld1 $openPat]]
		    if {$ck != $qslash} { incr nclose }
		} else {
		    error "Couldn't match $delim as field delimiter"
		} 
	    }
	    set pos [expr {[string length $fld] - 2}]
	    set fld [string range $fld 0 $pos]
	    regsub -all -- $unescPat $fld {\1} fld
	    
	} elseif {$type == "unquoted"} {
	    
	    set entry ${delim}${entry}
	    set ok [regexp -indices {^([^ 	]*)} $entry mtch sub1]
	    if {$ok} {
		set fld [string range $entry [lindex $sub1 0] [lindex $sub1 1]]
		set pos [expr {1 + [lindex $mtch 1]}]
		set entry [string range $entry $pos end]
	    } else {
		set fld ""
		set entry ""
	    }
	} else {
	    error "parseWords: unrecognized case"
	}
	
	lappend words $fld
	set entry [string trimleft $entry]
    }
    return $words
}

