proc searchText {text type minMatch listGlob {listRE {}}} { # this proc splits $text up into records according to $type (See the # switch block below). Then it converts any globs in $listGlob to a # (hopefully) well-formed regexp and appends it to $listRE. # Next, it matches each record against each RE string in $listRE. # If the first char of a glob or RE is "!", it will be exclusionary. # Records which match at least $minMatch number of globs will be # returned in a flat chunk of text. $minMatch may be "all", "any", or # a positive integer. # Also note that this entire proc is case-INsensitive. # This handles all but the pathological # cases I haven't thought of yet. ### break up text into records switch -glob -- $type { line* { set records [split $text "\n"] ; set join "\n"} para* { # A paragraph is a block of text which ends with a double # newline. # \167 is the section marker (§). Appropriate, ne? regsub -all {\n\n} $text "\167" text set records [split $text "\167"] set join "\n\n" } inde* { # indent-records are delineated by the first line of the record # having text beginning immediately after the newline. Other # lines in the record are indented or blank. A line that begins # with a non-whitespace character is the first line in a record regsub -all {\n(\S+)} $text "\167" text set records [split $text "\167"] set join "\n" } default {error "Invalid search type '$type'. It must be one of \ 'lines', 'paragraphs', or 'indent-records', \ or an abbreviation of these."} } ### massage glob strings, add to RE list foreach glob $listGlob { lappend listRE [regexp_sanitize $glob] } ### make absolutely sure that $minMatch is an integer, or error out switch -glob -- $minMatch { all {set minMatch [llength $listRE]} any {set minMatch 1} default {set minMatch [format "%01d" $minMatch]} } if {$minMatch>[llength $listGlob]} {set minMatch [llength $listGlob]} if {$minMatch<0} {set minMatch 0} ### setup done, start searching set result {} foreach record $records { set matches 0 foreach RE $listRE { puts $RE if {[string range $RE 0 0]=="!"} { # negative match set RE [string range $RE 1 end] if {[regexp -nocase -- $RE $record]==0} {incr matches} } else { # positive match if {[regexp -nocase -- $RE $record]} {incr matches} } } if {$matches>=$minMatch} { lappend result $record } } return [join $result $join] } proc regexp_sanitize {text} { # escape ALL(most?) regexp special characters regsub -all {([\\\*\+\?\{\}\(\)\[\]\.\^\$])} $text {\\\1} text # only two characters should have special meaning, "?" and "*" # for DOS-file-glob-style matching. set text [string map -nocase { {\*} {.*} {\?} {[^ \t]} } $text] return $text } proc doSearch {fn searchtype args} { # args must consist of a list of globs to match against the # text in file $fn. set fh [open $fn r] set ft [read $fh] close $fh ; unset fh return [grep $ft $searchtype all $args] }