# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt # module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2024 # # @@ Meta Begin # Application punk::lib 999999.0a1.0 # Meta platform tcl # Meta license BSD # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::lib 0 999999.0a1.0] #[copyright "2024"] #[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] #[moddesc {punk library}] [comment {-- Description at end of page heading --}] #[require punk::lib] #[keywords module utility lib] #[description] #[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. #[para]The base set includes string and math functions but has no specific theme # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::lib #[subsection Concepts] #[para]The punk::lib modules should have no strong dependencies other than Tcl #[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. #[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::lib #[list_begin itemized] package require Tcl 8.6- package require punk::args package require punk::assertion #*** !doctools #[item] [package {Tcl 8.6-}] #[item] [package {punk::args}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] tcl::namespace::eval punk::lib::ensemble { #wiki.tcl-lang.org/page/ensemble+extend # extend an ensemble-like routine with the routines in some namespace #NOTE - the extension ns becomes the '-namespace ' for the original routine name, #with -unknown handling the original subcommands. #This makes the original ensemble harder to introspect! #e.g (the original -map or -namespace not visible) #In this specific case (which, being published on the wiki might be common in the wild) #we could call {*}[namespace ensemble configure $routine -unknown] $routine #and then detect that the first resulting word is an ensemble #For arbitrary '-unknown scripts' - sensible introspection is likely not possible proc extend {routine extension} { if {![string match ::* $routine]} { set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] if {$resolved eq {}} { error [list {no such routine} $routine] } set routine $resolved } set routinens [tcl::namespace::qualifiers $routine] if {$routinens eq {::}} { set routinens {} } set routinetail [tcl::namespace::tail $routine] if {![string match ::* $extension]} { set extension [uplevel 1 [list [tcl::namespace::which namespace] current]]::$extension } if {![tcl::namespace::exists $extension]} { error [list {no such namespace} $extension] } set extension [tcl::namespace::eval $extension [list [tcl::namespace::which namespace] current]] tcl::namespace::eval $extension [list [tcl::namespace::which namespace] export *] while 1 { set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] if {[tcl::namespace::which $renamed] eq {}} break } rename $routine $renamed tcl::namespace::eval $extension [ list namespace ensemble create -command $routine -unknown [ list apply {{renamed ensemble routine args} { list $renamed $routine }} $renamed ] ] return $routine } } # some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. proc has_tclbug_caseinsensitiveglob_windows {} { #https://core.tcl-lang.org/tcl/tktview/108904173c set bug 0 ;#default only if {"windows" ne $::tcl_platform(platform)} { set bug 0 } else { if {![catch {file tempdir} tmpdir]} { #tcl 9+ has 'file tempdir' set testfile [file join $tmpdir "bugtest"] } else { #fallback for older tcl versions - use env TEMP/TMP or current directory set tmpdir "" foreach e {TEMP TMP} { if {[info exists ::env($e)] && [file isdirectory ::env($e)]} { set tmpdir ::env($e) break } } if {$tmpdir eq ""} { #no env vars - fallback to current directory set tmpdir [pwd] } set testfile [file join $tmpdir "bugtest"] } set fd [open $testfile w] puts $fd test close $fd set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] if {[file exists $testfile]} { file delete $testfile } foreach r $globresult { if {$r ne "bugtest"} { set bug 1 break } } } #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results" level medium] } #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, # or at least checking for an existing mounted case-insensitive filesystem. # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) set bug [expr {![catch {regexp {} [error should_error]}]}] return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] } proc has_tclbug_lsearch_sorted_inline_subindices {} { if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { #probably tcl version doesn't support all options set bug 0 } else { set bug [expr {$result ne "b"}] } set description "lsearch -sorted with -subindices -inline - incorrect result." return [dict create bug $bug bugref bc4ac0 description $description level minor] } proc has_tclbug_script_var {} { set script {set j [list spud] ; list} append script \n uplevel #0 $script set rep1 [tcl::unsupported::representation $::j] set script "" set rep2 [tcl::unsupported::representation $::j] set nostring1 [string match "*no string" $rep1] set nostring2 [string match "*no string" $rep2] #we assume it should have no string rep in either case #Review: check Tcl versions for behaviour/consistency if {!$nostring2} { set bug true } else { set bug false } set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" return [dict create bug $bug bugref "" description $description level minor] } proc has_tclbug_lsearch_strideallinline {} { #bug only occurs with single -index value combined with -stride -all -inline -subindices #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { #we aren't looking for an error result - error most likely indicates tcl too old to support -stride set bug 0 } else { set bug [expr {$result ne "a2"}] } set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." return [dict create bug $bug bugref 5a1aaa201d description $description level major] } proc has_tclbug_lseq_sign {} { #https://core.tcl-lang.org/tcl/tktview/999b6966b2 if {[catch {lseq 1 10}]} { set bug 0 } else { set r1 [lseq 1 10 -9] set r2 [lseq 1 10 -10] set bug [expr {$r1 ne $r2}] } set description "lseq step sign not matching sequence direction - inconsistent results." return [dict create bug $bug bugref 999b6966b2 description $description level minor] } proc has_tclbug_list_quoting_emptyjoin {} { #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" return [dict create bug $bug bugref e38dc74e2 description $description level medium] } proc has_tclbug_safeinterp_compile {{show 0}} { #ensemble calls within safe interp not compiled #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde namespace eval [namespace current]::testcompile { proc ensembletest {} {string index a 0} } set has_bug 0 set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] if {$show} { puts outer: puts $bytecode_outer } if {![interp issafe]} { #test of safe subinterp only needed if we aren't already in a safe interp if {![catch { interp create x -safe } errMsg]} { x eval {proc ensembletest {} {string index a 0}} set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] if {$show} { puts safe: puts $bytecode_safe } interp delete x #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) #It's possible the interp we're running in is also not compiling ensembles. #we could then get a result of 2 - which still indicates a problem if {[string last "invokeStk" $bytecode_safe] >= 1} { incr has_bug } } else { #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? #unlikely - but we should warn puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" } } namespace delete [namespace current]::testcompile if {[string last "invokeStk" $bytecode_outer] >= 1} { incr has_bug } set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] } } tcl::namespace::eval punk::lib::compat { #*** !doctools #[subsection {Namespace punk::lib::compat}] #[para] compatibility functions for features that may not be available in earlier Tcl versions #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. #*** !doctools #[list_begin definitions] if {[catch { ::tcl::mathfunc::isinf 1 }]} { #review - doesn't seem to cause int-rep of the value to shimmer - but does it reasonably emulate what tcl9's isinf does? proc ::tcl::mathfunc::isinf {v} { string match -nocase *inf* $v } } if {"::lremove" ne [info commands ::lremove]} { #puts stderr "Warning - no built-in lremove" interp alias {} lremove {} ::punk::lib::compat::lremove } proc lremove {list args} { #*** !doctools #[call [fun lremove] [arg list] [opt {index ...}]] #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove set data [lmap v $list {list data $v}] foreach doomed_index $args { if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value } set keep [lsearch -all -inline -not -exact $data x] return [lsearch -all -inline -index 1 -subindices $keep *] } #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers proc lremove2 {list args} { set data [lmap v $list {list data $v}] foreach doomed_index $args { if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value } set keep [lsearch -all -inline -not -exact $data x] return [lmap v $keep {lindex $v 1}] } #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 if {![info exists ::auto_index(readFile)]} { if {[info commands ::readFile] eq ""} { proc ::readFile {filename {mode text}} { #readFile not seen in auto_index or as command: installed by punk::lib # Parse the arguments set MODES {binary text} set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] # Read the file set f [open $filename [dict get {text r binary rb} $mode]] try { return [read $f] } finally { close $f } } } } if {![info exists ::auto_index(writeFile)]} { if {[info commands ::writeFile] eq ""} { proc ::writeFile {args} { #writeFile not seen in auto_index or as command: installed by punk::lib # Parse the arguments switch [llength $args] { 2 { lassign $args filename data set mode text } 3 { lassign $args filename mode data set MODES {binary text} set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] } default { set COMMAND [lindex [info level 0] 0] return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" } } # Write the File set f [open $filename [dict get {text w binary wb} $mode]] try { puts -nonewline $f $data } finally { close $f } } } } if {"::lpop" ne [info commands ::lpop]} { #puts stderr "Warning - no built-in lpop" interp alias {} lpop {} ::punk::lib::compat::lpop punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore } proc lpop {lvar args} { #*** !doctools #[call [fun lpop] [arg listvar] [opt {index}]] #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop #upvar $lvar l upvar 1 $lvar l if {![llength $args]} { set args [list end] } set v [lindex $l {*}$args] set newlist $l set path [list] set subl $l for {set i 0} {$i < [llength $args]} {incr i} { set idx [lindex $args $i] if {![llength [lrange $subl $idx $idx]]} { error "tcl_lpop index \"$idx\" out of range" } lappend path [lindex $args $i] set subl [lindex $l {*}$path] } set sublist_path [lrange $args 0 end-1] set tailidx [lindex $args end] if {![llength $sublist_path]} { #set newlist [lremove $newlist $tailidx] #set newlist [lreplace $newlist $tailidx $tailidx] set newlist [lreplace $newlist[set newlist {}] $tailidx $tailidx] #we avoid use of ledit here because if lpop is running as compat - ledit may also not be available as a builtin. } else { set sublist [lindex $newlist {*}$sublist_path] #set sublist [lremove $sublist $tailidx] #set sublist [lreplace $sublist $tailidx $tailidx] set sublist [lreplace $sublist[set sublist {}] $tailidx $tailidx] lset newlist {*}$sublist_path $sublist } #puts "[set l] -> $newlist" set l $newlist return $v } if {"::ledit" ni [info commands ::ledit]} { interp alias {} ledit {} ::punk::lib::compat::ledit punk::args::set_idalias ::punk::lib::compat::ledit ::ledit } proc ledit {lvar first last args} { upvar $lvar l #use lindex_resolve to support for example: ledit lst end+1 end+1 h i set fidx [punk::lib::lindex_resolve [llength $l] $first] switch -exact -- $fidx { -Inf { #index below lower bound set pre [list] set fidx -1 } Inf { #first index position is greater than index of last element in the list set pre [lrange $l 0 end] set fidx [llength $l] } default { #set pre [lrange $l 0 $first-1] set pre [lrange $l 0 $fidx-1] } } set lidx [punk::lib::lindex_resolve [llength $l] $last] if {$lidx < $fidx} { #from ledit man page: #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. set post [lrange $l $fidx end] } else { #set post [lrange $l $last+1 end] switch -exact -- $lidx { -Inf { #index below lower bound set post [lrange $l 0 end] } Inf { #index above upper bound set post [list] } default { set post [lrange $l $lidx+1 end] } } } #switch -exact -- $lidx { # -Inf { # #index below lower bound # set post [lrange $l 0 end] # } # Inf { # #index above upper bound # set post [list] # } # default { # if {$lidx < $fidx} { # #from ledit man page: # #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. # set post [lrange $l $fidx end] # } else { # #set post [lrange $l $last+1 end] # set post [lrange $l $lidx+1 end] # } # } #} set l [list {*}$pre {*}$args {*}$post] } #slight isolation - varnames don't leak - but calling context vars can be affected proc lmaptcl2 {varnames list script} { set result [list] set values [list] foreach v $varnames { lappend values "\$$v" } set linkvars [uplevel 1 [list ::tcl::info::vars]] set nscaller [uplevel 1 [list ::tcl::namespace::current]] set apply_script "" foreach vname $linkvars { append apply_script [string map [list %vname% $vname]\ {upvar 2 %vname% %vname%}\ ] \n } append apply_script $script \n #puts "--> $apply_script" foreach $varnames $list { lappend result [apply {*}{ } [list {*}{ } $varnames {*}{ } $apply_script {*}{ } $nscaller {*}{ } ] {*}[subst $values] ] } return $result } if {"::lmap" ne [info commands ::lmap]} { #puts stderr "Warning - no built-in lpop" interp alias {} lmap {} ::punk::lib::compat::lmaptcl } #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway proc lmaptcl {varnames list script} { set result [list] set varlist [list] foreach varname $varnames { upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc lappend varlist var_$varname } foreach $varlist $list { lappend result [uplevel 1 $script] } return $result } #tcl8.7/9 compatibility for 8.6 if {[info commands ::tcl::string::insert] eq ""} { #https://wiki.tcl-lang.org/page/string+insert # Pure Tcl implementation of [string insert] command. proc ::tcl::string::insert {string index insertString} { # Convert end-relative and TIP 176 indexes to simple integers. if {[regexp -expanded { ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace (?:([+-]) # op, omitted when index is "end" ([+-]?\d+))? # n, omitted when index is "end" [\t\n\v\f\r ]*$ # optional whitespace (unless "end") } $index _ m op n]} { # Convert first index to an integer. switch $m { end {set index [string length $string]} default {scan $m %d index} } # Add or subtract second index, if provided. switch $op { + {set index [expr {$index + $n}]} - {set index [expr {$index - $n}]} } } elseif {![string is integer -strict $index]} { # Reject invalid indexes. return -code error "bad index \"$index\": must be\ integer?\[+-\]integer? or end?\[+-\]integer?" } # Concatenate the pre-insert, insertion, and post-insert strings. string cat [string range $string 0 [expr {$index - 1}]] $insertString\ [string range $string $index end] } # Bind [string insert] to [::tcl::string::insert]. tcl::namespace::ensemble configure string -map [tcl::dict::replace\ [tcl::namespace::ensemble configure string -map]\ insert ::tcl::string::insert] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::lib { variable PUNKARGS tcl::namespace::export * variable has_struct_list set has_struct_list [expr {![catch {package require struct::list}]}] variable has_struct_set set has_struct_set [expr {![catch {package require struct::set}]}] variable has_punk_ansi set has_punk_ansi [expr {![catch {package require punk::ansi}]}] set has_twapi 0 if {"windows" eq $::tcl_platform(platform)} { set has_twapi [expr {![catch {package require twapi}]}] } if {![llength [tcl::info::commands ::punk::lib::assert]]} { tcl::namespace::import ::punk::assertion::assert punk::assertion::active 1 } namespace eval argdoc { #non-colour SGR codes set I "\x1b\[3m" ;# [a+ italic] set NI "\x1b\[23m" ;# [a+ noitalic] set B "\x1b\[1m" ;# [a+ bold] set N "\x1b\[22m" ;# [a+ normal] set T "\x1b\[1\;4m" ;# [a+ bold underline] set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::is_main_script @cmd -name punk::lib::is_main_script\ -summary\ "Test if current script was launched directly."\ -help\ "The ${$B}main script${$N} is the primary script that is executed by the interpreter, e.g. tclsh or wish. (as opposed to being loaded by the 'source' command) see https://wiki.tcl-lang.org/page/main+script" @values -min 0 -max 0 }] } proc is_main_script {} { #see https://wiki.tcl-lang.org/page/main+script if {[info script] ne "" && [info exists ::argv0] && [file dirname [file normalize [file join [info script] ...]]] eq [file dirname [file normalize [file join $::argv0 ...]]] } { return true } else { return false } } # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == # Maintenance - This is the primary source for tm_version... functions # - certain packages script require these but without package dependency # - 1 punk boot script # - 2 packagetrace module # - These should be updated to sync with this # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == proc tm_version_isvalid {versionpart} { #Needs to be suitable for use with Tcl's 'package vcompare' if {![catch [list package vcompare $versionpart $versionpart]]} { return 1 } else { return 0 } } proc tm_version_major {version} { if {![tm_version_isvalid $version]} { error "Invalid version '$version' is not a proper Tcl module version number" } set firstpart [lindex [split $version .] 0] #check for a/b in first segment if {[string is integer -strict $firstpart]} { return $firstpart } if {[string first a $firstpart] > 0} { return [lindex [split $firstpart a] 0] } if {[string first b $firstpart] > 0} { return [lindex [split $firstpart b] 0] } error "tm_version_major unable to determine major version from version number '$version'" } proc tm_version_canonical {ver} { #accepts a single valid version only - not a bounded or unbounded spec if {![tm_version_isvalid $ver]} { error "tm_version_canonical version '$ver' is not valid for a package version" } set parts [split $ver .] set newparts [list] foreach o $parts { set trimmed [string trimleft $o 0] set firstnonzero [string index $trimmed 0] switch -exact -- $firstnonzero { "" { lappend newparts 0 } a - b { #e.g 000bnnnn -> bnnnnn set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] if {$tailtrimmed eq ""} { set tailtrimmed 0 } lappend newparts 0$firstnonzero$tailtrimmed } default { #digit if {[string is integer -strict $trimmed]} { #e.g 0100 -> 100 lappend newparts $trimmed } else { #e.g 0100b003 -> 100b003 (still need to process tail) if {[set apos [string first a $trimmed]] > 0} { set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits set rhs [string trimleft $rhs 0] if {$rhs eq ""} { set rhs 0 } lappend newparts ${lhs}a${rhs} } elseif {[set bpos [string first b $trimmed]] > 0} { set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits set rhs [string trimleft $rhs 0] if {$rhs eq ""} { set rhs 0 } lappend newparts ${lhs}b${rhs} } else { #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b error "tm_version_canonical error - trimfail - unexpected" } } } } } return [join $newparts .] } proc tm_version_required_canonical {versionspec} { #also trim leading zero from any dottedpart? #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. #e.g 1.01 is equivalent to 1.1 and 01.001 #also 1b3 == 1b0003 if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version set errmsg "tm_version_required_canonical - invalid version specification" if {[string first - $versionspec] < 0} { #no dash #looks like a minbounded version (ie a single version with no dash) convert to min-max form set from $versionspec if {![tm_version_isvalid $from]} { error "$errmsg '$versionpec'" } if {![catch {tm_version_major $from} majorv]} { set from [tm_version_canonical $from] return "${from}-[expr {$majorv +1}]" } else { error "$errmsg '$versionspec'" } } else { # min- or min-max #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) set parts [split $versionspec -] ;#we expect only 2 parts lassign $parts from to if {![tm_version_isvalid $from]} { error "$errmsg '$versionspec'" } set from [tm_version_canonical $from] if {[llength $parts] == 2} { if {$to ne ""} { if {![tm_version_isvalid $to]} { error "$errmsg '$versionspec'" } set to [tm_version_canonical $to] return $from-$to } else { return $from- } } else { error "$errmsg '$versionspec'" } error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" } } # end tm_version... functions # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == proc jtest {} { namespace eval jtest { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lswap @cmd -name punk::lib::lswap\ -summary\ "Swap list values in-place"\ -help\ "Similar to struct::list swap, except it fully supports basic list index expressions such as 7-2 end-1 etc. struct::list swap doesn't support 'end' offsets, and only sometimes appears to support basic expressions, depending on the expression compared to the list length." @values -min 3 -max 3 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } } # -- --- #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows # Review and retest as new versions come out. # -- --- proc list_multi_append1 {lvar1 lvar2} { #clear winner in 2024 upvar $lvar1 l1 $lvar2 l2 lappend l1 {*}$l2 return $l1 } proc list_multi_append2 {lvar1 lvar2} { upvar $lvar1 l1 $lvar2 l2 set l1 [list {*}$l1 {*}$l2] } proc list_multi_append3 {lvar1 lvar2} { upvar $lvar1 l1 $lvar2 l2 set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] } #testing e.g #set l1_reset {a b c} #set l2 {a b c d e f g} #set l1 $l1_reset #time {list_multi_append1 l1 l2} 1000 #set l1 $l1_reset #time {list_multi_append2 l1 l2} 1000 # -- --- namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lswap @cmd -name punk::lib::lswap\ -summary\ "Swap list values in-place"\ -help\ "Similar to struct::list swap, except it fully supports basic list index expressions such as 7-2 end-1 etc. struct::list swap doesn't support 'end' offsets, and only sometimes appears to support basic expressions, depending on the expression compared to the list length." @values -min 3 -max 3 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } proc lswap {lvar a z} { upvar $lvar l set len [llength $l] if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { #lindex_resolve_basic returns only -Inf if out of range at either bound #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively set a_index [lindex_resolve $len $a] set a_msg "" switch -- $a_index { -Inf { set a_msg "1st supplied index $a is below the lower bound for the list (0)" } Inf { set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } } set z_index [lindex_resolve $len $z] set z_msg "" switch -- $z_index { -Inf { set z_msg "2nd supplied index $z is below the lower bound for the list (0)" } Inf { set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" } } set errmsg "lswap cannot swap indices $a and $z" if {$a_msg ne ""} { append errmsg \n $a_msg } if {$z_msg ne ""} { append errmsg \n $z_msg } error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] lset l $a $item2 return $l } #proc lswap2 {lvar a z} { # upvar $lvar l # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] } #an experimental test of swapping vars without intermediate variables #It's an interesting idea - but probably of little to no practical use # - the swap_intvars3 version using intermediate var is faster in Tcl # - This is probably unsurprising - as it's simpler code. # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. #proc swap_intvars {swapv1 swapv2} { # upvar $swapv1 _x $swapv2 _y # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] #} #proc swap_intvars2 {swapv1 swapv2} { # upvar $swapv1 _x $swapv2 _y # set _x [expr {$_x ^ $_y}] # set _y [expr {$_x ^ $_y}] # set _x [expr {$_x ^ $_y}] #} #proc swap_intvars3 {swapv1 swapv2} { # #using intermediate variable # upvar $swapv1 _x $swapv2 _y # set z $_x # set _x $_y # set _y $z #} #*** !doctools #[subsection {Namespace punk::lib}] #[para] Core API functions for punk::lib #[list_begin definitions] if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to {by 1}} { #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 #https://core.tcl-lang.org/tcl/tktview/999b6966b2 lseq $from $to by $by } } else { #lseq accepts basic expressions e.g 4-2 for both arguments #e.g we can do lseq 0 [llength $list]-1 #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. #our range function doesn't support double like lseq does. (deliberate) review proc range {from to {by ""}} { if {$by eq "0"} { #as per lseq, step (by) zero always gives no result return [list] } set to [offset_expr $to] set from [offset_expr $from] if {$by ne ""} { set by [offset_expr $by] } #assert $by is now empty string or an integer if {$to > $from} { switch -- $by { "" - 1 { set count [expr {($to -$from) + 1}] if {$from == 0} { return [lsearch -all [lrepeat $count 0] *] } else { incr from -1 return [lmap v [lrepeat $count 0] {incr from}] } } default { set count [expr {($to - $from + $by) / $by}] if {$count <= 0} { #return [list] #https://core.tcl-lang.org/tcl/tktview/999b6966b2 return [list $from] ;#review } set result [list] for {set i $from} {$i <= $to} {incr i $by} { lappend result $i } return $result #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. #set count [expr {($to -$from) + 1}] #if {$from == 0} { # set fullrange [lsearch -all [lrepeat $count 0] *] #} else { # incr from -1 # set fullrange [lmap v [lrepeat $count 0] {incr from}] #} #set result [list] #for {set i 0} {$i < $count} {incr i} { # if {$i % $by == 0} { # lappend result [lindex $fullrange $i] # } #} #return $result } } #slower methods. #2) #set i -1 #set L [lrepeat $count 0] #lmap v $L {lset L [incr i] [incr from];lindex {}} #return $L #3) #set L {} #for {set i 0} {$i < $count} {incr i} { # lappend L [incr from] #} #return $L } elseif {$from > $to} { switch -- $by { "" - -1 { set count [expr {$from - $to} + 1] if {$to == 0} { return [lreverse [lsearch -all [lrepeat $count 0] *]] } else { incr from return [lmap v [lrepeat $count 0] {incr from -1}] } } default { set count [expr {($to - $from + $by) / $by}] if {$count <= 0} { #return [list] return [list $from] ;#review } set result [list] for {set i $from} {$i >= $to} {incr i $by} { lappend result $i } return $result } } #2) #set i -1 #set L [lrepeat $count 0] #lmap v $L {lset L [incr i] [incr from -1];lindex {}} #return $L #3) #set L {} #for {set i 0} {$i < $count} {incr i} { # lappend L [incr from -1] #} #return $L } else { return [list $from] } } } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lzip @cmd -name punk::lib::lzip\ -summary\ "zip any number of lists together."\ -help\ "Conceptually equivalent to converting a list of rows to a list of columns. The number of returned lists (columns) will be equal to the length of the longest supplied list (row). If lengths of supplied lists don't match, empty strings will be inserted in the resulting lists. e.g lzip {a b c d e} {1 2 3 4} {x y z} -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} " @values -min 0 -max -1 list -type list -multiple 1 -optional 1 }] } proc lzip {args} { switch -- [llength $args] { 0 {return {}} 1 {return [lindex $args 0]} 2 {return [::punk::lib::system::lzip2lists {*}$args]} 3 {return [::punk::lib::system::lzip3lists {*}$args]} 4 {return [::punk::lib::system::lzip4lists {*}$args]} 5 {return [::punk::lib::system::lzip5lists {*}$args]} 6 {return [::punk::lib::system::lzip6lists {*}$args]} 7 {return [::punk::lib::system::lzip7lists {*}$args]} 8 {return [::punk::lib::system::lzip8lists {*}$args]} 9 {return [::punk::lib::system::lzip9lists {*}$args]} 10 {return [::punk::lib::system::lzip10lists {*}$args]} 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { set n [llength $args] if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { #puts "calling ::punk::lib::system::Build_lzipn $n" ::punk::lib::system::Build_lzipn $n } return [::punk::lib::system::lzip${n}lists {*}$args] } default { if {[llength $args] < 4000} { set n [llength $args] if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { #puts "calling ::punk::lib::system::Build_lzipn $n" ::punk::lib::system::Build_lzipn $n } return [::punk::lib::system::lzip${n}lists {*}$args] } else { return [::punk::lib::lzipn {*}$args] } } } } namespace eval system { proc Build_lzipn {n} { set arglist [list] #use punk::lib::range which defers to lseq if available set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) set body "\nlmap " for {set i 1} {$i <= $n} {incr i} { lappend arglist l$i append body "[lindex $vars $i] \$l$i " } append body "\{list " for {set i 1} {$i <= $n} {incr i} { append body "\$[lindex $vars $i] " } append body "\}" \n #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" #puts "$body" #puts "\}" proc ::punk::lib::system::lzip${n}lists $arglist $body } #fastest is to know the number of lists to be zipped proc lzip2lists {l1 l2} { lmap a $l1 b $l2 {list $a $b} } proc lzip3lists {l1 l2 l3} { lmap a $l1 b $l2 c $l3 {list $a $b $c} } proc lzip4lists {l1 l2 l3 l4} { lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} } proc lzip5lists {l1 l2 l3 l4 l5} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} } proc lzip6lists {l1 l2 l3 l4 l5 l6} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} } proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} } proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} } proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} } proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} } #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly # review - proc lzipn_alt args { #stackoverflow - courtesy glenn jackman (modified) foreach l $args { lappend vars [incr n] lappend lmap_args $n $l } lmap {*}$lmap_args {lmap v $vars {set $v}} } #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) proc lzipn_tcl8 {args} { #For tcl pre 9 (without lsearch -stride) #wiki - courtesy JAL set list_l $args set zip_l [] while {1} { set cur [lmap a_l $list_l { lindex $a_l 0 }] set list_l [lmap a_l $list_l { lrange $a_l 1 end }] if {[join $cur {}] eq {}} { break } lappend zip_l $cur } return $zip_l } proc lzipn_tcl9a {args} { #For Tcl 9+ (with lsearch -stride) #compared to wiki version #comparable for lists len <3 or number of args < 3 #approx 2x faster for large lists or more lists #needs -stride single index bug fix to use empty string instead of NULL if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] set outlist [lrepeat $numcolumns {}] set s 0 foreach len $lens list $args { #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] ledit flatlist $s [expr {$s + $len - 1}] {*}$list incr s $numcolumns } #needs single index lstride bugfix for {set c 0} {$c < $numcolumns} {incr c} { ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] } return $outlist } proc lzipn_tcl9b {args} { if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [list] foreach len $lens list $args { lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] } lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} } proc lzipn_tcl9c {args} { #SLOW if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [list] foreach len $lens list $args { lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] } set zip_l {} set cols_remaining $numcolumns for {set c 0} {$c < $numcolumns} {incr c} { if {$cols_remaining == 1} { return [list {*}$zip_l $flatlist] } lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] set flen [llength $flatlist] set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] incr cols_remaining -1 } return $zip_l } } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lzipn @cmd -name punk::lib::lzipn\ -summary\ "zip any number of lists together (unoptimised)."\ -help\ "Conceptually equivalent to converting a list of rows to a list of columns. See lzip which provides the same functionality but with optimisations depending on the number of supplied lists. " @values -min 1 -max 1 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { #-stride either not available - or has bug preventing use of main algorithm below proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] } else { proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] } namespace import ::punk::args::lib::tstr proc tclscript_to_commands script { #https://wiki.tcl-lang.org/page/cmdSplit namespace upvar [namespace current] commands_info report set report {} set commands {} set command {} set comment 0 set lineidx 0 set offset 0 foreach line [split $script \n] { set parts [split $line \;] set numparts [llength $parts] set partidx 0 while 1 { set parts [lassign $parts[set parts {}] part] if {[string length $command]} { if {$partidx} { append command \;$part } else { append command \n$part } } else { set partlength [string length $part] set command [string trimleft $part[set part {}] "\f\n\r\t\v "] incr offset [expr {$partlength - [string length $command]}] if {[string match #* $command]} { set comment 1 } } if {$command eq {}} { incr offset } elseif {(!$comment || ( $comment && (!$numparts || ![llength $parts]))) && [info complete $command\n]} { lappend commands $command set info [dict create character $offset line $lineidx] set offset [expr {$offset + [string length $command] + 1}] lappend report $info set command {} set comment 0 set info {} } incr partidx if {![llength $parts]} break } } incr lineidx if {$command ne {}} { error [list {incomplete command} $command] } return $commands } #expects a single command. (ie does not handle multiple commands separated by semicolons or newlines) proc cmd_words cmd { #https://wiki.tcl-lang.org/page/cmdSplit # (words2 PYK) if {![info complete $cmd]} { error [list {not a complete command} $cmd] } set words {} set logical {} set cmd [string trimleft $cmd[set cmd {}] "\f\n\r\t\v " ] while {[regexp {([^\f\n\r\t\v ]*)([\f\n\r\t\v ]+)(.*)} $cmd full first delim last]} { append logical $first if {[info complete $logical\n]} { lappend words $logical set logical {} } else { append logical $delim } set cmd $last[set last {}] } if {$cmd ne {}} { append logical $cmd } if {$logical ne {}} { lappend words $logical } return $words } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::tclscript_to_scriptlist @cmd -name punk::lib::tclscript_to_scriptlist\ -summary\ "Parse tcl script to toplevel list of lists."\ -help\ "Get topmost list of tcl language elements in script. produces a list of lists where each sublist is a commandlist or a comment string." @values -min 1 -max 1 script -type string }] } #review - consider returning dict with scriptlist and linerange info for each cmdlist/comment #- would be useful for error reporting and other use cases. #- a command can have multi-line arguments and can also be continued across lines with the line-continuation character (backslash). proc tclscript_to_scriptlist {script} { set scriptlist [list] set cmdlist [list] set scrlen [string length $script] set token "" set in_token 0 set in_cmdlist 0 set in_comment 0 set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? for {set i 0} {$i < $scrlen} {incr i} { set ch [string index $script $i] set chswitch [string map $charmap $ch] if {!$in_token} { switch -- $chswitch { { } - TB { #ignore - continue being a non token } CR { if {[string index $script $i+1] eq "\n"} { if {$in_cmdlist} { #no active token - newline ends cmdlist set in_cmdlist 0 lappend scriptlist $cmdlist set cmdlist [list] } incr i } } LF - ";" { #no active token - newline or semicolon ends cmdlist if {$in_cmdlist} { set in_cmdlist 0 lappend scriptlist $cmdlist set cmdlist [list] } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation of whitespace while no token - boring incr i } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation of whitespace while no token - boring incr i 2 } else { #an uncommon possibility, a command wth surrounding spaces called in an strange way #REVIEW # e.g \ cmdname\ arg set in_token 1 set token_startline $line set token "\\[string index $script $i+1]" incr i if {!$in_cmdlist} { set in_cmdlist 1 } } } # { if {$in_cmdlist} { #ordinary data set in_token 1 set token # } else { if {!$in_comment} { set in_token 1 set in_comment 1 set token # } else { #wnen in comment - all will be a single token until comment ends append token # } } } default { #for completeness.. we should exclude other possible whitespace chars if {![string is space $ch]} { set in_token 1 set token $ch if {!$in_cmdlist} { set in_cmdlist 1 #lset cmdlist_linerange 0 $line ;#start line of cmdlist } } } } } else { #if we're in a token, we must be in a cmdlist or a comment (single token) #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved #note that unbalanced curly in *toplevel* comment will still 'info complete' to true switch -- $chswitch { LF { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token \n } } else { #ends a comment lappend scriptlist $token ;#single token for comment set token "" set in_token 0 set in_comment 0 set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity } } ";" { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token ";" } } else { #ordinary char for comment append token ";" } } CR { if {[string index $script $i+1] eq "\n"} { if {[tcl::info::complete $token]} { #ends token and commandlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token \r\n incr i } } else { append token \r } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation - lf effectively becomes a space if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token " " } } else { append token " " } incr i ;#skip LF } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation - cr-lf effectively becomes a space if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token " " } } else { append token " " } incr i 2 ;#skip CRLF } else { append token "\\[string index $script $i+1]" incr i } } default { if {![string is space $ch]} { append token $ch } else { if {!$in_comment} { if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token $ch } } else { append token $ch } } } } } } #eof if {!$in_comment} { if {$in_token} { if {[tcl::info::complete $token]} { lappend cmdlist $token lappend scriptlist $cmdlist } else { error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" } } else { if {$in_cmdlist} { lappend scriptlist $cmdlist } } } else { lappend scriptlist $token } return $scriptlist } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::tclscript_to_toplevelinfo @cmd -name punk::lib::tclscript_to_toplevelinfo\ -summary\ "Parse tcl script to toplevel components and lineranges."\ -help\ "Get topmost tcl language elements in script. Produces a dictionary with keys 'scriptlist' and 'lineranges'. 'scriptlist' is a list of elements that either represent a commandlist (list of words) or a comment (string). 'lineranges' is a list of lists, where each sublist corresponds to an element in 'scriptlist' and contains the line range (start and end line numbers) for that element in the original script. For a comment line - the entire comment is treated as a single string element in 'scriptlist'. Only a single entry is included in 'lineranges' for the whole comment, covering all lines of the comment if it spans multiple lines via line continuation characters. For a commandlist - there will always be at least 2 entries in the corresponding sublist in 'lineranges' - the first entry is the line range for the whole commandlist, and then there is a line range for each word in the commandlist. Note that the members of scriptlist can be either a tcl list or a tcl string - but the type is not explicitly indicated in the output. The type can be inferred by checking the corresponding element in lineranges - if it's a single line range (llength 1), it's a comment string, if it's a list of line ranges with llength > 1, it's a commandlist. " @values -min 1 -max 1 script -type string }] } #return a dict with scriptlist and lineranges. #todo - we need a way to determine the start line for each word in each cmdlist - for error reporting and other use cases. #We can do this by tracking line numbers as we go, and then returning a list of line ranges for each cmdlist. #For comments we can just return the line range for the whole comment. #review: we may be able to re-implement this using the tclparser 'parse' command, # but it merges consecutive comments into a single comment range (which we could split back out) and also doesn't directly provide line number info. # we could still determine line number info by carefully checking the output of the parse command. #todo - make a test implementation using 'parse' and compare performance. The c implementation of 'parse' is likely to provide a performance benefit. #for now, we don't have a pure-tcl implementation of 'parse' available as a fallback, so we'll stick with this approach. proc tclscript_to_toplevelinfo {script} { set scriptlist [list] set lineranges [list] set cmdlist [list] #------------------------------------ #cmdlist_linerange is a list of lists #- each sublist is a pair of start and end line numbers for the entire current linelist, #followed by a pair of line numbers for each token in the cmdlist. set cmdlist_linerange [list [list 1 1]] ;#start and end line numbers for current cmdlist - 1-based. #------------------------------------ set scrlen [string length $script] set token "" set token_startline 1 set in_token 0 set in_cmdlist 0 set in_comment 0 set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? set line 1 ;#1-based line numbers for {set i 0} {$i < $scrlen} {incr i} { set ch [string index $script $i] set chswitch [string map $charmap $ch] if {!$in_token} { switch -- $chswitch { { } - TB { #ignore - continue being a non token } CR { if {[string index $script $i+1] eq "\n"} { if {$in_cmdlist} { #no active token - newline ends cmdlist set in_cmdlist 0 lappend scriptlist $cmdlist lappend lineranges $cmdlist_linerange incr line set cmdlist [list] set cmdlist_linerange [list [list $line $line]] } else { incr line set cmdlist_linerange [list [list $line $line]] } incr i } } LF { #no active token - newline ends cmdlist if {$in_cmdlist} { set in_cmdlist 0 lappend scriptlist $cmdlist lappend lineranges $cmdlist_linerange incr line set cmdlist [list] set cmdlist_linerange [list [list $line $line]] } else { incr line #lset cmdlist_linerange 1 $line set cmdlist_linerange [list [list $line $line]] } } ";" { #no active token - semicolon ends cmdlist if {$in_cmdlist} { set in_cmdlist 0 lappend scriptlist $cmdlist lappend lineranges $cmdlist_linerange set cmdlist [list] set cmdlist_linerange [list [list $line $line]] } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation of whitespace while no token - but backslash is acting as line continuation char. puts stderr "no-token backslash continuation with LF at line $line" incr i incr line if {$in_cmdlist} { lset cmdlist_linerange 0 1 $line ;#extend range of whole cmdlist } #review } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation of whitespace while no token - but backslash is acting as line continuation char. puts stderr "no-token backslash continuation with CRLF at line $line" incr i 2 incr line if {$in_cmdlist} { lset cmdlist_linerange 0 1 $line ;#extend } #review } else { #an uncommon possibility, a command wth surrounding spaces called in an strange way # e.g \ cmdname\ arg set in_token 1 set token_startline $line set token "\\[string index $script $i+1]" incr i if {!$in_cmdlist} { set in_cmdlist 1 #lset cmdlist_linerange 0 $line ;#start line of cmdlist set cmdlist_linerange [list [list $line $line]] } } } # { if {$in_cmdlist} { #ordinary data set in_token 1 set token_startline $line set token # #lappend cmdlist_linerange [list $line $line] ;#start line of token } else { if {!$in_comment} { set in_token 1 set token_startline $line set in_comment 1 set token # } else { #wnen in comment - all will be a single token until comment ends append token # } } } default { #for completeness.. we should exclude other possible whitespace chars if {![string is space $ch]} { set in_token 1 set token_startline $line set token $ch if {!$in_cmdlist} { set in_cmdlist 1 #lset cmdlist_linerange 0 $line ;#start line of cmdlist set cmdlist_linerange [list [list $line $line]] } } } } } else { #if we're in a token, we must be in a cmdlist or a comment (single token) #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved #note that unbalanced curly in *toplevel* comment will still 'info complete' to true switch -- $chswitch { LF { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist lappend cmdlist_linerange [list $token_startline $line] ;#end line of token lappend lineranges $cmdlist_linerange set in_cmdlist 0 set token "" set in_token 0 incr line set cmdlist "" set cmdlist_linerange [list [list $line $line]] } else { append token \n incr line lset cmdlist_linerange 0 1 $line } } else { #ends a comment lappend scriptlist $token ;#single token for comment #we don't need to track individual token line ranges for comments, as the whole comment is a single token - so we just append the line range for the whole comment lappend lineranges $cmdlist_linerange set token "" set in_token 0 set in_comment 0 set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity incr line set cmdlist "" set cmdlist_linerange [list [list $line $line]] } } ";" { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist lappend cmdlist_linerange [list $token_startline $line] ;#end line of token lappend lineranges $cmdlist_linerange set in_cmdlist 0 set token "" set cmdlist "" set cmdlist_linerange [list [list $line $line]] set in_token 0 } else { append token ";" } } else { #ordinary char for comment append token ";" } } CR { if {[string index $script $i+1] eq "\n"} { if {[tcl::info::complete $token]} { #ends token and commandlist lappend cmdlist $token lappend scriptlist $cmdlist lappend cmdlist_linerange [list $token_startline $line] ;#end line of token lappend lineranges $cmdlist_linerange set in_cmdlist 0 set token "" incr line set cmdlist "" set cmdlist_linerange [list $line $line] set in_token 0 } else { append token \r\n incr i } } else { append token \r } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation - lf effectively becomes a space incr line if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 lappend cmdlist_linerange [list $token_startline $line] ;#end line of token } else { append token " " } } else { append token " " } lset cmdlist_linerange 0 1 $line ;#extend incr i ;#skip LF } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation - cr-lf effectively becomes a space incr line if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 lappend cmdlist_linerange [list $token_startline $line] ;#end line of token } else { append token " " } } else { append token " " } lset cmdlist_linerange 0 1 $line ;#extend incr i 2 ;#skip CRLF } else { append token "\\[string index $script $i+1]" incr i } } default { if {![string is space $ch]} { append token $ch } else { if {!$in_comment} { if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 lappend cmdlist_linerange [list $token_startline $line] ;#end line of token } else { append token $ch } } else { append token $ch } } } } } } #eof if {!$in_comment} { if {$in_token} { if {[tcl::info::complete $token]} { lappend cmdlist $token lappend scriptlist $cmdlist #eof ends token and cmdlist lappend cmdlist_linerange [list $token_startline $line] ;#end line of token lappend lineranges $cmdlist_linerange } else { error "Eof reached whilst script incomplete. Unbalanced braces? linerange:$cmdlist_linerange\ntoken: '$token'" } } else { if {$in_cmdlist} { lappend scriptlist $cmdlist lappend lineranges $cmdlist_linerange } } } else { lappend scriptlist $token lappend lineranges $cmdlist_linerange } #assert {[llength $scriptlist] == [llength $lineranges]} ;#sanity check - each cmdlist/comment should have a corresponding linerange return [dict create scriptlist $scriptlist lineranges $lineranges] } #tclscript_info should be evaluated in the namespace where the script is to be analysed, or with an explicit nscontext argument, #so that cmdinfo gives correct results for commands in the script's context. # - this is necessary for correct handling of ensembles and subcommands and in determining cmd types, and whether arguments are scripts or opaque strings. #This function is concerned with the complexity and what we can determine about the script without running it, #ie it is more akin to static analysis than runtime/dynamic analysis, despite the fact that we will be using cmdinfo to get contextual information about commands. #--------------------------------- #e.g "a string {[puts e1]xxx}" #e.g "a string [list {[puts n]yyy}]" #e.g "a string [list [puts e2[puts e1]]xxx {[puts n]yyy} "[puts e3]zzz"]" #for each of the above strings we should get a command recognised for the 'puts e*' items as well as the 'list' item, but not for the 'puts n' items since they are within curly braces and not subject to command substitution. #--------------------------------- proc tclscript_info {script {nscontext ""}} { package require parser #if the script is ANSI highlighted - the square brackets within the ANSI will disrupt our parsing. if {[punk::ansi::ta::detect $script]} { #we will strip it - but be noisy on stderr since a) it's a bi inefficient to pass in ansi highlighted scripts. #b) perhaps there could be edge cases where there is ANSI in the source, but within protected strings or comments. #error "tclscript_info does not currently support ANSI highlighted scripts. Please remove ANSI highlighting before analysis." puts stderr "[a+ red]tclscript_info: unescaped ANSI codes detected in script. Stripping ANSI codes for analysis - this may cause inaccuracies in some edge cases, and is inefficient - consider removing ANSI highlighting before analysis.[a]" set script [punk::ansi::ta::strip $script] } if {$nscontext eq ""} { set nscontext [uplevel 1 {namespace current}] } #use tclscript_to_toplevelinfo to analyse the script structure and give a rough measure of complexity based on number of command blocks, commands and nesting levels. set resultd [dict create commands_parsefail {} commands_parseskip {} comment_count 0 maxnesting 0 commands_count 0 commands {} commands_proc {} commands_native {} commands_notfound {} commands_unknown {} commands_recursive {} blocklist {}] set scriptinfo [tclscript_to_toplevelinfo $script] set scriptlist [dict get $scriptinfo scriptlist] set lineranges [dict get $scriptinfo lineranges] set cinfo_cache [dict create] foreach cmdlist $scriptlist cmdlineranges $lineranges { #If the cmdlist starts with a # it's a comment, and cmdlist is a string not a list. # but rather than shimmer all the cmdlist elements to strings to determine this, # we can test the length of the corresponding linerange - if it's a single line range, it's a comment string, if it's a list of line ranges with llength > 1, it's a commandlist. if {[llength $cmdlineranges] == 1} { dict incr resultd comment_count continue } #if {[string match #* $cmdlist]} { # dict incr resultd comment_count # continue #} #assert: cmdlist is a proper Tcl list of words representing a command and its arguments, and cmdlineranges is a list of line ranges corresponding to the cmdlist elements. #Any command or argument could be constructed in a dynamic way, possibly with multiple command calls for the line to be resolved. #e.g #[getcmd 1] [getsub 1] [getsub 2] [getoptions [mycommand options]] {*}[getargs remaining] #for our initial run we will assume the initial command and subcommands are all literal and not dynamically generated - review this assumption. #todo - apply the same checks we use for arguments below (regarding quotedness and square brackets) to the command and subcommand words #- if they are not simple words, then we should probably analyse them for command substitution and count any commands found as non-nested commands at the same level. #consider also that some dynamic arguments can stop us from determining the nesting behaviour since we won't be able to run the command to see what it does. #e.g #if {*}[get_if_args] #normally we could apply the arguments of the if command to see how they parse with regards to literal 'then' and 'elseif' and 'else' words #to determine which arguments are script blocks. #we should probably record instances of un-analysable lines for commands that are known to have complex argument parsing behaviour - e.g if, switch, apply, eval etc. #- so that a complexity formula that is looking at our returned dict can count those as increasing complexity by some amount. #review - ensemble commands can dispatch based on different argument positions. set dispatchwords [list] #puts stderr "cmdlist: $cmdlist" set args_remaining $cmdlist set args_remaining_lineranges [lrange $cmdlineranges 1 end] ;#the first element of cmdlineranges is the line range for the whole commandlist, so we take the remaining elements as the line ranges for dispatch(es) + args lappend dispatchwords [lpop args_remaining 0] lappend dispatch_lineranges [lpop args_remaining_lineranges 0] #we don't just call cmdinfo with {*}$dispatchwords since there can be documented subcommands that are not valid commands in their own right. # e.g 'string is xdigit' resolves to cmdtype 'notfound' (but with documentation that shows it is a valid deeper subcommand of 'string is') #this is because xdigit etc happen to be implemented within the tcl::string::is command but not as an ensemble. #we want to stop at the point where the command is not resolvable to a deeper level documented subcommand. #we will need to manually track the remaining unconsumed arguments. (lpop them as we go) #for now we'll just do toplevel commands in the script and their argument command-substitutions. #todo - use our punk::args::parse facility to determine arguments that are scripts (or expressions?) and analyse those recursively. if {![dict exists $cinfo_cache $dispatchwords]} { set cinfo [namespace eval $nscontext [list cmdinfo {*}$dispatchwords]] dict set cinfo_cache $dispatchwords $cinfo } else { set cinfo [dict get $cinfo_cache $dispatchwords] } set ctype [dict get $cinfo cmdtype] set origin [dict get $cinfo origin] if {$ctype eq "ensemble"} { for {set i 0} {$i < [llength $cmdlist]} {incr i} { lappend dispatchwords [lpop args_remaining 0] lappend dispatch_lineranges [lpop args_remaining_lineranges 0] #also check and update the cache for subcommands. if {![dict exists $cinfo_cache $dispatchwords]} { set test_cinfo [namespace eval $nscontext [list cmdinfo {*}$dispatchwords]] dict set cinfo_cache $dispatchwords $test_cinfo } else { set test_cinfo [dict get $cinfo_cache $dispatchwords] } if {[dict get $test_cinfo origin] eq $origin} { #no change in origin - so we are into the arguments of the command, or have an invalid subcommand - stop looking for subcommands #todo - detect invalid subcommand and count as unknown command? break } elseif {[dict get $test_cinfo cmdtype] in {"proc" "native" "notfound"}} { #we have a subcommand that won't be introspectable at a deeper level. set cinfo $test_cinfo set ctype [dict get $cinfo cmdtype] break } else { #review - we should perhaps only continue for cmdtypes that we explicilty determine are more deeply introspectable. #e.g ensembles and ooobjects (but this is not yet implemented) -- revew. set cinfo $test_cinfo set ctype [dict get $cinfo cmdtype] } } } #examine the arguments (to determine command substitutions in arguments that will occur before the full commandline itself can be run) #for each command before we add the command itself to the list of commands found. #This matches the behaviour of the tcl interpreter where arguments are fully processed (with regards to substitutions) before the command is executed, # so that the order of our lists matches the order of execution in the tcl interpreter. puts "cmdlist: $cmdlist cmdlineranges: $cmdlineranges dispatchwords:[a+ green]'$dispatchwords'[a] args_remaining:[a+ cyan]'$args_remaining'[a] ctype: $ctype origin: $origin" set cmdlist_parseinfo [parse command [join $cmdlist] {}] #puts stderr "[punk::lib::showdict -roottype list $cmdlist_parseinfo 0 1 2 3/*]" assert {[llength $args_remaining] == [llength $args_remaining_lineranges]} ;#sanity check - each remaining arg should have a corresponding linerange foreach arg $args_remaining arglr $args_remaining_lineranges { #arg could be expanded with leading {*} #we don't care about the expansion itself - but we need to check the trailing part for quoting and command substitution. #strip leading {*} if present for the purposes of analysis #- but we don't need to do anything with it other than perhaps record that it is present for the purposes of complexity analysis - review this assumption # (expansion of arguments with {*} is pretty common and reasonable, but expansion of commands or subcommands could reasonably be considered a complexity increasing feature) #similarly substitution of arguments with command substitution is pretty common and reasonable, but substitution of commands or subcommands could reasonably be considered a complexity increasing feature. #even substitution of commands or subcommands with variable substitution could be considered a complexity increasing feature - but we won't attempt to analyse that for now. #puts stderr "checking arg '$arg' for command substitution" #-------------------------------------- set argtest "bogus1 " append argtest $arg #we need a leading command word so that arg such as #etc is not parsed as a comment. #we can't use the main cmdlist_parseinfo structure since it auto-expands simple {*} constructs. sometimes producing no elements e.g {*}{ } and sometimes producing multiple. #(for other constructs with more complex content after the {*} it produces a single element with the first element of the parse tree being 'expand') set argparseinfo [parse command $argtest {}] lassign $argparseinfo commentRange commandRange restRange parseTree #sanity check the restRange - we expect the whole arg to be parsed as a single element, with no remaining text after parsing. if {[lindex $restRange 1] != 0} { puts stderr "Warning: 'parse command {}' did not consume the whole argument. This may indicate a parsing error or an edge case that is not handled correctly. arg: '$arg' restRange: $restRange" error "tclscript_info: unexpected parse result for argument. arg: '$arg' restRange: $restRange" } #the parse command will expand *simple* {*} constructs to return a parse tree with length > 1 (multiple 3-element lists) #e.g {*}"a b c" or {*}{a b c}. #whilst in a single line this simple construct is unlikely to appear in source code, #it is commonly used in source code when the argument spans multiple lines - e.g for creating dicts without using line-continuation characters. #e.g #dict create {*}{ # key1 value1 # key2 value2 #} #the parseTree list will be a list of 3 3-element lists something like: # simple {4 1} {{text {4 1} {}}} # simple {6 8} {{text {6 8} {}}} # simple {15 1} {{text {15 1} {}}} #if {*} is followed by more complex constructs, the parse tree will be a list of 1 3-element list, with the first element being the keyword 'expand'. #more complex constructs include those with command or variable or backslash substitution within them, #e.g {*}[getargs] or {*}"a [subcmd] c" or {*}"a $var c" or {*}"a \t c" #similar to variables or commands, any backslash (in a subtitution context such as bareword or doublequoted part) is parsed # as it's own token 'backslash' that covers the backslash and the following character, rather than being treated as a 'text' token. #- these constructs will have the first element being 'expand' since the presence of command substitution prevents the tcl parser from doing the expansion at parse time. #e.g the parseTree list for {{*}[somecmd]} will be a list containing a single 3-element list like: # expand {0 12} {{command {3 9} {}}} #The redundant case of argument {*}simpleword will be parsed the same as simpleword #-------------------------------------- set args_parsed [lrange [lindex $argparseinfo 3] 1 end] ;#first element is our 'bogus1' word we added for parsing purposes. if {[llength $args_parsed] == 0} { #there must have been a literal expansion with {*} that produced no text, e.g {*}{ } or {*}" " (or more likely a multiline version of that) etc - we can ignore these for the purposes of command substitution. continue } if {[llength $args_parsed] > 1} { #there must have been a simple expansion with {*} that produced multiple words, e.g {*}"a b c" or {*}{a b c} - we can ignore these for the purposes of command substitution since they are simple expansions that don't involve command substitution. continue } set type [lindex $args_parsed 0 0] if {$type eq "simple"} { #simple text - no command substitution possible - we can ignore these for the purposes of command substitution analysis. continue } #assert type is now word or expand. if {[string range $arg 0 2] eq "{*}"} { #strip leading {*} for analysis - but we don't need to do anything with it otherwise set arg [string range $arg 3 end] } if {[string index $arg 0] eq "\{" && [string index $arg end] eq "\}"} { #no variable or command substitution possible if arg is fully enclosed in curly braces - so we can skip analysis of command substitution within it continue } #----------------------------------------------------- #we presumably have a bareword or double quoted string. #e.g myargument[getnumber] or "my argument with [subcommand] etc" or "my argument with {*}[subcommand]" #---------------------------------------------------------------- #the parsing needs to be carefully done to avoid getting confused by square brackets that are within double quotes or curly braces. #e.g "a string {[puts emitted]xxx}" #e.g "a string [list {[puts not-emitted]yyy}]" #e.g "a string [list [puts emitted]xxx {[puts not-emitted]yyy}]" #Note that simply matching square brackets is not sufficient - nor is simply checking if a square bracket is within double quotes or curly braces #- we need to do a full parse of the argument to determine which square brackets are actually command substitutions that need to be analysed for complexity, #and which are just literal characters within the argument. #---------------------------------------------------------------- #even just running 'info complete' on the entire argument is not sufficient to determine if we can ignore it. #e.g "a string \{[puts emitted]" # # ignore - balance above curly just for editor \} #if {![info complete $arg]} { # #not a valid way to test if the argument can be ignored for the purposes of command substitution analysis # - since it could be incomplete due to unbalanced braces or quotes but still contain valid command substitutions that we need to analyse for complexity, # and it could still be a valid string as far as the cmdlist is concerned. # continue #} #----------------------------------------------------- # #arg is either enclosed in double quotes or a bareword (maybe even with trailing double quote) - we need to check for command substitution within it. # #tclword_to_scriptlist uses the tclparser library to do this parsing for us. # set sub_scriptlist [tclscript_info::tclword_to_scriptlist $arg] # #puts stderr "sub_scriptlist: $sub_scriptlist" # #set sub_tclscript_info [tclscript_info [join $sub_scriptlist \n] $nscontext] ;#wrong - we need to join the sub_scriptlist back into a string for analysis with tclscript_info, but we need to be careful to preserve the original structure of the argument for correct parsing of command substitutions within it - review this. # set combined_sub_script "" # foreach sub_script $sub_scriptlist { # append combined_sub_script [concat {*}$sub_script] \n # } # set sub_tclscript_info [tclscript_info $combined_sub_script $nscontext] #e.g script # set text " # cwd is [pwd] # tcl patch level: [info patch] # a bad command [if 1] # a variable value: $myvar # stuff: [dostuff [info name]] #" #we want to report the bad command 'if 1' as a parse fail with the correct line number, #and we want to report the other commands 'pwd' and 'info patch' as non-nested commands at the same level as the main command that the argument is for. #the 'info name' call should show up in the list of commands prior to the dostuff command, since it will be substituted before it by Tcl. set arg_start [lindex $arglr 0] puts stderr "arg_linerange: $arglr arg_start: $arg_start" #set arg_info [lindex $args_parsed 0 2] set arg_parts [lindex $args_parsed 0 2] puts stderr "arg_parts: $arg_parts" set arg_part_start $arg_start foreach ap $arg_parts { set ap_type [lindex $ap 0] set ap_range [lindex $ap 1] #expect types text, command, variable #( {*} will be a literal string, not an expansion operator even if it's at the beginning of a word - as it's within the argument) switch -- $ap_type { "text" { #ordinary text - no command substitution #we need to examine it for newlines to update the line number. set ap_text_value [parse getstring $argtest $ap_range] #puts " > text part: '$ap_text_value'" incr arg_part_start [parse countnewline $ap_text_value {}] } "command" { set ap_value [parse getstring $argtest $ap_range] puts " > command part: '$ap_value'" set ap_inner [string range $ap_value 1 end-1] ;#strip the square brackets for analysis set sub_tclscript_info [tclscript_info $ap_inner $nscontext] incr arg_part_start [parse countnewline $ap_value {}] ###------------------------------------------------------------------------------------------------------------------------------------ #merge the results from the sub_tclscript_info into our main resultd dict #- we will want to keep track of the total number of commands and max nesting level across the whole script, and also keep a cumulative list of all commands found in the script for the purposes of complexity analysis. #we do not consider substitutions within the current cmdlist to be increasing the nesting level - they are non-nested commands at the same level as the current cmdlist - so we do not need to adjust the maxnesting value from the sub_tclscript_info when merging it in. dict incr resultd comment_count [dict get $sub_tclscript_info comment_count] #leave nesting level as is - we are not increasing nesting level for command substitutions within arguments dict incr resultd commands_count [dict get $sub_tclscript_info commands_count] foreach key {commands commands_proc commands_native commands_notfound commands_unknown} { set cmds_to_update [dict get $resultd $key] dict set resultd $key {} ;#unshare foreach cmd [dict get $sub_tclscript_info $key] { if {$cmd ni $cmds_to_update} { lappend cmds_to_update $cmd } } dict set resultd $key $cmds_to_update ;#restore } #todo ###------------------------------------------------------------------------------------------------------------------------------------ set parsefails [dict get $sub_tclscript_info commands_parsefail] set adjusted_parsefails {} foreach pf $parsefails { puts "pf: $pf" set pf_linerange [lindex $pf 0] set pf_cmdwords [lindex $pf 1] lassign $pf_linerange pf_start pf_end #$arg_start is line 1 of the script, so we need to add $arg_start - 1 to the pf_start and pf_end to get the correct linerange relative to the whole script. set adjusted_pf_linerange [list [expr {$pf_start + ($arg_part_start -1)}] [ expr {$pf_end + ($arg_part_start-1)}]] lappend adjusted_parsefails [list $adjusted_pf_linerange $pf_cmdwords] } dict lappend resultd commands_parsefail {*}$adjusted_parsefails set parseskips [dict get $sub_tclscript_info commands_parseskip] set adjusted_parseskips {} foreach ps $parseskips { puts "ps: $ps" set ps_linerange [lindex $ps 0] set ps_cmdwords [lindex $ps 1] lassign $ps_linerange ps_start ps_end set adjusted_ps_linerange [list [expr {$ps_start + ($arg_part_start -1)}] [ expr {$ps_end + ($arg_part_start-1)}]] lappend adjusted_parseskips [list $adjusted_ps_linerange $ps_cmdwords] } dict lappend resultd commands_parseskip {*}$adjusted_parseskips ###------------------------------------------------------------------------------------------------------------------------------------ } "variable" { #there can be nested command substitution within variable names when they are arrays e.g "val: $var([subcmd])]" set ap_value [parse getstring $argtest $ap_range] puts " > variable part: '$ap_value'" } default { puts stderr "Warning: unexpected arg part type '$ap_type' in argument parsing. This may indicate an edge case that is not handled correctly. arg: '$arg' arg_part: $ap" } } } #error temp } switch -- $ctype { "proc" { #dict incr resultd commands_proc if {$dispatchwords ni [dict get $resultd commands_proc]} { dict lappend resultd commands_proc $dispatchwords } } "native" { if {$dispatchwords ni [dict get $resultd commands_native]} { dict lappend resultd commands_native $dispatchwords } } "notfound" { if {$dispatchwords ni [dict get $resultd commands_notfound]} { dict lappend resultd commands_notfound $dispatchwords } } "ooclass" { #todo puts stderr "detected oo class command '$dispatchwords' - not currently analysing oo classes - review" } "ooobject" { #todo puts stderr "detected oo object command '$dispatchwords' - not currently analysing oo objects - review" } default { puts "[a+ red]tclscript_info: unhandled cmdtype '$ctype' for command '$dispatchwords' - treating as unknown command type for now - review[a]" if {$dispatchwords ni [dict get $resultd commands_unknown]} { dict lappend resultd commands_unknown $dispatchwords } } } dict incr resultd commands_count if {$dispatchwords ni [dict get $resultd commands]} { dict lappend resultd commands $dispatchwords } #we need to check each word of every command to check the quoting. #if unquoted or double quoted - we need to check any square brackets for command substitution and count those as non-nested commands at the same level. #puts "cinfo: $cinfo" #puts "cmdorigin: [dict get $cinfo origin] args_remaining: $args_remaining" #todo - nesting based on detection of if, while, for, foreach, switch, dict for etc. #consider what to do with eval, apply, uplevel. #---------------------------------------------- #namespace eval (and similar such as uplevel) #---------------------------------------------- #commonly we have structures like: # 'namespace eval ns [list cmdname arg1 arg2 ...]' #or 'namespace eval ns [linsert $args 0 cmdname]' #---------------------------------------------- #these will recognise the commands 'list' and 'linsert' as substituted commands, #but when we examine the arguments for 'namespace eval' and determine that there is a script argument, #we will miss the fact that 'cmdname' is being called. #- and we may miss detecting recursion. #e.g something more opaque 'namespace eval ns [getscript]' #in this case we won't be able to determine the actual script at all - but this is no different from # 'namespace eval ns [list nscommand]' where we would at most acknowledte the call to list but not the actual command being called - so we should at least be consistent in how we handle these cases. #In any case - the script is likely being evaluated in a different context to the current one, #so we *could* possibly not attempt to analyse the script even if it was provided in a way that is amenable to analysys: #- e.g 'namespace eval ns { # cmdname arg1 arg2 # etc ... # }' #It is arguable that if it is specified literally as a script then the complexity of the script should be counted as part of the complexity of the current script. #---------------------------------------------- #------------------------------------------------------------------------------------------------------------------------------------------------ #for this reason - we will test the commands if, while, for, foreach, switch, dict for etc for the presence of script arguments and analyse those #scripts if they are present as literal braced scripts but ignore them if they are not, since we have already analyzed command substitutions within arguments above. #this means a structure like: # namespace eval ns [list\ # if {condition} { # # }] #would not make the multiline script amenable to analysis despite it being coded directly in the analysed script - and could theoretically be used to hide complex scripts from analysis #- but it is not clear that this is a common or reasonable way to write code. #In a dynamic language like Tcl, there are commonly structures such as: # set script [string map $map {}] # namespace eval ns $script #These unfortunately are also not trivially analysable without implementing full data flow analysis to determine the possible values of 'script' at the point of the 'namespace eval' #- which is a non-trivial amount of work and is not on the roadmap for this implementation. #------------------------------------------------------------------------------------------------------------------------------------------------ #for apply, uplevel and namespace eval we will do similar, but attempt to provide the correct context for cmdinfo resolution. #This may not always be possible e.g if the namespace or level being evalled into is determined in a dynamic way #- but we will do our best to handle the common cases where the context can be determined from the script. #In practice, most 'script' accepting commands are resolved in the global scope and the context should often make little difference with regards, #to complexity analysis, but it is still worth trying to get it right where possible. #------------------------------------------------------------------------------------------------------------------------------------------------ set origin [dict get $cinfo origin] puts stderr "[a+ green]origin: $origin[a]" set parse_ok 1 ;#default assumption #we will attempt to parse the arguments with punk::args::parse to determine which arguments are scripts and analyse those as nested scripts #for commands where that is appropriate. set docid [dict get [cmdinfo $origin] docid] if {$docid ne ""} { puts stderr "[a+ green]detected command with id $origin - should be parsable with punk::args::parse for more detailed analysis of arguments[a]" #review - we have no way to substitute any variables or command substitutions within the arguments at this stage, #so we will just be parsing the raw arguments as they appear in the script #- this means that we may not be able to correctly parse some arguments if they are constructed in a complex way. # todo - a way to tell punk::args::parse to ignore type-checking for certain arguments would be useful. #In some cases the actual value of an argument may be needed to determine which 'form' of the command is being used. #e.g dynamically specified keywords can make it hard to determine the right form of the command to use for parsing - review. #get a copy of the resolved_def with overrides. #e.g for ::lrange #punk::args::resolved_def -override [list @id [list -id test-$origin] @cmd [list {-help ""}] first {-type any}] $origin #--------------------------------------- set form 0 #hack if {$origin eq "::switch"} { set form block ;#most common form of switch } else { #for other commands we will just use the default form 0 for now - review whether we want to attempt to detect other forms for other commands as well. set form 0 } #--------------------------------------- #review - a lot of what we're attempting to do here requires more serious analysis of the script structure than we are currently doing #we will need to get serious about proc/command return 'types' and variable tracking. #-------------------------- #These 3 lists should have same length and corresponding elements. set test_cmdargs [list] set test_cmdargs_quotestate [list] set test_cmdargs_linerange [list] #-------------------------- set has_expand_arg 0 set has_complex_arg 0 foreach a $args_remaining arglr $args_remaining_lineranges { #todo - avoid manually checking for leading {*} and instead use the parse tree from 'parse command' to determine if the argument includes an expansion. #consider: # {*}{a b c} - we should append a b c as separate elements - a literal list on a single line is unlikely but possible and a literal list/dict spanning multiple lines is reasonably common. #e.g {*}{ # k1 v1 # k2 v2 #} # {*}[my_getoptions x] - we don't know how many arguments (if any) this will expand to. #puts "[a+ blue]checking arg '$a' with linerange $arglr[a]" set argparseinfo [parse command $a {}] lassign $argparseinfo argCommentRange argCommandRange argRestRange argParseTree assert {[lindex $argRestRange 1] == 0} ;#we expect the parse command to consume the whole argument as a single element, so the restRange should start at 0. #if {[lindex $argRestRange 1] != 0} { # #failed sanity check. # #todo - just use assert? # puts stderr "Warning2: 'parse command {}' did not consume the whole argument. This may indicate a parsing error or an edge case that is not handled correctly. arg: '$a' restRange: $argRestRange" # error "tclscript_info: unexpected parse result for argument. arg: '$a' restRange: $argRestRange" #} if {[llength $argParseTree] == 0} { #no parse tree - This is likely for an empty argument with expansion e.g {*}{ } #This construct occurs when using {*} in place of line continuation for long lists or dicts, e.g #dict create {*}{ # } key1 $dynamic {*}{ # key2 value2 #} #review - the 'empty' argument will still have an entry in cmdlineranges - as although 'empty' in terms of how it expands it may be whitespace across multiple lines. #we will simply skip adding this to our test_ lists. continue } elseif {[llength $argParseTree] > 1} { #we have a simple {*} expansion with a parse tree that includes multiple elements for each word in the expansion, #e.g {*}{a b c} #e.g {*}{"a" b c} #% parse command {{*}{"a" {b} c}} {} # (result split into lines and whitespace added for readability - the actuall result is on a single line) # {0 0} {0 14} {14 0} { # {simple {4 3} { # {text {5 1} {}} # } # } # {simple {8 3} { # {text {9 1} {}} # } # } # {simple {12 1} { # {text {12 1} {}} # } # } # } #here we have an expansion to 3 arguments, dquoted, cquoted and bare foreach subarginfo $argParseTree { lassign $subarginfo subargtype subargRange subargParseTree #we expect a list of a single triplet in subargParseTree for each subarg, with the first element being 'text' - review this assumption. assert {$subargtype eq "simple"} assert {[llength $subargParseTree] == 1} set fullsubarg [parse getstring $a $subargRange] ;#arg possibly with quotes set textRange [lindex $subargParseTree 0 1] set textsubarg [parse getstring $a $textRange] ;#arg with quotes stripped lappend test_cmdargs $textsubarg if {[lindex $subargRange 1] - [lindex $textRange 1] == 2} { #was quoted with either {} or "" - we can determine the quotestate from the first and last character of the full argument string. if {[string index $fullsubarg 0] eq "\{" && [string index $fullsubarg end] eq "\}"} { lappend test_cmdargs_quotestate "cquoted" } elseif {[string index $fullsubarg 0] eq "\"" && [string index $fullsubarg end] eq "\""} { lappend test_cmdargs_quotestate "dquoted" } } else { lappend test_cmdargs_quotestate "bare" } lappend test_cmdargs_linerange $arglr } } else { #we have a single element in the parse tree - this could be a simple argument with no {*} expansion, or it could be a complex argument with {*} expansion that includes command substitution or variable substitution etc that prevents the tcl parser from doing the expansion at parse time, and results in a parse tree with a single element with the 'expand' keyword. if {[lindex $argParseTree 0 0] eq "expand"} { #we have an argument with leading {*} that is followed by more complex constructs such as command substitution, variable substitution or backslash substitution. #e.g {*}"a [subcmd] c" or {*}"a $var c" or {*}"a \t c" #or # {*}[ # #comment only # # #more comments #] #This comment example expands to an empty list. We should be able to determine that here and treat the same as 'llength $argparseTree == 0' case above - but for now we will just treat it as a complex expansion that we can't analyse. set has_expand_arg 1 break } if {[lindex $argParseTree 0 0] eq "simple"} { #we have a simple argument with no {*} expansion, so we can parse it as it is. set simpleRange [lindex $argParseTree 0 1] set simpletriplet [lindex $argParseTree 0 2 0] ;#expect only one text element in the parse tree for a simple argument assert {[lindex $simpletriplet 0] eq "text"} set simpletextRange [lindex $simpletriplet 1] set simpletext [parse getstring $a $simpletextRange] lappend test_cmdargs $simpletext if {[lindex $simpleRange 1] - [lindex $simpletextRange 1] == 2} { #was quoted with either {} or "" - we can determine the quotestate from the first and last character of the full argument string. set fullsimpletext [parse getstring $a $simpleRange] if {[string index $fullsimpletext 0] eq "\{" && [string index $fullsimpletext end] eq "\}"} { lappend test_cmdargs_quotestate "cquoted" } elseif {[string index $fullsimpletext 0] eq "\"" && [string index $fullsimpletext end] eq "\""} { lappend test_cmdargs_quotestate "dquoted" } } else { lappend test_cmdargs_quotestate "bare" } lappend test_cmdargs_linerange $arglr } elseif {[lindex $argParseTree 0 0] eq "word"} { #get wordparts #reconstruct the argument from the word parts - we need to do this to correctly handle cases such as "a string with [subcommand]" where the parse tree will split this into multiple #if the parts are a single 'command' or single 'variable' then we can (eventually) use type analysis on the variable or return value of the command to determine the structure of the argument for parsing with punk::args::parse - but for now we will just treat this as a complex case that we can't analyse. set wordRange [lindex $argParseTree 0 1] set wordLength [lindex $wordRange 1] ;#we compare this with the total length of the parts making up the word to determine if it was quoted or not. set wordparts [lindex $argParseTree 0 2] ;#list of triplets with types such as 'text', 'command', 'variable', 'backslash' etc. set wordparttypes [lmap wp $wordparts {lindex $wp 0}] #we know the wordparts have at least one of variable,command or backslash since otherwise it would have been parsed as a simple argument with a 'text' wordpart and not a 'word' with multiple wordparts - review this assumption. if {"variable" ni $wordparttypes && "command" ni $wordparttypes} { #we must have only text and/or backslash wordparts - so we can subst to get the final text of the argument for parsing with punk::args::parse - review this assumption. set escapedtext "" set partLengthSum 0 ;#to compare with wordLength to determine if the argument was quoted or not. foreach wp $wordparts { #we can just retrieve and join the parts and call subst on the whole thing to get the final text of the argument for parsing with punk::args::parse - review this assumption. set wptype [lindex $wp 0] set wpRange [lindex $wp 1] incr partLengthSum [lindex $wpRange 1] set wpParseTree [lindex $wp 2] set wpText [parse getstring $a $wpRange] append escapedtext $wpText } set finaltext [subst -nocommands -novariables $escapedtext] lappend test_cmdargs $finaltext if {$wordLength - $partLengthSum == 2} { #was quoted "". If it had been quoted with {} it would have been parsed as a simple argument with a 'text' wordpart and not a 'word' with multiple wordparts - review this assumption. lappend test_cmdargs_quotestate "dquoted" } else { lappend test_cmdargs_quotestate "bare" } lappend test_cmdargs_linerange $arglr } else { #we have a complex argument with command substitution or variable substitution etc that prevents the tcl parser from doing the expansion at parse time, and results in a parse tree with a single element with the 'word' keyword. #e.g "a [subcmd] c" or "a $var c" set has_complex_arg 1 break } } else { puts stderr "Warning: unexpected argParseTree element for argument '$a': [lindex $argParseTree 0] (expected type 'simple' or 'word'). This may indicate an edge case that is not handled correctly. argParseTree: $argParseTree" error "tclscript_info: unexpected argParseTree element for argument '$a': [lindex $argParseTree 0] (expected type 'simple' or 'word'). This may indicate an edge case that is not handled correctly. argParseTree: $argParseTree" } } } #temp hack for ::list if {$docid ne "::list" && ($has_expand_arg || $has_complex_arg)} { puts stderr "[a+ red]detected $origin command with arguments that include expansion and/or embedded commands/variables - this may cause inaccuracies in argument parsing since we either don't know how many arguments the expansion will produce or what type they will be. review[a]" set linerange [lindex $cmdlineranges 0] dict lappend resultd commands_parseskip [list $linerange $dispatchwords] set parse_ok 0 } else { #assert llength test_cmdargs == llength test_cmdargs_quotestate == llength args_remaining if {[catch {punk::args::parse $test_cmdargs -form $form -errorstyle minimal withid $docid} cmd_argd]} { set linerange [lindex $cmdlineranges 0] ;#we should have a linerange for the whole command at this point since we called punk::args::parse with the whole command line as the argument list. set errmsg $cmd_argd set argoutput "[a+ cyan]" foreach a $test_cmdargs { append argoutput " $a " \n } append argoutput "[a]" puts stderr "[a+ red]detected $origin command with arguments that we failed to parse with form '$form':\n$argoutput\n $errmsg[a]" dict lappend resultd commands_parsefail [list $linerange $dispatchwords] set parse_ok 0 } } } else { puts stderr "[a+ red]detected command $origin with no argdoc id - unable to use punk::args::parse for detailed analysis of arguments[a]" } if {$parse_ok} { switch -- $origin { "::if" { #script arguments that are nested are body and optionally multiple else_if clauses and an else clause. #Note that the expression argument can contain complex multiline scripts eg if {[catch {} val]}... #consider: #if {$condition} {return [getcode]} #vs #if {$condition} "return [getcode]" #vs #if {$conditions} [list return [getcode]] #vs #if {$condition} [getscript] #the difference is in when the substitution of $x happens #- in the first case it happens when the if command is executed, #and in the following cases it happens when the argument is parsed. # #if we were to (above during arg substutition analysis) replace command-substitutions with some token like , #we still wouldn't know the structure of a doublequoted argument such as "return " as we don't know how many words expands to. set expr1 [dict get $cmd_argd values expr1] #todo - use tclparser on expression #'parse expression $expr1 {0 end}' #todo - pull out any command substitutions within the expression and analyse those as well - review whether we want to count those as increasing complexity or not. #some complexity metrics are interested in the number of conditions in if statement conditions - review. #BODY1 - body of if command set scriptarg [dict get $cmd_argd values body1] set argindex [dict get $cmd_argd received body1] #no need to adjust for optional keywords, since body is always 2nd argument after the condition. #if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd } else { puts stderr "[a+ red]detected if command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" #todo - we should be able to analyse the body if it is a simple unbraced command e.g 'if {$condition} return' #this is functionally equivalent to 'if {$condition} {return}' } #elseif and else blocks should be processed similarly. if {[dict exists $cmd_argd values "elseif_clause"]} { set clauses [dict get $cmd_argd values elseif_clause] set elseif_clauses_argindices [list] dict for {k argidx} [dict get $cmd_argd received] { if {$k eq "elseif_clause"} { lappend elseif_clauses_argindices $argidx } } set elseif_number 0 foreach clause $clauses { lassign $clause _elseif elseif_cond _then elseif_script set scriptarg $elseif_script set cindex [lindex $elseif_clauses_argindices $elseif_number] #-------------------------------------------------------- #take into account optional keywords to determine the actual argindex of the scriptarg for this elseif clause. set argidx -1 set elseif_clauses [dict get $cmd_argd values "elseif_clause"] ;#list of elseif clauses in order of appearance in the arguments list foreach {clausename clauseindex} [dict get $cmd_argd received] { if {$clausename eq "elseif_clause"} { #determine length of any earlier (or current) elseif clause to adjust the argindex accordingly set prev_elseif_clause [lpop elseif_clauses 0] #clause of the form {elseif then