# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt # module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2024 # # @@ Meta Begin # Application punk::lib 0.1.5 # Meta platform tcl # Meta license BSD # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::lib 0 0.1.5] #[copyright "2024"] #[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] #[moddesc {punk library}] [comment {-- Description at end of page heading --}] #[require punk::lib] #[keywords module utility lib] #[description] #[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. #[para]The base set includes string and math functions but has no specific theme # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::lib #[subsection Concepts] #[para]The punk::lib modules should have no strong dependencies other than Tcl #[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. #[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::lib #[list_begin itemized] package require Tcl 8.6- package require punk::args #*** !doctools #[item] [package {Tcl 8.6-}] #[item] [package {punk::args}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] tcl::namespace::eval punk::lib::ensemble { #wiki.tcl-lang.org/page/ensemble+extend # extend an ensemble-like routine with the routines in some namespace #NOTE - the extension ns becomes the '-namespace ' for the original routine name, #with -unknown handling the original subcommands. #This makes the original ensemble harder to introspect! #e.g (the original -map or -namespace not visible) #In this specific case (which, being published on the wiki might be common in the wild) #we could call {*}[namespace ensemble configure $routine -unknown] $routine #and then detect that the first resulting word is an ensemble #For arbitrary '-unknown scripts' - sensible introspection is likely not possible proc extend {routine extension} { if {![string match ::* $routine]} { set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] if {$resolved eq {}} { error [list {no such routine} $routine] } set routine $resolved } set routinens [tcl::namespace::qualifiers $routine] if {$routinens eq {::}} { set routinens {} } set routinetail [tcl::namespace::tail $routine] if {![string match ::* $extension]} { set extension [uplevel 1 [ list [tcl::namespace::which namespace] current]]::$extension } if {![tcl::namespace::exists $extension]} { error [list {no such namespace} $extension] } set extension [tcl::namespace::eval $extension [ list [tcl::namespace::which namespace] current]] tcl::namespace::eval $extension [ list [tcl::namespace::which namespace] export *] while 1 { set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] if {[tcl::namespace::which $renamed] eq {}} break } rename $routine $renamed tcl::namespace::eval $extension [ list namespace ensemble create -command $routine -unknown [ list apply {{renamed ensemble routine args} { list $renamed $routine }} $renamed ] ] return $routine } } # some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) set bug [expr {![catch {regexp {} [error should_error]}]}] return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] } proc has_tclbug_script_var {} { set script {set j [list spud] ; list} append script \n uplevel #0 $script set rep1 [tcl::unsupported::representation $::j] set script "" set rep2 [tcl::unsupported::representation $::j] set nostring1 [string match "*no string" $rep1] set nostring2 [string match "*no string" $rep2] #we assume it should have no string rep in either case #Review: check Tcl versions for behaviour/consistency if {!$nostring2} { set bug true } else { set bug false } set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" return [dict create bug $bug bugref "" description $description level minor] } proc has_tclbug_lsearch_strideallinline {} { #bug only occurs with single -index value combined with -stride -all -inline -subindices #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { #we aren't looking for an error result - error most likely indicates tcl too old to support -stride set bug 0 } else { set bug [expr {$result ne "a2"}] } set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." return [dict create bug $bug bugref 5a1aaa201d description $description level major] } proc has_tclbug_lseq_sign {} { #https://core.tcl-lang.org/tcl/tktview/999b6966b2 if {[catch {lseq 1 10}]} { set bug 0 } else { set r1 [lseq 1 10 -9] set r2 [lseq 1 10 -10] set bug [expr {$r1 ne $r2}] } set description "lseq step sign not matching sequence direction - inconsistent results." return [dict create bug $bug bugref 999b6966b2 description $description level minor] } proc has_tclbug_list_quoting_emptyjoin {} { #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" return [dict create bug $bug bugref e38dc74e2 description $description level medium] } proc has_tclbug_safeinterp_compile {{show 0}} { #ensemble calls within safe interp not compiled #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde namespace eval [namespace current]::testcompile { proc ensembletest {} {string index a 0} } set has_bug 0 set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] if {$show} { puts outer: puts $bytecode_outer } if {![interp issafe]} { #test of safe subinterp only needed if we aren't already in a safe interp if {![catch { interp create x -safe } errMsg]} { x eval {proc ensembletest {} {string index a 0}} set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] if {$show} { puts safe: puts $bytecode_safe } interp delete x #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) #It's possible the interp we're running in is also not compiling ensembles. #we could then get a result of 2 - which still indicates a problem if {[string last "invokeStk" $bytecode_safe] >= 1} { incr has_bug } } else { #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? #unlikely - but we should warn puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" } } namespace delete [namespace current]::testcompile if {[string last "invokeStk" $bytecode_outer] >= 1} { incr has_bug } set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] } } tcl::namespace::eval punk::lib::compat { #*** !doctools #[subsection {Namespace punk::lib::compat}] #[para] compatibility functions for features that may not be available in earlier Tcl versions #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. #*** !doctools #[list_begin definitions] if {"::lremove" ne [info commands ::lremove]} { #puts stderr "Warning - no built-in lremove" interp alias {} lremove {} ::punk::lib::compat::lremove } proc lremove {list args} { #*** !doctools #[call [fun lremove] [arg list] [opt {index ...}]] #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove set data [lmap v $list {list data $v}] foreach doomed_index $args { if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value } set keep [lsearch -all -inline -not -exact $data x] return [lsearch -all -inline -index 1 -subindices $keep *] } #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers proc lremove2 {list args} { set data [lmap v $list {list data $v}] foreach doomed_index $args { if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value } set keep [lsearch -all -inline -not -exact $data x] return [lmap v $keep {lindex $v 1}] } #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 if {![info exists ::auto_index(readFile)]} { if {[info commands ::readFile] eq ""} { proc ::readFile {filename {mode text}} { #readFile not seen in auto_index or as command: installed by punk::lib # Parse the arguments set MODES {binary text} set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] # Read the file set f [open $filename [dict get {text r binary rb} $mode]] try { return [read $f] } finally { close $f } } } } if {![info exists ::auto_index(writeFile)]} { if {[info commands ::writeFile] eq ""} { proc ::writeFile {args} { #writeFile not seen in auto_index or as command: installed by punk::lib # Parse the arguments switch [llength $args] { 2 { lassign $args filename data set mode text } 3 { lassign $args filename mode data set MODES {binary text} set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] } default { set COMMAND [lindex [info level 0] 0] return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" } } # Write the File set f [open $filename [dict get {text w binary wb} $mode]] try { puts -nonewline $f $data } finally { close $f } } } } if {"::lpop" ne [info commands ::lpop]} { #puts stderr "Warning - no built-in lpop" interp alias {} lpop {} ::punk::lib::compat::lpop punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore } proc lpop {lvar args} { #*** !doctools #[call [fun lpop] [arg listvar] [opt {index}]] #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop upvar $lvar l if {![llength $args]} { set args [list end] } set v [lindex $l {*}$args] set newlist $l set path [list] set subl $l for {set i 0} {$i < [llength $args]} {incr i} { set idx [lindex $args $i] if {![llength [lrange $subl $idx $idx]]} { error "tcl_lpop index \"$idx\" out of range" } lappend path [lindex $args $i] set subl [lindex $l {*}$path] } set sublist_path [lrange $args 0 end-1] set tailidx [lindex $args end] if {![llength $sublist_path]} { #set newlist [lremove $newlist $tailidx] set newlist [lreplace $newlist $tailidx $tailidx] } else { set sublist [lindex $newlist {*}$sublist_path] #set sublist [lremove $sublist $tailidx] set sublist [lreplace $sublist $tailidx $tailidx] lset newlist {*}$sublist_path $sublist } #puts "[set l] -> $newlist" set l $newlist return $v } if {"::ledit" ni [info commands ::ledit]} { interp alias {} ledit {} ::punk::lib::compat::ledit punk::args::set_idalias ::punk::lib::compat::ledit ::ledit } proc ledit {lvar first last args} { upvar $lvar l #use lindex_resolve to support for example: ledit lst end+1 end+1 h i set fidx [punk::lib::lindex_resolve [llength $l] $first] switch -exact -- $fidx { -Inf { #index below lower bound set pre [list] set fidx -1 } Inf { #first index position is greater than index of last element in the list set pre [lrange $l 0 end] set fidx [llength $l] } default { #set pre [lrange $l 0 $first-1] set pre [lrange $l 0 $fidx-1] } } set lidx [punk::lib::lindex_resolve [llength $l] $last] switch -exact -- $lidx { -Inf { #index below lower bound set post [lrange $l 0 end] } Inf { #index above upper bound set post [list] } default { if {$lidx < $fidx} { #from ledit man page: #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. set post [lrange $l $fidx end] } else { #set post [lrange $l $last+1 end] set post [lrange $l $lidx+1 end] } } } set l [list {*}$pre {*}$args {*}$post] } #slight isolation - varnames don't leak - but calling context vars can be affected proc lmaptcl2 {varnames list script} { set result [list] set values [list] foreach v $varnames { lappend values "\$$v" } set linkvars [uplevel 1 [list ::tcl::info::vars]] set nscaller [uplevel 1 [list ::tcl::namespace::current]] set apply_script "" foreach vname $linkvars { append apply_script [string map [list %vname% $vname]\ {upvar 2 %vname% %vname%}\ ] \n } append apply_script $script \n #puts "--> $apply_script" foreach $varnames $list { lappend result [apply\ [list\ $varnames\ $apply_script\ $nscaller\ ] {*}[subst $values]\ ] } return $result } if {"::lmap" ne [info commands ::lmap]} { #puts stderr "Warning - no built-in lpop" interp alias {} lmap {} ::punk::lib::compat::lmaptcl } #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway proc lmaptcl {varnames list script} { set result [list] set varlist [list] foreach varname $varnames { upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc lappend varlist var_$varname } foreach $varlist $list { lappend result [uplevel 1 $script] } return $result } #tcl8.7/9 compatibility for 8.6 if {[info commands ::tcl::string::insert] eq ""} { #https://wiki.tcl-lang.org/page/string+insert # Pure Tcl implementation of [string insert] command. proc ::tcl::string::insert {string index insertString} { # Convert end-relative and TIP 176 indexes to simple integers. if {[regexp -expanded { ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace (?:([+-]) # op, omitted when index is "end" ([+-]?\d+))? # n, omitted when index is "end" [\t\n\v\f\r ]*$ # optional whitespace (unless "end") } $index _ m op n]} { # Convert first index to an integer. switch $m { end {set index [string length $string]} default {scan $m %d index} } # Add or subtract second index, if provided. switch $op { + {set index [expr {$index + $n}]} - {set index [expr {$index - $n}]} } } elseif {![string is integer -strict $index]} { # Reject invalid indexes. return -code error "bad index \"$index\": must be\ integer?\[+-\]integer? or end?\[+-\]integer?" } # Concatenate the pre-insert, insertion, and post-insert strings. string cat [string range $string 0 [expr {$index - 1}]] $insertString\ [string range $string $index end] } # Bind [string insert] to [::tcl::string::insert]. tcl::namespace::ensemble configure string -map [tcl::dict::replace\ [tcl::namespace::ensemble configure string -map]\ insert ::tcl::string::insert] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::lib { variable PUNKARGS tcl::namespace::export * variable has_struct_list set has_struct_list [expr {![catch {package require struct::list}]}] variable has_struct_set set has_struct_set [expr {![catch {package require struct::set}]}] variable has_punk_ansi set has_punk_ansi [expr {![catch {package require punk::ansi}]}] set has_twapi 0 if {"windows" eq $::tcl_platform(platform)} { set has_twapi [expr {![catch {package require twapi}]}] } namespace eval argdoc { #non-colour SGR codes set I "\x1b\[3m" ;# [a+ italic] set NI "\x1b\[23m" ;# [a+ noitalic] set B "\x1b\[1m" ;# [a+ bold] set N "\x1b\[22m" ;# [a+ normal] set T "\x1b\[1\;4m" ;# [a+ bold underline] set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::is_main_script @cmd -name punk::lib::is_main_script\ -summary\ "Test if current script was launched directly."\ -help\ "The ${$B}main script${$N} is the primary script that is executed by the interpreter, e.g. tclsh or wish. (as opposed to being loaded by the 'source' command) see https://wiki.tcl-lang.org/page/main+script" @values -min 0 -max 0 }] } proc is_main_script {} { #see https://wiki.tcl-lang.org/page/main+script if {[info script] ne "" && [info exists ::argv0] && [file dirname [file normalize [file join [info script] ...]]] eq [file dirname [file normalize [file join $::argv0 ...]]] } { return true } else { return false } } # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == # Maintenance - This is the primary source for tm_version... functions # - certain packages script require these but without package dependency # - 1 punk boot script # - 2 packagetrace module # - These should be updated to sync with this # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == proc tm_version_isvalid {versionpart} { #Needs to be suitable for use with Tcl's 'package vcompare' if {![catch [list package vcompare $versionpart $versionpart]]} { return 1 } else { return 0 } } proc tm_version_major {version} { if {![tm_version_isvalid $version]} { error "Invalid version '$version' is not a proper Tcl module version number" } set firstpart [lindex [split $version .] 0] #check for a/b in first segment if {[string is integer -strict $firstpart]} { return $firstpart } if {[string first a $firstpart] > 0} { return [lindex [split $firstpart a] 0] } if {[string first b $firstpart] > 0} { return [lindex [split $firstpart b] 0] } error "tm_version_major unable to determine major version from version number '$version'" } proc tm_version_canonical {ver} { #accepts a single valid version only - not a bounded or unbounded spec if {![tm_version_isvalid $ver]} { error "tm_version_canonical version '$ver' is not valid for a package version" } set parts [split $ver .] set newparts [list] foreach o $parts { set trimmed [string trimleft $o 0] set firstnonzero [string index $trimmed 0] switch -exact -- $firstnonzero { "" { lappend newparts 0 } a - b { #e.g 000bnnnn -> bnnnnn set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] if {$tailtrimmed eq ""} { set tailtrimmed 0 } lappend newparts 0$firstnonzero$tailtrimmed } default { #digit if {[string is integer -strict $trimmed]} { #e.g 0100 -> 100 lappend newparts $trimmed } else { #e.g 0100b003 -> 100b003 (still need to process tail) if {[set apos [string first a $trimmed]] > 0} { set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits set rhs [string trimleft $rhs 0] if {$rhs eq ""} { set rhs 0 } lappend newparts ${lhs}a${rhs} } elseif {[set bpos [string first b $trimmed]] > 0} { set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits set rhs [string trimleft $rhs 0] if {$rhs eq ""} { set rhs 0 } lappend newparts ${lhs}b${rhs} } else { #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b error "tm_version_canonical error - trimfail - unexpected" } } } } } return [join $newparts .] } proc tm_version_required_canonical {versionspec} { #also trim leading zero from any dottedpart? #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. #e.g 1.01 is equivalent to 1.1 and 01.001 #also 1b3 == 1b0003 if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version set errmsg "tm_version_required_canonical - invalid version specification" if {[string first - $versionspec] < 0} { #no dash #looks like a minbounded version (ie a single version with no dash) convert to min-max form set from $versionspec if {![tm_version_isvalid $from]} { error "$errmsg '$versionpec'" } if {![catch {tm_version_major $from} majorv]} { set from [tm_version_canonical $from] return "${from}-[expr {$majorv +1}]" } else { error "$errmsg '$versionspec'" } } else { # min- or min-max #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) set parts [split $versionspec -] ;#we expect only 2 parts lassign $parts from to if {![tm_version_isvalid $from]} { error "$errmsg '$versionspec'" } set from [tm_version_canonical $from] if {[llength $parts] == 2} { if {$to ne ""} { if {![tm_version_isvalid $to]} { error "$errmsg '$versionspec'" } set to [tm_version_canonical $to] return $from-$to } else { return $from- } } else { error "$errmsg '$versionspec'" } error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" } } # end tm_version... functions # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == # -- --- #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows # Review and retest as new versions come out. # -- --- proc list_multi_append1 {lvar1 lvar2} { #clear winner in 2024 upvar $lvar1 l1 $lvar2 l2 lappend l1 {*}$l2 return $l1 } proc list_multi_append2 {lvar1 lvar2} { upvar $lvar1 l1 $lvar2 l2 set l1 [list {*}$l1 {*}$l2] } proc list_multi_append3 {lvar1 lvar2} { upvar $lvar1 l1 $lvar2 l2 set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] } #testing e.g #set l1_reset {a b c} #set l2 {a b c d e f g} #set l1 $l1_reset #time {list_multi_append1 l1 l2} 1000 #set l1 $l1_reset #time {list_multi_append2 l1 l2} 1000 # -- --- namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lswap @cmd -name punk::lib::lswap\ -summary\ "Swap list values in-place"\ -help\ "Similar to struct::list swap, except it fully supports basic list index expressions such as 7-2 end-1 etc. struct::list swap doesn't support 'end' offsets, and only sometimes appears to support basic expressions, depending on the expression compared to the list length." @values -min 1 -max 1 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } proc lswap {lvar a z} { upvar $lvar l set len [llength $l] if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { #lindex_resolve_basic returns only -Inf if out of range at either bound #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred #(e.g using: lswap mylist end-2 end on a two element list) #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively set a_index [lindex_resolve $len $a] set a_msg "" switch -- $a_index { -Inf { set a_msg "1st supplied index $a is below the lower bound for the list (0)" } Inf { set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" } } set z_index [lindex_resolve $len $z] set z_msg "" switch -- $z_index { -Inf { set z_msg "2nd supplied index $z is below the lower bound for the list (0)" } Inf { set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" } } set errmsg "lswap cannot swap indices $a and $z" if {$a_msg ne ""} { append errmsg \n $a_msg } if {$z_msg ne ""} { append errmsg \n $z_msg } error $errmsg } set item2 [lindex $l $z] lset l $z [lindex $l $a] lset l $a $item2 return $l } #proc lswap2 {lvar a z} { # upvar $lvar l # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] #} proc lswap2 {lvar a z} { upvar $lvar l #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] } #an experimental test of swapping vars without intermediate variables #It's an interesting idea - but probably of little to no practical use # - the swap_intvars3 version using intermediate var is faster in Tcl # - This is probably unsurprising - as it's simpler code. # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. #proc swap_intvars {swapv1 swapv2} { # upvar $swapv1 _x $swapv2 _y # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] #} #proc swap_intvars2 {swapv1 swapv2} { # upvar $swapv1 _x $swapv2 _y # set _x [expr {$_x ^ $_y}] # set _y [expr {$_x ^ $_y}] # set _x [expr {$_x ^ $_y}] #} #proc swap_intvars3 {swapv1 swapv2} { # #using intermediate variable # upvar $swapv1 _x $swapv2 _y # set z $_x # set _x $_y # set _y $z #} #*** !doctools #[subsection {Namespace punk::lib}] #[para] Core API functions for punk::lib #[list_begin definitions] if {[info commands lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges #The internal rep can be an 'arithseries' with no string representation #support minimal set from to proc range {from to {by 1}} { #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 #https://core.tcl-lang.org/tcl/tktview/999b6966b2 lseq $from $to by $by } } else { #lseq accepts basic expressions e.g 4-2 for both arguments #e.g we can do lseq 0 [llength $list]-1 #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. #our range function doesn't support double like lseq does. (deliberate) review proc range {from to {by ""}} { if {$by eq "0"} { #as per lseq, step (by) zero always gives no result return [list] } set to [offset_expr $to] set from [offset_expr $from] if {$by ne ""} { set by [offset_expr $by] } #assert $by is now empty string or an integer if {$to > $from} { switch -- $by { "" - 1 { set count [expr {($to -$from) + 1}] if {$from == 0} { return [lsearch -all [lrepeat $count 0] *] } else { incr from -1 return [lmap v [lrepeat $count 0] {incr from}] } } default { set count [expr {($to - $from + $by) / $by}] if {$count <= 0} { #return [list] #https://core.tcl-lang.org/tcl/tktview/999b6966b2 return [list $from] ;#review } set result [list] for {set i $from} {$i <= $to} {incr i $by} { lappend result $i } return $result #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. #set count [expr {($to -$from) + 1}] #if {$from == 0} { # set fullrange [lsearch -all [lrepeat $count 0] *] #} else { # incr from -1 # set fullrange [lmap v [lrepeat $count 0] {incr from}] #} #set result [list] #for {set i 0} {$i < $count} {incr i} { # if {$i % $by == 0} { # lappend result [lindex $fullrange $i] # } #} #return $result } } #slower methods. #2) #set i -1 #set L [lrepeat $count 0] #lmap v $L {lset L [incr i] [incr from];lindex {}} #return $L #3) #set L {} #for {set i 0} {$i < $count} {incr i} { # lappend L [incr from] #} #return $L } elseif {$from > $to} { switch -- $by { "" - -1 { set count [expr {$from - $to} + 1] if {$to == 0} { return [lreverse [lsearch -all [lrepeat $count 0] *]] } else { incr from return [lmap v [lrepeat $count 0] {incr from -1}] } } default { set count [expr {($to - $from + $by) / $by}] if {$count <= 0} { #return [list] return [list $from] ;#review } set result [list] for {set i $from} {$i >= $to} {incr i $by} { lappend result $i } return $result } } #2) #set i -1 #set L [lrepeat $count 0] #lmap v $L {lset L [incr i] [incr from -1];lindex {}} #return $L #3) #set L {} #for {set i 0} {$i < $count} {incr i} { # lappend L [incr from -1] #} #return $L } else { return [list $from] } } } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lzip @cmd -name punk::lib::lzip\ -summary\ "zip any number of lists together."\ -help\ "Conceptually equivalent to converting a list of rows to a list of columns. The number of returned lists (columns) will be equal to the length of the longest supplied list (row). If lengths of supplied lists don't match, empty strings will be inserted in the resulting lists. e.g lzip {a b c d e} {1 2 3 4} {x y z} -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} " @values -min 1 -max 1 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } proc lzip {args} { switch -- [llength $args] { 0 {return {}} 1 {return [lindex $args 0]} 2 {return [::punk::lib::system::lzip2lists {*}$args]} 3 {return [::punk::lib::system::lzip3lists {*}$args]} 4 {return [::punk::lib::system::lzip4lists {*}$args]} 5 {return [::punk::lib::system::lzip5lists {*}$args]} 6 {return [::punk::lib::system::lzip6lists {*}$args]} 7 {return [::punk::lib::system::lzip7lists {*}$args]} 8 {return [::punk::lib::system::lzip8lists {*}$args]} 9 {return [::punk::lib::system::lzip9lists {*}$args]} 10 {return [::punk::lib::system::lzip10lists {*}$args]} 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { set n [llength $args] if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { #puts "calling ::punk::lib::system::Build_lzipn $n" ::punk::lib::system::Build_lzipn $n } return [::punk::lib::system::lzip${n}lists {*}$args] } default { if {[llength $args] < 4000} { set n [llength $args] if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { #puts "calling ::punk::lib::system::Build_lzipn $n" ::punk::lib::system::Build_lzipn $n } return [::punk::lib::system::lzip${n}lists {*}$args] } else { return [::punk::lib::lzipn {*}$args] } } } } namespace eval system { proc Build_lzipn {n} { set arglist [list] #use punk::lib::range which defers to lseq if available set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) set body "\nlmap " for {set i 1} {$i <= $n} {incr i} { lappend arglist l$i append body "[lindex $vars $i] \$l$i " } append body "\{list " for {set i 1} {$i <= $n} {incr i} { append body "\$[lindex $vars $i] " } append body "\}" \n #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" #puts "$body" #puts "\}" proc ::punk::lib::system::lzip${n}lists $arglist $body } #fastest is to know the number of lists to be zipped proc lzip2lists {l1 l2} { lmap a $l1 b $l2 {list $a $b} } proc lzip3lists {l1 l2 l3} { lmap a $l1 b $l2 c $l3 {list $a $b $c} } proc lzip4lists {l1 l2 l3 l4} { lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} } proc lzip5lists {l1 l2 l3 l4 l5} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} } proc lzip6lists {l1 l2 l3 l4 l5 l6} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} } proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} } proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} } proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} } proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} } #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly # review - proc lzipn_alt args { #stackoverflow - courtesy glenn jackman (modified) foreach l $args { lappend vars [incr n] lappend lmap_args $n $l } lmap {*}$lmap_args {lmap v $vars {set $v}} } #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) proc lzipn_tcl8 {args} { #For tcl pre 9 (without lsearch -stride) #wiki - courtesy JAL set list_l $args set zip_l [] while {1} { set cur [lmap a_l $list_l { lindex $a_l 0 }] set list_l [lmap a_l $list_l { lrange $a_l 1 end }] if {[join $cur {}] eq {}} { break } lappend zip_l $cur } return $zip_l } proc lzipn_tcl9a {args} { #For Tcl 9+ (with lsearch -stride) #compared to wiki version #comparable for lists len <3 or number of args < 3 #approx 2x faster for large lists or more lists #needs -stride single index bug fix to use empty string instead of NULL if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] set outlist [lrepeat $numcolumns {}] set s 0 foreach len $lens list $args { #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] ledit flatlist $s [expr {$s + $len - 1}] {*}$list incr s $numcolumns } #needs single index lstride bugfix for {set c 0} {$c < $numcolumns} {incr c} { ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] } return $outlist } proc lzipn_tcl9b {args} { if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [list] foreach len $lens list $args { lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] } lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} } proc lzipn_tcl9c {args} { #SLOW if {![llength $args]} {return {}} set lens [lmap l $args {llength $l}] set numcolumns [::tcl::mathfunc::max {*}$lens] set flatlist [list] foreach len $lens list $args { lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] } set zip_l {} set cols_remaining $numcolumns for {set c 0} {$c < $numcolumns} {incr c} { if {$cols_remaining == 1} { return [list {*}$zip_l $flatlist] } lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] set flen [llength $flatlist] set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] incr cols_remaining -1 } return $zip_l } } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lzipn @cmd -name punk::lib::lzipn\ -summary\ "zip any number of lists together (unoptimised)."\ -help\ "Conceptually equivalent to converting a list of rows to a list of columns. See lzip which provides the same functionality but with optimisations depending on the number of supplied lists. " @values -min 1 -max 1 lvar -type string -help\ "name of list variable" a -type indexexpression z -type indexexpression }] } #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { #-stride either not available - or has bug preventing use of main algorithm below proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] } else { proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] } namespace import ::punk::args::lib::tstr namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::tclscript_to_scriptlist @cmd -name punk::lib::tclscript_to_scriptlist\ -summary\ "Parse tcl script to toplevel list of lists."\ -help\ "Get topmost list of tcl language elements in script. produces a list of lists where each sublist is a commandlist or a comment string." @values -min 1 -max 1 script -type string }] } proc tclscript_to_scriptlist {script} { set scriptlist [list] set cmdlist [list] set scrlen [string length $script] set token "" set in_token 0 set in_cmdlist 0 set in_comment 0 set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? for {set i 0} {$i < $scrlen} {incr i} { set ch [string index $script $i] set chswitch [string map $charmap $ch] if {!$in_token} { switch -- $chswitch { { } - TB { #ignore - continue being a non token } CR { if {[string index $script $i+1] eq "\n"} { if {$in_cmdlist} { #no active token - newline ends cmdlist set in_cmdlist 0 lappend scriptlist $cmdlist set cmdlist [list] } incr i } } LF - ";" { #no active token - newline or semicolon ends cmdlist if {$in_cmdlist} { set in_cmdlist 0 lappend scriptlist $cmdlist set cmdlist [list] } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation of whitespace while no token - boring incr i } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation of whitespace while no token - boring incr i 2 } else { #an uncommon possibility, a command wth surrounding spaces called in an strange way # e.g \ cmdname\ arg set in_token 1 set token "\\[string index $script $i+1]" incr i if {!$in_cmdlist} { set in_cmdlist 1 } } } # { if {$in_cmdlist} { #ordinary data set in_token 1 set token # } else { if {!$in_comment} { set in_token 1 set in_comment 1 set token # } else { #wnen in comment - all will be a single token until comment ends append token # } } } default { #for completeness.. we should exclude other possible whitespace chars if {![string is space $ch]} { set in_token 1 set token $ch if {!$in_cmdlist} { set in_cmdlist 1 } } } } } else { #if we're in a token, we must be in a cmdlist or a comment (single token) #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved #note that unbalanced curly in *toplevel* comment will still 'info complete' to true switch -- $chswitch { LF { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token \n } } else { #ends a comment lappend scriptlist $token ;#single token for comment set token "" set in_token 0 set in_comment 0 set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity } } ";" { if {!$in_comment} { if {[tcl::info::complete $token]} { #ends token and cmdlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token \n } } else { #ordinary char for comment append token ";" } } CR { if {[string index $script $i+1] eq "\n"} { if {[tcl::info::complete $token]} { #ends token and commandlist lappend cmdlist $token lappend scriptlist $cmdlist set cmdlist "" set in_cmdlist 0 set token "" set in_token 0 } else { append token \r\n incr i } } else { append token \r } } BSL { if {[string index $script $i+1] eq "\n"} { #continuation - lf effectively becomes a space if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token " " } } else { append token " " } incr i ;#skip LF } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { #continuation - cr-lf effectively becomes a space if {!$in_comment} { #token may end - but cmdlist goes on if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token " " } } else { append token " " } incr i 2 ;#skip CRLF } else { append token "\\[string index $script $i+1]" incr i } } default { if {![string is space $ch]} { append token $ch } else { if {!$in_comment} { if {[tcl::info::complete $token]} { lappend cmdlist $token set token "" set in_token 0 } else { append token $ch } } else { append token $ch } } } } } } #eof if {!$in_comment} { if {$in_token} { if {[tcl::info::complete $token]} { lappend cmdlist $token lappend scriptlist $cmdlist } else { error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" } } else { if {$in_cmdlist} { lappend scriptlist $cmdlist } } } else { lappend scriptlist $token } return $scriptlist } proc invoke command { #*** !doctools #[call [fun invoke] [arg command]] #[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode #[example { # set script { # puts stdout {hello on stdout} # puts stderr {hello on stderr} # exit 42 # } # invoke [list tclsh <<$script] #}] #see https://wiki.tcl-lang.org/page/open lassign [chan pipe] chanout chanin lappend command 2>@$chanin set fh [open |$command] set stdout [read $fh] close $chanin set stderr [read $chanout] close $chanout if {[catch {close $fh} cres e]} { dict with e {} lassign [set -errorcode] sysmsg pid exit if {$sysmsg eq {NONE}} { #output to stderr caused [close] to fail. Do nothing } elseif {$sysmsg eq {CHILDSTATUS}} { return [list $stdout $stderr $exit] } else { return -options $e $stderr } } return [list $stdout $stderr 0] } proc pdict {args} { package require punk::args variable has_punk_ansi if {!$has_punk_ansi} { set sep " = " } else { #set sep " [a+ Web-seagreen]=[a] " set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " } set argspec [string map [list %sep% $sep] { @id -id ::punk::lib::pdict @cmd -name pdict -help\ "Print dict keys,values to channel The pdict function operates on variable names - passing the value to the showdict function which operates on values (see also showdict)" @opts -any 1 #default separator to provide similarity to tcl's parray function -separator -default "%sep%" -roottype -default "dict" -substructure -default {} -channel -default stdout -help\ "existing channel - or 'none' to return as string" @values -min 1 -max -1 dictvar -type string -help "name of variable. Can be a dict, list or array" patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) The system uses similar patterns to the punk pipeline pattern-matching system. The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. Segments are classified into list,dict and string operations. Leading % indicates a string operation - e.g %# gives string length A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 (todo - change to indexset syntax @1..3 @1..end-1 etc) A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. e.g1 pdict env */%# the pattern starts with default type dict, so * retrieves all keys & values, the next hierarchy switches to a string operation to get the length of each value. e.g2 pdict env W* S* Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns e.g3 pdict punk_testd */* This displays 2 levels of the dict hierarchy. Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent The second level segment in each pattern switches to a dict operation to retrieve the value by key. When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. } }] #puts stderr "$argspec" set argd [punk::args::parse $args withdef $argspec] set opts [dict get $argd opts] set dvar [dict get $argd values dictvar] set patterns [dict get $argd values patterns] set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] if {$isarray} { set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] if {![dict exists $opts -keytemplates]} { set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] dict set opts -keytemplates [list $arrdisplay] } dict set opts -keysorttype dictionary } else { set dvalue [uplevel 1 [list set $dvar]] } showdict {*}$opts $dvalue {*}$patterns } #TODO - much. #showdict needs to be able to show different branches which share a root path #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) # - specify ansi colour per pattern so different branches can be highlighted? # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc # - The current version is incomplete but passably usable. # - Copy proc and attempt rework so we can get back to this as a baseline for functionality proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) #set sep " [a+ Web-seagreen]=[a] " variable has_punk_ansi if {!$has_punk_ansi} { set RST "" set sep " = " #set sep_mismatch " mismatch " set sep \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) } else { set RST [punk::ansi::a] set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " } package require punk::pipe #package require punk ;#we need pipeline pattern matching features package require textblock set argd [punk::args::parse $args withdef [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { @id -id ::punk::lib::showdict @cmd -name punk::lib::showdict -help "display dictionary keys and values" #todo - table tableobject -return -default "tailtohead" -choices {tailtohead sidebyside} -channel -default none -trimright -default 1 -type boolean -help\ "Trim whitespace off rhs of each line. This can help prevent a single long line that wraps in terminal from making every line wrap due to long rhs padding." -separator -default {%sep%} -help\ "Separator column between keys and values" -separator_mismatch -default {%sep_mismatch%} -help\ "Separator to use when patterns mismatch" -roottype -default "dict" -help\ "list,dict,string" -ansibase_keys -default "" -help\ "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" -substructure -default {} -ansibase_values -default "" -keytemplates -default {\$\{$key\}} -type list -help\ "list of templates for keys at each level" -keysorttype -default "none" -choices {none dictionary ascii integer real} -keysortdirection -default increasing -choices {increasing decreasing} -debug -default 0 -type boolean -help\ "When enabled, produces some rudimentary debug output on stderr" -- -type none -optional 1 @values -min 1 -max -1 dictvalue -type list -help\ "dict or list value" patterns -default "*" -type string -multiple 1 -help\ "key or key glob pattern" }]] #for punk::lib - we want to reduce pkg dependencies. # - so we won't even use the tcllib debug pkg here set opt_debug [dict get $argd opts -debug] if {$opt_debug} { if {[info body debug::showdict] eq ""} { proc ::punk::lib::debug::showdict {args} { catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} } } } else { if {[info body debug::showdict] ne ""} { proc ::punk::lib::debug::showdict {args} {} } } set opt_sep [dict get $argd opts -separator] set opt_mismatch_sep [dict get $argd opts -separator_mismatch] set opt_keysorttype [dict get $argd opts -keysorttype] set opt_keysortdirection [dict get $argd opts -keysortdirection] set opt_trimright [dict get $argd opts -trimright] set opt_keytemplates [dict get $argd opts -keytemplates] debug::showdict "keytemplates ---> $opt_keytemplates <---" set opt_ansibase_keys [dict get $argd opts -ansibase_keys] set opt_ansibase_values [dict get $argd opts -ansibase_values] set opt_return [dict get $argd opts -return] set opt_roottype [dict get $argd opts -roottype] set opt_structure [dict get $argd opts -substructure] set dval [dict get $argd values dictvalue] set patterns [dict get $argd values patterns] set result "" #pattern hierarchy # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest # * @1 @0,%#,%str - segments # a b 1 0 %# %str - keys set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated set pattern_next_substructure [dict create] set pattern_this_structure [dict create] # -- --- --- --- #REVIEW #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). #todo - determine if there is a more consistent rule-based way to do this rather than adhoc #e.g pdict something * #we want the keys from the result as individual lines on lhs #e.g pdict something @@ #we want on lhs result on rhs # = v0 #e.g pdict something @0-2,@4 #we currently return: #0 = v0 #1 = v1 #2 = v2 #4 = v4 #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. #this is a tradeoff that could create surprises and make things messy and/or inconsistent. #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) # -- --- --- --- set filtered_keys [list] if {$opt_roottype in {dict list string}} { #puts "getting keys for roottype:$opt_roottype" if {[llength $dval]} { #TODO - change to indexset notation 0..1,3..end-1 etc set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} foreach pattern_nest $patterns { set keyset [list] set keyset_structure [list] set segments [split $pattern_nest /] set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns #we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] #puts stderr "showdict-->_split_patterns: $patterninfo" foreach v_idx $patterninfo { lassign $v_idx v idx #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern if {[string index $p 0] eq "!"} { set get_not 1 set p [string range $p 1 end] } else { set get_not 0 } switch -exact -- $p { * - "" { if {$opt_roottype eq "list"} { set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] list] dict set pattern_this_structure $p list } elseif {$opt_roottype eq "dict"} { set keys [dict keys $dval] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] dict] dict set pattern_this_structure $p dict } else { lappend keyset %string lappend keyset_structure string dict set pattern_this_structure $p string } } %# { dict set pattern_this_structure $p string lappend keyset %# lappend keyset_structure string } # { #todo get_not !# is test for listiness (see punk) dict set pattern_this_structure $p list lappend keyset # lappend keyset_structure list } ## { dict set pattern_this_structure $p dict lappend keyset [list ## query] lappend keyset_structure dict } @* { #puts "showdict ---->@*<----" dict set pattern_this_structure $p list set keys [punk::lib::range 0 [llength $dval]-1] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] list] } @@ { #get first k v from dict dict set pattern_this_structure $p dict lappend keyset [list @@ query] lappend keyset_structure dict } @*k@* - @*K@* { #returns keys only lappend keyset [list $p query] lappend keyset_structure dict dict set pattern_this_structure $p dict } @*.@* { set keys [dict keys $dval] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] dict] dict set pattern_this_structure $p dict } default { #puts stderr "===p:$p" #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful #@@"key,etc" should allow any non-whitespace key switch -glob -- $p { {@k\*@*} - {@K\*@*} { #value glob return keys #set search [string range $p 4 end] #dict for {k v} $dval { # if {[string match $search $v]} { # lappend keyset $k # } #} if {$get_not} { lappend keyset [list !$p query] } else { lappend keyset [list $p query] } lappend keyset_structure dict dict set pattern_this_structure $p dict } @@* { #exact match key - review - should raise error to match punk pipe behaviour? set k [string range $p 2 end] if {$get_not} { if {[dict exists $dval $k]} { set keys [dict keys [dict remove $dval $k]] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] dict] } else { lappend keyset {*}[dict keys $dval] lappend keyset_structure {*}[lrepeat [dict size $dval] dict] } } else { if {[dict exists $dval $k]} { lappend keyset $k lappend keyset_structure dict } } dict set pattern_this_structure $p dict } @k@* - @K@* { #TODO get_not set k [string range $p 3 end] if {[dict exists $dval $k]} { lappend keyset $k lappend keyset_structure dict } dict set pattern_this_structure $p dict } {@\*@*} { #return list of values #set k [string range $p 3 end] #lappend keyset {*}[dict keys $dval $k] if {$get_not} { lappend keyset [list !$p query] } else { lappend keyset [list $p query] } lappend keyset_structure dict dict set pattern_this_structure $p dict } {@\*.@*} { #TODO get_not set k [string range $p 4 end] set keys [dict keys $dval $k] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] dict] dict set pattern_this_structure $p dict } {@v\*@*} - {@V\*@*} { #value-glob return value #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" if {$get_not} { lappend keyset [list !$p query] } else { lappend keyset [list $p query] } lappend keyset_structure dict dict set pattern_this_structure $p dict } {@\*v@*} - {@\*V@*} { #key-glob return value lappend keyset [list $p query] lappend keyset_structure dict dict set pattern_this_structure $p dict } {@\*@*} - {@\*v@*} - {@\*V@} { #key glob return val lappend keyset [list $p query] lappend keyset_structure dict dict set pattern_this_structure $p dict } @??@* { #exact key match - no error lappend keyset [list $p query] lappend keyset_structure dict dict set pattern_this_structure $p dict } default { set this_type $opt_roottype if {[string match @* $p]} { #list mode - trim optional list specifier @ set p [string range $p 1 end] dict set pattern_this_structure $p list set this_type list } elseif {[string match %* $p]} { dict set pattern_this_structure $p string lappend keyset $p lappend keyset_structure string set this_type string } if {$this_type eq "list"} { dict set pattern_this_structure $p list if {[string is integer -strict $p]} { if {$get_not} { set keys [punk::lib::range 0 [llength $dval]-1] set keys [lremove $keys $p] lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] list] } else { lappend keyset $p lappend keyset_structure list } } elseif {[string match "?*-?*" $p]} { #could be either - don't change type #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers #now we should map _ to "" first set p [string map {_ {}} $p] #lassign [textutil::split::splitx $p {\.\.}] a b if {![regexp $re_idxdashidx $p _match a b]} { error "unrecognised pattern $p" } #TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order #change to start/end terminology? set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds if {${lower_resolve} == Inf} { ##x #lower bound is above upper list range #match with decreasing indices is still possible set lower [expr {[llength $dval]-1}] ;#set to max } elseif {$lower_resolve == -Inf} { ##x set lower 0 } else { set lower $lower_resolve } set upper [punk::lib::lindex_resolve [llength $dval] $b] if {$upper == -Inf} { ##x #upper bound is below list range - if {$lower_resolve > -Inf} { ##x set upper 0 } else { continue } } elseif {$upper == Inf} { #use max set upper [expr {[llength $dval]-1}] #assert - upper >=0 because we have ruled out empty lists } #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order set keys [punk::lib::range $lower $upper] if {$get_not} { set fullrange [punk::lib::range 0 [llength $dval]-1] set keys [lremove $fullrange {*}$keys] if {$lower > $upper} { set keys [lreverse $keys] } } lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] list] } else { if {$get_not} { lappend keyset [list !@$p query] } else { lappend keyset [list @$p query] } lappend keyset_structure list } } elseif {$this_type eq "string"} { dict set pattern_this_structure $p string } elseif {$this_type eq "dict"} { #default equivalent to @\*@* dict set pattern_this_structure $p dict #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" set keys [dict keys $dval $p] if {$get_not} { set keys [dict keys [dict remove $dval {*}$keys]] } lappend keyset {*}$keys lappend keyset_structure {*}[lrepeat [llength $keys] dict] } else { puts stderr "list: unrecognised pattern $p" } } } } } } # -- --- --- --- #check next pattern-segment for substructure type to use # -- --- --- --- set substructure "" set pnext [lindex $segments 1] set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] if {[llength $patterninfo] == 0} { # // ? -review - what does this mean? for xpath this would mean at any level set substructure [lindex $pattern_this_structure end] } elseif {[llength $patterninfo] == 1} { #ignore the NOT operator for purposes of query-type detection if {[string index $pnext 0] eq "!"} { set pnext [string range $pnext 1 end] } # single type in segment e.g /@@something/ switch -exact -- $pnext { "" { set substructure string } @*k@* - @*K@* - @*.@* - ## { set substructure dict } # { set substructure list } ## { set substructure dict } %# { set substructure string } * { #set substructure $opt_roottype #set substructure [dict get $pattern_this_structure $pattern_nest] set substructure [lindex $pattern_this_structure end] } default { switch -glob -- $pnext { @??@* - @?@* - @@* { #all 4 or 3 len prefixes bounded by @ are dict set substructure dict } default { if {[string match @* $pnext]} { set substructure list } elseif {[string match %* $pnext]} { set substructure string } else { #set substructure $opt_roottype #set substructure [dict get $pattern_this_structure $pattern_nest] set substructure [lindex $pattern_this_structure end] } } } } } } else { #e.g /@0,%str,.../ #doesn't matter what the individual types are - we have a list result set substructure list } #puts "--pattern_nest: $pattern_nest substructure: $substructure" dict set pattern_next_substructure $pattern_nest $substructure # -- --- --- --- if {$opt_keysorttype ne "none"} { set int_keyset 1 foreach k $keyset { if {![string is integer -strict $k]} { set int_keyset 0 break } } if {$int_keyset} { set sortindices [lsort -indices -integer $keyset] #set keyset [lsort -integer $keyset] } else { #set keyset [lsort -$opt_keysorttype $keyset] set sortindices [lsort -indices -$opt_keysorttype $keyset] } set keyset [lmap i $sortindices {lindex $keyset $i}] set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] } foreach k $keyset { lappend pattern_key_index $pattern_nest } lappend filtered_keys {*}$keyset lappend all_keyset_structure {*}$keyset_structure #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" } } #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" } else { puts stdout "unrecognised roottype: $opt_roottype" return $dval } if {[llength $filtered_keys]} { #both keys and values could have newline characters. #simple use of 'format' won't cut it for more complex dict keys/values #use block::width or our columns won't align in some cases switch -- $opt_return { "tailtohead" { #last line of key is side by side (possibly with separator) with first line of value #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries set kt [lindex $opt_keytemplates 0] if {$kt eq ""} { set kt {${$key}} } #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] set kidx 0 set last_hidekey 0 foreach keydisplay $display_keys key $filtered_keys { set thisval "?" set hidekey 0 set pattern_nest [lindex $pattern_key_index $kidx] set pattern_nest_list [split $pattern_nest /] #set this_type [dict get $pattern_this_structure $pattern_nest] #set this_type [dict get $pattern_this_structure $key] set this_type [lindex $all_keyset_structure $kidx] #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" set is_match 1 ;#whether to display the normal separator or bad-match separator switch -- $this_type { dict { #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict # - default highlight dupes (ansi underline?) if {[lindex $key 1] eq "query"} { set qry [lindex $key 0] % thisval.= $qry= $dval } else { set thisval [tcl::dict::get $dval $key] } #set substructure [lrange $opt_structure 1 end] set nextpatterns [list] #which pattern nest applies to this branch set nextsub [dict get $pattern_next_substructure $pattern_nest] if {[llength $pattern_nest_list]} { set nest [lrange $pattern_nest_list 1 end] lappend nextpatterns {*}[join $nest /] } set nextopts [dict get $argd opts] set subansibasekeys [lrange $opt_ansibase_keys 1 end] set nextkeytemplates [lrange $opt_keytemplates 1 end] #dict set nextopts -substructure $nextsub dict set nextopts -keytemplates $nextkeytemplates dict set nextopts -ansibase_keys $subansibasekeys dict set nextopts -roottype $nextsub dict set nextopts -channel none #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" if {[llength $nextpatterns]} { if {[catch { set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] } errMsg]} { #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" set is_match 0 } } } list { if {[string is integer -strict $key]} { set thisval [lindex $dval $key] } else { if {[lindex $key 1] eq "query"} { set qry [lindex $key 0] } else { set qry $key } % thisval.= $qry= $dval } set nextpatterns [list] #which pattern nest applies to this branch set nextsub [dict get $pattern_next_substructure $pattern_nest] if {[llength $pattern_nest_list]} { set nest [lrange $pattern_nest_list 1 end] lappend nextpatterns {*}[join $nest /] } set nextopts [dict get $argd opts] dict set nextopts -roottype $nextsub dict set nextopts -channel none #if {![llength $nextpatterns]} { # set nextpatterns * #} if {[llength $nextpatterns]} { if {[catch { set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] } errMsg]} { set is_match 0 } } } string { set hidekey 1 if {$key eq "%string"} { set hidekey 1 set thisval $dval } elseif {$key eq "%ansiview"} { set thisval [ansistring VIEW -lf 1 $dval] } elseif {$key eq "%ansiviewstyle"} { set thisval [ansistring VIEWSTYLE -lf 1 $dval] } elseif {[string match *lpad-* $key]} { set hidekey 1 lassign [split $key -] _ extra set width [expr {[textblock::width $dval] + $extra}] set thisval [textblock::pad $dval -which left -width $width] } elseif {[string match *lpadstr-* $key]} { set hidekey 1 lassign [split $key -] _ extra set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] set thisval [textblock::pad $dval -which left -width $width -padchar $extra] } elseif {[string match *rpad-* $key]} { set hidekey 1 lassign [split $key -] _ extra set width [expr {[textblock::width $dval] + $extra}] set thisval [textblock::pad $dval -which right -width $width] } elseif {[string match *rpadstr-* $key]} { set hidekey 1 lassign [split $key -] _ extra set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] set thisval [textblock::pad $dval -which right -width $width -padchar $extra] } else { if {[lindex $key 1] eq "query"} { set qry [lindex $key 0] } else { set qry $key } set thisval $dval if {[string index $key 0] ne "%"} { set key %$key } % thisval.= $key= $thisval } set nextpatterns [list] #which pattern nest applies to this branch set nextsub [dict get $pattern_next_substructure $pattern_nest] if {[llength $pattern_nest_list]} { set nest [lrange $pattern_nest_list 1 end] lappend nextpatterns {*}[join $nest /] } #set nextopts [dict get $argd opts] dict set nextopts -roottype $nextsub dict set nextopts -channel none if {[llength $nextpatterns]} { set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] } } } if {$this_type eq "string" && $hidekey} { lassign [textblock::size $thisval] _vw vwidth _vh vheight #set blanks_above [string repeat \n [expr {$kheight -1}]] set vblock $opt_ansibase_values$thisval$RST #append result [textblock::join_basic -- $vblock] #review - we wouldn't need this space if we had a literal %sp %sp-x ?? append result " $vblock" } else { set ansibase_key [lindex $opt_ansibase_keys 0] lassign [textblock::size $keydisplay] _kw kwidth _kh kheight lassign [textblock::size $thisval] _vw vwidth _vh vheight set totalheight [expr {$kheight + $vheight -1}] set blanks_above [string repeat \n [expr {$kheight -1}]] set blanks_below [string repeat \n [expr {$vheight -1}]] if {$is_match} { set use_sep $opt_sep } else { set use_sep $opt_mismatch_sep } set sepwidth [textblock::width $use_sep] set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] set vblock $blanks_above$opt_ansibase_values$thisval$RST #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace if {$last_hidekey} { append result \n } #append result [textblock::join_basic -- $kblock $sblock $vblock] \n append result [textblock::join_basic_raw $kblock $sblock $vblock] \n } set last_hidekey $hidekey incr kidx } } "sidebyside" { # TODO - fix #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. #use ansibase_key etc to make the output more comprehensible in that situation. #This is why it is not the default. (review - terminal width detection and wrapping?) set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] foreach key $filtered_keys { set kt [lindex $opt_keytemplates 0] if {$kt eq ""} { set kt "%k%" } set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n } } } } if {$opt_trimright} { set result [::join [lines_as_list -line trimright $result] \n] } if {[string last \n $result] == [string length $result]-1} { set result [string range $result 0 end-1] } #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) set chan [dict get $argd opts -channel] switch -- $chan { stderr - stdout { puts $chan $result } none { return $result } default { #review - check member of chan names? #just try outputting to the supplied channel for now puts $chan $result } } } proc is_list_all_in_list {small large} { if {[llength $small] > [llength $large]} {return 0} foreach x $large { ::set ($x) {} } foreach x $small { if {![info exists ($x)]} { return 0 } } return 1 } #v2 generally seems slower proc is_list_all_in_list2 {small large} { set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] return [struct::list equal [lsort $small] $small_in_large] } if {!$has_struct_list || !$has_struct_set} { set body { package require struct::list package require struct::set } append body [info body is_list_all_in_list2] proc is_list_all_in_list2 {small large} $body } proc is_list_all_ni_list {A B} { foreach x $B { ::set ($x) {} } foreach x $A { if {[info exists ($x)]} { return 0 } } return 1 } proc is_list_all_ni_list2 {a b} { set i [struct::set intersect $a $b] return [expr {[llength $i] == 0}] } if {!$has_struct_set} { set body { package require struct::list } append body [info body is_list_all_ni_list2] proc is_list_all_ni_list2 {a b} $body } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::ldiff @cmd -name punk::lib::ldiff\ -summary\ "Difference consisting of items with removeitems removed."\ -help\ "Somewhat like struct::set difference, but order preserving, and doesn't treat as a 'set' so preserves any duplicates in items. struct::set difference may happen to preserve ordering when items are integers, but order can't be relied on, especially as struct::set has 2 differening implementations (tcl vs critcl) which return results with different ordering to each other and different deduping behaviour in some cases (e.g when 2nd arg is empty)" @values -min 2 -max 2 items -type list removeitems -type list }] } proc ldiff {items removeitems} { if {[llength $removeitems] == 0} {return $items} set result {} foreach item $items { if {$item ni $removeitems} { lappend result $item } } return $result } #with ledit (also avail in 8.6 using punk::lib::compat::ledit proc ldiff2 {fromlist removeitems} { if {[llength $removeitems] == 0} {return $fromlist} foreach item $removeitems { set posns [lsearch -all -exact $fromlist $item] foreach p $posns {ledit fromlist $p $p} } return $fromlist } proc ldiff3 {fromlist removeitems} { set doomed [list] foreach item $removeitems { lappend doomed {*}[lsearch -all -exact $fromlist $item] } lremove $fromlist {*}$doomed } #fix for tcl impl of struct::set::diff which doesn't dedupe proc struct_set_diff_unique {A B} { package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. if {[struct::set::Loaded] eq "tcl"} { return [punk::lib::setdiff $A $B] } else { #use (presumably critcl) implementation for speed return [struct::set difference $A $B] } } #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) #also struct::set difference with critcl is faster proc setdiff {A B} { if {[llength $A] == 0} {return {}} set d [dict create] foreach x $A {dict set d $x {}} foreach x $B {dict unset d $x} return [dict keys $d] } #bulk dict remove is slower than a foreach with dict unset #proc setdiff2 {fromlist removeitems} { # #if {[llength $fromlist] == 0} {return {}} # set d [dict create] # foreach x $fromlist { # dict set d $x {} # } # return [dict keys [dict remove $d {*}$removeitems]] #} #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) proc setdiff_unordered {A B} { if {[llength $A] == 0} {return {}} array set tmp {} foreach x $A {::set tmp($x) .} foreach x $B {catch {unset tmp($x)}} return [array names tmp] } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lunique_unordered @cmd -name punk::lib::lunique_unordered\ -summary\ "unique values in list"\ -help\ "Return unique values in provided list. This removes duplicates but *may* rearrange the order of the returned elements compared to the original list. When struct::set is available this will be used for the implementation, as it can be *slightly* faster if acceleration is present. When struct::set is not available it will fallback to lunique and provide the same functionality with order preserved." @values -min 1 -max 1 list -type list }] } #default/fallback implementation proc lunique_unordered {list} { lunique $list } if {$has_struct_set} { if {[struct::set equal [struct::set union {a a} {}] {a}]} { proc lunique_unordered {list} { struct::set union $list {} } } else { #struct::set union operates on a 'set' - so this probably won't change, and hopefully is #consistent across unacelerated versions and those implemented in accelerators, #but if it ever does change - be a little noisy about it. puts stderr "punk::lib WARNING: struct::set union no longer dedupes!" #we could also test a sequence of: struct::set add } } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lunique @cmd -name punk::lib::lunique\ -summary\ "Order-preserving unique values in list"\ -help\ "Return unique values in provided list. This removes duplicates whilst preserving the original order of the provided list. When struct::set is available with acceleration, lunique_unordered may be slightly faster." @values -min 1 -max 1 list -type list }] } proc lunique {list} { set new {} foreach item $list { if {$item ni $new} { lappend new $item } } return $new } proc lunique2 {list} { set doomed [list] #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) for {set i 0} {$i < [llength $list]} {} { set item [lindex $list $i] lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] while {[incr i] in $doomed} {} } lremove $list {*}$doomed } #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env proc lmapflat_closure {varnames list script} { set result [list] set values [list] foreach v $varnames { lappend values "\$$v" } # -- --- --- #capture - use uplevel 1 or namespace eval depending on context set capture [uplevel 1 { apply { varnames { set capturevars [tcl::dict::create] set capturearrs [tcl::dict::create] foreach fullv $varnames { set v [tcl::namespace::tail $fullv] upvar 1 $v var if {[info exists var]} { if {(![array exists var])} { tcl::dict::set capturevars $v $var } else { tcl::dict::set capturearrs capturedarray_$v [array get var] } } else { #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set } } return [tcl::dict::create vars $capturevars arrs $capturearrs] } } [info vars] } ] # -- --- --- set cvars [tcl::dict::get $capture vars] set carrs [tcl::dict::get $capture arrs] set apply_script "" foreach arrayalias [tcl::dict::keys $carrs] { set realname [string range $arrayalias [string first _ $arrayalias]+1 end] append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { array set %realname% [set %arrayalias%][unset %arrayalias%] }] } append apply_script [string map [list %script% $script] { #foreach arrayalias [info vars capturedarray_*] { # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] # array set $realname [set $arrayalias][unset arrayalias] #} #return [eval %script%] %script% }] #puts "--> $apply_script" foreach $varnames $list { lappend result {*}[apply\ [list\ [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ $apply_script\ ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] } return $result } #link version - can write to vars in calling context - but keeps varnames themselves isolated #performance much better than capture version - but still a big price to pay for the isolation proc lmapflat_link {varnames list script} { set result [list] set values [list] foreach v $varnames { lappend values "\$$v" } set linkvars [uplevel 1 [list info vars]] set nscaller [uplevel 1 [list namespace current]] set apply_script "" foreach vname $linkvars { append apply_script [string map [list %vname% $vname]\ {upvar 2 %vname% %vname%}\ ] \n } append apply_script $script \n #puts "--> $apply_script" foreach $varnames $list { lappend result {*}[apply\ [list\ $varnames\ $apply_script\ $nscaller\ ] {*}[subst $values]\ ] } return $result } #proc lmapflat {varnames list script} { # concat {*}[uplevel 1 [list lmap $varnames $list $script]] #} #lmap can accept multiple var list pairs proc lmapflat {args} { concat {*}[uplevel 1 [list lmap {*}$args]] } proc lmapflat2 {args} { concat {*}[uplevel 1 lmap {*}$args] } #proc dict_getdef {dictValue args} { # if {[llength $args] < 1} { # error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} # } # set keys [lrange $args -1 end-1] # if {[tcl::dict::exists $dictValue {*}$keys]} { # return [tcl::dict::get $dictValue {*}$keys] # } else { # return [lindex $args end] # } #} if {[info commands ::tcl::dict::getdef] eq ""} { proc dict_getdef {dictValue args} { set keys [lrange $args 0 end-1] if {[tcl::dict::exists $dictValue {*}$keys]} { return [tcl::dict::get $dictValue {*}$keys] } else { return [lindex $args end] } } } else { #we pay a minor perf penalty for the wrap interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef } #proc sample1 {p1 n args} { # #*** !doctools # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] # #[para]Description of sample1 # #[para] Arguments: # # [list_begin arguments] # # [arg_def tring p1] A description of string argument p1. # # [arg_def integer n] A description of integer argument n. # # [list_end] # return "ok" #} #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features #safe in that we don't evaluate the expression as a string. proc offset_expr {expression} { set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints if {[tcl::string::is integer -strict $expression]} { return [expr {$expression}] } if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { if {$op eq "-"} { return [expr {$a - $b}] } else { return [expr {$a + $b}] } } else { error "bad expression '$expression': must be integer?\[+-\]integer?" } } punk::args::define { @id -id ::punk::lib::is_indexset @cmd -name punk::lib::is_indexset\ -summary\ "Validate string is a comma-delimited 'indexset'."\ -help\ "Validate that a string is an 'indexset' An indexset consists of a comma delimited list of indexes or index-ranges. No particular base is assumed for the purposes of validating an indexset here. While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. To validate if an indexset is strictly within range, both the length of the data and the base would need to be considered. The normal 'range' specifier is .. but can be of the form .x. where x is the step value. The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire range of valid values. e.g the following are all valid ranges 1.. (index 1 to 'max') ..10 (index 'base' to 10) 2..11 (index 2 to 11) .. (all indices) .3. (1st index and every 3rd index thereafter) Common whitespace elements space,tab,newlines are ignored. Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, e.g end-2 or 2+2. see indexset_resolve" @values -min 1 -max 1 indexset -type string } proc is_indexset {indexset} { #collapse internal whitespace (for basic whitespace set we allow) set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { return 0 } set ranges [split $indexset ,] foreach r $ranges { set validateindices [list] set rposn [string first .. $r] if {$rposn >= 0} { set sepsize 2 set step 1 #review - whitespace between ints? lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] } elseif {[string first . $r] >= 0} { set stripped [string map {. ""} $r] if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { #if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) return 0 } #assert - we have exactly 2 dots separated by something. #check for .n. 'stepped' range set fdot [string first . $r] set ldot [string last . $r] set step [string range $r $fdot+1 $ldot-1] #todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq #1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing #1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. if {[string match *end* $step] || [catch {lindex {} $step}]} { return 0 } #if {![string is integer -strict $step] || $step == 0} { # return 0 #} lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] } else { #'range' is just an index set validateindices [list $r] } foreach v $validateindices { if {$v eq "" || $v eq "end"} {continue} if {[string is integer -strict $v]} {continue} if {[catch {lindex {} $v}]} { return 0 } } } return 1 } #review - compare to IMAP4 methods of specifying ranges? #TODO add tests to test::punk::lib indexset_resolve is a little tricky punk::args::define { @id -id ::punk::lib::indexset_resolve @cmd -name punk::lib::indexset_resolve\ -summary\ "Resolve an indexset to a list of integers based on supplied list or string length."\ -help\ "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 An indexset consists of a comma delimited list of indexes or index-ranges. Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at either side of the separator representing beginning and end of the index range respectively. The range-separator can be of the form .x. where x is an integer or basic expression (single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value in the tcl9 lseq command. When the start index is lower than the end, the step value defaults to 1. ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 When the start index is higher than the end, the step value defaults to -1. ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 If start and end are ommitted, increasing order is assumed if the step isn't specified. eg .. represents the range from the base to the end .-1. would represent end to base with step -1 If start is omitted and only the end is supplied: The default step is 1 indicating ascension and the missing start is equivalent to the base. indexset_resolve 5 ..2 -> 0 1 2 The default start is 'end' if the step is negative indexset_resolve 5 .-1.2 -> 4 3 2 If end is omitted and only the start is supplied: The default step is 1 indicating ascension and the missing end is equivalent to 'end' indexset_resolve 5 2.. -> 2 3 4 The default end is the base if the step is negative indexset_resolve 5 2.-1. -> 2 1 0 Like the tcl9 lseq command - a step (by) value of zero produces no results. The indexes are 0-based by default, but the base can be specified. indexset_resolve 7 .. -> 0 1 2 3 4 5 6 indexset_resolve -base -3 7 .. -> -3 -2 -1 0 1 2 3 Whitespace is ignored. Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, e.g end-2 or 2+2. end means the last item. end-1 means the second last item. 0.. is the same as 0..end indexset examples: These assume the default 0-based indices (-base 0) 1,3.. output the index 1 (2nd item) followed by all from index 3 to the end. indexset_resolve 4 1,3.. -> 1 3 indexset_resolve 10 1,3.. -> 1 3 4 5 6 7 8 9 0..2,end output the first 3 indices, and the last index. end-1..0 output the indexes in reverse order from 2nd last item to first item." @leaders -min 0 -max 0 @opts -base -type integer -prefix 1 -default 0 -help\ "This is the starting index. It can be positive, negative or zero. This affects the start and end calculations, limiting what indices will be returned. e.g with base 1 'end' will give a different value from base 0 for 10 items 'end' is 10 when 1-based for 10 items 'end' is 9 when 0-based For base 1, index 0 is considered to be below the range. ie indexset_resolve -base 1 10 0..3 -> 1 2 3 indexset_resolve -base 0 10 0..3 -> 0 1 2 3 It does not *convert* indexes within the range. indexset_resolve -base 1 10 5 -> 5 indexset_resolve -base 0 10 5 -> 5 ie if you ask for a 1-based resolution of an indexset the integers that are within the range will come out the same, so the result needs to be treated as a 1-based set of indices when performing further operations. " @values -min 2 -max 3 numitems -type integer indexset -type indexset -help "comma delimited specification for indices to return" } #limit punk::args parsing to unhappy paths where possible proc indexset_resolve {args} { # -------------------------------------------------- # Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve # This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. # for the unhappy path - the punk::args::parse is fine to generate the usage/error information. # -------------------------------------------------- if {[llength $args] < 2} { punk::args::resolve $args withid ::punk::lib::indexset_resolve } set indexset [lindex $args end] set numitems [lindex $args end-1] if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { #use parser on unhappy path only set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] } #assert we have 2 or more args set base 0 ;#default if {[llength $args] > 2} { #if more than just numitems and indexset - we expect only -base ie 4 args in total if {[llength $args] != 4} { set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] } set optname [lindex $args 0] set optval [lindex $args 1] set fulloptname [tcl::prefix::match -error "" -base $optname] if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] } set base $optval } # -------------------------------------------------- set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace set index_list [list] ;#list of actual indexes within the range set iparts [split $indexset ,] set based_max [expr {$numitems -1 + $base}] #we already did is_indexset check above, so we can make assumptions about well-formedness of each part foreach ipart $iparts { set ipart [string trim $ipart] #we need to cater for n..m as well as n.s.m where s is 'step' set rposn [string first . $ipart] if {$rposn>=0} { #if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression set rposn2 [string last . $ipart] if {$rposn2 == $rposn+1} { #.. set step "default" ;#could be 1 or -1 } else { set step [tcl::string::range $ipart $rposn+1 $rposn2-1] } lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb set rawa [string trim $rawa] set rawb [string trim $rawb] if {$rawa eq "" && $rawb eq ""} { if {$step eq "default"} { set step 1 ;#default ascending when no start and no end } if {$step < 0} { set rawa end set rawb $base } else { set rawa $base set rawb end } #if neither start nor end specified - we won't get out of range results from lindex_resolve set a [punk::lib::lindex_resolve $numitems $rawa $base] set b [punk::lib::lindex_resolve $numitems $rawb $base] } else { if {$rawa eq ""} { if {$step eq "default"} { #when start not specified, but end is - default direction always ascending #(even if end is base or below range) set step 1 } if {$step < 0} { set rawa end } else { set rawa $base } } set a [punk::lib::lindex_resolve $numitems $rawa $base] if {$a == -Inf} { #undershot - leave negative } elseif {$a == Inf} { #overshot set a [expr {$based_max + 1}] ;#put it outside the range on the upper side } #review - a may be -Inf if {$rawb eq ""} { if {$step eq "default"} { set step 1 } if {$step < 0} { if {$a < $base} { #make sure both #mathfunc::isinf is tcl9+ if {[catch { if {[::tcl::mathfunc::isinf $a]} { set a [expr {$base -1}] } }]} { if {[string match -nocase *inf* $a]} { set a [expr {$base -1}] } } set rawb $a } else { set rawb $base } } else { if {$a > $based_max} { set rawb $a ;#make sure .. doesn't return last item - should return nothing } else { set rawb end } } } set b [punk::lib::lindex_resolve $numitems $rawb $base] if {$b == -Inf} { #undershot - leave negative } elseif {$b == Inf} { #set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side } } #JJJ #e.g make sure .. doesn't return last item - should return nothing as both are above the range. if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { #assert a & b are integers within the range if {$step eq "default"} { #unspecified step - base direction on order of a & b if {$a <= $b} { set step 1 } else { set step -1 } } lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. } else { if {$a >= $base && $a <= $based_max} { #only a is in the range if {$b < $base} { set b $base } else { set b $based_max } if {$step eq "default"} { if {$a <= $b} { set step 1 } else { set step -1 } } lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. } elseif {$b >=$base && $b <= $based_max} { #only b is in the range if {$step eq "default"} { if {$a <= $b} { set step 1 } else { set step -1 } } if {$step < 0} { if {$a < $base} { #negative step from below - doesn't matter if b is in range - recast both to an int below $base #(a may be -Inf) set a [expr {$base -1}] set b $a set step 0 ;#we should return nothing } } else { if {$a < $base} { set a $base } else { set a $based_max } } lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. } else { #both outside the range if {$a < $base && $b > $base} { #spans the range in forward order set a $base set b $based_max if {$step eq "default"} { set step 1 } lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. } elseif {$a > $base && $b < $base} { #spans the range in reverse order set a $based_max set b $base if {$step eq "default"} { set step -1 } lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. } #both outside of range on same side } } } else { set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] #returns only -Inf for out of range at either end if {$idx >= $base} { #index within the range lappend index_list $idx } } } return $index_list } # showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side #This doesn't need the list itself - just the length suffices. punk::args::define { @id -id ::punk::lib::lindex_resolve @cmd -name punk::lib::lindex_resolve\ -summary\ "Resolve an indexexpression to an integer based on supplied list or string length."\ -help\ "Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating whether the index was below or above the range of possible indices for the length supplied. Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. This means the proc may be called with something like $x+2 end-$y etc Sometimes the actual integer index is desired. We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. lindex_resolve will parse the index expression and return: a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) The similar function lindex_resolve_basic uses -Inf to denote out of range at either end of the list/string. Otherwise it will return an integer corresponding to the position in the data. This is in stark contrast to Tcl list/string function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. Like Tcl list commands - it will produce an error if the form of the index is not acceptable. For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr " @values -min 2 -max 2 datalength -type integer index -type indexexpression } proc lindex_resolve {len index {base 0}} { #*** !doctools #[call [fun lindex_resolve] [arg len] [arg index]] #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length #[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. #[para]This means the proc may be called with something like $x+2 end-$y etc #[para]Sometimes the actual integer index is desired. #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. #[para]lindex_resolve will parse the index expression and return: #[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) #[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string #[para]Otherwise it will return an integer corresponding to the position in the list. #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr #REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? #A basic string map means we aren't properly validating #todo - be stricter about malformations such as 1000_ if {![string is integer -strict 1_0]} { #basic forward compatibility with integers such as 1_000 for 8.6.x set index [tcl::string::map {_ {}} $index] set len [tcl::string::map {_ {}} $len] } if {![string is integer -strict $len] || $len < 0} { error "lindex_resolve len must be a positive integer." } set based_max [expr {$len -1 + $base}] if {[string is integer -strict $index]} { #review - base? #can match +i -i if {$index < $base} { return -Inf } elseif {$index > $based_max} { return Inf } else { #integer may still have + sign - normalize with expr return [expr {$index}] } } else { if {[string match end* $index]} { if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] #note - offset could have leading + or - # 'string is integer -strict +1' ==> true #e.g end+-1 is valid (end++-1 is not) if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$offset == 0} { #(offset +0, -0 or 0 or 000 0_0 etc) #op either + or - is irrelevant #set index [expr {$len-1}] ;#+ base ? set index $based_max if {$index < $base} { #return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds return Inf } else { return $index } } #set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] if {$index < $base} { return -Inf } elseif {$index > $based_max} { return Inf } else { return $index } } else { #index is 'end' if {$len == 0} { #special case - 'end' with empty list - treat end like a positive number out of bounds return Inf } #return [expr {$len - 1 + $base}] return $based_max } } else { #plain +- already handled above. #we are trying to avoid evaluating unbraced expr of potentially insecure origin #regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op if {[regexp {([^+-]*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } if {$index < $base} { return -Inf } elseif {$index > $based_max} { return Inf } return $index } } } proc lindex_resolve_basic {len index {base 0}} { #*** !doctools #[call [fun lindex_resolve_basic] [arg len] [arg index]] #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) #[para] returns -Inf for out of range at either end, or a valid integer index #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound #[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 #[para] For pure integer indices the performance should be equivalent if {![string is integer -strict $len] || $len < 0} { error "lindex_resolve_basic len must be an integer greater than or equal to zero" } if {![string is integer -strict $base]} { #base can be negative error "lindex_resolve_basic base must be an integer" } set based_max [expr {$len -1 + $base}] set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 if {[string is integer -strict $index]} { #can match +i -i #avoid even the lseq overhead when the index is simple if {$index < $base || ($index > $based_max)} { #even though in this case we could return -Inf or Inf like lindex_resolve; #for consistency we don't return Inf for upper-boudn violation, #as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. return -Inf } else { #!NOTE! index within range is unchanged - no matter the base #integer may still have + sign - normalize with expr return [expr {$index}] } } if {$len > 0} { #For large len - this is a wasteful allocation if no true lseq available in Tcl version. #lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. } else { set testlist [list] #we want to call 'lindex' even in this case - to get the appropriate error message } set idx [lindex $testlist $index] if {$idx eq ""} { #we have no way to determine if out of bounds is at lower vs upper end return -Inf } else { return $idx } } proc lindex_get {list index} { set resultlist [lrange $list $index $index] if {![llength $resultlist]} { return -1 } else { #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator return [tcl::dict::create value [lindex $resultlist 0]] } } proc string_splitbefore {str index} { if {![string is integer -strict $index]} { set index [punk::lib::lindex_resolve [string length $str] $index] switch -- $index { -Inf { return [list "" $str] } Inf { return [list $str ""] } } } return [list [string range $str 0 $index-1] [string range $str $index end]] #scan %s stops at whitespace - not useful here. #scan $s %${p}s%s } proc string_splitbefore_indices {str args} { set parts [list $str] set sizes [list [string length $str]] set s 0 foreach index $args { if {![string is integer -strict $index]} { set index [punk::lib::lindex_resolve [string length $str] $index] switch -- $index { -Inf { if {[lindex $sizes 0] != 0} { ledit parts 0 0 {} [lindex $parts 0] ledit sizes 0 0 0 [lindex $sizes 0] } continue } Inf { if {[lindex $sizes end] != 0} { ledit parts end end [lindex $parts end] {} ledit sizes end end [lindex $sizes end] 0 } continue } } } if {$index <= 0} { if {[lindex $sizes 0] != 0} { ledit parts 0 0 {} [lindex $parts 0] ledit sizes 0 0 0 [lindex $sizes 0] } continue } if {$index >= [string length $str]} { if {[lindex $sizes end] != 0} { ledit parts end end [lindex $parts end] {} ledit sizes end end [lindex $sizes end] 0 } continue } set i -1 set a 0 foreach sz $sizes { incr i if {$a + $sz > $index} { set p [lindex $parts $i] #puts "a:$a index:$index" if {$a == $index} { break } ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] break } incr a $sz } #puts "->parts:$parts" #puts "->sizes:$sizes" } return $parts } proc K {x y} {return $x} #*** !doctools #[call [fun K] [arg x] [arg y]] #[para]The K-combinator function - returns the first argument, x and discards y #[para]see [uri https://wiki.tcl-lang.org/page/K] #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. proc is_utf8_multibyteprefix {bytes} { #*** !doctools #[call [fun is_utf8_multibyteprefix] [arg str]] #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint #[para] Will return false for an already complete utf-8 codepoint #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] regexp {(?x) ^ (?: [\xC0-\xDF] | #possible prefix for two-byte codepoint [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for ) $ } $bytes } proc is_utf8_first {str} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) ^ (?: [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) ) } $str } proc is_utf8_single {1234bytes} { #*** !doctools #[call [fun is_utf8_single] [arg 1234bytes]] #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) ^ (?: [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) ) $ } $1234bytes } proc get_utf8_leading {rawbytes} { #*** !doctools #[call [fun get_utf8_leading] [arg rawbytes]] #[para] return the leading portion of rawbytes that is a valid utf8 sequence. #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) \A ( [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) ) + } $rawbytes completeChars]} { return $completeChars } return "" } proc hex2dec {args} { #*** !doctools #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 set list_largeHex [lindex $args end] set argopts [lrange $args 0 end-1] if {[llength $argopts]%2 !=0} { error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" } set opts [tcl::dict::create\ -validate 1\ -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ ] set known_opts [tcl::dict::keys $opts] foreach {k v} $argopts { tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v } # -- --- --- --- set opt_validate [tcl::dict::get $opts -validate] set opt_empty [tcl::dict::get $opts -empty_as_hex] # -- --- --- --- set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] if {$opt_validate} { #Note appended F so that we accept list of empty strings as per the documentation if {![string is xdigit -strict [join $list_largeHex ""]F ]} { error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" } } if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { #mapping empty string to a value destroys any advantage of -scanonly #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] if {[lsearch $list_largeHex ""] >=0} { error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" } } else { set opt_empty [string trim [string map {_ ""} $opt_empty]] if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] set nonempty_head [lrange $list_largeHex 0 $first_empty-1] set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] } } return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] } proc dec2hex {args} { #*** !doctools #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] #[para]Convert a list of decimal integers to a list of hex values #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. #[para] -case upper|lower determines the case of the hex letters in the output set list_decimals [lindex $args end] set argopts [lrange $args 0 end-1] if {[llength $argopts]%2 !=0} { error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" } set defaults [tcl::dict::create\ -width 1\ -case upper\ -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ ] set known_opts [tcl::dict::keys $defaults] set fullopts [tcl::dict::create] foreach {k v} $argopts { tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v } set opts [tcl::dict::merge $defaults $fullopts] # -- --- --- --- set opt_width [tcl::dict::get $opts -width] set opt_case [tcl::dict::get $opts -case] set opt_empty [tcl::dict::get $opts -empty_as_decimal] # -- --- --- --- set resultlist [list] switch -- [string tolower $opt_case] { upper { set spec X } lower { set spec x } default { error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" } } set fmt "%${opt_width}.${opt_width}ll${spec}" set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] if {![string is digit -strict [string map {_ ""} $opt_empty]]} { if {[lsearch $list_decimals ""] >=0} { error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" } } else { set opt_empty [string map {_ ""} $opt_empty] if {[set first_empty [lsearch $list_decimals ""]] >= 0} { set nonempty_head [lrange $list_decimals 0 $first_empty-1] set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] } } return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] } proc log2 x "expr {log(\$x)/[expr log(2)]}" #*** !doctools #[call [fun log2] [arg x]] #[para]log base2 of x #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) proc logbase {b x} { #*** !doctools #[call [fun logbase] [arg b] [arg x]] #[para]log base b of x #[para]This function uses expr's natural log and the change of base division. #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 expr {log($x)/log($b)} } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::factors @cmd -name punk::lib::factors\ -summary\ "Sorted factors of positive integer x"\ -help\ "Return a sorted list of the positive factors of x where x > 0 For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers Comparisons were done with some numbers below 17 digits long For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers but has the disadvantage of being slower for 'small' numbers and using more memory. If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x * Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py In other mathematical contexts zero may be considered not to divide anything." @values -min 1 -max 1 x -type integer }] } proc factors {x} { #*** !doctools #[call [fun factors] [arg x]] #[para]Return a sorted list of the positive factors of x where x > 0 #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers #[para]Comparisons were done with some numbers below 17 digits long #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers #but has the disadvantage of being slower for 'small' numbers and using more memory. #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py #[para] In other mathematical contexts zero may be considered not to divide anything. set factors [list 1] set j 2 set max [expr {sqrt($x)}] while {$j <= $max} { if {($x % $j) == 0} { lappend factors $j [expr {$x / $j}] } incr j } lappend factors $x return [lsort -unique -integer $factors] } proc oddFactors {x} { #*** !doctools #[call [fun oddFactors] [arg x]] #[para]Return a list of odd integer factors of x, sorted in ascending order set j 2 set max [expr {sqrt($x)}] set factors [list 1] while {$j <= $max} { if {$x % $j == 0} { set other [expr {$x / $j}] if {$other % 2} { if {$other ni $factors} { lappend factors $other } } if {$j % 2} { if {$j ni $factors} { lappend factors $j } } } incr j } return [lsort -integer -increasing $factors] } proc greatestFactorBelow {x} { #*** !doctools #[call [fun greatestFactorBelow] [arg x]] #[para]Return the largest factor of x excluding itself #[para]factor functions can be useful for console layout calculations #[para]See Tcllib math::numtheory for more extensive implementations if {$x % 2 == 0 || $x == 0} { return [expr {$x / 2}] } set j 3 set max [expr {sqrt($x)}] while {$j <= $max} { if {$x % $j == 0} { return [expr {$x / $j}] } incr j 2 } return 1 } proc greatestOddFactorBelow {x} { #*** !doctools #[call [fun greatestOddFactorBelow] [arg x]] #[para]Return the largest odd integer factor of x excluding x itself if {$x %2 == 0} { return [greatestOddFactor $x] } set j 3 #dumb brute force - time taken to compute is wildly variable on big numbers #todo - use a (memoized?) generator of primes to reduce the search space #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. set god 1 set max [expr {sqrt($x)}] while { $j <= $max} { if {$x % $j == 0} { set other [expr {$x / $j}] if {$other % 2 == 0} { set god $j } else { set god [expr {$x / $j}] #lowest j - so other side must be highest break } } incr j 2 } return $god } proc greatestOddFactor {x} { #*** !doctools #[call [fun greatestOddFactor] [arg x]] #[para]Return the largest odd integer factor of x #[para]For an odd value of x - this will always return x if {$x % 2 != 0 || $x == 0} { return $x } set r [expr {$x / 2}] while {$r % 2 == 0} { set r [expr {$r / 2}] } return $r } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::gcd @cmd -name punk::lib::gcd\ -summary\ "Greatest common divisor of m and n."\ -help\ "Return the greatest common divisor of m and n. Straight from Lars Hellström's math::numtheory library in Tcllib Graphical use: An a by b rectangle can be covered with square tiles of side-length c, only if c is a common divisor of a and b" @values -min 2 -max 2 m -type integer n -type integer }] } proc gcd {n m} { #*** !doctools #[call [fun gcd] [arg n] [arg m]] #[para]Return the greatest common divisor of m and n #[para]Straight from Lars Hellström's math::numtheory library in Tcllib #[para]Graphical use: #[para]An a by b rectangle can be covered with square tiles of side-length c, #[para]only if c is a common divisor of a and b # # Apply Euclid's good old algorithm # if { $n > $m } { set t $n set n $m set m $t } while { $n > 0 } { set r [expr {$m % $n}] set m $n set n $r } return $m } namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @id -id ::punk::lib::lcm @cmd -name punk::lib::lcm\ -summary\ "Lowest common multiple of m and n."\ -help\ "Return the lowest common multiple of m and n. Straight from Lars Hellström's math::numtheory library in Tcllib" @values -min 2 -max 2 m -type integer n -type integer }] } proc lcm {n m} { set gcd [gcd $n $m] return [expr {$n*$m/$gcd}] } proc commonDivisors {x y} { #*** !doctools #[call [fun commonDivisors] [arg x] [arg y]] #[para]Return a list of all the common factors of x and y #[para](equivalent to factors of their gcd) return [factors [gcd $x $y]] } #experimental only - there are better/faster ways proc sieve n { set primes [list] if {$n < 2} {return $primes} set nums [tcl::dict::create] for {set i 2} {$i <= $n} {incr i} { tcl::dict::set nums $i "" } set next 2 set limit [expr {sqrt($n)}] while {$next <= $limit} { for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} lappend primes $next tcl::dict::for {next -} $nums break } return [concat $primes [tcl::dict::keys $nums]] } proc sieve2 n { set primes [list] if {$n < 2} {return $primes} set nums [tcl::dict::create] for {set i 2} {$i <= $n} {incr i} { tcl::dict::set nums $i "" } set next 2 set limit [expr {sqrt($n)}] while {$next <= $limit} { for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} lappend primes $next #dict for {next -} $nums break set next [lindex $nums 0] } return [concat $primes [tcl::dict::keys $nums]] } proc hasglobs {str} { #*** !doctools #[call [fun hasglobs] [arg str]] #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving } proc trimzero {number} { #*** !doctools #[call [fun trimzero] [arg number]] #[para]Return number with left-hand-side zeros trimmed off - unless all zero #[para]If number is all zero - a single 0 is returned set trimmed [string trimleft $number 0] if {[string length $trimmed] == 0} { set trimmed 0 } return $trimmed } proc substring_count {str substring} { #*** !doctools #[call [fun substring_count] [arg str] [arg substring]] #[para]Search str and return number of occurrences of substring #faster than lsearch on split for str of a few K if {$substring eq ""} {return 0} set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] return [expr {$occurrences / [string length $substring]}] } proc dict_merge_ordered {defaults main} { #*** !doctools #[call [fun dict_merge_ordered] [arg defaults] [arg main]] #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] } proc askuser {question} { #*** !doctools #[call [fun askuser] [arg question]] #[para]A basic utility to read an answer from stdin #[para]The prompt is written to the terminal and then it waits for a user to type something #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. #[para](Generic terminal raw vs linemode detection not yet present) #[para]The user must hit enter to submit the response #[para]The return value is the string if any that was typed prior to hitting enter. #[para]The question argument can be manually colourised using the various punk::ansi funcitons #[example_begin] # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { # puts "Proceeding" # } else { # puts "Cancelled by user" # } #[example_end] puts stdout $question flush stdout set stdin_state [chan configure stdin] if {[catch { package require punk::console set console_raw [tsv::get console is_raw] } err_console]} { #assume normal line mode set console_raw 0 } try { chan configure stdin -blocking 1 if {$console_raw} { punk::console::disableRaw set answer [gets stdin] punk::console::enableRaw } else { set answer [gets stdin] } } finally { chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] } return $answer } #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. proc indent {text {prefix " "}} { set result [list] foreach line [split $text \n] { if {[string trim $line] eq ""} { lappend result "" } else { lappend result $prefix[string trimright $line] } } return [join $result \n] } #dedent? proc undent {text {max -1}} { if {$text eq ""} { return "" } set lines [split $text \n] set nonblank [list] foreach ln $lines { if {[string trim $ln] eq ""} { continue } lappend nonblank $ln } set lcp [longestCommonPrefix $nonblank] if {$lcp eq ""} { return $text } regexp {^([\t ]*)} $lcp _m lcp if {$lcp eq ""} { return $text } set len [string length $lcp] if {$max != -1} { set len [expr {min($len,$max)}] } set result [list] foreach ln $lines { if {[string trim $ln] eq ""} { lappend result "" } else { lappend result [string range $ln $len end] } } return [join $result \n] } #A version of textutil::string::longestCommonPrefixList proc longestCommonPrefix {items} { if {[llength $items] <= 1} { return [lindex $items 0] } set items [lsort $items[unset items]] set min [lindex $items 0] set max [lindex $items end] #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) #(sort order nothing to do with length - e.g min may be longer than max) if {[string length $min] > [string length $max]} { set temp $min set min $max set max $temp } set n [string length $min] set prefix "" set i -1 while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { append prefix $c } return $prefix } #e.g linesort -decreasing $data proc linesort {args} { #*** !doctools #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] #[para]Sort lines in textblock #[para]Returns another textblock with lines sorted #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique if {[llength $args] < 1} { error "linesort missing lines argument" } set lines [lindex $args end] set opts [lrange $args 0 end-1] #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts list_as_lines [lsort {*}$opts [linelist $lines]] } proc list_as_lines {args} { #*** !doctools #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] #[para]This simply joins the elements of the list with -joinchar #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. if {[set eop [lsearch $args --]] == [llength $args]-2} { #end-of-opts not really necessary - except for consistency with lines_as_list set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] } if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { set joinchar [lindex $args 1] set lines [lindex $args 2] } elseif {[llength $args] == 1} { set joinchar "\n" set lines [lindex $args 0] } else { error "list_as_lines usage: list_as_lines ?-joinchar ? " } return [join $lines $joinchar] } proc list_as_lines2 {args} { #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? lassign [tcl::dict::values [punk::args::parse $args withdef { -joinchar -default \n @values -min 1 -max 1 }]] leaders opts values return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] } proc lines_as_list {args} { #*** !doctools #[call [fun lines_as_list] [opt {option value ...}] [arg text]] #[para]Returns a list of possibly trimmed lines depeding on options #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements #The underlying function linelist has the validation code which gives nicer usage errors. #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error #..because we don't know what to say if there are odd numbers of args #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway if {[lsearch $args "--"] == [llength $args]-2} { set opts [lrange $args 0 end-2] } else { set opts [lrange $args 0 end-1] } #set opts [tcl::dict::merge {-block {}} $opts] set bposn [lsearch $opts -block] if {$bposn < 0} { lappend opts -block {} } set text [lindex $args end] #tailcall linelist {*}$opts $text return [linelist {*}$opts $text] } #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds proc lines_as_list2 {args} { #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc #we don't have to decide what is an opt vs a value #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) lassign [tcl::dict::values [punk::args::parse $args withdef { @opts -any 1 -block -default {} }]] leaderdict opts valuedict tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] } # important for pipeline & match_assign # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace set linelist_body { set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" if {[llength $args] == 0} { error "linelist missing textchunk argument usage:$usage" } set text [lindex $args end] set text [string map {\r\n \n} $text] ;#review - option? set arglist [lrange $args 0 end-1] set opts [tcl::dict::create\ -block {trimhead1 trimtail1}\ -line {}\ -commandprefix ""\ -ansiresets auto\ -ansireplays 0\ ] foreach {o v} $arglist { switch -- $o { -block - -line - -commandprefix - -ansiresets - -ansireplays { tcl::dict::set opts $o $v } default { error "linelist: Unrecognized option '$o' usage:$usage" } } } # -- --- --- --- --- --- set opt_block [tcl::dict::get $opts -block] if {[llength $opt_block]} { foreach bo $opt_block { switch -- $bo { trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} default { set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] error "linelist: unknown -block option value: $bo known values: $known_blockopts" } } } #normalize certain combos if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { set opt_block [lreplace $opt_block $posn $posn] } if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { set opt_block [lreplace $opt_block $posn $posn] } if {"trimall" in $opt_block} { #no other block options make sense in combination with this set opt_block [list "trimall"] } #TODO if {"triminner" in $opt_block } { error "linelist -block triminner not implemented - sorry" } } # -- --- --- --- --- --- set opt_line [tcl::dict::get $opts -line] set tl_left 0 set tl_right 0 set tl_both 0 foreach lo $opt_line { switch -- $lo { trimline { set tl_both 1 } trimleft { set tl_left 1 } trimright { set tl_right 1 } default { set known_lineopts [list trimline trimleft trimright] error "linelist: unknown -line option value: $lo known values: $known_lineopts" } } } #normalize trimleft trimright combo if {$tl_left && $tl_right} { set opt_line [list "trimline"] set tl_both 1 } # -- --- --- --- --- --- set opt_commandprefix [tcl::dict::get $opts -commandprefix] # -- --- --- --- --- --- set opt_ansiresets [tcl::dict::get $opts -ansiresets] # -- --- --- --- --- --- set opt_ansireplays [tcl::dict::get $opts -ansireplays] if {$opt_ansireplays} { if {$opt_ansiresets eq "auto"} { set opt_ansiresets 1 } } else { if {$opt_ansiresets eq "auto"} { set opt_ansiresets 0 } } # -- --- --- --- --- --- set linelist [list] set nlsplit [split $text \n] if {![llength $opt_line]} { set linelist $nlsplit #lappend linelist {*}$nlsplit } else { #already normalized trimleft+trimright to trimline if {$tl_both} { foreach ln $nlsplit { lappend linelist [string trim $ln] } } elseif {$tl_left} { foreach ln $nlsplit { lappend linelist [string trimleft $ln] } } elseif {$tl_right} { foreach ln $nlsplit { lappend linelist [string trimright $ln] } } } if {"collateempty" in $opt_block} { set inputlist $linelist[set linelist [list]] set last "-" foreach input $inputlist { if {$input ne ""} { lappend linelist $input set last "-" } else { if {$last ne ""} { lappend linelist "" } set last "" } } } if {"trimall" in $opt_block} { set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] } else { set start 0 if {"trimhead" in $opt_block} { set idx 0 set lastempty -1 foreach ln $linelist { if {[lindex $linelist $idx] ne ""} { break } else { set lastempty $idx } incr idx } if {$lastempty >=0} { set start [expr {$lastempty +1}] } } set linelist [lrange $linelist $start end] if {"trimtail" in $opt_block} { set revlinelist [lreverse $linelist][set linelist {}] set i 0 foreach ln $revlinelist { if {$ln ne ""} { set linelist [lreverse [lrange $revlinelist $i end]] break } incr i } } # --- --- set start 0 set end "end" if {"trimhead1" in $opt_block} { if {[lindex $linelist 0] eq ""} { set start 1 } } if {"trimtail1" in $opt_block} { if {[lindex $linelist end] eq ""} { set end "end-1" } } set linelist [lrange $linelist $start $end] } #review - we need to make sure ansiresets don't accumulate/grow on any line #Each resulting line should have a reset of some type at start and a pure-reset at end to stop #see if we can find an ST sequence that most terminals will not display for marking sections? if {$opt_ansireplays} { ;#package require punk::ansi if {$opt_ansiresets} { set RST "\x1b\[0m" } else { set RST "" } set replaycodes $RST ;#todo - default? set transformed [list] #shortcircuit common case of no ansi #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) #we use detectcode_in_list instead of detect_in_list #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) # - but the main reason is it is slightly faster. if {![punk::ansi::ta::detectcode_in_list $linelist]} { if {$opt_ansiresets} { foreach ln $linelist { lappend transformed $RST$ln$RST } set linelist $transformed } } else { #INLINE punk::ansi::codetype::is_sgr_reset #regexp {\x1b\[0*m$} $code set re_is_sgr_reset {\x1b\[0*m$} #INLINE punk::ansi::codetype::is_sgr #regexp {\033\[[0-9;:]*m$} $code set re_is_sgr {\x1b\[[0-9;:]*m$} foreach ln $linelist { #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable #set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. #get_codes_single lists only the codes. no plaintext or empty elements set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. if {[llength $ansisplits] == 0} { #plaintext only - no ansi codes in line lappend transformed [string cat $replaycodes $ln $RST] #leave replaycodes as is for next line set nextreplay $replaycodes } else { set tail $RST set lastcode [lindex $ansisplits end] ;#may or may not be SGR set lastcodeoffset [expr {[string length $lastcode]-1}] if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { #last plaintext is empty. So the line is already suffixed with a reset set tail "" set nextreplay $RST } else { #trailing text has been reset within line - but no tail reset present #we normalize by putting a tail reset on anyway set tail $RST set nextreplay $RST } } elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { #code is at tail (no trailing plaintext) #No tail reset - and no need to examine whole line to determine stack that is in effect set tail $RST set nextreplay $lastcode } else { #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect #last codeset doesn't end in a pure-reset #whether code was at very end or not - add a reset tail set tail $RST #determine effective replay for line set codestack [list start] foreach code $ansisplits { if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list] ;#different from 'start' marked - this means we've had a reset } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } else { if {[punk::ansi::codetype::is_sgr $code]} { #todo - proper test of each code - so we only take latest background/foreground etc. #requires handling codes with varying numbers of parameters. #basic simplification - remove straight dupes. set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code } ;#else gx0 or other code - we don't want to stack it with SGR codes } } if {$codestack eq [list start]} { #No SGRs - may have been other codes set line_has_sgr 0 } else { #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes set line_has_sgr 1 if {[lindex $codestack 0] eq "start"} { set codestack [lrange $codestack 1 end] } } #set newreplay [join $codestack ""] set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] if {$line_has_sgr && $newreplay ne $replaycodes} { #adjust if it doesn't already does a reset at start if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { set nextreplay $newreplay } else { set nextreplay $RST$newreplay } } else { set nextreplay $replaycodes } } if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { #no point attaching any replay lappend transformed [string cat $ln $tail] } else { lappend transformed [string cat $replaycodes $ln $tail] } } set replaycodes $nextreplay } set linelist $transformed } } if {[llength $opt_commandprefix]} { set transformed [list] foreach ln $linelist { lappend transformed [{*}$opt_commandprefix $ln] } set linelist $transformed } return $linelist } if {$has_punk_ansi} { #optimise linelist as much as possible set linelist_body [string map { ""} $linelist_body] } else { #punk ansi not avail at time of package load. #by putting in calls to punk::ansi the user will get appropriate error messages set linelist_body [string map { "package require punk::ansi"} $linelist_body] } set linelist_body_original { set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" if {[llength $args] == 0} { error "linelist missing textchunk argument usage:$usage" } set text [lindex $args end] set text [string map {\r\n \n} $text] ;#review - option? set arglist [lrange $args 0 end-1] set opts [tcl::dict::create\ -block {trimhead1 trimtail1}\ -line {}\ -commandprefix ""\ -ansiresets auto\ -ansireplays 0\ ] foreach {o v} $arglist { switch -- $o { -block - -line - -commandprefix - -ansiresets - -ansireplays { tcl::dict::set opts $o $v } default { error "linelist: Unrecognized option '$o' usage:$usage" } } } # -- --- --- --- --- --- set opt_block [tcl::dict::get $opts -block] if {[llength $opt_block]} { foreach bo $opt_block { switch -- $bo { trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} default { set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] error "linelist: unknown -block option value: $bo known values: $known_blockopts" } } } #normalize certain combos if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { set opt_block [lreplace $opt_block $posn $posn] } if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { set opt_block [lreplace $opt_block $posn $posn] } if {"trimall" in $opt_block} { #no other block options make sense in combination with this set opt_block [list "trimall"] } #TODO if {"triminner" in $opt_block } { error "linelist -block triminner not implemented - sorry" } } # -- --- --- --- --- --- set opt_line [tcl::dict::get $opts -line] set tl_left 0 set tl_right 0 set tl_both 0 foreach lo $opt_line { switch -- $lo { trimline { set tl_both 1 } trimleft { set tl_left 1 } trimright { set tl_right 1 } default { set known_lineopts [list trimline trimleft trimright] error "linelist: unknown -line option value: $lo known values: $known_lineopts" } } } #normalize trimleft trimright combo if {$tl_left && $tl_right} { set opt_line [list "trimline"] set tl_both 1 } # -- --- --- --- --- --- set opt_commandprefix [tcl::dict::get $opts -commandprefix] # -- --- --- --- --- --- set opt_ansiresets [tcl::dict::get $opts -ansiresets] # -- --- --- --- --- --- set opt_ansireplays [tcl::dict::get $opts -ansireplays] if {$opt_ansireplays} { if {$opt_ansiresets eq "auto"} { set opt_ansiresets 1 } } else { if {$opt_ansiresets eq "auto"} { set opt_ansiresets 0 } } # -- --- --- --- --- --- set linelist [list] set nlsplit [split $text \n] if {![llength $opt_line]} { set linelist $nlsplit #lappend linelist {*}$nlsplit } else { #already normalized trimleft+trimright to trimline if {$tl_both} { foreach ln $nlsplit { lappend linelist [string trim $ln] } } elseif {$tl_left} { foreach ln $nlsplit { lappend linelist [string trimleft $ln] } } elseif {$tl_right} { foreach ln $nlsplit { lappend linelist [string trimright $ln] } } } if {"collateempty" in $opt_block} { set inputlist $linelist[set linelist [list]] set last "-" foreach input $inputlist { if {$input ne ""} { lappend linelist $input set last "-" } else { if {$last ne ""} { lappend linelist "" } set last "" } } } if {"trimall" in $opt_block} { set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] } else { set start 0 if {"trimhead" in $opt_block} { set idx 0 set lastempty -1 foreach ln $linelist { if {[lindex $linelist $idx] ne ""} { break } else { set lastempty $idx } incr idx } if {$lastempty >=0} { set start [expr {$lastempty +1}] } } set linelist [lrange $linelist $start end] if {"trimtail" in $opt_block} { set revlinelist [lreverse $linelist][set linelist {}] set i 0 foreach ln $revlinelist { if {$ln ne ""} { set linelist [lreverse [lrange $revlinelist $i end]] break } incr i } } # --- --- set start 0 set end "end" if {"trimhead1" in $opt_block} { if {[lindex $linelist 0] eq ""} { set start 1 } } if {"trimtail1" in $opt_block} { if {[lindex $linelist end] eq ""} { set end "end-1" } } set linelist [lrange $linelist $start $end] } #review - we need to make sure ansiresets don't accumulate/grow on any line #Each resulting line should have a reset of some type at start and a pure-reset at end to stop #see if we can find an ST sequence that most terminals will not display for marking sections? if {$opt_ansireplays} { #package require punk::ansi if {$opt_ansiresets} { set RST "\x1b\[0m" } else { set RST "" } set replaycodes $RST ;#todo - default? set transformed [list] #shortcircuit common case of no ansi #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) if {![punk::ansi::ta::detect_in_list $linelist]} { if {$opt_ansiresets} { foreach ln $linelist { lappend transformed $RST$ln$RST } set linelist $transformed } } else { #INLINE punk::ansi::codetype::is_sgr_reset #regexp {\x1b\[0*m$} $code set re_is_sgr_reset {\x1b\[0*m$} #INLINE punk::ansi::codetype::is_sgr #regexp {\033\[[0-9;:]*m$} $code set re_is_sgr {\x1b\[[0-9;:]*m$} foreach ln $linelist { #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. if {[llength $ansisplits]<= 1} { #plaintext only - no ansi codes in line lappend transformed [string cat $replaycodes $ln $RST] #leave replaycodes as is for next line set nextreplay $replaycodes } else { set tail $RST set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { if {[lindex $ansisplits end] eq ""} { #last plaintext is empty. So the line is already suffixed with a reset set tail "" set nextreplay $RST } else { #trailing text has been reset within line - but no tail reset present #we normalize by putting a tail reset on anyway set tail $RST set nextreplay $RST } } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { #No tail reset - and no need to examine whole line to determine stack that is in effect set tail $RST set nextreplay $lastcode } else { #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect #last codeset doesn't end in a pure-reset #whether code was at very end or not - add a reset tail set tail $RST #determine effective replay for line set codestack [list start] foreach {pt code} $ansisplits { if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list] ;#different from 'start' marked - this means we've had a reset } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } else { if {[punk::ansi::codetype::is_sgr $code]} { #todo - proper test of each code - so we only take latest background/foreground etc. #requires handling codes with varying numbers of parameters. #basic simplification - remove straight dupes. set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code } ;#else gx0 or other code - we don't want to stack it with SGR codes } } if {$codestack eq [list start]} { #No SGRs - may have been other codes set line_has_sgr 0 } else { #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes set line_has_sgr 1 if {[lindex $codestack 0] eq "start"} { set codestack [lrange $codestack 1 end] } } #set newreplay [join $codestack ""] set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] if {$line_has_sgr && $newreplay ne $replaycodes} { #adjust if it doesn't already does a reset at start if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { set nextreplay $newreplay } else { set nextreplay $RST$newreplay } } else { set nextreplay $replaycodes } } if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { #no point attaching any replay lappend transformed [string cat $ln $tail] } else { lappend transformed [string cat $replaycodes $ln $tail] } } set replaycodes $nextreplay } set linelist $transformed } } if {[llength $opt_commandprefix]} { set transformed [list] foreach ln $linelist { lappend transformed [{*}$opt_commandprefix $ln] } set linelist $transformed } return $linelist } if {$has_punk_ansi} { #optimise linelist as much as possible set linelist_body [string map { ""} $linelist_body] } else { #punk ansi not avail at time of package load. #by putting in calls to punk::ansi the user will get appropriate error messages set linelist_body [string map { "package require punk::ansi"} $linelist_body] } proc linelist {args} $linelist_body interp alias {} errortime {} punk::lib::errortime proc errortime {script groupsize {iters 2}} { #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance set i 0 set times {} if {$iters < 2} {set iters 2} for {set i 0} {$i < $iters} {incr i} { set result [uplevel [list time $script $groupsize]] lappend times [lindex $result 0] } set average 0.0 set s2 0.0 foreach time $times { set average [expr {$average + double($time)/$iters}] } foreach time $times { set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] } set sigma [expr {int(sqrt($s2))}] set average [expr {int($average)}] return "$average +/- $sigma microseconds per iteration" } #test function to use with show_jump_tables #todo - check if switch compilation to jump tables differs by Tcl version proc switch_char_test {c} { set dec [scan $c %c] foreach t [list 1 2 3] { switch -- $c { x { return [list $dec x $t] } y { return [list $dec y $t] } z { return [list $dec z $t] } } } #tcl 8.6/8.7 (at least) #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable switch -- $c { a { return [list $dec a] } {"} { return [list $dec dquote] } {[} {return [list $dec lb]} {]} {return [list $dec rb]} "{" { return [list $dec lbrace] } "}" { return [list $dec rbrace] } default { return [list $dec $c] } } } #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) proc show_jump_tables {args} { #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. if {[llength $args] == 1} { set data [tcl::unsupported::disassemble proc [lindex $args 0]] } elseif {[llength $args] == 2} { #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. #not sure if this handles more complex hierarchies or mixins etc. lassign $args obj method if {![info object isa object $obj]} { error "show_jump_tables unable to examine '$args'. $obj is not an oo object" } #classes are objects too and can have direct methods if {$method in [info object methods $obj]} { set data [tcl::unsupported::disassemble objmethod $obj $method] } else { if {![info object isa class $obj]} { set obj [info object class $obj] } set data [tcl::unsupported::disassemble method $obj $method] } } else { error "show_jump_tables expected a procname or a class/object and method" } set result "" set in_jt 0 foreach ln [split $data \n] { set tln [::tcl::string::trim $ln] if {!$in_jt} { if {[::tcl::string::match *jumpTable* $ln]} { punk::ns::call_frame append result $ln \n set in_jt 1 } } else { if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { set in_jt 0 } else { append result $ln \n } } } return $result } #a test # punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 # Note the different disassemble result when trace is running. proc disassemble {procname} { tcl::unsupported::disassemble proc $procname } proc temperature_f_to_c {deg_fahrenheit} { return [expr {($deg_fahrenheit -32) * (5/9.0)}] } proc temperature_c_to_f {deg_celsius} { return [expr {($deg_celsius * (9/5.0)) + 32}] } proc interp_sync_package_paths {interp} { if {![interp exists $interp]} { error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" } interp eval $interp [list set ::auto_path $::auto_path] interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] } proc valcopy {obj} { append obj2 $obj {} } proc set_valcopy {varname obj} { #used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] append obj2 $obj {} uplevel 1 [list set $varname $obj2] } proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { variable has_twapi if {$has_twapi} { if {$delim eq "" && $groupsize eq ""} { set localeid [twapi::get_system_default_lcid] } } #when using twapi we currently only get the localeid - not the specific defaults #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this set default_delim "," set default_groupsize 3 set results [list] set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list foreach inputnum $nums { set number [valcopy $inputnum] #also handle tcl 8.7+ underscores in numbers set number [string map [list _ "" , ""] $number] #normalize e.g 2e4 -> 20000.0 set number [expr {$number}] if {$has_twapi} { if {$delim eq "" && $groupsize eq ""} { lappend results [twapi::format_number $number $localeid -idigits -1] continue } else { #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? if {$delim eq ""} {set delim $default_delim} if {$groupsize eq ""} {set groupsize $default_groupsize} lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] continue } } #todo - get configured user defaults if {$delim eq ""} { set delim $default_delim } if {$groupsize eq ""} { set groupsize $default_groupsize } lappend results [delimit_number $number $delim $groupsize] } if {[llength $results] == 1} { #keep intrep as string rather than list return [lindex $results 0] } return $results } #from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse # Given a number represented as a string, insert delimiters to break it up for # readability. Normally, the delimiter will be a comma which will be inserted every # three digits. However, the delimiter and groupsize are optional arguments, # permitting use in other locales. # # The string is assumed to consist of digits, possibly preceded by spaces, # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { set number [valcopy $unformattednumber] set number [string map {_ ""} $number] #normalize using expr - e.g 2e4 -> 20000.0 set number [expr {$number}] # First, extract right hand part of number, up to and including decimal point set point [string last "." $number]; if {$point >= 0} { set PostDecimal [string range $number $point+1 end]; set PostDecimalP 1; } else { set point [expr {[string length $number] + 1}] set PostDecimal ""; set PostDecimalP 0; } # Now extract any leading spaces. review - regex for whitespace instead of just ascii space? set ind 0; while {[string equal [string index $number $ind] \u0020]} { incr ind; } set FirstNonSpace $ind; set LastSpace [expr {$FirstNonSpace - 1}]; set LeadingSpaces [string range $number 0 $LastSpace]; # Now extract the non-fractional part of the number, omitting leading spaces. set MainNumber [string range $number $FirstNonSpace $point-1]; # Insert commas into the non-fractional part. set Length [string length $MainNumber]; set Phase [expr {$Length % $GroupSize}] set PhaseMinusOne [expr {$Phase -1}]; set DelimitedMain ""; #First we deal with the extra stuff. if {$Phase > 0} { append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; } set FirstInGroup $Phase; set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; while {$LastInGroup < $Length} { if {$FirstInGroup > 0} { append DelimitedMain $delim; } append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; incr FirstInGroup $GroupSize incr LastInGroup $GroupSize } # Reassemble the number. if {$PostDecimalP} { return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; } else { return [format "%s%s" $LeadingSpaces $DelimitedMain]; } } #review - there are various type of uuid - we should use something consistent across platforms #twapi is used on windows because it's about 5 times faster - but is this more important than consistency? #twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway #(counterpoint: in the case of punk - we currently need twapi anyway on windows) #does tcllib's uuid use the same mechanisms on different platforms anyway? if {$has_twapi} { interp alias "" ::punk::lib::uuid "" twapi::new_uuid } else { catch {package require uuid} interp alias "" ::punk::lib::uuid "" uuid::uuid generate } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ tcl::namespace::eval punk::lib::flatgrid { namespace export filler_count rows cols col row block #WARNING - requires lseq and 'lsearch -stride' #WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 #todo - 8.6 fallback? proc filler_count {listlen numcolumns} { #if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error #if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} } proc rows {list numcolumns {blank NULL}} { set numblanks [filler_count [llength $list] $numcolumns] set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] set splits [lseq 0 to [llength $padded_list] by $numcolumns] set rows [list] set i 1 foreach s [lrange $splits 0 end-1] { lappend rows [lrange $padded_list $s [lindex $splits $i]-1] incr i } return $rows } proc cols {list numcolumns {blank NULL}} { set cols [list] foreach colindex [lseq 0 $numcolumns-1] { lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] } return $cols } proc cols2 {list numcolumns {blank NULL}} { set cols [list] foreach colindex [lseq 0 $numcolumns-1] { lappend cols [col2 $list $numcolumns $colindex $blank] } return $cols } proc col {list numcolumns colindex {blank NULL}} { lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * } proc col2 {list numcolumns colindex {blank NULL}} { set numblanks [filler_count [llength $list] $numcolumns] set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] set splits [lseq 0 to [llength $padded_list] by $numcolumns] set col [list] foreach s [lrange $splits 0 end-1] { lappend col [lindex $padded_list $s+$colindex] } return $col } proc col3 {list numcolumns colindex {blank NULL}} { set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} } proc col4 {list numcolumns colindex {blank NULL}} { #slow set vars [lrepeat $numcolumns _] lset vars $colindex v if {$blank eq ""} { return [lmap $vars $list {set v}] } set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} } proc block {list numcolumns {blank NULL}} { set colblocks [list] foreach c [cols $list $numcolumns $blank] { lappend colblocks [join $c \n] " " } textblock::join -- {*}$colblocks } proc block2 {list numcolumns {blank NULL}} { set colblocks [list] foreach c [cols2 $list $numcolumns $blank] { lappend colblocks [join $c \n] " " } textblock::join -- {*}$colblocks } } tcl::namespace::eval punk::lib::test { } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #todo - way to generate 'internal' docs separately? #*** !doctools #[section Internal] tcl::namespace::eval punk::lib::system { #*** !doctools #[subsection {Namespace punk::lib::system}] #[para] Internal functions that are not part of the API #[list_begin definitions] proc mostFactorsBelow {n} { ##*** !doctools #[call [fun mostFactorsBelow] [arg n]] #[para]Find the number below $n which has the greatest number of factors #[para]This will get slow quickly as n increases (100K = 1s+ 2024) set most 0 set mostcount 0 for {set i 1} {$i < $n} {incr i} { set fc [llength [punk::lib::factors $i]] if {$fc > $mostcount} { set most $i set mostcount $fc } } return [list number $most numfactors $mostcount] } proc factorCountBelow_punk {n} { ##*** !doctools #[call [fun factorCountBelow] [arg n]] #[para]For numbers 1 to n - keep a tally of the total count of factors #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result #[para]and as a rudimentary performance comparison #[para]gets slow quickly! set tally 0 for {set i 1} {$i <= $n} {incr i} { incr tally [llength [punk::lib::factors $i]] } return $tally } proc factorCountBelow_numtheory {n} { ##*** !doctools #[call [fun factorCountBelow] [arg n]] #[para]For numbers 1 to n - keep a tally of the total count of factors #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result #[para]and as a rudimentary performance comparison #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) package require math::numtheory set tally 0 for {set i 1} {$i <= $n} {incr i} { incr tally [llength [math::numtheory::factors $i]] } return $tally } proc factors2 {x} { ##*** !doctools #[call [fun factors2] [arg x]] #[para]Return a sorted list of factors of x #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. set smallfactors [list 1] set j 2 set max [expr {sqrt($x)}] while {$j < $max} { if {($x % $j) == 0} { lappend smallfactors $j lappend largefactors [expr {$x / $j}] } incr j } #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop if {($x % $j) == 0} { if {$j == ($x / $j)} { lappend smallfactors $j } } return [concat $smallfactors [lreverse $largefactors] $x] } # incomplete - report which is the innermost bracket/quote etc awaiting completion for a Tcl command #important - used by punk::repl proc incomplete {partial} { #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. if {[info complete $partial]} { return [list] } set clist [split $partial ""] #puts stderr "-->$clist<--" set waiting [list ""] set innerpartials [list ""] set escaped 0 set i 0 foreach c $clist { if {$c eq "\\"} { set escaped [expr {!$escaped}] incr i continue } ;# set escaped 0 at end set p [lindex $innerpartials end] if {$escaped == 0} { #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) switch -- $c { {"} { if {![info complete ${p}]} { lappend waiting {"} lappend innerpartials "" } else { if {[lindex $waiting end] eq {"}} { #this quote is endquote set waiting [lrange $waiting 0 end-1] set innerpartials [lrange $innerpartials 0 end-1] } else { if {![info complete ${p}$c]} { lappend waiting {"} lappend innerpartials "" } else { set p ${p}${c} lset innerpartials end $p } } } } {[} { if {![info complete ${p}$c]} { lappend waiting "\]" lappend innerpartials "" } else { set p ${p}${c} lset innerpartials end $p } } "{" { if {![info complete ${p}$c]} { lappend waiting "\}" lappend innerpartials "" } else { set p ${p}${c} lset innerpartials end $p } } "}" - default { set waitingfor [lindex $waiting end] if {$c eq "$waitingfor"} { set waiting [lrange $waiting 0 end-1] set innerpartials [lrange $innerpartials 0 end-1] } else { set p ${p}${c} lset innerpartials end $p } } } } else { set p ${p}${c} lset innerpartials end $p } set escaped 0 incr i } set incomplete [list] foreach w $waiting { #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. switch -- $w { {"} { lappend incomplete $w } {]} { lappend incomplete "\[" } "{" {} "}" { lappend incomplete "\{" } } } set debug 0 if {$debug} { foreach w $waiting p $innerpartials { puts stderr "->awaiting:'$w' partial: $p" } } return $incomplete } #This only works for very simple cases will get confused with for example: # {set x "a["""} proc incomplete_naive {partial} { if {[info complete $partial]} { return [list] } set clist [split $partial ""] set waiting [list] set escaped 0 foreach c $clist { if {$c eq "\\"} { set escaped [expr {!$escaped}] continue } if {!$escaped} { if {$c eq {"}} { if {[lindex $waiting end] eq {"}} { set waiting [lrange $waiting 0 end-1] } else { lappend waiting {"} } } elseif {$c eq "\["} { lappend waiting "\]" } elseif {$c eq "\{"} { lappend waiting "\}" } else { set waitingfor [lindex $waiting end] if {$c eq "$waitingfor"} { set waiting [lrange $waiting 0 end-1] } } } } set incomplete [list] foreach w $waiting { if {$w eq {"}} { lappend incomplete $w } elseif {$w eq "\]"} { lappend incomplete "\[" } elseif {$w eq "\}"} { lappend incomplete "\{" } } return $incomplete } #get info about punk nestindex key ie type: list,dict,undetermined # pdict devel proc nestindex_info {args} { set argd [punk::args::parse $args withdef { -parent -default "" nestindex }] set opt_parent [dict get $argd opts -parent] if {$opt_parent eq ""} { set parent_type undetermined } else { set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing } #??? } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] } tcl::namespace::eval punk::lib::debug { proc showdict {args} {} } namespace eval ::punk::args::register { #use fully qualified so 8.6 doesn't find existing var in global namespace lappend ::punk::args::register::NAMESPACES ::punk::lib } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::lib [tcl::namespace::eval punk::lib { variable pkg punk::lib variable version set version 0.1.5 }] return #*** !doctools #[manpage_end]