From 78b8ad803e8ff18ff115aacd6853ba872737d62b Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Sun, 15 Mar 2026 19:11:37 +1100 Subject: [PATCH] punk::winlnk and punk patternmatchin fixes --- src/bootsupport/modules/punk-0.1.tm | 72 +- src/bootsupport/modules/punk/lib-0.1.6.tm | 5488 +++++++++++++++++ src/bootsupport/modules/punk/nav/fs-0.1.0.tm | 391 +- src/bootsupport/modules/punk/pipe-1.0.tm | 6 +- src/bootsupport/modules/punk/winlnk-0.1.1.tm | 1014 +++ src/bootsupport/modules/punk/winpath-0.1.0.tm | 11 +- src/modules/punk-0.1.tm | 72 +- src/modules/punk/lib-999999.0a1.0.tm | 123 +- src/modules/punk/lib-buildversion.txt | 2 +- src/modules/punk/nav/fs-999999.0a1.0.tm | 391 +- src/modules/punk/pipe-999999.0a1.0.tm | 6 +- src/modules/punk/winlnk-999999.0a1.0.tm | 206 +- src/modules/punk/winlnk-buildversion.txt | 2 +- src/modules/punk/winpath-999999.0a1.0.tm | 11 +- .../src/bootsupport/modules/punk-0.1.tm | 72 +- .../src/bootsupport/modules/punk/lib-0.1.6.tm | 5488 +++++++++++++++++ .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 391 +- .../src/bootsupport/modules/punk/pipe-1.0.tm | 6 +- .../bootsupport/modules/punk/winlnk-0.1.1.tm | 1014 +++ .../bootsupport/modules/punk/winpath-0.1.0.tm | 11 +- .../src/bootsupport/modules/punk-0.1.tm | 72 +- .../src/bootsupport/modules/punk/lib-0.1.6.tm | 5488 +++++++++++++++++ .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 391 +- .../src/bootsupport/modules/punk/pipe-1.0.tm | 6 +- .../bootsupport/modules/punk/winlnk-0.1.1.tm | 1014 +++ .../bootsupport/modules/punk/winpath-0.1.0.tm | 11 +- src/vfs/_vfscommon.vfs/modules/punk-0.1.tm | 72 +- .../_vfscommon.vfs/modules/punk/lib-0.1.6.tm | 5488 +++++++++++++++++ .../modules/punk/nav/fs-0.1.0.tm | 391 +- .../_vfscommon.vfs/modules/punk/pipe-1.0.tm | 6 +- .../modules/punk/winlnk-0.1.1.tm | 1014 +++ .../modules/punk/winpath-0.1.0.tm | 11 +- 32 files changed, 28116 insertions(+), 625 deletions(-) create mode 100644 src/bootsupport/modules/punk/lib-0.1.6.tm create mode 100644 src/bootsupport/modules/punk/winlnk-0.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm create mode 100644 src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm create mode 100644 src/vfs/_vfscommon.vfs/modules/punk/winlnk-0.1.1.tm diff --git a/src/bootsupport/modules/punk-0.1.tm b/src/bootsupport/modules/punk-0.1.tm index 53cb4067..ea72ad1c 100644 --- a/src/bootsupport/modules/punk-0.1.tm +++ b/src/bootsupport/modules/punk-0.1.tm @@ -1422,7 +1422,7 @@ namespace eval punk { } if {[string is digit -strict [join $subindices ""]]} { - #review tip 551 (tcl9+?) + #review tip 551 (underscores in numerical literals) (tcl9+) #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # @@ -2650,6 +2650,76 @@ namespace eval punk { } }] } + } elseif {[punk::lib::is_indexset $index]} { + #review - a basic math statement such as 5-1 is also a valid member of an indexset + #see punk::lib::is_indexset and punk::lib::indexset_resolve + #single element of an indexset - e.g @..3 or @1..5 or @..end or @.. or @end..0 or @end-5..8 etc + set is_range [expr {[string first ".." $index] >= 0}] + if {$get_not} { + if {$is_range} { + lappend INDEX_OPERATIONS list-range-not + } else { + lappend INDEX_OPERATIONS listindex-not + } + set assign_script { + set assigned [lremove $assigned {*}[punk::lib::indexset_resolve [llength $leveldata] ]] + } + } else { + if {$is_range} { + lappend INDEX_OPERATIONS list-range + } else { + lappend INDEX_OPERATIONS listindex + } + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } + } + + if {$do_bounds_check} { + #bounds check each element of the resolved indexset - if any are out of bounds, return mismatch-list-index-out-of-range + if {$is_range} { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + lassign [split ..] idx1 _ idx2 + set v2 [punk::lib::lindex_resolve_basic $len $idx2] + if {isinf($v2)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + set v1 [punk::lib::lindex_resolve_basic $len $idx1] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + set v1 [punk::lib::lindex_resolve_basic $len ] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + ${$assign_script} + } + }] + } + set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { diff --git a/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/bootsupport/modules/punk/lib-0.1.6.tm new file mode 100644 index 00000000..6a7b79d6 --- /dev/null +++ b/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -0,0 +1,5488 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.6 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.6] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6- +package require punk::args +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {punk::args}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + +tcl::namespace::eval punk::lib::ensemble { + #wiki.tcl-lang.org/page/ensemble+extend + # extend an ensemble-like routine with the routines in some namespace + + #NOTE - the extension ns becomes the '-namespace ' for the original routine name, + #with -unknown handling the original subcommands. + #This makes the original ensemble harder to introspect! + #e.g (the original -map or -namespace not visible) + #In this specific case (which, being published on the wiki might be common in the wild) + #we could call {*}[namespace ensemble configure $routine -unknown] $routine + #and then detect that the first resulting word is an ensemble + #For arbitrary '-unknown scripts' - sensible introspection is likely not possible + + proc extend {routine extension} { + if {![string match ::* $routine]} { + set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] + if {$resolved eq {}} { + error [list {no such routine} $routine] + } + set routine $resolved + } + set routinens [tcl::namespace::qualifiers $routine] + if {$routinens eq {::}} { + set routinens {} + } + set routinetail [tcl::namespace::tail $routine] + + if {![string match ::* $extension]} { + set extension [uplevel 1 [ + list [tcl::namespace::which namespace] current]]::$extension + } + + if {![tcl::namespace::exists $extension]} { + error [list {no such namespace} $extension] + } + + set extension [tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] current]] + + tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] export *] + + while 1 { + set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] + if {[tcl::namespace::which $renamed] eq {}} break + } + + rename $routine $renamed + + tcl::namespace::eval $extension [ + list namespace ensemble create -command $routine -unknown [ + list apply {{renamed ensemble routine args} { + list $renamed $routine + }} $renamed + ] + ] + + return $routine + } +} + +# some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated +tcl::namespace::eval punk::lib::check { + #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) + #Not any sort of comprehensive check of known tcl bugs. + #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + proc has_tclbug_regexp_emptystring {} { + #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces + #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, + #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: + #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) + set bug [expr {![catch {regexp {} [error should_error]}]}] + return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] + } + proc has_tclbug_lsearch_sorted_inline_subindices {} { + if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { + #probably tcl version doesn't support all options + set bug 0 + } else { + set bug [expr {$result ne "b"}] + } + set description "lsearch -sorted with -subindices -inline - incorrect result." + return [dict create bug $bug bugref bc4ac0 description $description level minor] + } + proc has_tclbug_script_var {} { + + set script {set j [list spud] ; list} + append script \n + uplevel #0 $script + set rep1 [tcl::unsupported::representation $::j] + set script "" + set rep2 [tcl::unsupported::representation $::j] + + set nostring1 [string match "*no string" $rep1] + set nostring2 [string match "*no string" $rep2] + + #we assume it should have no string rep in either case + #Review: check Tcl versions for behaviour/consistency + if {!$nostring2} { + set bug true + } else { + set bug false + } + set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" + return [dict create bug $bug bugref "" description $description level minor] + } + proc has_tclbug_lsearch_strideallinline {} { + #bug only occurs with single -index value combined with -stride -all -inline -subindices + #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d + if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { + #we aren't looking for an error result - error most likely indicates tcl too old to support -stride + set bug 0 + } else { + set bug [expr {$result ne "a2"}] + } + set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." + return [dict create bug $bug bugref 5a1aaa201d description $description level major] + } + proc has_tclbug_lseq_sign {} { + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + if {[catch {lseq 1 10}]} { + set bug 0 + } else { + set r1 [lseq 1 10 -9] + set r2 [lseq 1 10 -10] + set bug [expr {$r1 ne $r2}] + } + set description "lseq step sign not matching sequence direction - inconsistent results." + return [dict create bug $bug bugref 999b6966b2 description $description level minor] + } + + proc has_tclbug_list_quoting_emptyjoin {} { + #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 + set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases + set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" + set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. + set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" + return [dict create bug $bug bugref e38dc74e2 description $description level medium] + } + + proc has_tclbug_safeinterp_compile {{show 0}} { + #ensemble calls within safe interp not compiled + #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde + namespace eval [namespace current]::testcompile { + proc ensembletest {} {string index a 0} + } + + set has_bug 0 + + set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] + if {$show} { + puts outer: + puts $bytecode_outer + } + if {![interp issafe]} { + #test of safe subinterp only needed if we aren't already in a safe interp + if {![catch { + interp create x -safe + } errMsg]} { + x eval {proc ensembletest {} {string index a 0}} + set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] + if {$show} { + puts safe: + puts $bytecode_safe + } + interp delete x + #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) + #It's possible the interp we're running in is also not compiling ensembles. + #we could then get a result of 2 - which still indicates a problem + if {[string last "invokeStk" $bytecode_safe] >= 1} { + incr has_bug + } + } else { + #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? + #unlikely - but we should warn + puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" + } + } + + namespace delete [namespace current]::testcompile + + if {[string last "invokeStk" $bytecode_outer] >= 1} { + incr has_bug + } + set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" + return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] + } +} + +tcl::namespace::eval punk::lib::compat { + #*** !doctools + #[subsection {Namespace punk::lib::compat}] + #[para] compatibility functions for features that may not be available in earlier Tcl versions + #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. + #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. + + #*** !doctools + #[list_begin definitions] + + + + + if {"::lremove" ne [info commands ::lremove]} { + #puts stderr "Warning - no built-in lremove" + interp alias {} lremove {} ::punk::lib::compat::lremove + } + proc lremove {list args} { + #*** !doctools + #[call [fun lremove] [arg list] [opt {index ...}]] + #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove + + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lsearch -all -inline -index 1 -subindices $keep *] + } + #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers + proc lremove2 {list args} { + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lmap v $keep {lindex $v 1}] + } + #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. + #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 + + if {![info exists ::auto_index(readFile)]} { + if {[info commands ::readFile] eq ""} { + proc ::readFile {filename {mode text}} { + #readFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + + # Read the file + set f [open $filename [dict get {text r binary rb} $mode]] + try { + return [read $f] + } finally { + close $f + } + } + } + } + if {![info exists ::auto_index(writeFile)]} { + if {[info commands ::writeFile] eq ""} { + proc ::writeFile {args} { + #writeFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + switch [llength $args] { + 2 { + lassign $args filename data + set mode text + } + 3 { + lassign $args filename mode data + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + } + default { + set COMMAND [lindex [info level 0] 0] + return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" + } + } + + # Write the File + set f [open $filename [dict get {text w binary wb} $mode]] + try { + puts -nonewline $f $data + } finally { + close $f + } + } + } + } + + if {"::lpop" ne [info commands ::lpop]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lpop + punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore + } + proc lpop {lvar args} { + #*** !doctools + #[call [fun lpop] [arg listvar] [opt {index}]] + #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop + upvar $lvar l + if {![llength $args]} { + set args [list end] + } + set v [lindex $l {*}$args] + set newlist $l + + set path [list] + set subl $l + for {set i 0} {$i < [llength $args]} {incr i} { + set idx [lindex $args $i] + if {![llength [lrange $subl $idx $idx]]} { + error "tcl_lpop index \"$idx\" out of range" + } + lappend path [lindex $args $i] + set subl [lindex $l {*}$path] + } + + set sublist_path [lrange $args 0 end-1] + set tailidx [lindex $args end] + if {![llength $sublist_path]} { + #set newlist [lremove $newlist $tailidx] + set newlist [lreplace $newlist $tailidx $tailidx] + } else { + set sublist [lindex $newlist {*}$sublist_path] + #set sublist [lremove $sublist $tailidx] + set sublist [lreplace $sublist $tailidx $tailidx] + lset newlist {*}$sublist_path $sublist + } + #puts "[set l] -> $newlist" + set l $newlist + return $v + } + if {"::ledit" ni [info commands ::ledit]} { + interp alias {} ledit {} ::punk::lib::compat::ledit + punk::args::set_idalias ::punk::lib::compat::ledit ::ledit + } + proc ledit {lvar first last args} { + upvar $lvar l + #use lindex_resolve to support for example: ledit lst end+1 end+1 h i + set fidx [punk::lib::lindex_resolve [llength $l] $first] + switch -exact -- $fidx { + -Inf { + #index below lower bound + set pre [list] + set fidx -1 + } + Inf { + #first index position is greater than index of last element in the list + set pre [lrange $l 0 end] + set fidx [llength $l] + } + default { + #set pre [lrange $l 0 $first-1] + set pre [lrange $l 0 $fidx-1] + } + } + set lidx [punk::lib::lindex_resolve [llength $l] $last] + switch -exact -- $lidx { + -Inf { + #index below lower bound + set post [lrange $l 0 end] + } + Inf { + #index above upper bound + set post [list] + } + default { + if {$lidx < $fidx} { + #from ledit man page: + #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. + set post [lrange $l $fidx end] + } else { + #set post [lrange $l $last+1 end] + set post [lrange $l $lidx+1 end] + } + } + } + set l [list {*}$pre {*}$args {*}$post] + } + + + #slight isolation - varnames don't leak - but calling context vars can be affected + proc lmaptcl2 {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list ::tcl::info::vars]] + set nscaller [uplevel 1 [list ::tcl::namespace::current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result [apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + if {"::lmap" ne [info commands ::lmap]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lmap {} ::punk::lib::compat::lmaptcl + } + #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway + proc lmaptcl {varnames list script} { + set result [list] + set varlist [list] + foreach varname $varnames { + upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc + lappend varlist var_$varname + } + foreach $varlist $list { + lappend result [uplevel 1 $script] + } + return $result + } + + #tcl8.7/9 compatibility for 8.6 + if {[info commands ::tcl::string::insert] eq ""} { + #https://wiki.tcl-lang.org/page/string+insert + # Pure Tcl implementation of [string insert] command. + proc ::tcl::string::insert {string index insertString} { + # Convert end-relative and TIP 176 indexes to simple integers. + if {[regexp -expanded { + ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace + |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace + (?:([+-]) # op, omitted when index is "end" + ([+-]?\d+))? # n, omitted when index is "end" + [\t\n\v\f\r ]*$ # optional whitespace (unless "end") + } $index _ m op n]} { + # Convert first index to an integer. + switch $m { + end {set index [string length $string]} + default {scan $m %d index} + } + + # Add or subtract second index, if provided. + switch $op { + + {set index [expr {$index + $n}]} + - {set index [expr {$index - $n}]} + } + } elseif {![string is integer -strict $index]} { + # Reject invalid indexes. + return -code error "bad index \"$index\": must be\ + integer?\[+-\]integer? or end?\[+-\]integer?" + } + + # Concatenate the pre-insert, insertion, and post-insert strings. + string cat [string range $string 0 [expr {$index - 1}]] $insertString\ + [string range $string $index end] + } + + # Bind [string insert] to [::tcl::string::insert]. + tcl::namespace::ensemble configure string -map [tcl::dict::replace\ + [tcl::namespace::ensemble configure string -map]\ + insert ::tcl::string::insert] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + variable PUNKARGS + tcl::namespace::export * + variable has_struct_list + set has_struct_list [expr {![catch {package require struct::list}]}] + variable has_struct_set + set has_struct_set [expr {![catch {package require struct::set}]}] + variable has_punk_ansi + set has_punk_ansi [expr {![catch {package require punk::ansi}]}] + set has_twapi 0 + if {"windows" eq $::tcl_platform(platform)} { + set has_twapi [expr {![catch {package require twapi}]}] + } + + namespace eval argdoc { + #non-colour SGR codes + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::is_main_script + @cmd -name punk::lib::is_main_script\ + -summary\ + "Test if current script was launched directly."\ + -help\ + "The ${$B}main script${$N} is the primary script that is executed + by the interpreter, e.g. tclsh or wish. + (as opposed to being loaded by the 'source' command) + + see https://wiki.tcl-lang.org/page/main+script" + @values -min 0 -max 0 + }] + } + proc is_main_script {} { + #see https://wiki.tcl-lang.org/page/main+script + if {[info script] ne "" && [info exists ::argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $::argv0 ...]]] + } { + return true + } else { + return false + } + } + + + + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + # Maintenance - This is the primary source for tm_version... functions + # - certain packages script require these but without package dependency + # - 1 punk boot script + # - 2 packagetrace module + # - These should be updated to sync with this + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + proc tm_version_isvalid {versionpart} { + #Needs to be suitable for use with Tcl's 'package vcompare' + if {![catch [list package vcompare $versionpart $versionpart]]} { + return 1 + } else { + return 0 + } + } + proc tm_version_major {version} { + if {![tm_version_isvalid $version]} { + error "Invalid version '$version' is not a proper Tcl module version number" + } + set firstpart [lindex [split $version .] 0] + #check for a/b in first segment + if {[string is integer -strict $firstpart]} { + return $firstpart + } + if {[string first a $firstpart] > 0} { + return [lindex [split $firstpart a] 0] + } + if {[string first b $firstpart] > 0} { + return [lindex [split $firstpart b] 0] + } + error "tm_version_major unable to determine major version from version number '$version'" + } + proc tm_version_canonical {ver} { + #accepts a single valid version only - not a bounded or unbounded spec + if {![tm_version_isvalid $ver]} { + error "tm_version_canonical version '$ver' is not valid for a package version" + } + set parts [split $ver .] + set newparts [list] + foreach o $parts { + set trimmed [string trimleft $o 0] + set firstnonzero [string index $trimmed 0] + switch -exact -- $firstnonzero { + "" { + lappend newparts 0 + } + a - b { + #e.g 000bnnnn -> bnnnnn + set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] + if {$tailtrimmed eq ""} { + set tailtrimmed 0 + } + lappend newparts 0$firstnonzero$tailtrimmed + } + default { + #digit + if {[string is integer -strict $trimmed]} { + #e.g 0100 -> 100 + lappend newparts $trimmed + } else { + #e.g 0100b003 -> 100b003 (still need to process tail) + if {[set apos [string first a $trimmed]] > 0} { + set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}a${rhs} + } elseif {[set bpos [string first b $trimmed]] > 0} { + set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}b${rhs} + } else { + #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b + error "tm_version_canonical error - trimfail - unexpected" + } + } + } + } + } + return [join $newparts .] + } + proc tm_version_required_canonical {versionspec} { + #also trim leading zero from any dottedpart? + #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. + #e.g 1.01 is equivalent to 1.1 and 01.001 + #also 1b3 == 1b0003 + + if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version + set errmsg "tm_version_required_canonical - invalid version specification" + if {[string first - $versionspec] < 0} { + #no dash + #looks like a minbounded version (ie a single version with no dash) convert to min-max form + set from $versionspec + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionpec'" + } + if {![catch {tm_version_major $from} majorv]} { + set from [tm_version_canonical $from] + return "${from}-[expr {$majorv +1}]" + } else { + error "$errmsg '$versionspec'" + } + } else { + # min- or min-max + #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) + set parts [split $versionspec -] ;#we expect only 2 parts + lassign $parts from to + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionspec'" + } + set from [tm_version_canonical $from] + if {[llength $parts] == 2} { + if {$to ne ""} { + if {![tm_version_isvalid $to]} { + error "$errmsg '$versionspec'" + } + set to [tm_version_canonical $to] + return $from-$to + } else { + return $from- + } + } else { + error "$errmsg '$versionspec'" + } + error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" + } + } + # end tm_version... functions + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + + + + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lswap + @cmd -name punk::lib::lswap\ + -summary\ + "Swap list values in-place"\ + -help\ + "Similar to struct::list swap, except it fully supports basic + list index expressions such as 7-2 end-1 etc. + + struct::list swap doesn't support 'end' offsets, and only + sometimes appears to support basic expressions, depending on the + expression compared to the list length." + @values -min 3 -max 3 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + proc lswap {lvar a z} { + upvar $lvar l + set len [llength $l] + if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { + #lindex_resolve_basic returns only -Inf if out of range at either bound + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively + set a_index [lindex_resolve $len $a] + set a_msg "" + switch -- $a_index { + -Inf { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" + } + Inf { + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" + } + } + set z_index [lindex_resolve $len $z] + set z_msg "" + switch -- $z_index { + -Inf { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + Inf { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + if {[info commands lseq] ne ""} { + #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation + #support minimal set from to + proc range {from to {by 1}} { + #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + lseq $from $to by $by + } + } else { + #lseq accepts basic expressions e.g 4-2 for both arguments + #e.g we can do lseq 0 [llength $list]-1 + #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. + #our range function doesn't support double like lseq does. (deliberate) review + proc range {from to {by ""}} { + if {$by eq "0"} { + #as per lseq, step (by) zero always gives no result + return [list] + } + set to [offset_expr $to] + set from [offset_expr $from] + if {$by ne ""} { + set by [offset_expr $by] + } + #assert $by is now empty string or an integer + if {$to > $from} { + switch -- $by { + "" - 1 { + set count [expr {($to -$from) + 1}] + if {$from == 0} { + return [lsearch -all [lrepeat $count 0] *] + } else { + incr from -1 + return [lmap v [lrepeat $count 0] {incr from}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i <= $to} {incr i $by} { + lappend result $i + } + return $result + + #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. + #set count [expr {($to -$from) + 1}] + #if {$from == 0} { + # set fullrange [lsearch -all [lrepeat $count 0] *] + #} else { + # incr from -1 + # set fullrange [lmap v [lrepeat $count 0] {incr from}] + #} + #set result [list] + #for {set i 0} {$i < $count} {incr i} { + # if {$i % $by == 0} { + # lappend result [lindex $fullrange $i] + # } + #} + #return $result + } + } + #slower methods. + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from] + #} + #return $L + } elseif {$from > $to} { + switch -- $by { + "" - -1 { + set count [expr {$from - $to} + 1] + if {$to == 0} { + return [lreverse [lsearch -all [lrepeat $count 0] *]] + } else { + incr from + return [lmap v [lrepeat $count 0] {incr from -1}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i >= $to} {incr i $by} { + lappend result $i + } + return $result + } + } + + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from -1];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from -1] + #} + #return $L + } else { + return [list $from] + } + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzip + @cmd -name punk::lib::lzip\ + -summary\ + "zip any number of lists together."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + The number of returned lists (columns) will be equal to + the length of the longest supplied list (row). + If lengths of supplied lists don't match, empty strings + will be inserted in the resulting lists. + + e.g lzip {a b c d e} {1 2 3 4} {x y z} + -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} + " + @values -min 0 -max -1 + list -type list -multiple 1 -optional 1 + }] + } + proc lzip {args} { + switch -- [llength $args] { + 0 {return {}} + 1 {return [lindex $args 0]} + 2 {return [::punk::lib::system::lzip2lists {*}$args]} + 3 {return [::punk::lib::system::lzip3lists {*}$args]} + 4 {return [::punk::lib::system::lzip4lists {*}$args]} + 5 {return [::punk::lib::system::lzip5lists {*}$args]} + 6 {return [::punk::lib::system::lzip6lists {*}$args]} + 7 {return [::punk::lib::system::lzip7lists {*}$args]} + 8 {return [::punk::lib::system::lzip8lists {*}$args]} + 9 {return [::punk::lib::system::lzip9lists {*}$args]} + 10 {return [::punk::lib::system::lzip10lists {*}$args]} + 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } + default { + if {[llength $args] < 4000} { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } else { + return [::punk::lib::lzipn {*}$args] + } + } + } + } + + namespace eval system { + proc Build_lzipn {n} { + set arglist [list] + #use punk::lib::range which defers to lseq if available + set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) + set body "\nlmap " + for {set i 1} {$i <= $n} {incr i} { + lappend arglist l$i + append body "[lindex $vars $i] \$l$i " + } + append body "\{list " + for {set i 1} {$i <= $n} {incr i} { + append body "\$[lindex $vars $i] " + } + append body "\}" \n + #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" + #puts "$body" + #puts "\}" + proc ::punk::lib::system::lzip${n}lists $arglist $body + } + + #fastest is to know the number of lists to be zipped + proc lzip2lists {l1 l2} { + lmap a $l1 b $l2 {list $a $b} + } + proc lzip3lists {l1 l2 l3} { + lmap a $l1 b $l2 c $l3 {list $a $b $c} + } + proc lzip4lists {l1 l2 l3 l4} { + lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} + } + proc lzip5lists {l1 l2 l3 l4 l5} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} + } + proc lzip6lists {l1 l2 l3 l4 l5 l6} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} + } + proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} + } + proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} + } + proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} + } + proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} + } + + #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly + # review - + proc lzipn_alt args { + #stackoverflow - courtesy glenn jackman (modified) + foreach l $args { + lappend vars [incr n] + lappend lmap_args $n $l + } + lmap {*}$lmap_args {lmap v $vars {set $v}} + } + + #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) + proc lzipn_tcl8 {args} { + #For tcl pre 9 (without lsearch -stride) + #wiki - courtesy JAL + set list_l $args + set zip_l [] + while {1} { + set cur [lmap a_l $list_l { lindex $a_l 0 }] + set list_l [lmap a_l $list_l { lrange $a_l 1 end }] + + if {[join $cur {}] eq {}} { + break + } + lappend zip_l $cur + } + return $zip_l + } + proc lzipn_tcl9a {args} { + #For Tcl 9+ (with lsearch -stride) + #compared to wiki version + #comparable for lists len <3 or number of args < 3 + #approx 2x faster for large lists or more lists + #needs -stride single index bug fix to use empty string instead of NULL + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] + set outlist [lrepeat $numcolumns {}] + set s 0 + foreach len $lens list $args { + #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] + ledit flatlist $s [expr {$s + $len - 1}] {*}$list + incr s $numcolumns + } + #needs single index lstride bugfix + for {set c 0} {$c < $numcolumns} {incr c} { + ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] + } + return $outlist + } + proc lzipn_tcl9b {args} { + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} + } + proc lzipn_tcl9c {args} { + #SLOW + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + set zip_l {} + set cols_remaining $numcolumns + for {set c 0} {$c < $numcolumns} {incr c} { + if {$cols_remaining == 1} { + return [list {*}$zip_l $flatlist] + } + lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] + set flen [llength $flatlist] + set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] + incr cols_remaining -1 + } + return $zip_l + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzipn + @cmd -name punk::lib::lzipn\ + -summary\ + "zip any number of lists together (unoptimised)."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + See lzip which provides the same functionality but with + optimisations depending on the number of supplied lists. + " + @values -min 1 -max 1 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible + if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { + #-stride either not available - or has bug preventing use of main algorithm below + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] + } else { + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] + } + + + + namespace import ::punk::args::lib::tstr + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::tclscript_to_scriptlist + @cmd -name punk::lib::tclscript_to_scriptlist\ + -summary\ + "Parse tcl script to toplevel list of lists."\ + -help\ + "Get topmost list of tcl language elements in script. + produces a list of lists where each sublist is a commandlist or + a comment string." + @values -min 1 -max 1 + script -type string + }] + } + proc tclscript_to_scriptlist {script} { + set scriptlist [list] + set cmdlist [list] + set scrlen [string length $script] + set token "" + set in_token 0 + set in_cmdlist 0 + set in_comment 0 + set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? + for {set i 0} {$i < $scrlen} {incr i} { + set ch [string index $script $i] + set chswitch [string map $charmap $ch] + if {!$in_token} { + switch -- $chswitch { + { } - TB { + #ignore - continue being a non token + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {$in_cmdlist} { + #no active token - newline ends cmdlist + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + incr i + } + } + LF - ";" { + #no active token - newline or semicolon ends cmdlist + if {$in_cmdlist} { + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation of whitespace while no token - boring + incr i + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation of whitespace while no token - boring + incr i 2 + } else { + #an uncommon possibility, a command wth surrounding spaces called in an strange way + # e.g \ cmdname\ arg + set in_token 1 + set token "\\[string index $script $i+1]" + incr i + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + # { + if {$in_cmdlist} { + #ordinary data + set in_token 1 + set token # + } else { + if {!$in_comment} { + set in_token 1 + set in_comment 1 + set token # + } else { + #wnen in comment - all will be a single token until comment ends + append token # + } + } + } + default { + #for completeness.. we should exclude other possible whitespace chars + if {![string is space $ch]} { + set in_token 1 + set token $ch + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + } + } else { + #if we're in a token, we must be in a cmdlist or a comment (single token) + #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved + #note that unbalanced curly in *toplevel* comment will still 'info complete' to true + switch -- $chswitch { + LF { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ends a comment + lappend scriptlist $token ;#single token for comment + set token "" + set in_token 0 + set in_comment 0 + set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity + } + } + ";" { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ordinary char for comment + append token ";" + } + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {[tcl::info::complete $token]} { + #ends token and commandlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \r\n + incr i + } + } else { + append token \r + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation - lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i ;#skip LF + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation - cr-lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i 2 ;#skip CRLF + } else { + append token "\\[string index $script $i+1]" + incr i + } + } + default { + if {![string is space $ch]} { + append token $ch + } else { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token $ch + } + } else { + append token $ch + } + } + } + } + } + } + #eof + if {!$in_comment} { + if {$in_token} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + lappend scriptlist $cmdlist + } else { + error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" + } + } else { + if {$in_cmdlist} { + lappend scriptlist $cmdlist + } + } + } else { + lappend scriptlist $token + } + return $scriptlist + } + + + proc invoke command { + #*** !doctools + #[call [fun invoke] [arg command]] + #[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode + #[example { + # set script { + # puts stdout {hello on stdout} + # puts stderr {hello on stderr} + # exit 42 + # } + # invoke [list tclsh <<$script] + #}] + + #see https://wiki.tcl-lang.org/page/open + lassign [chan pipe] chanout chanin + lappend command 2>@$chanin + set fh [open |$command] + set stdout [read $fh] + close $chanin + set stderr [read $chanout] + close $chanout + if {[catch {close $fh} cres e]} { + dict with e {} + lassign [set -errorcode] sysmsg pid exit + if {$sysmsg eq {NONE}} { + #output to stderr caused [close] to fail. Do nothing + } elseif {$sysmsg eq {CHILDSTATUS}} { + return [list $stdout $stderr $exit] + } else { + return -options $e $stderr + } + } + return [list $stdout $stderr 0] + } + + proc pdict {args} { + package require punk::args + variable has_punk_ansi + if {!$has_punk_ansi} { + set sep " = " + } else { + #set sep " [a+ Web-seagreen]=[a] " + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " + } + set argspec [string map [list %sep% $sep] { + @id -id ::punk::lib::pdict + @cmd -name pdict -help\ + "Print dict keys,values to channel + The pdict function operates on variable names - passing the value to the showdict function which operates on values + (see also showdict)" + + @opts -any 1 + + #default separator to provide similarity to tcl's parray function + -separator -default "%sep%" + -roottype -default "dict" + -substructure -default {} + -channel -default stdout -help\ + "existing channel - or 'none' to return as string" + + @values -min 1 -max -1 + + dictvar -type string -help "name of variable. Can be a dict, list or array" + + patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. + Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) + The system uses similar patterns to the punk pipeline pattern-matching system. + The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. + Segments are classified into list,dict and string operations. + Leading % indicates a string operation - e.g %# gives string length + A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 + (todo - change to indexset syntax @1..3 @1..end-1 etc) + A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' + The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. + e.g1 pdict env */%# + the pattern starts with default type dict, so * retrieves all keys & values, + the next hierarchy switches to a string operation to get the length of each value. + e.g2 pdict env W* S* + Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns + e.g3 pdict punk_testd */* + This displays 2 levels of the dict hierarchy. + Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) + - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. + e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 + Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent + The second level segment in each pattern switches to a dict operation to retrieve the value by key. + When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. + } + }] + #puts stderr "$argspec" + set argd [punk::args::parse $args withdef $argspec] + + set opts [dict get $argd opts] + set dvar [dict get $argd values dictvar] + set patterns [dict get $argd values patterns] + set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] + if {$isarray} { + set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] + if {![dict exists $opts -keytemplates]} { + set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] + dict set opts -keytemplates [list $arrdisplay] + } + dict set opts -keysorttype dictionary + } else { + set dvalue [uplevel 1 [list set $dvar]] + } + showdict {*}$opts $dvalue {*}$patterns + } + + #TODO - much. + #showdict needs to be able to show different branches which share a root path + #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) + # - specify ansi colour per pattern so different branches can be highlighted? + # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc + # - The current version is incomplete but passably usable. + # - Copy proc and attempt rework so we can get back to this as a baseline for functionality + proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) + #set sep " [a+ Web-seagreen]=[a] " + variable has_punk_ansi + if {!$has_punk_ansi} { + set RST "" + set sep " = " + #set sep_mismatch " mismatch " + set sep \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) + } else { + set RST [punk::ansi::a] + set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support + #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " + set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " + } + package require punk::pipe + #package require punk ;#we need pipeline pattern matching features + package require textblock + + set argd [punk::args::parse $args withdef [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { + @id -id ::punk::lib::showdict + @cmd -name punk::lib::showdict -help "display dictionary keys and values" + #todo - table tableobject + -return -default "tailtohead" -choices {tailtohead sidebyside} + -channel -default none + -trimright -default 1 -type boolean -help\ + "Trim whitespace off rhs of each line. + This can help prevent a single long line that wraps in terminal from making + every line wrap due to long rhs padding." + -separator -default {%sep%} -help\ + "Separator column between keys and values" + -separator_mismatch -default {%sep_mismatch%} -help\ + "Separator to use when patterns mismatch" + -roottype -default "dict" -help\ + "list,dict,string" + -ansibase_keys -default "" -help\ + "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" + -substructure -default {} + -ansibase_values -default "" + -keytemplates -default {\$\{$key\}} -type list -help\ + "list of templates for keys at each level" + -keysorttype -default "none" -choices {none dictionary ascii integer real} + -keysortdirection -default increasing -choices {increasing decreasing} + -debug -default 0 -type boolean -help\ + "When enabled, produces some rudimentary debug output on stderr" + -- -type none -optional 1 + @values -min 1 -max -1 + dictvalue -type list -help\ + "dict or list value" + patterns -default "*" -type string -multiple 1 -help\ + "key or key glob pattern" + }]] + + #for punk::lib - we want to reduce pkg dependencies. + # - so we won't even use the tcllib debug pkg here + set opt_debug [dict get $argd opts -debug] + if {$opt_debug} { + if {[info body debug::showdict] eq ""} { + proc ::punk::lib::debug::showdict {args} { + catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} + } + } + } else { + if {[info body debug::showdict] ne ""} { + proc ::punk::lib::debug::showdict {args} {} + } + } + + set opt_sep [dict get $argd opts -separator] + set opt_mismatch_sep [dict get $argd opts -separator_mismatch] + set opt_keysorttype [dict get $argd opts -keysorttype] + set opt_keysortdirection [dict get $argd opts -keysortdirection] + set opt_trimright [dict get $argd opts -trimright] + set opt_keytemplates [dict get $argd opts -keytemplates] + debug::showdict "keytemplates ---> $opt_keytemplates <---" + set opt_ansibase_keys [dict get $argd opts -ansibase_keys] + set opt_ansibase_values [dict get $argd opts -ansibase_values] + set opt_return [dict get $argd opts -return] + set opt_roottype [dict get $argd opts -roottype] + set opt_structure [dict get $argd opts -substructure] + + set dval [dict get $argd values dictvalue] + set patterns [dict get $argd values patterns] + + set result "" + + #pattern hierarchy + # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest + # * @1 @0,%#,%str - segments + # a b 1 0 %# %str - keys + + set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated + set pattern_next_substructure [dict create] + set pattern_this_structure [dict create] + + # -- --- --- --- + #REVIEW + #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. + #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). + #todo - determine if there is a more consistent rule-based way to do this rather than adhoc + #e.g pdict something * + #we want the keys from the result as individual lines on lhs + #e.g pdict something @@ + #we want on lhs result on rhs + # = v0 + #e.g pdict something @0-2,@4 + #we currently return: + #0 = v0 + #1 = v1 + #2 = v2 + #4 = v4 + #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) + #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. + #this is a tradeoff that could create surprises and make things messy and/or inconsistent. + #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. + #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys + #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment + #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) + # -- --- --- --- + + set filtered_keys [list] + if {$opt_roottype in {dict list string}} { + #puts "getting keys for roottype:$opt_roottype" + if {[llength $dval]} { + + #TODO - change to indexset notation 0..1,3..end-1 etc + + set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} + set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + foreach pattern_nest $patterns { + set keyset [list] + set keyset_structure [list] + + set segments [split $pattern_nest /] + set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns + #we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + #puts stderr "showdict-->_split_patterns: $patterninfo" + foreach v_idx $patterninfo { + lassign $v_idx v idx + #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) + set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern + if {[string index $p 0] eq "!"} { + set get_not 1 + set p [string range $p 1 end] + } else { + set get_not 0 + } + switch -exact -- $p { + * - "" { + if {$opt_roottype eq "list"} { + set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + dict set pattern_this_structure $p list + } elseif {$opt_roottype eq "dict"} { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } else { + lappend keyset %string + lappend keyset_structure string + dict set pattern_this_structure $p string + } + } + %# { + dict set pattern_this_structure $p string + lappend keyset %# + lappend keyset_structure string + } + # { + #todo get_not !# is test for listiness (see punk) + dict set pattern_this_structure $p list + lappend keyset # + lappend keyset_structure list + } + ## { + dict set pattern_this_structure $p dict + lappend keyset [list ## query] + lappend keyset_structure dict + } + @* { + #puts "showdict ---->@*<----" + dict set pattern_this_structure $p list + set keys [punk::lib::range 0 [llength $dval]-1] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } + @@ { + #get first k v from dict + dict set pattern_this_structure $p dict + lappend keyset [list @@ query] + lappend keyset_structure dict + } + @*k@* - @*K@* { + #returns keys only + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @*.@* { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + default { + #puts stderr "===p:$p" + #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! + #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful + #@@"key,etc" should allow any non-whitespace key + switch -glob -- $p { + {@k\*@*} - {@K\*@*} { + #value glob return keys + #set search [string range $p 4 end] + #dict for {k v} $dval { + # if {[string match $search $v]} { + # lappend keyset $k + # } + #} + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @@* { + #exact match key - review - should raise error to match punk pipe behaviour? + set k [string range $p 2 end] + if {$get_not} { + if {[dict exists $dval $k]} { + set keys [dict keys [dict remove $dval $k]] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + lappend keyset {*}[dict keys $dval] + lappend keyset_structure {*}[lrepeat [dict size $dval] dict] + } + } else { + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + } + dict set pattern_this_structure $p dict + } + @k@* - @K@* { + #TODO get_not + set k [string range $p 3 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + {@\*@*} { + #return list of values + #set k [string range $p 3 end] + #lappend keyset {*}[dict keys $dval $k] + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*.@*} { + #TODO get_not + set k [string range $p 4 end] + set keys [dict keys $dval $k] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + {@v\*@*} - {@V\*@*} { + #value-glob return value + #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*v@*} - {@\*V@*} { + #key-glob return value + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*@*} - {@\*v@*} - {@\*V@} { + #key glob return val + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @??@* { + #exact key match - no error + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + default { + set this_type $opt_roottype + if {[string match @* $p]} { + #list mode - trim optional list specifier @ + set p [string range $p 1 end] + dict set pattern_this_structure $p list + set this_type list + } elseif {[string match %* $p]} { + dict set pattern_this_structure $p string + lappend keyset $p + lappend keyset_structure string + set this_type string + } + if {$this_type eq "list"} { + dict set pattern_this_structure $p list + if {[string is integer -strict $p]} { + if {$get_not} { + set keys [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $keys $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + lappend keyset $p + lappend keyset_structure list + } + } elseif {[punk::lib::is_indexset $p]} { + set keys [punk::lib::indexset_resolve [llength $dval] $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } elseif {[string match "?*-?*" $p]} { + #could be either - don't change type + #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers + #now we should map _ to "" first + set p [string map {_ {}} $p] + #lassign [textutil::split::splitx $p {\.\.}] a b + if {![regexp $re_idxdashidx $p _match a b]} { + error "unrecognised pattern $p" + } + #TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order + #change to start/end terminology? + + set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high + #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds + if {${lower_resolve} == Inf} { + ##x + #lower bound is above upper list range + #match with decreasing indices is still possible + set lower [expr {[llength $dval]-1}] ;#set to max + } elseif {$lower_resolve == -Inf} { + ##x + set lower 0 + } else { + set lower $lower_resolve + } + set upper [punk::lib::lindex_resolve [llength $dval] $b] + if {$upper == -Inf} { + ##x + #upper bound is below list range - + if {$lower_resolve > -Inf} { + ##x + set upper 0 + } else { + continue + } + } elseif {$upper == Inf} { + #use max + set upper [expr {[llength $dval]-1}] + #assert - upper >=0 because we have ruled out empty lists + } + #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order + set keys [punk::lib::range $lower $upper] + if {$get_not} { + set fullrange [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $fullrange {*}$keys] + if {$lower > $upper} { + set keys [lreverse $keys] + } + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + if {$get_not} { + lappend keyset [list !@$p query] + } else { + lappend keyset [list @$p query] + } + lappend keyset_structure list + } + } elseif {$this_type eq "string"} { + dict set pattern_this_structure $p string + } elseif {$this_type eq "dict"} { + #default equivalent to @\*@* + dict set pattern_this_structure $p dict + #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" + set keys [dict keys $dval $p] + if {$get_not} { + set keys [dict keys [dict remove $dval {*}$keys]] + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + puts stderr "list: unrecognised pattern $p" + } + } + } + } + } + } + + # -- --- --- --- + #check next pattern-segment for substructure type to use + # -- --- --- --- + set substructure "" + set pnext [lindex $segments 1] + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + if {[llength $patterninfo] == 0} { + # // ? -review - what does this mean? for xpath this would mean at any level + set substructure [lindex $pattern_this_structure end] + } elseif {[llength $patterninfo] == 1} { + #ignore the NOT operator for purposes of query-type detection + if {[string index $pnext 0] eq "!"} { + set pnext [string range $pnext 1 end] + } + # single type in segment e.g /@@something/ + switch -exact -- $pnext { + "" { + set substructure string + } + @*k@* - @*K@* - @*.@* - ## { + set substructure dict + } + # { + set substructure list + } + ## { + set substructure dict + } + %# { + set substructure string + } + * { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + default { + switch -glob -- $pnext { + @??@* - @?@* - @@* { + #all 4 or 3 len prefixes bounded by @ are dict + set substructure dict + } + default { + if {[string match @* $pnext]} { + set substructure list + } elseif {[string match %* $pnext]} { + set substructure string + } else { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + } + } + } + } + } else { + #e.g /@0,%str,.../ + #doesn't matter what the individual types are - we have a list result + set substructure list + } + #puts "--pattern_nest: $pattern_nest substructure: $substructure" + dict set pattern_next_substructure $pattern_nest $substructure + # -- --- --- --- + + if {$opt_keysorttype ne "none"} { + set int_keyset 1 + foreach k $keyset { + if {![string is integer -strict $k]} { + set int_keyset 0 + break + } + } + if {$int_keyset} { + set sortindices [lsort -indices -integer $keyset] + #set keyset [lsort -integer $keyset] + } else { + #set keyset [lsort -$opt_keysorttype $keyset] + set sortindices [lsort -indices -$opt_keysorttype $keyset] + } + set keyset [lmap i $sortindices {lindex $keyset $i}] + set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] + } + + foreach k $keyset { + lappend pattern_key_index $pattern_nest + } + + lappend filtered_keys {*}$keyset + lappend all_keyset_structure {*}$keyset_structure + + #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" + } + } + #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" + } else { + puts stdout "unrecognised roottype: $opt_roottype" + return $dval + } + + if {[llength $filtered_keys]} { + #both keys and values could have newline characters. + #simple use of 'format' won't cut it for more complex dict keys/values + #use block::width or our columns won't align in some cases + switch -- $opt_return { + "tailtohead" { + #last line of key is side by side (possibly with separator) with first line of value + #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values + #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries + + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt {${$key}} + } + #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] + set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] + set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] + + set kidx 0 + set last_hidekey 0 + foreach keydisplay $display_keys key $filtered_keys { + set thisval "?" + set hidekey 0 + set pattern_nest [lindex $pattern_key_index $kidx] + set pattern_nest_list [split $pattern_nest /] + #set this_type [dict get $pattern_this_structure $pattern_nest] + #set this_type [dict get $pattern_this_structure $key] + set this_type [lindex $all_keyset_structure $kidx] + #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" + + set is_match 1 ;#whether to display the normal separator or bad-match separator + switch -- $this_type { + dict { + #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict + # - default highlight dupes (ansi underline?) + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + % thisval.= $qry= $dval + } else { + set thisval [tcl::dict::get $dval $key] + } + + #set substructure [lrange $opt_structure 1 end] + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + + set subansibasekeys [lrange $opt_ansibase_keys 1 end] + set nextkeytemplates [lrange $opt_keytemplates 1 end] + #dict set nextopts -substructure $nextsub + dict set nextopts -keytemplates $nextkeytemplates + dict set nextopts -ansibase_keys $subansibasekeys + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" + + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" + set is_match 0 + } + } + } + list { + if {[string is integer -strict $key]} { + set thisval [lindex $dval $key] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + % thisval.= $qry= $dval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + #if {![llength $nextpatterns]} { + # set nextpatterns * + #} + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + set is_match 0 + } + } + } + string { + set hidekey 1 + switch -- $key { + "%string" { + set hidekey 1 + set thisval $dval + } + "%ansiview" { + set thisval [ansistring VIEW -lf 1 $dval] + } + "%ansiviewstyle" { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] + } + default { + switch -glob -- $key { + *lpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } + %lpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } + %rpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } + %rpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } + %split-* { + #split on one or more chars - review + set hidekey 1 + lassign [split $key -] _ splitchars + set thisval [split $dval $splitchars] + } + default { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + #pipeline + % thisval.= $key= $thisval + } + } + + } + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } + + } + } + if {$this_type eq "string" && $hidekey} { + lassign [textblock::size $thisval] _vw vwidth _vh vheight + #set blanks_above [string repeat \n [expr {$kheight -1}]] + set vblock $opt_ansibase_values$thisval$RST + #append result [textblock::join_basic -- $vblock] + #review - we wouldn't need this space if we had a literal %sp %sp-x ?? + append result " $vblock" + } else { + set ansibase_key [lindex $opt_ansibase_keys 0] + + lassign [textblock::size $keydisplay] _kw kwidth _kh kheight + lassign [textblock::size $thisval] _vw vwidth _vh vheight + + set totalheight [expr {$kheight + $vheight -1}] + set blanks_above [string repeat \n [expr {$kheight -1}]] + set blanks_below [string repeat \n [expr {$vheight -1}]] + + if {$is_match} { + set use_sep $opt_sep + } else { + set use_sep $opt_mismatch_sep + } + + + set sepwidth [textblock::width $use_sep] + set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] + set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] + set vblock $blanks_above$opt_ansibase_values$thisval$RST + #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace + if {$last_hidekey} { + append result \n + } + #append result [textblock::join_basic -- $kblock $sblock $vblock] \n + append result [textblock::join_basic_raw $kblock $sblock $vblock] \n + } + set last_hidekey $hidekey + incr kidx + } + } + "sidebyside" { + # TODO - fix + #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. + #use ansibase_key etc to make the output more comprehensible in that situation. + #This is why it is not the default. (review - terminal width detection and wrapping?) + set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] + foreach key $filtered_keys { + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt "%k%" + } + set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST + #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n + #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic + append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n + } + } + } + } + if {$opt_trimright} { + set result [::join [lines_as_list -line trimright $result] \n] + } + if {[string last \n $result] == [string length $result]-1} { + set result [string range $result 0 end-1] + } + #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) + set chan [dict get $argd opts -channel] + switch -- $chan { + stderr - stdout { + puts $chan $result + } + none { + return $result + } + default { + #review - check member of chan names? + #just try outputting to the supplied channel for now + puts $chan $result + } + } + } + + proc is_list_all_in_list {small large} { + if {[llength $small] > [llength $large]} {return 0} + foreach x $large { + ::set ($x) {} + } + foreach x $small { + if {![info exists ($x)]} { + return 0 + } + } + return 1 + } + #v2 generally seems slower + proc is_list_all_in_list2 {small large} { + set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] + return [struct::list equal [lsort $small] $small_in_large] + } + if {!$has_struct_list || !$has_struct_set} { + set body { + package require struct::list + package require struct::set + } + append body [info body is_list_all_in_list2] + proc is_list_all_in_list2 {small large} $body + } + + proc is_list_all_ni_list {A B} { + foreach x $B { + ::set ($x) {} + } + foreach x $A { + if {[info exists ($x)]} { + return 0 + } + } + return 1 + } + proc is_list_all_ni_list2 {a b} { + set i [struct::set intersect $a $b] + return [expr {[llength $i] == 0}] + } + if {!$has_struct_set} { + set body { + package require struct::list + } + append body [info body is_list_all_ni_list2] + proc is_list_all_ni_list2 {a b} $body + } + proc is_cachedlist_all_ni_list {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set expression "" + foreach t $a { + #append expression "({$t} ni \$b) && " + append expression "{$t} ni \$b && " + } + set expression [string trimright $expression " &"] ;#trim trailing spaces and ampersands + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @expression@ $expression] { + return [expr {@expression@}] + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + proc is_cachedlist_all_ni_list2 {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set d [dict create] + foreach x $a { + dict set d $x "" + } + #constructing a switch statement could be an option + # - but would need to avoid using escapes in order to get a jump-table + # - this would need runtime mapping of values - unlikely to be a win + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @d@ $d] { + foreach x $b { + if {[::tcl::dict::exists {@d@} $x]} { + return 0 + } + } + return 1 + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::ldiff + @cmd -name punk::lib::ldiff\ + -summary\ + "Difference consisting of items with removeitems removed."\ + -help\ + "Somewhat like struct::set difference, but order preserving, and doesn't + treat as a 'set' so preserves any duplicates in items. + + struct::set difference may happen to preserve ordering when items are + integers, but order can't be relied on, especially as struct::set has + 2 differening implementations (tcl vs critcl) which return results with + different ordering to each other and different deduping behaviour in + some cases (e.g when 2nd arg is empty)" + @values -min 2 -max 2 + items -type list + removeitems -type list + }] + } + proc ldiff {items removeitems} { + if {[llength $removeitems] == 0} {return $items} + set result {} + foreach item $items { + if {$item ni $removeitems} { + lappend result $item + } + } + return $result + } + #with ledit (also avail in 8.6 using punk::lib::compat::ledit + proc ldiff2 {fromlist removeitems} { + if {[llength $removeitems] == 0} {return $fromlist} + foreach item $removeitems { + set posns [lsearch -all -exact $fromlist $item] + foreach p $posns {ledit fromlist $p $p} + } + return $fromlist + } + proc ldiff3 {fromlist removeitems} { + set doomed [list] + foreach item $removeitems { + lappend doomed {*}[lsearch -all -exact $fromlist $item] + } + lremove $fromlist {*}$doomed + } + + #fix for tcl impl of struct::set::diff which doesn't dedupe + proc struct_set_diff_unique {A B} { + package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. + if {[struct::set::Loaded] eq "tcl"} { + return [punk::lib::setdiff $A $B] + } else { + #use (presumably critcl) implementation for speed + return [struct::set difference $A $B] + } + } + + + #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B + #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) + #also struct::set difference with critcl is faster + proc setdiff {A B} { + if {[llength $A] == 0} {return {}} + set d [dict create] + foreach x $A {dict set d $x {}} + foreach x $B {dict unset d $x} + return [dict keys $d] + } + #bulk dict remove is slower than a foreach with dict unset + #proc setdiff2 {fromlist removeitems} { + # #if {[llength $fromlist] == 0} {return {}} + # set d [dict create] + # foreach x $fromlist { + # dict set d $x {} + # } + # return [dict keys [dict remove $d {*}$removeitems]] + #} + #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) + proc setdiff_unordered {A B} { + if {[llength $A] == 0} {return {}} + array set tmp {} + foreach x $A {::set tmp($x) .} + foreach x $B {catch {unset tmp($x)}} + return [array names tmp] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique_unordered + @cmd -name punk::lib::lunique_unordered\ + -summary\ + "unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates but *may* rearrange the + order of the returned elements compared to the + original list. + + When struct::set is available this will be used + for the implementation, as it can be *slightly* + faster if acceleration is present. When struct::set + is not available it will fallback to lunique and + provide the same functionality with order preserved." + @values -min 1 -max 1 + list -type list + }] + } + #default/fallback implementation + proc lunique_unordered {list} { + lunique $list + } + if {$has_struct_set} { + if {[struct::set equal [struct::set union {a a} {}] {a}]} { + proc lunique_unordered {list} { + struct::set union $list {} + } + } else { + #struct::set union operates on a 'set' - so this probably won't change, and hopefully is + #consistent across unacelerated versions and those implemented in accelerators, + #but if it ever does change - be a little noisy about it. + puts stderr "punk::lib WARNING: struct::set union no longer dedupes!" + #we could also test a sequence of: struct::set add + } + } + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique + @cmd -name punk::lib::lunique\ + -summary\ + "Order-preserving unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates whilst preserving the + original order of the provided list. + + When struct::set is available with acceleration, + lunique_unordered may be slightly faster." + @values -min 1 -max 1 + list -type list + }] + } + proc lunique {list} { + set new {} + foreach item $list { + if {$item ni $new} { + lappend new $item + } + } + return $new + } + proc lunique2 {list} { + set doomed [list] + #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) + for {set i 0} {$i < [llength $list]} {} { + set item [lindex $list $i] + lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] + while {[incr i] in $doomed} {} + } + lremove $list {*}$doomed + } + #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env + proc lmapflat_closure {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + # -- --- --- + #capture - use uplevel 1 or namespace eval depending on context + set capture [uplevel 1 { + apply { varnames { + set capturevars [tcl::dict::create] + set capturearrs [tcl::dict::create] + foreach fullv $varnames { + set v [tcl::namespace::tail $fullv] + upvar 1 $v var + if {[info exists var]} { + if {(![array exists var])} { + tcl::dict::set capturevars $v $var + } else { + tcl::dict::set capturearrs capturedarray_$v [array get var] + } + } else { + #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set + } + } + return [tcl::dict::create vars $capturevars arrs $capturearrs] + } } [info vars] + } ] + # -- --- --- + set cvars [tcl::dict::get $capture vars] + set carrs [tcl::dict::get $capture arrs] + set apply_script "" + foreach arrayalias [tcl::dict::keys $carrs] { + set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { + array set %realname% [set %arrayalias%][unset %arrayalias%] + }] + } + + append apply_script [string map [list %script% $script] { + #foreach arrayalias [info vars capturedarray_*] { + # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + # array set $realname [set $arrayalias][unset arrayalias] + #} + #return [eval %script%] + %script% + }] + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ + $apply_script\ + ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] + } + return $result + } + #link version - can write to vars in calling context - but keeps varnames themselves isolated + #performance much better than capture version - but still a big price to pay for the isolation + proc lmapflat_link {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + #proc lmapflat {varnames list script} { + # concat {*}[uplevel 1 [list lmap $varnames $list $script]] + #} + #lmap can accept multiple var list pairs + proc lmapflat {args} { + concat {*}[uplevel 1 [list lmap {*}$args]] + } + proc lmapflat2 {args} { + concat {*}[uplevel 1 lmap {*}$args] + } + + #proc dict_getdef {dictValue args} { + # if {[llength $args] < 1} { + # error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + # } + # set keys [lrange $args -1 end-1] + # if {[tcl::dict::exists $dictValue {*}$keys]} { + # return [tcl::dict::get $dictValue {*}$keys] + # } else { + # return [lindex $args end] + # } + #} + if {[info commands ::tcl::dict::getdef] eq ""} { + proc dict_getdef {dictValue args} { + set keys [lrange $args 0 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + } else { + #we pay a minor perf penalty for the wrap + interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + + punk::args::define { + @id -id ::punk::lib::is_indexset + @cmd -name punk::lib::is_indexset\ + -summary\ + "Validate string is a comma-delimited 'indexset'."\ + -help\ + "Validate that a string is an 'indexset' + + An indexset consists of a comma delimited list of indexes or index-ranges. + No particular base is assumed for the purposes of validating an indexset here. + While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. + e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. + To validate if an indexset is strictly within range, both the length of the data and the base would + need to be considered. + + The normal 'range' specifier is .. but can be of the form .x. where x is the step value. + The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire + range of valid values. + e.g the following are all valid ranges + 1.. + (index 1 to 'max') + ..10 + (index 'base' to 10) + 2..11 + (index 2 to 11) + .. + (all indices) + .3. + (1st index and every 3rd index thereafter) + + Common whitespace elements space,tab,newlines are ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + see indexset_resolve" + @values -min 1 -max 1 + indexset -type string + } + proc is_indexset {indexset} { + #collapse internal whitespace (for basic whitespace set we allow) + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] + if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { + return 0 + } + set ranges [split $indexset ,] + foreach r $ranges { + set validateindices [list] + set rposn [string first .. $r] + if {$rposn >= 0} { + set sepsize 2 + set step 1 + #review - whitespace between ints? + lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] + } elseif {[string first . $r] >= 0} { + set stripped [string map {. ""} $r] + if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { + #if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) + return 0 + } + #assert - we have exactly 2 dots separated by something. + #check for .n. 'stepped' range + set fdot [string first . $r] + set ldot [string last . $r] + set step [string range $r $fdot+1 $ldot-1] + #todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq + #1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing + + #1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. + if {[string match *end* $step] || [catch {lindex {} $step}]} { + return 0 + } + #if {![string is integer -strict $step] || $step == 0} { + # return 0 + #} + lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] + } else { + #'range' is just an index + set validateindices [list $r] + } + + foreach v $validateindices { + if {$v eq "" || $v eq "end"} {continue} + if {[string is integer -strict $v]} {continue} + if {[catch {lindex {} $v}]} { + return 0 + } + } + } + return 1 + } + #review - compare to IMAP4 methods of specifying ranges? + #TODO add tests to test::punk::lib indexset_resolve is a little tricky + punk::args::define { + @id -id ::punk::lib::indexset_resolve + @cmd -name punk::lib::indexset_resolve\ + -summary\ + "Resolve an indexset to a list of integers based on supplied list or string length."\ + -help\ + "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. + e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 + + An indexset consists of a comma delimited list of indexes or index-ranges. + Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at + either side of the separator representing beginning and end of the index range respectively. + The range-separator can be of the form .x. where x is an integer or basic expression + (single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value + in the tcl9 lseq command. + + When the start index is lower than the end, the step value defaults to 1. + ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 + When the start index is higher than the end, the step value defaults to -1. + ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 + + If start and end are ommitted, increasing order is assumed if the step isn't specified. + eg + .. represents the range from the base to the end + .-1. would represent end to base with step -1 + + If start is omitted and only the end is supplied: + The default step is 1 indicating ascension and the missing start is equivalent to the base. + indexset_resolve 5 ..2 + -> 0 1 2 + The default start is 'end' if the step is negative + indexset_resolve 5 .-1.2 + -> 4 3 2 + If end is omitted and only the start is supplied: + The default step is 1 indicating ascension and the missing end is equivalent to 'end' + indexset_resolve 5 2.. + -> 2 3 4 + The default end is the base if the step is negative + indexset_resolve 5 2.-1. + -> 2 1 0 + + Like the tcl9 lseq command - a step (by) value of zero produces no results. + + The indexes are 0-based by default, but the base can be specified. + indexset_resolve 7 .. + -> 0 1 2 3 4 5 6 + indexset_resolve -base -3 7 .. + -> -3 -2 -1 0 1 2 3 + + Whitespace is ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + end means the last item. + end-1 means the second last item. + 0.. is the same as 0..end + + indexset examples: + + These assume the default 0-based indices (-base 0) + + 1,3.. + output the index 1 (2nd item) followed by all from index 3 to the end. + indexset_resolve 4 1,3.. + -> 1 3 + indexset_resolve 10 1,3.. + -> 1 3 4 5 6 7 8 9 + 0..2,end + output the first 3 indices, and the last index. + end-1..0 + output the indexes in reverse order from 2nd last item to first item." + @leaders -min 0 -max 0 + @opts + -base -type integer -prefix 1 -default 0 -help\ + "This is the starting index. It can be positive, negative or zero. + This affects the start and end calculations, limiting what indices will be + returned. + e.g with base 1 'end' will give a different value from base 0 + + for 10 items 'end' is 10 when 1-based + for 10 items 'end' is 9 when 0-based + + For base 1, index 0 is considered to be below the range. + ie + indexset_resolve -base 1 10 0..3 + -> 1 2 3 + indexset_resolve -base 0 10 0..3 + -> 0 1 2 3 + + It does not *convert* indexes within the range. + + indexset_resolve -base 1 10 5 + -> 5 + indexset_resolve -base 0 10 5 + -> 5 + + ie if you ask for a 1-based resolution of an indexset the integers that are within + the range will come out the same, so the result needs to be treated as a 1-based + set of indices when performing further operations. + " + @values -min 2 -max 3 + numitems -type integer + indexset -type indexset -help "comma delimited specification for indices to return" + } + + #limit punk::args parsing to unhappy paths where possible + proc indexset_resolve {args} { + # -------------------------------------------------- + # Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve + # This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. + # for the unhappy path - the punk::args::parse is fine to generate the usage/error information. + # -------------------------------------------------- + if {[llength $args] < 2} { + punk::args::resolve $args withid ::punk::lib::indexset_resolve + } + set indexset [lindex $args end] + set numitems [lindex $args end-1] + if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { + #use parser on unhappy path only + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + #assert we have 2 or more args + set base 0 ;#default + if {[llength $args] > 2} { + #if more than just numitems and indexset - we expect only -base ie 4 args in total + if {[llength $args] != 4} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set optname [lindex $args 0] + set optval [lindex $args 1] + set fulloptname [tcl::prefix::match -error "" -base $optname] + if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set base $optval + } + # -------------------------------------------------- + + + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace + set index_list [list] ;#list of actual indexes within the range + set iparts [split $indexset ,] + set based_max [expr {$numitems -1 + $base}] + + #we already did is_indexset check above, so we can make assumptions about well-formedness of each part + foreach ipart $iparts { + set ipart [string trim $ipart] + #we need to cater for n..m as well as n.s.m where s is 'step' + set rposn [string first . $ipart] + if {$rposn>=0} { + #if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression + set rposn2 [string last . $ipart] + if {$rposn2 == $rposn+1} { + #.. + set step "default" ;#could be 1 or -1 + } else { + set step [tcl::string::range $ipart $rposn+1 $rposn2-1] + } + lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb + + set rawa [string trim $rawa] + set rawb [string trim $rawb] + if {$rawa eq "" && $rawb eq ""} { + if {$step eq "default"} { + set step 1 ;#default ascending when no start and no end + } + if {$step < 0} { + set rawa end + set rawb $base + } else { + set rawa $base + set rawb end + } + #if neither start nor end specified - we won't get out of range results from lindex_resolve + set a [punk::lib::lindex_resolve $numitems $rawa $base] + set b [punk::lib::lindex_resolve $numitems $rawb $base] + } else { + if {$rawa eq ""} { + if {$step eq "default"} { + #when start not specified, but end is - default direction always ascending + #(even if end is base or below range) + set step 1 + } + if {$step < 0} { + set rawa end + } else { + set rawa $base + } + } + set a [punk::lib::lindex_resolve $numitems $rawa $base] + if {$a == -Inf} { + #undershot - leave negative + } elseif {$a == Inf} { + #overshot + set a [expr {$based_max + 1}] ;#put it outside the range on the upper side + } + #review - a may be -Inf + + if {$rawb eq ""} { + if {$step eq "default"} { + set step 1 + } + if {$step < 0} { + if {$a < $base} { + #make sure both + #mathfunc::isinf is tcl9+ + if {[catch { + if {[::tcl::mathfunc::isinf $a]} { + set a [expr {$base -1}] + } + }]} { + if {[string match -nocase *inf* $a]} { + set a [expr {$base -1}] + } + } + set rawb $a + } else { + set rawb $base + } + } else { + if {$a > $based_max} { + set rawb $a ;#make sure .. doesn't return last item - should return nothing + } else { + set rawb end + } + } + } + set b [punk::lib::lindex_resolve $numitems $rawb $base] + if {$b == -Inf} { + #undershot - leave negative + } elseif {$b == Inf} { + #set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side + set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side + } + } + + #JJJ + + #e.g make sure .. doesn't return last item - should return nothing as both are above the range. + if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { + #assert a & b are integers within the range + if {$step eq "default"} { + #unspecified step - base direction on order of a & b + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + if {$a >= $base && $a <= $based_max} { + #only a is in the range + if {$b < $base} { + set b $base + } else { + set b $based_max + } + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$b >=$base && $b <= $based_max} { + #only b is in the range + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + if {$step < 0} { + if {$a < $base} { + #negative step from below - doesn't matter if b is in range - recast both to an int below $base + #(a may be -Inf) + set a [expr {$base -1}] + set b $a + set step 0 ;#we should return nothing + } + } else { + if {$a < $base} { + set a $base + } else { + set a $based_max + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + #both outside the range + if {$a < $base && $b > $base} { + #spans the range in forward order + set a $base + set b $based_max + if {$step eq "default"} { + set step 1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$a > $base && $b < $base} { + #spans the range in reverse order + set a $based_max + set b $base + if {$step eq "default"} { + set step -1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } + #both outside of range on same side + } + } + } else { + set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] + #returns only -Inf for out of range at either end + if {$idx >= $base} { + #index within the range + lappend index_list $idx + } + } + } + return $index_list + } + # showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side + #This doesn't need the list itself - just the length suffices. + punk::args::define { + @id -id ::punk::lib::lindex_resolve + @cmd -name punk::lib::lindex_resolve\ + -summary\ + "Resolve an indexexpression to an integer based on supplied list or string length."\ + -help\ + "Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 + to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating + whether the index was below or above the range of possible indices for the length supplied. + + Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + This means the proc may be called with something like $x+2 end-$y etc + Sometimes the actual integer index is desired. + + We want to resolve the index used, without passing arbitrary expressions into the 'expr' function + - which could have security risks. + lindex_resolve will parse the index expression and return: + a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + The similar function lindex_resolve_basic uses -Inf to denote + out of range at either end of the list/string. + Otherwise it will return an integer corresponding to the position in the data. + This is in stark contrast to Tcl list/string function indices which will return empty strings for out of + bounds indices, or in the case of lrange, return results anyway. + Like Tcl list commands - it will produce an error if the form of the index is not acceptable. + For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side + - thus returning -2 + + Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. + We will get something like 10+1 - which can be resolved safely with expr + " + @values -min 2 -max 2 + datalength -type integer + index -type indexexpression + } + proc lindex_resolve {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve] [arg len] [arg index]] + #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length + #[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + #[para]This means the proc may be called with something like $x+2 end-$y etc + #[para]Sometimes the actual integer index is desired. + #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. + #[para]lindex_resolve will parse the index expression and return: + #[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string + #[para]Otherwise it will return an integer corresponding to the position in the list. + #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. + #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 + + #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr + + + #REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? + #A basic string map means we aren't properly validating + #todo - be stricter about malformations such as 1000_ + if {![string is integer -strict 1_0]} { + #basic forward compatibility with integers such as 1_000 for 8.6.x + set index [tcl::string::map {_ {}} $index] + set len [tcl::string::map {_ {}} $len] + } + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve len must be a positive integer." + } + set based_max [expr {$len -1 + $base}] + + if {[string is integer -strict $index]} { + #review - base? + #can match +i -i + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } else { + if {[string match end* $index]} { + if {$index ne "end"} { + set op [string index $index 3] + set offset [string range $index 4 end] + #note - offset could have leading + or - + # 'string is integer -strict +1' ==> true + #e.g end+-1 is valid (end++-1 is not) + if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$offset == 0} { + #(offset +0, -0 or 0 or 000 0_0 etc) + #op either + or - is irrelevant + #set index [expr {$len-1}] ;#+ base ? + set index $based_max + if {$index < $base} { + #return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds + return Inf + } else { + return $index + } + } + + #set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] + set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + return $index + } + } else { + #index is 'end' + if {$len == 0} { + #special case - 'end' with empty list - treat end like a positive number out of bounds + return Inf + } + #return [expr {$len - 1 + $base}] + return $based_max + } + } else { + #plain +- already handled above. + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + #regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op + if {[regexp {([^+-]*)([+-])(.*)} $index _match a op b]} { + if {[string is integer -strict $a] && [string is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } + return $index + } + } + } + proc lindex_resolve_basic {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg len] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -Inf for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve_basic len must be an integer greater than or equal to zero" + } + if {![string is integer -strict $base]} { + #base can be negative + error "lindex_resolve_basic base must be an integer" + } + set based_max [expr {$len -1 + $base}] + + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < $base || ($index > $based_max)} { + #even though in this case we could return -Inf or Inf like lindex_resolve; + #for consistency we don't return Inf for upper-boudn violation, + #as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. + return -Inf + } else { + #!NOTE! index within range is unchanged - no matter the base + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } + if {$len > 0} { + #For large len - this is a wasteful allocation if no true lseq available in Tcl version. + #lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) + set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. + } else { + set testlist [list] + #we want to call 'lindex' even in this case - to get the appropriate error message + } + set idx [lindex $testlist $index] + if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end + return -Inf + } else { + return $idx + } + } + proc lindex_get {list index} { + set resultlist [lrange $list $index $index] + if {![llength $resultlist]} { + return -1 + } else { + #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. + #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator + return [tcl::dict::create value [lindex $resultlist 0]] + } + } + + proc string_splitbefore {str index} { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + return [list "" $str] + } + Inf { + return [list $str ""] + } + } + } + return [list [string range $str 0 $index-1] [string range $str $index end]] + #scan %s stops at whitespace - not useful here. + #scan $s %${p}s%s + } + proc string_splitbefore_indices {str args} { + set parts [list $str] + set sizes [list [string length $str]] + set s 0 + foreach index $args { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + Inf { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + } + } + if {$index <= 0} { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + if {$index >= [string length $str]} { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + set i -1 + set a 0 + foreach sz $sizes { + incr i + if {$a + $sz > $index} { + set p [lindex $parts $i] + #puts "a:$a index:$index" + if {$a == $index} { + break + } + ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] + ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] + break + } + incr a $sz + } + #puts "->parts:$parts" + #puts "->sizes:$sizes" + } + return $parts + } + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + + proc is_utf8_multibyteprefix {bytes} { + #*** !doctools + #[call [fun is_utf8_multibyteprefix] [arg str]] + #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character + #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint + #[para] Will return false for an already complete utf-8 codepoint + #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument + #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes + #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] + regexp {(?x) + ^ + (?: + [\xC0-\xDF] | #possible prefix for two-byte codepoint + [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint + [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for + ) + $ + } $bytes + } + + proc is_utf8_first {str} { + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + } $str + } + proc is_utf8_single {1234bytes} { + #*** !doctools + #[call [fun is_utf8_single] [arg 1234bytes]] + #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + $ + } $1234bytes + } + proc get_utf8_leading {rawbytes} { + #*** !doctools + #[call [fun get_utf8_leading] [arg rawbytes]] + #[para] return the leading portion of rawbytes that is a valid utf8 sequence. + #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint + #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. + #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. + #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics + #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned + #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes + if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + \A ( + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + + } $rawbytes completeChars]} { + return $completeChars + } + return "" + } + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set opts [tcl::dict::create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $opts] + foreach {k v} $argopts { + tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + # -- --- --- --- + set opt_validate [tcl::dict::get $opts -validate] + set opt_empty [tcl::dict::get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map {_ ""} $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [tcl::dict::create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $defaults] + set fullopts [tcl::dict::create] + foreach {k v} $argopts { + tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [tcl::dict::merge $defaults $fullopts] + # -- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_case [tcl::dict::get $opts -case] + set opt_empty [tcl::dict::get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + switch -- [string tolower $opt_case] { + upper { + set spec X + } + lower { + set spec x + } + default { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] + if {![string is digit -strict [string map {_ ""} $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map {_ ""} $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::factors + @cmd -name punk::lib::factors\ + -summary\ + "Sorted factors of positive integer x"\ + -help\ + "Return a sorted list of the positive factors of x where x > 0 + For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. + (including zero itself in this context)* + + This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + Comparisons were done with some numbers below 17 digits long + For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + but has the disadvantage of being slower for 'small' numbers and using more memory. + If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get + there than computing the whole list, even for small values of x + * Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + In other mathematical contexts zero may be considered not to divide anything." + @values -min 1 -max 1 + x -type integer + }] + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::gcd + @cmd -name punk::lib::gcd\ + -summary\ + "Greatest common divisor of m and n."\ + -help\ + "Return the greatest common divisor of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib + + Graphical use: + An a by b rectangle can be covered with square tiles of side-length c, + only if c is a common divisor of a and b" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lcm + @cmd -name punk::lib::lcm\ + -summary\ + "Lowest common multiple of m and n."\ + -help\ + "Return the lowest common multiple of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc lcm {n m} { + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + tcl::dict::for {next -} $nums break + } + return [concat $primes [tcl::dict::keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [tcl::dict::keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [chan configure stdin] + if {[catch { + package require punk::console + set console_raw [tsv::get console is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + chan configure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] + } + return $answer + } + + #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. + proc indent {text {prefix " "}} { + set result [list] + foreach line [split $text \n] { + if {[string trim $line] eq ""} { + lappend result "" + } else { + lappend result $prefix[string trimright $line] + } + } + return [join $result \n] + } + #dedent? + proc undent {text {max -1}} { + if {$text eq ""} { + return "" + } + set lines [split $text \n] + set nonblank [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + continue + } + lappend nonblank $ln + } + set lcp [longestCommonPrefix $nonblank] + if {$lcp eq ""} { + return $text + } + regexp {^([\t ]*)} $lcp _m lcp + if {$lcp eq ""} { + return $text + } + set len [string length $lcp] + if {$max != -1} { + set len [expr {min($len,$max)}] + } + set result [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + lappend result "" + } else { + lappend result [string range $ln $len end] + } + } + return [join $result \n] + } + #A version of textutil::string::longestCommonPrefixList + proc longestCommonPrefix {items} { + if {[llength $items] <= 1} { + return [lindex $items 0] + } + set items [lsort $items[unset items]] + set min [lindex $items 0] + set max [lindex $items end] + #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) + #(sort order nothing to do with length - e.g min may be longer than max) + if {[string length $min] > [string length $max]} { + set temp $min + set min $max + set max $temp + } + set n [string length $min] + set prefix "" + set i -1 + while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { + append prefix $c + } + return $prefix + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joins the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? + lassign [tcl::dict::values [punk::args::parse $args withdef { + -joinchar -default \n + @values -min 1 -max 1 + }]] leaders opts values + + return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [tcl::dict::merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + lappend opts -block {} + } + set text [lindex $args end] + #tailcall linelist {*}$opts $text + return [linelist {*}$opts $text] + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [tcl::dict::values [punk::args::parse $args withdef { + @opts -any 1 + -block -default {} + }]] leaderdict opts valuedict + tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + set linelist_body { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + ;#package require punk::ansi + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) + + #we use detectcode_in_list instead of detect_in_list + #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) + # - but the main reason is it is slightly faster. + if {![punk::ansi::ta::detectcode_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + #set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + #get_codes_single lists only the codes. no plaintext or empty elements + set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits] == 0} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end] ;#may or may not be SGR + set lastcodeoffset [expr {[string length $lastcode]-1}] + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #code is at tail (no trailing plaintext) + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach code $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + + set linelist_body_original { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + #package require punk::ansi + + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) + if {![punk::ansi::ta::detect_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits]<= 1} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[lindex $ansisplits end] eq ""} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach {pt code} $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + proc linelist {args} $linelist_body + + + interp alias {} errortime {} punk::lib::errortime + proc errortime {script groupsize {iters 2}} { + #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance + set i 0 + set times {} + if {$iters < 2} {set iters 2} + + for {set i 0} {$i < $iters} {incr i} { + set result [uplevel [list time $script $groupsize]] + lappend times [lindex $result 0] + } + + set average 0.0 + set s2 0.0 + + foreach time $times { + set average [expr {$average + double($time)/$iters}] + } + + foreach time $times { + set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] + } + + set sigma [expr {int(sqrt($s2))}] + set average [expr {int($average)}] + + return "$average +/- $sigma microseconds per iteration" + } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version + proc switch_char_test {c} { + set dec [scan $c %c] + foreach t [list 1 2 3] { + switch -- $c { + x { + return [list $dec x $t] + } + y { + return [list $dec y $t] + } + z { + return [list $dec z $t] + } + } + } + + #tcl 8.6/8.7 (at least) + #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable + switch -- $c { + a { + return [list $dec a] + } + {"} { + return [list $dec dquote] + } + {[} {return [list $dec lb]} + {]} {return [list $dec rb]} + "{" { + return [list $dec lbrace] + } + "}" { + return [list $dec rbrace] + } + default { + return [list $dec $c] + } + } + + + + } + + #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) + proc show_jump_tables {args} { + #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. + if {[llength $args] == 1} { + set data [tcl::unsupported::disassemble proc [lindex $args 0]] + } elseif {[llength $args] == 2} { + #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. + #not sure if this handles more complex hierarchies or mixins etc. + lassign $args obj method + if {![info object isa object $obj]} { + error "show_jump_tables unable to examine '$args'. $obj is not an oo object" + } + #classes are objects too and can have direct methods + if {$method in [info object methods $obj]} { + set data [tcl::unsupported::disassemble objmethod $obj $method] + } else { + if {![info object isa class $obj]} { + set obj [info object class $obj] + } + set data [tcl::unsupported::disassemble method $obj $method] + } + } else { + error "show_jump_tables expected a procname or a class/object and method" + } + set result "" + set in_jt 0 + foreach ln [split $data \n] { + set tln [::tcl::string::trim $ln] + if {!$in_jt} { + if {[::tcl::string::match *jumpTable* $ln]} { + punk::ns::call_frame + append result $ln \n + set in_jt 1 + } + } else { + if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { + set in_jt 0 + } else { + append result $ln \n + } + } + } + return $result + } + + #a test + # punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 + # Note the different disassemble result when trace is running. + proc disassemble {procname} { + tcl::unsupported::disassemble proc $procname + } + + proc temperature_f_to_c {deg_fahrenheit} { + return [expr {($deg_fahrenheit -32) * (5/9.0)}] + } + proc temperature_c_to_f {deg_celsius} { + return [expr {($deg_celsius * (9/5.0)) + 32}] + } + + proc interp_sync_package_paths {interp} { + if {![interp exists $interp]} { + error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" + } + interp eval $interp [list set ::auto_path $::auto_path] + interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} + interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] + } + + proc valcopy {obj} { + append obj2 $obj {} + } + proc set_valcopy {varname obj} { + #used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] + append obj2 $obj {} + uplevel 1 [list set $varname $obj2] + } + + + + proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { + variable has_twapi + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + set localeid [twapi::get_system_default_lcid] + } + } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 + + set results [list] + set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list + foreach inputnum $nums { + set number [valcopy $inputnum] + #also handle tcl 8.7+ underscores in numbers + set number [string map [list _ "" , ""] $number] + #normalize e.g 2e4 -> 20000.0 + set number [expr {$number}] + + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + lappend results [twapi::format_number $number $localeid -idigits -1] + continue + } else { + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} + lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] + continue + } + } + #todo - get configured user defaults + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } + + lappend results [delimit_number $number $delim $groupsize] + } + + if {[llength $results] == 1} { + #keep intrep as string rather than list + return [lindex $results 0] + } + return $results + } + + + #from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse + # Given a number represented as a string, insert delimiters to break it up for + # readability. Normally, the delimiter will be a comma which will be inserted every + # three digits. However, the delimiter and groupsize are optional arguments, + # permitting use in other locales. + # + # The string is assumed to consist of digits, possibly preceded by spaces, + # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* + + proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { + set number [valcopy $unformattednumber] + set number [string map {_ ""} $number] + #normalize using expr - e.g 2e4 -> 20000.0 + set number [expr {$number}] + # First, extract right hand part of number, up to and including decimal point + set point [string last "." $number]; + if {$point >= 0} { + set PostDecimal [string range $number $point+1 end]; + set PostDecimalP 1; + } else { + set point [expr {[string length $number] + 1}] + set PostDecimal ""; + set PostDecimalP 0; + } + + # Now extract any leading spaces. review - regex for whitespace instead of just ascii space? + set ind 0; + while {[string equal [string index $number $ind] \u0020]} { + incr ind; + } + set FirstNonSpace $ind; + set LastSpace [expr {$FirstNonSpace - 1}]; + set LeadingSpaces [string range $number 0 $LastSpace]; + + # Now extract the non-fractional part of the number, omitting leading spaces. + set MainNumber [string range $number $FirstNonSpace $point-1]; + + # Insert commas into the non-fractional part. + set Length [string length $MainNumber]; + set Phase [expr {$Length % $GroupSize}] + set PhaseMinusOne [expr {$Phase -1}]; + set DelimitedMain ""; + + #First we deal with the extra stuff. + if {$Phase > 0} { + append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; + } + set FirstInGroup $Phase; + set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; + while {$LastInGroup < $Length} { + if {$FirstInGroup > 0} { + append DelimitedMain $delim; + } + append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; + incr FirstInGroup $GroupSize + incr LastInGroup $GroupSize + } + + # Reassemble the number. + if {$PostDecimalP} { + return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; + } else { + return [format "%s%s" $LeadingSpaces $DelimitedMain]; + } + } + + #review - there are various type of uuid - we should use something consistent across platforms + #twapi is used on windows because it's about 5 times faster - but is this more important than consistency? + #twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway + #(counterpoint: in the case of punk - we currently need twapi anyway on windows) + #does tcllib's uuid use the same mechanisms on different platforms anyway? + if {$has_twapi} { + interp alias "" ::punk::lib::uuid "" twapi::new_uuid + } else { + catch {package require uuid} + interp alias "" ::punk::lib::uuid "" uuid::uuid generate + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::lib::flatgrid { + namespace export filler_count rows cols col row block + + #WARNING - requires lseq and 'lsearch -stride' + #WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 + #todo - 8.6 fallback? + + proc filler_count {listlen numcolumns} { + #if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error + #if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense + expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} + } + proc rows {list numcolumns {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set rows [list] + set i 1 + foreach s [lrange $splits 0 end-1] { + lappend rows [lrange $padded_list $s [lindex $splits $i]-1] + incr i + } + return $rows + } + proc cols {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] + } + return $cols + } + proc cols2 {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [col2 $list $numcolumns $colindex $blank] + } + return $cols + } + proc col {list numcolumns colindex {blank NULL}} { + lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * + } + proc col2 {list numcolumns colindex {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set col [list] + foreach s [lrange $splits 0 end-1] { + lappend col [lindex $padded_list $s+$colindex] + } + return $col + } + proc col3 {list numcolumns colindex {blank NULL}} { + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} + } + proc col4 {list numcolumns colindex {blank NULL}} { + #slow + set vars [lrepeat $numcolumns _] + lset vars $colindex v + if {$blank eq ""} { + return [lmap $vars $list {set v}] + } + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} + } + + proc block {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } + proc block2 {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols2 $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } +} + +tcl::namespace::eval punk::lib::test { + + + +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + + + # incomplete - report which is the innermost bracket/quote etc awaiting completion for a Tcl command + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + set i 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + incr i + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) + switch -- $c { + {"} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } + {[} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "{" { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "}" - + default { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + incr i + } + set incomplete [list] + foreach w $waiting { + #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. + switch -- $w { + {"} { + lappend incomplete $w + } + {]} { + lappend incomplete "\[" + } + "{" {} + "}" { + lappend incomplete "\{" + } + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->awaiting:'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #get info about punk nestindex key ie type: list,dict,undetermined + # pdict devel + proc nestindex_info {args} { + set argd [punk::args::parse $args withdef { + -parent -default "" + nestindex + }] + set opt_parent [dict get $argd opts -parent] + if {$opt_parent eq ""} { + set parent_type undetermined + } else { + set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing + } + + #??? + + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} + +tcl::namespace::eval punk::lib::caches { + +} + +tcl::namespace::eval punk::lib::debug { + proc showdict {args} {} +} + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::lib +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [tcl::namespace::eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.6 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index 06c7ddf3..741d9fc0 100644 --- a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -229,12 +229,16 @@ tcl::namespace::eval punk::nav::fs { } else { set stripbase 1 } - if {$v eq "/"} { - #hack - dict set matchinfo files {} - dict set matchinfo filesizes {} - } - set out [dirfiles_dict_as_lines -stripbase $stripbase $matchinfo] + + #we need to pass matchinfo that includes files even when only doing a directory listing (d/ /) + #This is because we want to display links/shortcuts that point to directories as directories. + #( ./ listing needs to show navigable items) + #if {$v eq "/"} { + # #dodgy hack that doesn't give proper display of all links/shortcuts that are pointing to directories. + # dict set matchinfo files {} + # dict set matchinfo filesizes {} + #} + set out [dirfiles_dict_as_lines -listing $v -stripbase $stripbase $matchinfo] #set chunklist [list] #lappend chunklist [list stdout "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n"] set result "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n" @@ -258,10 +262,10 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args a1] + set atail [lassign $args cdtarget] if {[llength $args] == 1} { - set a1 [lindex $args 0] - switch -exact -- $a1 { + set cdtarget [lindex $args 0] + switch -exact -- $cdtarget { . - ./ { tailcall punk::nav::fs::d/ } @@ -286,43 +290,88 @@ tcl::namespace::eval punk::nav::fs { } } else { cd $up1 - #set VIRTUAL_CWD [file normalize $a1] + #set VIRTUAL_CWD [file normalize $cdtarget] } tailcall punk::nav::fs::d/ $v } } - if {![regexp {[*?]} $a1] && [file pathtype $a1] ne "relative"} { + set cdtarget_copy [punk::nav::fs::system::valcopy $cdtarget] + set cdtarget_copy [string map {\\ /} $cdtarget_copy] + if {[string range $cdtarget_copy 0 3] eq "//?/"} { + #handle dos device paths - convert to normal path for glob testing + set glob_test [string range $cdtarget_copy 3 end] + set cdtarget_is_glob [regexp {[*?]} $glob_test] + } else { + set cdtarget_is_glob [regexp {[*?]} $cdtarget] + } + if {!$cdtarget_is_glob} { + set cdtarget_file_type [file type $cdtarget] + #e.g may be a link - whilst the type returned in the 'file stat' info reflects the type of the link target + } else { + set cdtarget_file_type "glob" + } + + if {!$cdtarget_is_glob && [file pathtype $cdtarget] ne "relative"} { #non-relative non-glob - if { ![string match //zipfs:/* $a1]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD $a1 - tailcall punk::nav::fs::d/ $v + if {![string match //zipfs:/* $cdtarget]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + if {$linktarget_file_type eq "directory"} { + set linktarget [file readlink $cdtarget] + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } } } - if {![regexp {[*?]} $a1] && ![string match //zipfs:/* $a1] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD [file normalize $a1] - tailcall punk::nav::fs::d/ $v + if {!$cdtarget_is_glob && ![string match //zipfs:/* $cdtarget] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + set linktarget [file readlink $cdtarget] + if {$linktarget_file_type eq "directory"} { + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } + #if {[file type $cdtarget] eq "directory"} { + # cd $cdtarget + # #set VIRTUAL_CWD [file normalize $cdtarget] + # tailcall punk::nav::fs::d/ $v + #} } - if {![regexp {[*?]} $a1]} { + if {!$cdtarget_is_glob} { #NON-Glob target #review - if {[string match //zipfs:/* $a1]} { - if {[Zipfs_path_within_zipfs_mounts $a1]} { - commandstack::basecall cd $a1 + if {[string match //zipfs:/* $cdtarget]} { + if {[Zipfs_path_within_zipfs_mounts $cdtarget]} { + commandstack::basecall cd $cdtarget } - set VIRTUAL_CWD $a1 - set curdir $a1 + set VIRTUAL_CWD $cdtarget + set curdir $cdtarget } else { - set target [punk::path::normjoin $VIRTUAL_CWD $a1] + set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[Zipfs_path_within_zipfs_mounts $target]} { commandstack::basecall cd $target @@ -521,20 +570,93 @@ tcl::namespace::eval punk::nav::fs { return $result } + punk::args::define { + @id -id ::punk::nav::fs::d/new + -nonportable -type none -help\ + "Allow creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." + @values -min 1 -max -1 -type string + path -type string -multiple 1 -help\ + "Path(s) to create. Can be absolute or relative. + + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. + + If a path already exists, then it will be left as-is and no error will be raised. + + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." + } + #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { - if {![llength $args]} { - error "usage: d/new \[ ...\]" - } - set a1 [lindex $args 0] + set argd [punk::args::parse $args withid ::punk::nav::fs::d/new] + lassign [dict values $argd] leaders opts values received + set paths [dict get $values path] + set allow_nonportable [dict exists $received -nonportable] + set curdir [pwd] - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath_list [list] + set error_paths [list] + foreach p $paths { + if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { + #error "punk::nav::fs::d/new Path '$p' is not portable and may not be created without -nonportable option" + lappend error_paths [list $p "Path '$p' is not portable and may not be created without -nonportable option"] + continue + } + if {[string first \0 $p] != -1} { + #error "punk::nav::fs::d/new Path '$p' contains null character which is not allowed" + lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] + continue + } + set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] + #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. + set fullpath [file join $path1 {*}[lrange $args 1 end]] + #Some subpaths of the supplied paths to create may already exist. + #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + set parent [file dirname $fullpath] + while {![file exists $parent]} { + set parent [file dirname $parent] + } + if {![file writable $parent]} { + #error "punk::nav::fs::d/new Cannot create directory '$fullpath' as parent '$parent' is not writable" + lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] + continue + } + lappend fullpath_list $fullpath + } + if {[llength $fullpath_list] != [llength $paths]} { + set path_error_display "" + foreach e $error_paths { + set p [lindex $e 0] + set m [lindex $e 1] + append path_error_display " Path: '$p' Error: $m\n" + } + error "punk::nav::fs::d/new One or more supplied paths were invalid or not writable:\n$path_error_display" + } - if {[file exists $fullpath]} { - error "Folder $fullpath already exists" + set num_created 0 + set error_string "" + foreach fullpath $fullpath_list { + if {[catch {file mkdir $fullpath}]} { + set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + break + } + incr num_created } - file mkdir $fullpath - d/ $fullpath + if {$error_string ne ""} { + error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." + } + d/ $curdir } #todo use unknown to allow d/~c:/etc ?? @@ -849,11 +971,11 @@ tcl::namespace::eval punk::nav::fs { #file attr //cookit:/ returns {-vfs 1 -handle {}} #we will treat it differently for now - use generic handler REVIEW - set in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. + set is_in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. if {[llength [package provide vfs]]} { foreach mount [vfs::filesystem info] { if {[punk::mix::base::lib::path_a_atorbelow_b $location $mount]} { - set in_vfs 1 + set is_in_vfs 1 break } } @@ -871,27 +993,27 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } - if {$in_vfs} { + if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { - set in_zipfs 0 - set in_cookit 1 - set in_other_pseudovol 1 + set invfs "" switch -glob -- $location { //zipfs:/* { if {[info commands ::tcl::zipfs::mount] ne ""} { - set in_zipfs 1 + set invfs zipfs } } //cookit:/* { - set in_cookit 1 + set invfs cookit } default { #handle 'other/unknown' that mounts at a volume-like path //pseudovol:/ + #(intentionally will not match a dos device path such as //?/c:/) if {[regexp {//((?:(?!:|/).)+):/.*} $location _match pseudovol]} { #pseudovol probably more than one char long #we don't really expect something like //c:/ , but anyway, it's not the same as c:/ and for all we know someone could use that as a volume name? - set in_other_pseudovol 1 ;#flag so we don't use twapi - hope generic can handle it (uses tcl glob) + #flag so we don't use twapi - hope generic can handle it (uses tcl glob) + set invfs pseudovol } else { #we could use 'file attr' here to test if {-vfs 1} #but it's an extra filesystem hit on all normal paths too (which can be expensive on some systems) @@ -900,20 +1022,24 @@ tcl::namespace::eval punk::nav::fs { } } - - if {$in_zipfs} { - #relative vs absolute? review - cwd valid for //zipfs:/ ?? - set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_cookit} { - #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ - #don't use twapi - #could possibly use du_dirlisting_tclvfs REVIEW - #files and folders are all returned with the -types hidden option for glob on windows - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_other} { - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } else { - set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + switch -- $invfs { + zipfs { + #relative vs absolute? review - cwd valid for //zipfs:/ ?? + set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + cookit { + #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ + #don't use twapi + #could possibly use du_dirlisting_tclvfs REVIEW + #files and folders are all returned with the -types hidden option for glob on windows + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + pseudovol { + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + default { + set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } } } @@ -1018,11 +1144,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean + -listing -default "/" -choices {/ // //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { + set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1031,9 +1159,12 @@ tcl::namespace::eval punk::nav::fs { # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_stripbase [dict get $opts -stripbase] + set opt_stripbase [dict get $opts -stripbase] set opt_formatsizes [dict get $opts -formatsizes] + set opt_listing [dict get $opts -listing] # -- --- --- --- --- --- --- --- --- --- --- --- + #we still need to examine files for -listing / which means show only directories, + # because we want to display links/shortcuts that point to directories as directories #if multiple dicts and -stripbase = 1 - we can only strip the longest common part of the searchbases supplied set common_base "" @@ -1074,7 +1205,6 @@ tcl::namespace::eval punk::nav::fs { foreach fileset [list dirs files links underlayfiles flaggedhidden flaggedreadonly flaggedsystem nonportable vfsmounts] { set $fileset [list] } - #set contents [lindex $list_of_dicts 0] foreach contents $list_of_dicts { lappend dirs {*}[dict get $contents dirs] @@ -1090,6 +1220,7 @@ tcl::namespace::eval punk::nav::fs { lappend vfsmounts {*}[dict get $contents vfsmounts] } + set fkeys [dict create] ;#avoid some file normalize calls.. if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1224,27 +1355,41 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] + set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { if {![catch {package require punk::winlnk}]} { - set shortcutinfo [punk::winlnk::resolve $fname] set target_type "file" ;#default/fallback + + set shortcutinfo [punk::winlnk::resolve $fname] if {[dict exists $shortcutinfo link_target]} { set is_valid_lnk 1 set tgt [dict get $shortcutinfo link_target] - if {[file exists $tgt]} { - #file type could return 'link' - we will use isfile/isdirectory - if {[file isfile $tgt]} { - set target_type file - } elseif {[file isdirectory $tgt]} { - set target_type directory - } else { - set target_type file ;## ? + set link_target_type [dict get $shortcutinfo target_type] + switch -- $link_target_type { + file { + set target_type "file" + } + directory - "local disk" { + set target_type "directory" + } + unknown { + #fall back to checking attributes and filesystem if we have a link_target but no target_type + if {[file exists $tgt]} { + #file type could return 'link' - we will use isfile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } } - } else { - #todo - see if punk::winlnk has info about the type at the time of linking - #for now - treat as file } } else { #no link_target - probably an ordinary file - but there could have been some other error in reading the binary windows lnk format. @@ -1295,6 +1440,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo + puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1304,58 +1451,82 @@ tcl::namespace::eval punk::nav::fs { set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] set RST [punk::ansi::a] + if {$opt_listing eq "/"} { + #disply directories only (including items that were actually files that were links/shortcuts to directories) + set finfo_plus [list] + } foreach d $dirs filerec $finfo_plus { - set d1 [punk::ansi::a+ cyan bold] - set d2 [punk::ansi::a+ defaultfg defaultbg normal] - #set f1 [punk::ansi::a+ white bold] - set f1 [punk::ansi::a+ white] - set f2 [punk::ansi::a+ defaultfg defaultbg normal] + set d1 [punk::ansi::a+ cyan normal] + set d1_overrides [list] + #set d2 [punk::ansi::a+ defaultfg defaultbg normal] + set f1 [punk::ansi::a+ white normal] + set f1_overrides [list] + #set f2 [punk::ansi::a+ defaultfg defaultbg normal] set fdisp "" if {[string length $d]} { if {$d in $flaggedhidden} { - set d1 [punk::ansi::a+ cyan normal] + #set d1 [punk::ansi::a+ Term-grey50 normal] + lappend d1_overrides term-grey50 } if {$d in $vfsmounts} { - if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW - #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) - #mark it differently for now.. (todo bug report?) - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red Yellow bold] - } else { - set d1 [punk::ansi::a+ green Purple bold] - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red White bold] - } else { - set d1 [punk::ansi::a+ green bold] - } - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red bold] - } + lappend d1_overrides Green + } + if {$d in $nonportable} { + #lappend d1_overrides underline undt-red ;#we use underlins to indicate symlinks and shortcuts, so we shouldn't use underlines here if possible. + lappend d1_overrides italic bold } + #if {$d in $vfsmounts} { + # if {$d in $flaggedhidden} { + # #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indication of 'hidden' status - REVIEW + # #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) + # #mark it differently for now.. (todo bug report?) + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red Yellow bold] + # } else { + # set d1 [punk::ansi::a+ green Purple bold] + # } + # } else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red White bold] + # } else { + # set d1 [punk::ansi::a+ green bold] + # } + # } + #} else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red bold] + # } + #} #dlink-style & dshortcut_style are for underlines - can be added with colours already set + + if {[llength $d1_overrides]} { + set d1 [punk::ansi::a+ {*}$d1_overrides] + } if {$d in $dir_symlinks} { append d1 $dlink_style } elseif {$d in $dir_shortcuts} { append d1 $dshortcut_style } } + if {[llength $filerec]} { set fname [dict get $filerec file] set fdisp [dict get $filerec display] if {$fname in $flaggedhidden} { - set f1 [punk::ansi::a+ Purple] - } else { - if {$fname in $nonportable} { - set f1 [punk::ansi::a+ red bold] - } + #set f1 [punk::ansi::a+ Term-grey50] + lappend f1_overrides term-grey50 + } + if {$fname in $nonportable} { + lappend f1_overrides italic bold } + if {[llength $f1_overrides]} { + set f1 [punk::ansi::a+ {*}$f1_overrides] + } + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST + } else { + #either there are no files or opt_listing is / = show dirs only (some of which may have actually been files that were links/shortcuts to directories) + lappend displaylist [overtype::left $col1 $d1$d$RST] } - lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] @@ -1469,6 +1640,12 @@ tcl::namespace::eval punk::nav::fs::system { #[subsection {Namespace punk::nav::fs::system}] #[para] Internal functions that are not part of the API + #utility function to copy values from one variable to another without sharing the reference. + #Useful for example to avoid some issues with possible shimmering of the underlying type of file paths. + proc valcopy {obj} { + append obj2 $obj {} + } + #ordinary emission of chunklist when no repl proc emit_chunklist {chunklist} { set result "" diff --git a/src/bootsupport/modules/punk/pipe-1.0.tm b/src/bootsupport/modules/punk/pipe-1.0.tm index eac7df81..034fae01 100644 --- a/src/bootsupport/modules/punk/pipe-1.0.tm +++ b/src/bootsupport/modules/punk/pipe-1.0.tm @@ -326,12 +326,12 @@ tcl::namespace::eval punk::pipe::lib { set in_atom 1 } ( { - incr in_brackets + incr in_brackets } default { if {$end_var_posn == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { - set end_var_posn $token_index - } + set end_var_posn $token_index + } } } } diff --git a/src/bootsupport/modules/punk/winlnk-0.1.1.tm b/src/bootsupport/modules/punk/winlnk-0.1.1.tm new file mode 100644 index 00000000..f283348f --- /dev/null +++ b/src/bootsupport/modules/punk/winlnk-0.1.1.tm @@ -0,0 +1,1014 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 0.1.1 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::winlnk 0 0.1.1] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [lb]MS_SHLLINK[rb].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Header_Get_HeaderSize $contents] eq $magic_HeaderSize && [Header_Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + variable LinkFlags + set LinkFlags [dict create\ + HasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Header_Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + } + + #MS-SHLLINK.pdf documents the .lnk file format in detail, but here is a brief overview of the structure of a .lnk file: + #protocol revision 10.0 (November 2025) https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net/MS-SHLLINK/%5bMS-SHLLINK%5d.pdf + + + #SHELL_LINK_HEADER structure is 76 bytes long and starts at the beginning of the file + #offset hex:0x00 dec:0 4 bytes + #Header size (HeaderSize) (must be 0x0000004C for .lnk files) + proc Header_Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + + + #offset hex:0x04 dec:4 16 bytes + #LinkCLSID (must be 00021401-0000-0000-C000-000000000046 for .lnk files) + proc Header_Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + + + #offset hex:0x14 dec:20 4 bytes + #Link flags (LinkFlags) - bit field specifying information about the shell link and the presence of optional portions of the structure. + #HasLinkTargetIDList bit 0 (0x00000001) - if set, a LinkTargetIDList structure is present immediately following the header + #HasLinkInfo bit 1 (0x00000002) - if set, a LinkInfo structure is present immediately following the header (or the LinkTargetIDList if that is present) + #HasName bit 2 (0x00000004) - if set, a null-terminated string containing the name of the link is present immediately following the header (or the LinkTargetIDList and LinkInfo if they are present) + #HasRelativePath bit 3 (0x00000008) - if set, a null-terminated string containing the relative path of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo and Name if they are present) + #HasWorkingDir bit 4 (0x00000010) - if set, a null-terminated string containing the working directory of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name and Relative Path if they are present) + #HasArguments bit 5 (0x00000020) - if set, a null-terminated string containing the command line arguments for the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path and Working Dir if they are present) + #HasIconLocation bit 6 (0x00000040) - if set, a null-terminated string containing the location of the icon for the link is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir and Arguments if they are present) + #IsUnicode bit 7 (0x00000080) - if set, the strings in the link are stored in Unicode (UTF-16LE) format; if not set, the strings are stored in ANSI format (usually the system's default code page) + #ForceNoLinkInfo bit 8 (0x00000100) - if set, the LinkInfo structure is not stored in the file even if the HasLinkInfo bit is set; this can be used to force the link to be resolved using only the information in the header and the optional strings, without using the LinkInfo structure + #HasExpString bit 9 (0x00000200) - if set, a null-terminated string containing an "environment variable" style string is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments and Icon Location if they are present); this string can contain environment variable references (e.g. %USERPROFILE%) that can be expanded to obtain the actual path of the link target + #RunInSeparateProcess bit 10 (0x00000400) - if set, the link target should be run in a separate process; if not set, the link target may be run in the same process as the caller + #Unused1 bit 11 (0x00000800) - reserved for future use; should be set to 0 + #HasDarwinID bit 12 (0x00001000) - if set, a null-terminated string containing a "Darwin ID" is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location and ExpString if they are present); this string can be used to identify the link target in a way that is independent of the file system (e.g. for links to Control Panel items or special folders) + #RunAsUser bit 13 (0x00002000) - if set, the link target should be run with the permissions of the user specified in the HasDarwinID string; if not set, the link target should be run with the permissions of the caller + #HasExpIcon bit 14 (0x00004000) - if set, a null-terminated string containing an "environment variable" style string for the icon location is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location, ExpString and DarwinID if they are present); this string can contain environment variable references that can be expanded to obtain the actual path of the icon for the link + #NoPidlAlias bit 15 (0x00008000) - if set, the link target should not be resolved using the PIDL alias mechanism; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed + #Unused2 bit 16 (0x00010000) - reserved for future use; should be set to 0 + #RunWithShimLayer bit 17 (0x00020000) - if set, the link target should be run with the application compatibility shim layer; if not set, the link target should be run without the shim layer + #ForceNoLinkTrack bit 18 (0x00040000) - if set, the link target should not be tracked by the shell's link tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed + #EnableTargetMetadata bit 19 (0x00080000) - if set, the link target should have metadata enabled; this can be used to allow the link to store additional information about the target (e.g. for links to files, the link can store the file's attributes, creation time, access time and modification time) + #DisableLinkPathTracking bit 20 (0x00100000) - if set, the link target should not be tracked by the shell's link path tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its path + #DisableKnownFolderTracking bit 21 (0x00200000) - if set, the link target should not be tracked by the shell's known folder tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its known folder ID + #DisableKnownFolderAlias bit 22 (0x00400000) - if set, the link target should not be aliased to a known folder; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on its known folder ID + #AllowLinkToLink bit 23 (0x00800000) - if set, the link target can be another link; if not set, the link target should not be another link (i.e. it should be a file or directory); this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #UnaliasOnSave bit 24 (0x01000000) - if set, the link should be unaliased when it is saved; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #PreferEnvironmentPath bit 25 (0x02000000) - if set, the link should prefer to resolve the target using environment variable references; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the environment variable references still point to the correct location + #KeepLocalIDListForUNCTarget bit 26 (0x04000000) - if set, the link should keep the local ID list for UNC targets; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the local ID list still points to the correct location + # - the presence of these flags indicates the presence of optional structures in the .lnk file and also provides information about how to interpret the data in the file + proc Header_Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x18 dec:24 4 bytes + #File attributes (FileAttributes) - bit field specifying the file attributes of the link target (if the EnableTargetMetadata flag is set in the LinkFlags field); this field is a bitwise combination of the following values: + proc Header_Get_FileAttributes {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return {} + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + set attrlist {} + if {$val & 0x00000001} {lappend attrlist "READONLY"} + if {$val & 0x00000002} {lappend attrlist "HIDDEN"} + if {$val & 0x00000004} {lappend attrlist "SYSTEM"} + if {$val & 0x00000010} {lappend attrlist "DIRECTORY"} + if {$val & 0x00000020} {lappend attrlist "ARCHIVE"} + if {$val & 0x00000040} {lappend attrlist "DEVICE"} + if {$val & 0x00000080} {lappend attrlist "NORMAL"} + if {$val & 0x00000100} {lappend attrlist "TEMPORARY"} + if {$val & 0x00000200} {lappend attrlist "SPARSE_FILE"} + if {$val & 0x00000400} {lappend attrlist "REPARSE_POINT"} + if {$val & 0x00000800} {lappend attrlist "COMPRESSED"} + if {$val & 0x00001000} {lappend attrlist "OFFLINE"} + if {$val & 0x00002000} {lappend attrlist "NOT_CONTENT_INDEXED"} + if {$val & 0x00004000} {lappend attrlist "ENCRYPTED"} + return $attrlist + } + proc Header_Get_FileAttributes_Raw {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return 0 + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + + + + #offset hex:0x1C dec:28 8 bytes + #creation date and time (CreationTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_CreationTime {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_CreationTime_Raw {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset 36 8 bytes + #last access date and time (AccessTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_AccessTime {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_AccessTime_Raw {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x2C dec:44 8 bytes + #last modification date and time (WriteTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_WriteTime {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_WriteTime_Raw {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x34 dec:52 Bytes:4 - unsigned int + #file size in bytes (of target - low 32 bits if >4GB) + proc Header_Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x38 dec:56 Bytes:4 - signed integer + #icon index value + proc Header_Get_IconIndex {contents} { + set 4bytes [string range $contents 56 59] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x3C dec:60 Bytes:4 - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Header_Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x40 dec:64 Bytes:2 + #Hot key + proc Header_Get_HotKey {contents} { + # Existing code that extracts the raw 16‑bit hotkey value: + set raw [Header_Get_HotKey_Raw $contents] + # The low byte holds the virtual‑key, high byte holds modifier flags + set vk [expr {$raw & 0xFF}] + set mods [expr {($raw >> 8) & 0xFF}] + set name [_vk_to_name $vk] + set modStr [_modifiers_to_string $mods] + if {$modStr eq ""} { + return $name + } else { + return "${modStr}+${name}" + } + } + proc Header_Get_HotKey_Raw {contents} { + set 2bytes [string range $contents 64 65] + set r [binary scan $2bytes s val] ;#short + return $val + } + proc _modifiers_to_string {mods} { + set parts {} + if {$mods & 0x01} {lappend parts "Shift"} + if {$mods & 0x02} {lappend parts "Ctrl"} + if {$mods & 0x04} {lappend parts "Alt"} + if {$mods & 0x08} {lappend parts "Win"} ;# optional + return [join $parts "+"] + } + proc _vk_to_name {vk} { + # Minimal map – extend as needed + array set vkMap { + 0x00 "No key assigned" + 0x08 Backspace 0x09 Tab 0x0D Return + 0x10 Shift 0x11 Control 0x12 Alt + 0x20 Space 0x21 PageUp 0x22 PageDown + 0x23 End 0x24 Home 0x25 Left + 0x26 Up 0x27 Right 0x28 Down + 0x2D Insert 0x2E Delete + 0x70 F1 0x71 F2 0x72 F3 + 0x73 F4 0x74 F5 0x75 F6 + 0x76 F7 0x77 F8 0x78 F9 + 0x79 F10 0x7A F11 0x7B F12 + 0x7c F13 0x7d F14 0x7e F15 + 0x7f F16 0x80 F17 0x81 F18 + 0x82 F19 0x83 F20 0x84 F21 + 0x85 F22 0x86 F23 0x87 F24 + 0x90 "NUM LOCK" 0x91 "SCROLL LOCK" + } + if {[info exists vkMap($vk)]} { + return $vkMap($vk) + } else { + if {$vk >= 0x30 && $vk <= 0x39} { + return [format "%c" $vk] ;# 0-9 + } elseif {$vk >= 0x41 && $vk <= 0x5A} { + return [format "%c" $vk] ;# A-Z + } + # fallback: hex representation + return [format "0x%02X" $vk] + } + } + + #offset hex:0x42 dec:66 Bytes:2 - reserved1 + proc Header_Get_Reserved1 {contents} { + set 2bytes [string range $contents 66 67] + set r [binary scan $2bytes s val] ;#short + return $val + } + + #offset hex:0x44 dec:68 Bytes:4 - reserved2 + proc Header_Get_Reserved2 {contents} { + set 4bytes [string range $contents 68 71] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x48 dec:72 Bytes:4 - reserved3 + proc Header_Get_Reserved3 {contents} { + set 4bytes [string range $contents 72 75] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #end of 76 byte header + + proc Get_LinkTargetIDList_size {contents} { + if {[Header_Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkTargetIDList_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + return "" + } else { + set idlist_content [string range $contents 78 [expr {78 + $idlist_size -1}]] + return $idlist_content + } + } + + #some clues on the structure of the IDList content and how to parse it can be found in the analysis of CVE-2020-0729, + #which is a remote code execution vulnerability in Windows that can be exploited through specially crafted .lnk files that contain malicious IDList content. + #The analysis of this vulnerability provides insights into how the IDList content is structured and how it can be parsed to extract information about the link target and potentially execute code. + #https://www.zerodayinitiative.com/blog/2020/3/25/cve-2020-0729-remote-code-execution-through-lnk-files + + proc Get_LinkTargetIDList_iteminfo {contents} { + set idlist_content [Get_LinkTargetIDList_content $contents] + set result {} + set offset 0 + while {$offset < [string length $idlist_content]} { + if {[string length $idlist_content] - $offset < 2} break + set size_bytes [string range $idlist_content $offset [expr {$offset + 1}]] ;#size including these 2 bytes + binary scan $size_bytes su size + if {$size == 0} break + if {$size < 2} { + # Invalid size, abort + error "punk::winlnk::Get_LinkTargetIDList_iteminfo: Invalid ItemID size: $size at offset $offset" + } + if {$offset + $size > [string length $idlist_content]} { + # ItemID extends beyond content, stop parsing + puts stderr "punk::winlnk::Get_LinkTargetIDList_iteminfo: ItemID at offset $offset with size $size extends beyond content length, stopping parse" + break + } + set itemid [string range $idlist_content $offset [expr {$offset + $size - 1}]] + set itemid_bytes [string range $itemid 0 1] + binary scan $itemid_bytes su itemid_size + #in *general* byte 3 of the ItemID structure can be used to determine the type of the item + #(e.g. file, folder, network location, etc.) but this is not always reliable and can vary + #based on the specific structure of the ItemID and the context in which it is used + set itemid_type_byte [string index $itemid 2] + #puts stderr "ItemID size: $itemid_size, type byte: [format %02X [scan $itemid_type_byte %c]]" + set maybe_type [format %02X [scan $itemid_type_byte %c]] + lappend result [dict create size $itemid_size type $maybe_type rawcontent $itemid] + + incr offset $size + } + return $result + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Header_Has_LinkFlag $contents "B"]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + #TODO - finish parsing of LinkInfo - add support + #Link location information + #present if data flag HasLinkInfo exists. + + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + binary scan $bytes_linkinfoheadersize i headersize + + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $bytes_linkinfoflags i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix note ] + } + + proc Contents_Get_Info {contents} { + + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-0.1.1/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Header_Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Header_Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set next_start [dict get $linkinfo_content_dict next_start] ;#location of section following LinkInfo (Location information) - this will be the Data Strings. + set link_target "" + set linkfields [dict create] + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + if {"windows" eq $::tcl_platform(platform)} { + set link_target [file join $localbase_path $suffix_path] + } else { + set suffix_path [string trimleft [string map {\\ /} $suffix_path] /] + if {[regexp {([a-zA-Z]):\\(.*)} $localbase_path _match drive_letter tail]} { + set localbase_path [string map {\\ /} $localbase_path] + set tail [string trimleft [string map {\\ /} $tail] /] + set link_target "" + #shortcut basepath is a windows path with drive letter - try to resolve it on unix by looking for a corresponding mount from fstab or a point under /mnt + set mountinfo [exec mount] + foreach line [split $mountinfo "\n"] { + #review - a more specific mount target might exist that includes the drive letter as part of the mount point name and is a longer prefix of the localbase_path + #- we should probably look for the longest prefix match rather than just the drive letter + if {[regexp -nocase -- [string cat ^$drive_letter {:\\\s+on\s+(\S+)}] $line _match mount_point]} { + set link_target [file join $mount_point $tail $suffix_path] + break + } + } + if {$link_target eq ""} { + #review - under what circumstances could this happen? If the drive letter doesn't match any mount points, then /mnt/drive_letter should generally already have been found above above + # - However, it may be possible for /mnt/drive_Letter to still exist even if it's not reflected in the output of mount or the output of mount is in an unexpected format. + + #nothing in mount result matches the drive letter - try looking for a mount point under /mnt with the drive letter as the name + if {[file exists /mnt/$drive_letter]} { + set link_target [file join /mnt/$drive_letter $tail $suffix_path] + } else { + if {$drive_letter eq [string tolower $drive_letter]]} { + set op_drive_letter [string toupper $drive_letter] + } else { + set op_drive_letter [string tolower $drive_letter] + } + if {[file exists /mnt/$op_drive_letter]} { + set link_target [file join /mnt/$op_drive_letter $tail $suffix_path] + } else { + #leave as is except for backslashes converted to forward + #- probably won't resolve correctly unless the unix system has a folder named drive_letter: in the current folder with a copy of the original filestructure. + set link_target [file join $localbase_path $suffix_path] + } + } + } else { + #shortcut basepath is a windows path with drive letter and we found a matching mount point - link_target is set to the resolved path + } + } else { + #shortcut basepath doesn't match expected windows path format - just join it with the suffix and hope for the best + #could be something like a network path or it could be something else entirely + set link_target [file join $localbase_path $suffix_path] + } + } + } + + # ---------------------------------------------------------------------- + #todo - get Data strings by parsing contents starting at $next_start + #stored in following order: + # description + # relative path + # working directory + # command line arguments + # icon location + + #Data strings format: + # 2 bytes: number of characters in the string + # following: The string. ASCII or UTF-16 little-endian string + + set datastring_dict [Contents_Get_DataStrings $contents $next_start] + + # ---------------------------------------------------------------------- + + set file_attributes [Header_Get_FileAttributes $contents] + set linktargetidlist [Get_LinkTargetIDList_iteminfo $contents] + + set target_type_info [Get_target_type $contents $file_attributes] + set target_type [dict get $target_type_info type] + set target_type_mech [dict get $target_type_info mechanism] + if {$target_type eq "unknown"} { + if {[file exists $link_target]} { + set target_type [file type $link_target] + set target_type_mech "filesystem" + } + } + + set result [dict create\ + link_target $link_target\ + link_flags $flags_enabled\ + file_attributes $file_attributes\ + creation_time [Header_Get_CreationTime $contents]\ + access_time [Header_Get_AccessTime $contents]\ + write_time [Header_Get_WriteTime $contents]\ + target_length [Header_Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey [Header_Get_HotKey $contents]\ + target_type $target_type\ + target_type_mech $target_type_mech\ + idlist $linktargetidlist\ + linkinfo $linkfields\ + ] + #relative_path "?" + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::resolve + @cmd -name punk::winlnk::resolve\ + -summary\ + "Return information about a .lnk file (windows shortcut)"\ + -help\ + "Return a dict of info obtained by parsing the binary data in a windows .lnk file. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut + file and the dictionary will contain an 'error' key." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc resolve {path} { + #*** !doctools + #[call [fun resolve] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [Contents_Get_Info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::file_show_info + @cmd -name punk::winlnk::file_show_info\ + -summary\ + "Show information about a .lnk file (windows shortcut)"\ + -help\ + "Print to stdout the information obtained by parsing the binary data in a windows .lnk file, in a human readable format. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and an error message will be printed." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc file_show_info {path} { + package require punk::lib + #punk::lib::showdict [resolve $path] */@* + set field_queries [dict create\ + link_target link_target\ + link_flags link_flags/@*\ + file_attributes file_attributes\ + creation_time creation_time\ + access_time access_time\ + write_time write_time\ + target_length target_length\ + icon_index icon_index\ + showwnd showwnd\ + hotkey hotkey\ + target_type target_type\ + idlist idlist/@*/@*.@*\ + linkinfo linkinfo/@*.@*\ + ] + set info [resolve $path] + if {[dict exists $info error]} { + return "Error: [dict get $info error]" + } else { + set querystring "" + foreach field [dict keys $info] { + if {[dict exists $field_queries $field]} { + append querystring "[dict get $field_queries $field] " + } else { + append querystring "$field " + } + } + puts "querystring: $querystring" + return [punk::lib::showdict $info {*}$querystring] + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::target + @cmd -name punk::winlnk::target\ + -summary\ + "Return the target path of a .lnk file (windows shortcut)"\ + -help\ + "Return the target path of the .lnk file specified in path. + This is a convenience function that extracts the target path from the .lnk file and returns it directly, + without all the additional information that resolve provides. If the .lnk header check fails, then + the .lnk file probably isn't really a shortcut file and an error message will be returned." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc target {path} { + #*** !doctools + #[call [fun target] [arg path] ] + #[para]Return the target path of the .lnk file specified in path + set info [resolve $path] + if {[dict exists $info error]} { + error [dict get $info error] + } else { + return [dict get $info link_target] + } + } + + proc target_type {path} { + set content [Get_contents $path] + if {![Contents_check_header $content]} { + error "lnk_header_check_failed" + } + set info [Contents_Get_Info $content] + return [dict get $info target_type] + } + + proc Get_target_type {content file_attributes} { + #determine type based on info in the .lnk file, such as file attributes and link flags + + if {"DIRECTORY" in $file_attributes} { + return [dict create type directory mechanism file_attributes]" + } elseif {"ARCHIVE" in $file_attributes} { + return [dict create type file mechanism file_attributes] + } else { + set iteminfo [Get_LinkTargetIDList_iteminfo $content] + if {[llength $iteminfo] > 0} { + set first_item [lindex $iteminfo 0] + set first_item_type [dict get $first_item type] + set saw_2f 0 + switch -- $first_item_type { + "1F" { + #plain files and folders always seem to have a first item type of 1F + #so does "local disk" + set type_so_far "unknown" + #For a file, we may first see multiple items of type 32 (directory) as we go through the folder structure, + #and then finally an item of type 31 (file) at the end. + #For a network location, we may see an item of type 2F. + #So we need to loop through all the items and keep track of what we've seen so far. + foreach item $iteminfo { + set item_type [dict get $item type] + if {$item_type eq "31"} { + set type_so_far "directory" + } elseif {$item_type eq "32"} { + return [dict create type file mechanism idlist] + } elseif {$item_type eq "2F"} { + set saw_2f 1 + } + } + if {$type_so_far eq "unknown" && $saw_2f} { + return [dict create type "local disk" mechanism idlist] + } + return [dict create type $type_so_far mechanism idlist] + } + } + return [dict create type "unknown" mechanism idlist] + } else { + return [dict create type "unknown" mechanism idlist] + } + } + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::winlnk +} +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 0.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/bootsupport/modules/punk/winpath-0.1.0.tm index a876d781..9079dbbc 100644 --- a/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -196,7 +196,8 @@ namespace eval punk::winpath { #https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file #according to the above: Use any character in the current code page for a name, including Unicode characters and characters in the extended character set (128–255), except for the following: set reserved [list < > : \" / \\ | ? *] - + #embedded nulls (\0) are also disallowed - but these are also disallowed on unix-like platforms. + set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. foreach seg [file split $path] { @@ -208,6 +209,14 @@ namespace eval punk::winpath { #/./ /../ segments don't require protection - keep checking. continue } + if {[string toupper [file rootname $seg]] in $windows_reserved_names} { + #windows reserved names + #there are reports that these names aren't usable even with file extension - e.g that CON.txt is reserved and can't be created by some standard tools. + #In practice on windows 11 in 2026, cmd.exe,notepad,explorer and powershell seem to handle creation and access of CON.txt and PRN.txt etc without issue. + # the windows documentation reference above however still states that these names with an extension should be avoided. + #For this reason - we will still treat these as reserved and require protection with dos device syntax - even though in practice they seem to be usable without it. + return 1 + } #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph diff --git a/src/modules/punk-0.1.tm b/src/modules/punk-0.1.tm index 53cb4067..ea72ad1c 100644 --- a/src/modules/punk-0.1.tm +++ b/src/modules/punk-0.1.tm @@ -1422,7 +1422,7 @@ namespace eval punk { } if {[string is digit -strict [join $subindices ""]]} { - #review tip 551 (tcl9+?) + #review tip 551 (underscores in numerical literals) (tcl9+) #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # @@ -2650,6 +2650,76 @@ namespace eval punk { } }] } + } elseif {[punk::lib::is_indexset $index]} { + #review - a basic math statement such as 5-1 is also a valid member of an indexset + #see punk::lib::is_indexset and punk::lib::indexset_resolve + #single element of an indexset - e.g @..3 or @1..5 or @..end or @.. or @end..0 or @end-5..8 etc + set is_range [expr {[string first ".." $index] >= 0}] + if {$get_not} { + if {$is_range} { + lappend INDEX_OPERATIONS list-range-not + } else { + lappend INDEX_OPERATIONS listindex-not + } + set assign_script { + set assigned [lremove $assigned {*}[punk::lib::indexset_resolve [llength $leveldata] ]] + } + } else { + if {$is_range} { + lappend INDEX_OPERATIONS list-range + } else { + lappend INDEX_OPERATIONS listindex + } + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } + } + + if {$do_bounds_check} { + #bounds check each element of the resolved indexset - if any are out of bounds, return mismatch-list-index-out-of-range + if {$is_range} { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + lassign [split ..] idx1 _ idx2 + set v2 [punk::lib::lindex_resolve_basic $len $idx2] + if {isinf($v2)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + set v1 [punk::lib::lindex_resolve_basic $len $idx1] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + set v1 [punk::lib::lindex_resolve_basic $len ] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + ${$assign_script} + } + }] + } + set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { diff --git a/src/modules/punk/lib-999999.0a1.0.tm b/src/modules/punk/lib-999999.0a1.0.tm index 97cb9ada..c2273df7 100644 --- a/src/modules/punk/lib-999999.0a1.0.tm +++ b/src/modules/punk/lib-999999.0a1.0.tm @@ -1895,6 +1895,10 @@ namespace eval punk::lib { lappend keyset $p lappend keyset_structure list } + } elseif {[punk::lib::is_indexset $p]} { + set keys [punk::lib::indexset_resolve [llength $dval] $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] } elseif {[string match "?*-?*" $p]} { #could be either - don't change type #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers @@ -2184,60 +2188,81 @@ namespace eval punk::lib { } string { set hidekey 1 - if {$key eq "%string"} { - set hidekey 1 - set thisval $dval - } elseif {$key eq "%ansiview"} { - set thisval [ansistring VIEW -lf 1 $dval] - } elseif {$key eq "%ansiviewstyle"} { - set thisval [ansistring VIEWSTYLE -lf 1 $dval] - } elseif {[string match *lpad-* $key]} { - set hidekey 1 - lassign [split $key -] _ extra - set width [expr {[textblock::width $dval] + $extra}] - set thisval [textblock::pad $dval -which left -width $width] - } elseif {[string match *lpadstr-* $key]} { - set hidekey 1 - lassign [split $key -] _ extra - set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] - set thisval [textblock::pad $dval -which left -width $width -padchar $extra] - } elseif {[string match *rpad-* $key]} { - set hidekey 1 - lassign [split $key -] _ extra - set width [expr {[textblock::width $dval] + $extra}] - set thisval [textblock::pad $dval -which right -width $width] - } elseif {[string match *rpadstr-* $key]} { - set hidekey 1 - lassign [split $key -] _ extra - set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] - set thisval [textblock::pad $dval -which right -width $width -padchar $extra] - } else { - if {[lindex $key 1] eq "query"} { - set qry [lindex $key 0] - } else { - set qry $key + switch -- $key { + "%string" { + set hidekey 1 + set thisval $dval } - set thisval $dval - if {[string index $key 0] ne "%"} { - set key %$key + "%ansiview" { + set thisval [ansistring VIEW -lf 1 $dval] } - % thisval.= $key= $thisval - } - - set nextpatterns [list] - #which pattern nest applies to this branch - set nextsub [dict get $pattern_next_substructure $pattern_nest] - if {[llength $pattern_nest_list]} { - set nest [lrange $pattern_nest_list 1 end] - lappend nextpatterns {*}[join $nest /] + "%ansiviewstyle" { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] } - #set nextopts [dict get $argd opts] - dict set nextopts -roottype $nextsub - dict set nextopts -channel none + default { + switch -glob -- $key { + *lpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } + %lpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } + %rpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } + %rpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } + %split-* { + #split on one or more chars - review + set hidekey 1 + lassign [split $key -] _ splitchars + set thisval [split $dval $splitchars] + } + default { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + #pipeline + % thisval.= $key= $thisval + } + } - if {[llength $nextpatterns]} { - set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] } + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } } } diff --git a/src/modules/punk/lib-buildversion.txt b/src/modules/punk/lib-buildversion.txt index 97365480..a969a498 100644 --- a/src/modules/punk/lib-buildversion.txt +++ b/src/modules/punk/lib-buildversion.txt @@ -1,3 +1,3 @@ -0.1.5 +0.1.6 #First line must be a semantic version number #all other lines are ignored. diff --git a/src/modules/punk/nav/fs-999999.0a1.0.tm b/src/modules/punk/nav/fs-999999.0a1.0.tm index 076b1de2..eb4026e9 100644 --- a/src/modules/punk/nav/fs-999999.0a1.0.tm +++ b/src/modules/punk/nav/fs-999999.0a1.0.tm @@ -229,12 +229,16 @@ tcl::namespace::eval punk::nav::fs { } else { set stripbase 1 } - if {$v eq "/"} { - #hack - dict set matchinfo files {} - dict set matchinfo filesizes {} - } - set out [dirfiles_dict_as_lines -stripbase $stripbase $matchinfo] + + #we need to pass matchinfo that includes files even when only doing a directory listing (d/ /) + #This is because we want to display links/shortcuts that point to directories as directories. + #( ./ listing needs to show navigable items) + #if {$v eq "/"} { + # #dodgy hack that doesn't give proper display of all links/shortcuts that are pointing to directories. + # dict set matchinfo files {} + # dict set matchinfo filesizes {} + #} + set out [dirfiles_dict_as_lines -listing $v -stripbase $stripbase $matchinfo] #set chunklist [list] #lappend chunklist [list stdout "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n"] set result "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n" @@ -258,10 +262,10 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args a1] + set atail [lassign $args cdtarget] if {[llength $args] == 1} { - set a1 [lindex $args 0] - switch -exact -- $a1 { + set cdtarget [lindex $args 0] + switch -exact -- $cdtarget { . - ./ { tailcall punk::nav::fs::d/ } @@ -286,43 +290,88 @@ tcl::namespace::eval punk::nav::fs { } } else { cd $up1 - #set VIRTUAL_CWD [file normalize $a1] + #set VIRTUAL_CWD [file normalize $cdtarget] } tailcall punk::nav::fs::d/ $v } } - if {![regexp {[*?]} $a1] && [file pathtype $a1] ne "relative"} { + set cdtarget_copy [punk::nav::fs::system::valcopy $cdtarget] + set cdtarget_copy [string map {\\ /} $cdtarget_copy] + if {[string range $cdtarget_copy 0 3] eq "//?/"} { + #handle dos device paths - convert to normal path for glob testing + set glob_test [string range $cdtarget_copy 3 end] + set cdtarget_is_glob [regexp {[*?]} $glob_test] + } else { + set cdtarget_is_glob [regexp {[*?]} $cdtarget] + } + if {!$cdtarget_is_glob} { + set cdtarget_file_type [file type $cdtarget] + #e.g may be a link - whilst the type returned in the 'file stat' info reflects the type of the link target + } else { + set cdtarget_file_type "glob" + } + + if {!$cdtarget_is_glob && [file pathtype $cdtarget] ne "relative"} { #non-relative non-glob - if { ![string match //zipfs:/* $a1]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD $a1 - tailcall punk::nav::fs::d/ $v + if {![string match //zipfs:/* $cdtarget]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + if {$linktarget_file_type eq "directory"} { + set linktarget [file readlink $cdtarget] + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } } } - if {![regexp {[*?]} $a1] && ![string match //zipfs:/* $a1] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD [file normalize $a1] - tailcall punk::nav::fs::d/ $v + if {!$cdtarget_is_glob && ![string match //zipfs:/* $cdtarget] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + set linktarget [file readlink $cdtarget] + if {$linktarget_file_type eq "directory"} { + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } + #if {[file type $cdtarget] eq "directory"} { + # cd $cdtarget + # #set VIRTUAL_CWD [file normalize $cdtarget] + # tailcall punk::nav::fs::d/ $v + #} } - if {![regexp {[*?]} $a1]} { + if {!$cdtarget_is_glob} { #NON-Glob target #review - if {[string match //zipfs:/* $a1]} { - if {[Zipfs_path_within_zipfs_mounts $a1]} { - commandstack::basecall cd $a1 + if {[string match //zipfs:/* $cdtarget]} { + if {[Zipfs_path_within_zipfs_mounts $cdtarget]} { + commandstack::basecall cd $cdtarget } - set VIRTUAL_CWD $a1 - set curdir $a1 + set VIRTUAL_CWD $cdtarget + set curdir $cdtarget } else { - set target [punk::path::normjoin $VIRTUAL_CWD $a1] + set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[Zipfs_path_within_zipfs_mounts $target]} { commandstack::basecall cd $target @@ -521,20 +570,93 @@ tcl::namespace::eval punk::nav::fs { return $result } + punk::args::define { + @id -id ::punk::nav::fs::d/new + -nonportable -type none -help\ + "Allow creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." + @values -min 1 -max -1 -type string + path -type string -multiple 1 -help\ + "Path(s) to create. Can be absolute or relative. + + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. + + If a path already exists, then it will be left as-is and no error will be raised. + + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." + } + #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { - if {![llength $args]} { - error "usage: d/new \[ ...\]" - } - set a1 [lindex $args 0] + set argd [punk::args::parse $args withid ::punk::nav::fs::d/new] + lassign [dict values $argd] leaders opts values received + set paths [dict get $values path] + set allow_nonportable [dict exists $received -nonportable] + set curdir [pwd] - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath_list [list] + set error_paths [list] + foreach p $paths { + if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { + #error "punk::nav::fs::d/new Path '$p' is not portable and may not be created without -nonportable option" + lappend error_paths [list $p "Path '$p' is not portable and may not be created without -nonportable option"] + continue + } + if {[string first \0 $p] != -1} { + #error "punk::nav::fs::d/new Path '$p' contains null character which is not allowed" + lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] + continue + } + set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] + #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. + set fullpath [file join $path1 {*}[lrange $args 1 end]] + #Some subpaths of the supplied paths to create may already exist. + #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + set parent [file dirname $fullpath] + while {![file exists $parent]} { + set parent [file dirname $parent] + } + if {![file writable $parent]} { + #error "punk::nav::fs::d/new Cannot create directory '$fullpath' as parent '$parent' is not writable" + lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] + continue + } + lappend fullpath_list $fullpath + } + if {[llength $fullpath_list] != [llength $paths]} { + set path_error_display "" + foreach e $error_paths { + set p [lindex $e 0] + set m [lindex $e 1] + append path_error_display " Path: '$p' Error: $m\n" + } + error "punk::nav::fs::d/new One or more supplied paths were invalid or not writable:\n$path_error_display" + } - if {[file exists $fullpath]} { - error "Folder $fullpath already exists" + set num_created 0 + set error_string "" + foreach fullpath $fullpath_list { + if {[catch {file mkdir $fullpath}]} { + set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + break + } + incr num_created } - file mkdir $fullpath - d/ $fullpath + if {$error_string ne ""} { + error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." + } + d/ $curdir } #todo use unknown to allow d/~c:/etc ?? @@ -849,11 +971,11 @@ tcl::namespace::eval punk::nav::fs { #file attr //cookit:/ returns {-vfs 1 -handle {}} #we will treat it differently for now - use generic handler REVIEW - set in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. + set is_in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. if {[llength [package provide vfs]]} { foreach mount [vfs::filesystem info] { if {[punk::mix::base::lib::path_a_atorbelow_b $location $mount]} { - set in_vfs 1 + set is_in_vfs 1 break } } @@ -871,27 +993,27 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } - if {$in_vfs} { + if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { - set in_zipfs 0 - set in_cookit 1 - set in_other_pseudovol 1 + set invfs "" switch -glob -- $location { //zipfs:/* { if {[info commands ::tcl::zipfs::mount] ne ""} { - set in_zipfs 1 + set invfs zipfs } } //cookit:/* { - set in_cookit 1 + set invfs cookit } default { #handle 'other/unknown' that mounts at a volume-like path //pseudovol:/ + #(intentionally will not match a dos device path such as //?/c:/) if {[regexp {//((?:(?!:|/).)+):/.*} $location _match pseudovol]} { #pseudovol probably more than one char long #we don't really expect something like //c:/ , but anyway, it's not the same as c:/ and for all we know someone could use that as a volume name? - set in_other_pseudovol 1 ;#flag so we don't use twapi - hope generic can handle it (uses tcl glob) + #flag so we don't use twapi - hope generic can handle it (uses tcl glob) + set invfs pseudovol } else { #we could use 'file attr' here to test if {-vfs 1} #but it's an extra filesystem hit on all normal paths too (which can be expensive on some systems) @@ -900,20 +1022,24 @@ tcl::namespace::eval punk::nav::fs { } } - - if {$in_zipfs} { - #relative vs absolute? review - cwd valid for //zipfs:/ ?? - set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_cookit} { - #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ - #don't use twapi - #could possibly use du_dirlisting_tclvfs REVIEW - #files and folders are all returned with the -types hidden option for glob on windows - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_other} { - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } else { - set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + switch -- $invfs { + zipfs { + #relative vs absolute? review - cwd valid for //zipfs:/ ?? + set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + cookit { + #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ + #don't use twapi + #could possibly use du_dirlisting_tclvfs REVIEW + #files and folders are all returned with the -types hidden option for glob on windows + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + pseudovol { + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + default { + set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } } } @@ -1018,11 +1144,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean + -listing -default "/" -choices {/ // //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { + set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1031,9 +1159,12 @@ tcl::namespace::eval punk::nav::fs { # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_stripbase [dict get $opts -stripbase] + set opt_stripbase [dict get $opts -stripbase] set opt_formatsizes [dict get $opts -formatsizes] + set opt_listing [dict get $opts -listing] # -- --- --- --- --- --- --- --- --- --- --- --- + #we still need to examine files for -listing / which means show only directories, + # because we want to display links/shortcuts that point to directories as directories #if multiple dicts and -stripbase = 1 - we can only strip the longest common part of the searchbases supplied set common_base "" @@ -1074,7 +1205,6 @@ tcl::namespace::eval punk::nav::fs { foreach fileset [list dirs files links underlayfiles flaggedhidden flaggedreadonly flaggedsystem nonportable vfsmounts] { set $fileset [list] } - #set contents [lindex $list_of_dicts 0] foreach contents $list_of_dicts { lappend dirs {*}[dict get $contents dirs] @@ -1090,6 +1220,7 @@ tcl::namespace::eval punk::nav::fs { lappend vfsmounts {*}[dict get $contents vfsmounts] } + set fkeys [dict create] ;#avoid some file normalize calls.. if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1224,27 +1355,41 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] + set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { if {![catch {package require punk::winlnk}]} { - set shortcutinfo [punk::winlnk::resolve $fname] set target_type "file" ;#default/fallback + + set shortcutinfo [punk::winlnk::resolve $fname] if {[dict exists $shortcutinfo link_target]} { set is_valid_lnk 1 set tgt [dict get $shortcutinfo link_target] - if {[file exists $tgt]} { - #file type could return 'link' - we will use isfile/isdirectory - if {[file isfile $tgt]} { - set target_type file - } elseif {[file isdirectory $tgt]} { - set target_type directory - } else { - set target_type file ;## ? + set link_target_type [dict get $shortcutinfo target_type] + switch -- $link_target_type { + file { + set target_type "file" + } + directory - "local disk" { + set target_type "directory" + } + unknown { + #fall back to checking attributes and filesystem if we have a link_target but no target_type + if {[file exists $tgt]} { + #file type could return 'link' - we will use isfile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } } - } else { - #todo - see if punk::winlnk has info about the type at the time of linking - #for now - treat as file } } else { #no link_target - probably an ordinary file - but there could have been some other error in reading the binary windows lnk format. @@ -1295,6 +1440,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo + puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1304,58 +1451,82 @@ tcl::namespace::eval punk::nav::fs { set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] set RST [punk::ansi::a] + if {$opt_listing eq "/"} { + #disply directories only (including items that were actually files that were links/shortcuts to directories) + set finfo_plus [list] + } foreach d $dirs filerec $finfo_plus { - set d1 [punk::ansi::a+ cyan bold] - set d2 [punk::ansi::a+ defaultfg defaultbg normal] - #set f1 [punk::ansi::a+ white bold] - set f1 [punk::ansi::a+ white] - set f2 [punk::ansi::a+ defaultfg defaultbg normal] + set d1 [punk::ansi::a+ cyan normal] + set d1_overrides [list] + #set d2 [punk::ansi::a+ defaultfg defaultbg normal] + set f1 [punk::ansi::a+ white normal] + set f1_overrides [list] + #set f2 [punk::ansi::a+ defaultfg defaultbg normal] set fdisp "" if {[string length $d]} { if {$d in $flaggedhidden} { - set d1 [punk::ansi::a+ cyan normal] + #set d1 [punk::ansi::a+ Term-grey50 normal] + lappend d1_overrides term-grey50 } if {$d in $vfsmounts} { - if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW - #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) - #mark it differently for now.. (todo bug report?) - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red Yellow bold] - } else { - set d1 [punk::ansi::a+ green Purple bold] - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red White bold] - } else { - set d1 [punk::ansi::a+ green bold] - } - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red bold] - } + lappend d1_overrides Green + } + if {$d in $nonportable} { + #lappend d1_overrides underline undt-red ;#we use underlins to indicate symlinks and shortcuts, so we shouldn't use underlines here if possible. + lappend d1_overrides italic bold } + #if {$d in $vfsmounts} { + # if {$d in $flaggedhidden} { + # #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indication of 'hidden' status - REVIEW + # #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) + # #mark it differently for now.. (todo bug report?) + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red Yellow bold] + # } else { + # set d1 [punk::ansi::a+ green Purple bold] + # } + # } else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red White bold] + # } else { + # set d1 [punk::ansi::a+ green bold] + # } + # } + #} else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red bold] + # } + #} #dlink-style & dshortcut_style are for underlines - can be added with colours already set + + if {[llength $d1_overrides]} { + set d1 [punk::ansi::a+ {*}$d1_overrides] + } if {$d in $dir_symlinks} { append d1 $dlink_style } elseif {$d in $dir_shortcuts} { append d1 $dshortcut_style } } + if {[llength $filerec]} { set fname [dict get $filerec file] set fdisp [dict get $filerec display] if {$fname in $flaggedhidden} { - set f1 [punk::ansi::a+ Purple] - } else { - if {$fname in $nonportable} { - set f1 [punk::ansi::a+ red bold] - } + #set f1 [punk::ansi::a+ Term-grey50] + lappend f1_overrides term-grey50 + } + if {$fname in $nonportable} { + lappend f1_overrides italic bold } + if {[llength $f1_overrides]} { + set f1 [punk::ansi::a+ {*}$f1_overrides] + } + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST + } else { + #either there are no files or opt_listing is / = show dirs only (some of which may have actually been files that were links/shortcuts to directories) + lappend displaylist [overtype::left $col1 $d1$d$RST] } - lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] @@ -1469,6 +1640,12 @@ tcl::namespace::eval punk::nav::fs::system { #[subsection {Namespace punk::nav::fs::system}] #[para] Internal functions that are not part of the API + #utility function to copy values from one variable to another without sharing the reference. + #Useful for example to avoid some issues with possible shimmering of the underlying type of file paths. + proc valcopy {obj} { + append obj2 $obj {} + } + #ordinary emission of chunklist when no repl proc emit_chunklist {chunklist} { set result "" diff --git a/src/modules/punk/pipe-999999.0a1.0.tm b/src/modules/punk/pipe-999999.0a1.0.tm index d97f45f7..0ad82b57 100644 --- a/src/modules/punk/pipe-999999.0a1.0.tm +++ b/src/modules/punk/pipe-999999.0a1.0.tm @@ -326,12 +326,12 @@ tcl::namespace::eval punk::pipe::lib { set in_atom 1 } ( { - incr in_brackets + incr in_brackets } default { if {$end_var_posn == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { - set end_var_posn $token_index - } + set end_var_posn $token_index + } } } } diff --git a/src/modules/punk/winlnk-999999.0a1.0.tm b/src/modules/punk/winlnk-999999.0a1.0.tm index bb26f63b..3387cfe3 100644 --- a/src/modules/punk/winlnk-999999.0a1.0.tm +++ b/src/modules/punk/winlnk-999999.0a1.0.tm @@ -115,7 +115,7 @@ tcl::namespace::eval punk::winlnk { } variable LinkFlags set LinkFlags [dict create\ - hasLinkTargetIDList 1\ + HasLinkTargetIDList 1\ HasLinkInfo 2\ HasName 4\ HasRelativePath 8\ @@ -477,6 +477,54 @@ tcl::namespace::eval punk::winlnk { return 0 } } + proc Get_LinkTargetIDList_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + return "" + } else { + set idlist_content [string range $contents 78 [expr {78 + $idlist_size -1}]] + return $idlist_content + } + } + + #some clues on the structure of the IDList content and how to parse it can be found in the analysis of CVE-2020-0729, + #which is a remote code execution vulnerability in Windows that can be exploited through specially crafted .lnk files that contain malicious IDList content. + #The analysis of this vulnerability provides insights into how the IDList content is structured and how it can be parsed to extract information about the link target and potentially execute code. + #https://www.zerodayinitiative.com/blog/2020/3/25/cve-2020-0729-remote-code-execution-through-lnk-files + + proc Get_LinkTargetIDList_iteminfo {contents} { + set idlist_content [Get_LinkTargetIDList_content $contents] + set result {} + set offset 0 + while {$offset < [string length $idlist_content]} { + if {[string length $idlist_content] - $offset < 2} break + set size_bytes [string range $idlist_content $offset [expr {$offset + 1}]] ;#size including these 2 bytes + binary scan $size_bytes su size + if {$size == 0} break + if {$size < 2} { + # Invalid size, abort + error "punk::winlnk::Get_LinkTargetIDList_iteminfo: Invalid ItemID size: $size at offset $offset" + } + if {$offset + $size > [string length $idlist_content]} { + # ItemID extends beyond content, stop parsing + puts stderr "punk::winlnk::Get_LinkTargetIDList_iteminfo: ItemID at offset $offset with size $size extends beyond content length, stopping parse" + break + } + set itemid [string range $idlist_content $offset [expr {$offset + $size - 1}]] + set itemid_bytes [string range $itemid 0 1] + binary scan $itemid_bytes su itemid_size + #in *general* byte 3 of the ItemID structure can be used to determine the type of the item + #(e.g. file, folder, network location, etc.) but this is not always reliable and can vary + #based on the specific structure of the ItemID and the context in which it is used + set itemid_type_byte [string index $itemid 2] + #puts stderr "ItemID size: $itemid_size, type byte: [format %02X [scan $itemid_type_byte %c]]" + set maybe_type [format %02X [scan $itemid_type_byte %c]] + lappend result [dict create size $itemid_size type $maybe_type rawcontent $itemid] + + incr offset $size + } + return $result + } proc Get_LinkInfo_content {contents} { set idlist_size [Get_LinkTargetIDList_size $contents] if {$idlist_size == 0} { @@ -497,11 +545,18 @@ tcl::namespace::eval punk::winlnk { } proc LinkInfo_get_fields {linkinfocontent} { - set 4bytes [string range $linkinfocontent 0 3] + #TODO - finish parsing of LinkInfo - add support + #Link location information + #present if data flag HasLinkInfo exists. + + set 4bytes [string range $linkinfocontent 0 3] binary scan $4bytes i val ;#size *including* these 4 bytes + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + binary scan $bytes_linkinfoheadersize i headersize + set bytes_linkinfoflags [string range $linkinfocontent 8 11] - set r [binary scan $4bytes i flags] ;# i for little endian 32-bit signed int + set r [binary scan $bytes_linkinfoflags i flags] ;# i for little endian 32-bit signed int #puts "linkinfoflags: $flags" set localbasepath "" @@ -517,10 +572,10 @@ tcl::namespace::eval punk::winlnk { #logger #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" } - set bytes_volumeid_offset [string range $linkinfocontent 12 15] - set bytes_localbasepath_offset [string range $linkinfocontent 16 19] ;# a - set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] - set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] ;# a + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] binary scan $bytes_localbasepath_offset i bp_offset if {$bp_offset > 0} { @@ -558,10 +613,11 @@ tcl::namespace::eval punk::winlnk { } - return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix] + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix note ] } - - proc contents_get_info {contents} { + + proc Contents_Get_Info {contents} { + #todo - return something like the perl lnk-parse-1.0.pl script? @@ -607,7 +663,9 @@ tcl::namespace::eval punk::winlnk { set localbase_path "" set suffix_path "" set linkinfocontent [dict get $linkinfo_content_dict content] + set next_start [dict get $linkinfo_content_dict next_start] ;#location of section following LinkInfo (Location information) - this will be the Data Strings. set link_target "" + set linkfields [dict create] if {$linkinfocontent ne ""} { set linkfields [LinkInfo_get_fields $linkinfocontent] set localbase_path [dict get $linkfields localbasepath] @@ -662,10 +720,40 @@ tcl::namespace::eval punk::winlnk { } } + # ---------------------------------------------------------------------- + #todo - get Data strings by parsing contents starting at $next_start + #stored in following order: + # description + # relative path + # working directory + # command line arguments + # icon location + + #Data strings format: + # 2 bytes: number of characters in the string + # following: The string. ASCII or UTF-16 little-endian string + + set datastring_dict [Contents_Get_DataStrings $contents $next_start] + + # ---------------------------------------------------------------------- + + set file_attributes [Header_Get_FileAttributes $contents] + set linktargetidlist [Get_LinkTargetIDList_iteminfo $contents] + + set target_type_info [Get_target_type $contents $file_attributes] + set target_type [dict get $target_type_info type] + set target_type_mech [dict get $target_type_info mechanism] + if {$target_type eq "unknown"} { + if {[file exists $link_target]} { + set target_type [file type $link_target] + set target_type_mech "filesystem" + } + } + set result [dict create\ link_target $link_target\ link_flags $flags_enabled\ - file_attributes [Header_Get_FileAttributes $contents]\ + file_attributes $file_attributes\ creation_time [Header_Get_CreationTime $contents]\ access_time [Header_Get_AccessTime $contents]\ write_time [Header_Get_WriteTime $contents]\ @@ -673,8 +761,12 @@ tcl::namespace::eval punk::winlnk { icon_index ""\ showwnd "$showwnd"\ hotkey [Header_Get_HotKey $contents]\ - relative_path "?"\ + target_type $target_type\ + target_type_mech $target_type_mech\ + idlist $linktargetidlist\ + linkinfo $linkfields\ ] + #relative_path "?" } proc file_check_header {path} { @@ -707,7 +799,7 @@ tcl::namespace::eval punk::winlnk { #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key set c [Get_contents $path] if {[Contents_check_header $c]} { - return [contents_get_info $c] + return [Contents_Get_Info $c] } else { return [dict create error "lnk_header_check_failed"] } @@ -728,8 +820,39 @@ tcl::namespace::eval punk::winlnk { } proc file_show_info {path} { package require punk::lib - punk::lib::showdict [resolve $path] * + #punk::lib::showdict [resolve $path] */@* + set field_queries [dict create\ + link_target link_target\ + link_flags link_flags/@*\ + file_attributes file_attributes\ + creation_time creation_time\ + access_time access_time\ + write_time write_time\ + target_length target_length\ + icon_index icon_index\ + showwnd showwnd\ + hotkey hotkey\ + target_type target_type\ + idlist idlist/@*/@*.@*\ + linkinfo linkinfo/@*.@*\ + ] + set info [resolve $path] + if {[dict exists $info error]} { + return "Error: [dict get $info error]" + } else { + set querystring "" + foreach field [dict keys $info] { + if {[dict exists $field_queries $field]} { + append querystring "[dict get $field_queries $field] " + } else { + append querystring "$field " + } + } + puts "querystring: $querystring" + return [punk::lib::showdict $info {*}$querystring] + } } + namespace eval argdoc { variable PUNKARGS lappend PUNKARGS [list { @@ -758,6 +881,61 @@ tcl::namespace::eval punk::winlnk { } } + proc target_type {path} { + set content [Get_contents $path] + if {![Contents_check_header $content]} { + error "lnk_header_check_failed" + } + set info [Contents_Get_Info $content] + return [dict get $info target_type] + } + + proc Get_target_type {content file_attributes} { + #determine type based on info in the .lnk file, such as file attributes and link flags + + if {"DIRECTORY" in $file_attributes} { + return [dict create type directory mechanism file_attributes]" + } elseif {"ARCHIVE" in $file_attributes} { + return [dict create type file mechanism file_attributes] + } else { + set iteminfo [Get_LinkTargetIDList_iteminfo $content] + if {[llength $iteminfo] > 0} { + set first_item [lindex $iteminfo 0] + set first_item_type [dict get $first_item type] + set saw_2f 0 + switch -- $first_item_type { + "1F" { + #plain files and folders always seem to have a first item type of 1F + #so does "local disk" + set type_so_far "unknown" + #For a file, we may first see multiple items of type 32 (directory) as we go through the folder structure, + #and then finally an item of type 31 (file) at the end. + #For a network location, we may see an item of type 2F. + #So we need to loop through all the items and keep track of what we've seen so far. + foreach item $iteminfo { + set item_type [dict get $item type] + if {$item_type eq "31"} { + set type_so_far "directory" + } elseif {$item_type eq "32"} { + return [dict create type file mechanism idlist] + } elseif {$item_type eq "2F"} { + set saw_2f 1 + } + } + if {$type_so_far eq "unknown" && $saw_2f} { + return [dict create type "local disk" mechanism idlist] + } + return [dict create type $type_so_far mechanism idlist] + } + } + return [dict create type "unknown" mechanism idlist] + } else { + return [dict create type "unknown" mechanism idlist] + } + } + } + + #proc sample1 {p1 n args} { # #*** !doctools # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] diff --git a/src/modules/punk/winlnk-buildversion.txt b/src/modules/punk/winlnk-buildversion.txt index f47d01c8..781c895b 100644 --- a/src/modules/punk/winlnk-buildversion.txt +++ b/src/modules/punk/winlnk-buildversion.txt @@ -1,3 +1,3 @@ -0.1.0 +0.1.1 #First line must be a semantic version number #all other lines are ignored. diff --git a/src/modules/punk/winpath-999999.0a1.0.tm b/src/modules/punk/winpath-999999.0a1.0.tm index 334a19c3..71e7ee92 100644 --- a/src/modules/punk/winpath-999999.0a1.0.tm +++ b/src/modules/punk/winpath-999999.0a1.0.tm @@ -196,7 +196,8 @@ namespace eval punk::winpath { #https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file #according to the above: Use any character in the current code page for a name, including Unicode characters and characters in the extended character set (128–255), except for the following: set reserved [list < > : \" / \\ | ? *] - + #embedded nulls (\0) are also disallowed - but these are also disallowed on unix-like platforms. + set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. foreach seg [file split $path] { @@ -208,6 +209,14 @@ namespace eval punk::winpath { #/./ /../ segments don't require protection - keep checking. continue } + if {[string toupper [file rootname $seg]] in $windows_reserved_names} { + #windows reserved names + #there are reports that these names aren't usable even with file extension - e.g that CON.txt is reserved and can't be created by some standard tools. + #In practice on windows 11 in 2026, cmd.exe,notepad,explorer and powershell seem to handle creation and access of CON.txt and PRN.txt etc without issue. + # the windows documentation reference above however still states that these names with an extension should be avoided. + #For this reason - we will still treat these as reserved and require protection with dos device syntax - even though in practice they seem to be usable without it. + return 1 + } #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm index 53cb4067..ea72ad1c 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm @@ -1422,7 +1422,7 @@ namespace eval punk { } if {[string is digit -strict [join $subindices ""]]} { - #review tip 551 (tcl9+?) + #review tip 551 (underscores in numerical literals) (tcl9+) #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # @@ -2650,6 +2650,76 @@ namespace eval punk { } }] } + } elseif {[punk::lib::is_indexset $index]} { + #review - a basic math statement such as 5-1 is also a valid member of an indexset + #see punk::lib::is_indexset and punk::lib::indexset_resolve + #single element of an indexset - e.g @..3 or @1..5 or @..end or @.. or @end..0 or @end-5..8 etc + set is_range [expr {[string first ".." $index] >= 0}] + if {$get_not} { + if {$is_range} { + lappend INDEX_OPERATIONS list-range-not + } else { + lappend INDEX_OPERATIONS listindex-not + } + set assign_script { + set assigned [lremove $assigned {*}[punk::lib::indexset_resolve [llength $leveldata] ]] + } + } else { + if {$is_range} { + lappend INDEX_OPERATIONS list-range + } else { + lappend INDEX_OPERATIONS listindex + } + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } + } + + if {$do_bounds_check} { + #bounds check each element of the resolved indexset - if any are out of bounds, return mismatch-list-index-out-of-range + if {$is_range} { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + lassign [split ..] idx1 _ idx2 + set v2 [punk::lib::lindex_resolve_basic $len $idx2] + if {isinf($v2)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + set v1 [punk::lib::lindex_resolve_basic $len $idx1] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + set v1 [punk::lib::lindex_resolve_basic $len ] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + ${$assign_script} + } + }] + } + set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm new file mode 100644 index 00000000..6a7b79d6 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -0,0 +1,5488 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.6 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.6] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6- +package require punk::args +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {punk::args}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + +tcl::namespace::eval punk::lib::ensemble { + #wiki.tcl-lang.org/page/ensemble+extend + # extend an ensemble-like routine with the routines in some namespace + + #NOTE - the extension ns becomes the '-namespace ' for the original routine name, + #with -unknown handling the original subcommands. + #This makes the original ensemble harder to introspect! + #e.g (the original -map or -namespace not visible) + #In this specific case (which, being published on the wiki might be common in the wild) + #we could call {*}[namespace ensemble configure $routine -unknown] $routine + #and then detect that the first resulting word is an ensemble + #For arbitrary '-unknown scripts' - sensible introspection is likely not possible + + proc extend {routine extension} { + if {![string match ::* $routine]} { + set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] + if {$resolved eq {}} { + error [list {no such routine} $routine] + } + set routine $resolved + } + set routinens [tcl::namespace::qualifiers $routine] + if {$routinens eq {::}} { + set routinens {} + } + set routinetail [tcl::namespace::tail $routine] + + if {![string match ::* $extension]} { + set extension [uplevel 1 [ + list [tcl::namespace::which namespace] current]]::$extension + } + + if {![tcl::namespace::exists $extension]} { + error [list {no such namespace} $extension] + } + + set extension [tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] current]] + + tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] export *] + + while 1 { + set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] + if {[tcl::namespace::which $renamed] eq {}} break + } + + rename $routine $renamed + + tcl::namespace::eval $extension [ + list namespace ensemble create -command $routine -unknown [ + list apply {{renamed ensemble routine args} { + list $renamed $routine + }} $renamed + ] + ] + + return $routine + } +} + +# some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated +tcl::namespace::eval punk::lib::check { + #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) + #Not any sort of comprehensive check of known tcl bugs. + #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + proc has_tclbug_regexp_emptystring {} { + #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces + #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, + #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: + #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) + set bug [expr {![catch {regexp {} [error should_error]}]}] + return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] + } + proc has_tclbug_lsearch_sorted_inline_subindices {} { + if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { + #probably tcl version doesn't support all options + set bug 0 + } else { + set bug [expr {$result ne "b"}] + } + set description "lsearch -sorted with -subindices -inline - incorrect result." + return [dict create bug $bug bugref bc4ac0 description $description level minor] + } + proc has_tclbug_script_var {} { + + set script {set j [list spud] ; list} + append script \n + uplevel #0 $script + set rep1 [tcl::unsupported::representation $::j] + set script "" + set rep2 [tcl::unsupported::representation $::j] + + set nostring1 [string match "*no string" $rep1] + set nostring2 [string match "*no string" $rep2] + + #we assume it should have no string rep in either case + #Review: check Tcl versions for behaviour/consistency + if {!$nostring2} { + set bug true + } else { + set bug false + } + set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" + return [dict create bug $bug bugref "" description $description level minor] + } + proc has_tclbug_lsearch_strideallinline {} { + #bug only occurs with single -index value combined with -stride -all -inline -subindices + #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d + if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { + #we aren't looking for an error result - error most likely indicates tcl too old to support -stride + set bug 0 + } else { + set bug [expr {$result ne "a2"}] + } + set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." + return [dict create bug $bug bugref 5a1aaa201d description $description level major] + } + proc has_tclbug_lseq_sign {} { + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + if {[catch {lseq 1 10}]} { + set bug 0 + } else { + set r1 [lseq 1 10 -9] + set r2 [lseq 1 10 -10] + set bug [expr {$r1 ne $r2}] + } + set description "lseq step sign not matching sequence direction - inconsistent results." + return [dict create bug $bug bugref 999b6966b2 description $description level minor] + } + + proc has_tclbug_list_quoting_emptyjoin {} { + #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 + set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases + set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" + set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. + set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" + return [dict create bug $bug bugref e38dc74e2 description $description level medium] + } + + proc has_tclbug_safeinterp_compile {{show 0}} { + #ensemble calls within safe interp not compiled + #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde + namespace eval [namespace current]::testcompile { + proc ensembletest {} {string index a 0} + } + + set has_bug 0 + + set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] + if {$show} { + puts outer: + puts $bytecode_outer + } + if {![interp issafe]} { + #test of safe subinterp only needed if we aren't already in a safe interp + if {![catch { + interp create x -safe + } errMsg]} { + x eval {proc ensembletest {} {string index a 0}} + set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] + if {$show} { + puts safe: + puts $bytecode_safe + } + interp delete x + #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) + #It's possible the interp we're running in is also not compiling ensembles. + #we could then get a result of 2 - which still indicates a problem + if {[string last "invokeStk" $bytecode_safe] >= 1} { + incr has_bug + } + } else { + #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? + #unlikely - but we should warn + puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" + } + } + + namespace delete [namespace current]::testcompile + + if {[string last "invokeStk" $bytecode_outer] >= 1} { + incr has_bug + } + set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" + return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] + } +} + +tcl::namespace::eval punk::lib::compat { + #*** !doctools + #[subsection {Namespace punk::lib::compat}] + #[para] compatibility functions for features that may not be available in earlier Tcl versions + #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. + #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. + + #*** !doctools + #[list_begin definitions] + + + + + if {"::lremove" ne [info commands ::lremove]} { + #puts stderr "Warning - no built-in lremove" + interp alias {} lremove {} ::punk::lib::compat::lremove + } + proc lremove {list args} { + #*** !doctools + #[call [fun lremove] [arg list] [opt {index ...}]] + #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove + + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lsearch -all -inline -index 1 -subindices $keep *] + } + #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers + proc lremove2 {list args} { + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lmap v $keep {lindex $v 1}] + } + #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. + #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 + + if {![info exists ::auto_index(readFile)]} { + if {[info commands ::readFile] eq ""} { + proc ::readFile {filename {mode text}} { + #readFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + + # Read the file + set f [open $filename [dict get {text r binary rb} $mode]] + try { + return [read $f] + } finally { + close $f + } + } + } + } + if {![info exists ::auto_index(writeFile)]} { + if {[info commands ::writeFile] eq ""} { + proc ::writeFile {args} { + #writeFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + switch [llength $args] { + 2 { + lassign $args filename data + set mode text + } + 3 { + lassign $args filename mode data + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + } + default { + set COMMAND [lindex [info level 0] 0] + return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" + } + } + + # Write the File + set f [open $filename [dict get {text w binary wb} $mode]] + try { + puts -nonewline $f $data + } finally { + close $f + } + } + } + } + + if {"::lpop" ne [info commands ::lpop]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lpop + punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore + } + proc lpop {lvar args} { + #*** !doctools + #[call [fun lpop] [arg listvar] [opt {index}]] + #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop + upvar $lvar l + if {![llength $args]} { + set args [list end] + } + set v [lindex $l {*}$args] + set newlist $l + + set path [list] + set subl $l + for {set i 0} {$i < [llength $args]} {incr i} { + set idx [lindex $args $i] + if {![llength [lrange $subl $idx $idx]]} { + error "tcl_lpop index \"$idx\" out of range" + } + lappend path [lindex $args $i] + set subl [lindex $l {*}$path] + } + + set sublist_path [lrange $args 0 end-1] + set tailidx [lindex $args end] + if {![llength $sublist_path]} { + #set newlist [lremove $newlist $tailidx] + set newlist [lreplace $newlist $tailidx $tailidx] + } else { + set sublist [lindex $newlist {*}$sublist_path] + #set sublist [lremove $sublist $tailidx] + set sublist [lreplace $sublist $tailidx $tailidx] + lset newlist {*}$sublist_path $sublist + } + #puts "[set l] -> $newlist" + set l $newlist + return $v + } + if {"::ledit" ni [info commands ::ledit]} { + interp alias {} ledit {} ::punk::lib::compat::ledit + punk::args::set_idalias ::punk::lib::compat::ledit ::ledit + } + proc ledit {lvar first last args} { + upvar $lvar l + #use lindex_resolve to support for example: ledit lst end+1 end+1 h i + set fidx [punk::lib::lindex_resolve [llength $l] $first] + switch -exact -- $fidx { + -Inf { + #index below lower bound + set pre [list] + set fidx -1 + } + Inf { + #first index position is greater than index of last element in the list + set pre [lrange $l 0 end] + set fidx [llength $l] + } + default { + #set pre [lrange $l 0 $first-1] + set pre [lrange $l 0 $fidx-1] + } + } + set lidx [punk::lib::lindex_resolve [llength $l] $last] + switch -exact -- $lidx { + -Inf { + #index below lower bound + set post [lrange $l 0 end] + } + Inf { + #index above upper bound + set post [list] + } + default { + if {$lidx < $fidx} { + #from ledit man page: + #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. + set post [lrange $l $fidx end] + } else { + #set post [lrange $l $last+1 end] + set post [lrange $l $lidx+1 end] + } + } + } + set l [list {*}$pre {*}$args {*}$post] + } + + + #slight isolation - varnames don't leak - but calling context vars can be affected + proc lmaptcl2 {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list ::tcl::info::vars]] + set nscaller [uplevel 1 [list ::tcl::namespace::current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result [apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + if {"::lmap" ne [info commands ::lmap]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lmap {} ::punk::lib::compat::lmaptcl + } + #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway + proc lmaptcl {varnames list script} { + set result [list] + set varlist [list] + foreach varname $varnames { + upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc + lappend varlist var_$varname + } + foreach $varlist $list { + lappend result [uplevel 1 $script] + } + return $result + } + + #tcl8.7/9 compatibility for 8.6 + if {[info commands ::tcl::string::insert] eq ""} { + #https://wiki.tcl-lang.org/page/string+insert + # Pure Tcl implementation of [string insert] command. + proc ::tcl::string::insert {string index insertString} { + # Convert end-relative and TIP 176 indexes to simple integers. + if {[regexp -expanded { + ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace + |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace + (?:([+-]) # op, omitted when index is "end" + ([+-]?\d+))? # n, omitted when index is "end" + [\t\n\v\f\r ]*$ # optional whitespace (unless "end") + } $index _ m op n]} { + # Convert first index to an integer. + switch $m { + end {set index [string length $string]} + default {scan $m %d index} + } + + # Add or subtract second index, if provided. + switch $op { + + {set index [expr {$index + $n}]} + - {set index [expr {$index - $n}]} + } + } elseif {![string is integer -strict $index]} { + # Reject invalid indexes. + return -code error "bad index \"$index\": must be\ + integer?\[+-\]integer? or end?\[+-\]integer?" + } + + # Concatenate the pre-insert, insertion, and post-insert strings. + string cat [string range $string 0 [expr {$index - 1}]] $insertString\ + [string range $string $index end] + } + + # Bind [string insert] to [::tcl::string::insert]. + tcl::namespace::ensemble configure string -map [tcl::dict::replace\ + [tcl::namespace::ensemble configure string -map]\ + insert ::tcl::string::insert] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + variable PUNKARGS + tcl::namespace::export * + variable has_struct_list + set has_struct_list [expr {![catch {package require struct::list}]}] + variable has_struct_set + set has_struct_set [expr {![catch {package require struct::set}]}] + variable has_punk_ansi + set has_punk_ansi [expr {![catch {package require punk::ansi}]}] + set has_twapi 0 + if {"windows" eq $::tcl_platform(platform)} { + set has_twapi [expr {![catch {package require twapi}]}] + } + + namespace eval argdoc { + #non-colour SGR codes + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::is_main_script + @cmd -name punk::lib::is_main_script\ + -summary\ + "Test if current script was launched directly."\ + -help\ + "The ${$B}main script${$N} is the primary script that is executed + by the interpreter, e.g. tclsh or wish. + (as opposed to being loaded by the 'source' command) + + see https://wiki.tcl-lang.org/page/main+script" + @values -min 0 -max 0 + }] + } + proc is_main_script {} { + #see https://wiki.tcl-lang.org/page/main+script + if {[info script] ne "" && [info exists ::argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $::argv0 ...]]] + } { + return true + } else { + return false + } + } + + + + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + # Maintenance - This is the primary source for tm_version... functions + # - certain packages script require these but without package dependency + # - 1 punk boot script + # - 2 packagetrace module + # - These should be updated to sync with this + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + proc tm_version_isvalid {versionpart} { + #Needs to be suitable for use with Tcl's 'package vcompare' + if {![catch [list package vcompare $versionpart $versionpart]]} { + return 1 + } else { + return 0 + } + } + proc tm_version_major {version} { + if {![tm_version_isvalid $version]} { + error "Invalid version '$version' is not a proper Tcl module version number" + } + set firstpart [lindex [split $version .] 0] + #check for a/b in first segment + if {[string is integer -strict $firstpart]} { + return $firstpart + } + if {[string first a $firstpart] > 0} { + return [lindex [split $firstpart a] 0] + } + if {[string first b $firstpart] > 0} { + return [lindex [split $firstpart b] 0] + } + error "tm_version_major unable to determine major version from version number '$version'" + } + proc tm_version_canonical {ver} { + #accepts a single valid version only - not a bounded or unbounded spec + if {![tm_version_isvalid $ver]} { + error "tm_version_canonical version '$ver' is not valid for a package version" + } + set parts [split $ver .] + set newparts [list] + foreach o $parts { + set trimmed [string trimleft $o 0] + set firstnonzero [string index $trimmed 0] + switch -exact -- $firstnonzero { + "" { + lappend newparts 0 + } + a - b { + #e.g 000bnnnn -> bnnnnn + set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] + if {$tailtrimmed eq ""} { + set tailtrimmed 0 + } + lappend newparts 0$firstnonzero$tailtrimmed + } + default { + #digit + if {[string is integer -strict $trimmed]} { + #e.g 0100 -> 100 + lappend newparts $trimmed + } else { + #e.g 0100b003 -> 100b003 (still need to process tail) + if {[set apos [string first a $trimmed]] > 0} { + set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}a${rhs} + } elseif {[set bpos [string first b $trimmed]] > 0} { + set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}b${rhs} + } else { + #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b + error "tm_version_canonical error - trimfail - unexpected" + } + } + } + } + } + return [join $newparts .] + } + proc tm_version_required_canonical {versionspec} { + #also trim leading zero from any dottedpart? + #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. + #e.g 1.01 is equivalent to 1.1 and 01.001 + #also 1b3 == 1b0003 + + if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version + set errmsg "tm_version_required_canonical - invalid version specification" + if {[string first - $versionspec] < 0} { + #no dash + #looks like a minbounded version (ie a single version with no dash) convert to min-max form + set from $versionspec + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionpec'" + } + if {![catch {tm_version_major $from} majorv]} { + set from [tm_version_canonical $from] + return "${from}-[expr {$majorv +1}]" + } else { + error "$errmsg '$versionspec'" + } + } else { + # min- or min-max + #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) + set parts [split $versionspec -] ;#we expect only 2 parts + lassign $parts from to + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionspec'" + } + set from [tm_version_canonical $from] + if {[llength $parts] == 2} { + if {$to ne ""} { + if {![tm_version_isvalid $to]} { + error "$errmsg '$versionspec'" + } + set to [tm_version_canonical $to] + return $from-$to + } else { + return $from- + } + } else { + error "$errmsg '$versionspec'" + } + error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" + } + } + # end tm_version... functions + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + + + + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lswap + @cmd -name punk::lib::lswap\ + -summary\ + "Swap list values in-place"\ + -help\ + "Similar to struct::list swap, except it fully supports basic + list index expressions such as 7-2 end-1 etc. + + struct::list swap doesn't support 'end' offsets, and only + sometimes appears to support basic expressions, depending on the + expression compared to the list length." + @values -min 3 -max 3 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + proc lswap {lvar a z} { + upvar $lvar l + set len [llength $l] + if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { + #lindex_resolve_basic returns only -Inf if out of range at either bound + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively + set a_index [lindex_resolve $len $a] + set a_msg "" + switch -- $a_index { + -Inf { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" + } + Inf { + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" + } + } + set z_index [lindex_resolve $len $z] + set z_msg "" + switch -- $z_index { + -Inf { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + Inf { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + if {[info commands lseq] ne ""} { + #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation + #support minimal set from to + proc range {from to {by 1}} { + #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + lseq $from $to by $by + } + } else { + #lseq accepts basic expressions e.g 4-2 for both arguments + #e.g we can do lseq 0 [llength $list]-1 + #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. + #our range function doesn't support double like lseq does. (deliberate) review + proc range {from to {by ""}} { + if {$by eq "0"} { + #as per lseq, step (by) zero always gives no result + return [list] + } + set to [offset_expr $to] + set from [offset_expr $from] + if {$by ne ""} { + set by [offset_expr $by] + } + #assert $by is now empty string or an integer + if {$to > $from} { + switch -- $by { + "" - 1 { + set count [expr {($to -$from) + 1}] + if {$from == 0} { + return [lsearch -all [lrepeat $count 0] *] + } else { + incr from -1 + return [lmap v [lrepeat $count 0] {incr from}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i <= $to} {incr i $by} { + lappend result $i + } + return $result + + #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. + #set count [expr {($to -$from) + 1}] + #if {$from == 0} { + # set fullrange [lsearch -all [lrepeat $count 0] *] + #} else { + # incr from -1 + # set fullrange [lmap v [lrepeat $count 0] {incr from}] + #} + #set result [list] + #for {set i 0} {$i < $count} {incr i} { + # if {$i % $by == 0} { + # lappend result [lindex $fullrange $i] + # } + #} + #return $result + } + } + #slower methods. + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from] + #} + #return $L + } elseif {$from > $to} { + switch -- $by { + "" - -1 { + set count [expr {$from - $to} + 1] + if {$to == 0} { + return [lreverse [lsearch -all [lrepeat $count 0] *]] + } else { + incr from + return [lmap v [lrepeat $count 0] {incr from -1}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i >= $to} {incr i $by} { + lappend result $i + } + return $result + } + } + + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from -1];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from -1] + #} + #return $L + } else { + return [list $from] + } + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzip + @cmd -name punk::lib::lzip\ + -summary\ + "zip any number of lists together."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + The number of returned lists (columns) will be equal to + the length of the longest supplied list (row). + If lengths of supplied lists don't match, empty strings + will be inserted in the resulting lists. + + e.g lzip {a b c d e} {1 2 3 4} {x y z} + -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} + " + @values -min 0 -max -1 + list -type list -multiple 1 -optional 1 + }] + } + proc lzip {args} { + switch -- [llength $args] { + 0 {return {}} + 1 {return [lindex $args 0]} + 2 {return [::punk::lib::system::lzip2lists {*}$args]} + 3 {return [::punk::lib::system::lzip3lists {*}$args]} + 4 {return [::punk::lib::system::lzip4lists {*}$args]} + 5 {return [::punk::lib::system::lzip5lists {*}$args]} + 6 {return [::punk::lib::system::lzip6lists {*}$args]} + 7 {return [::punk::lib::system::lzip7lists {*}$args]} + 8 {return [::punk::lib::system::lzip8lists {*}$args]} + 9 {return [::punk::lib::system::lzip9lists {*}$args]} + 10 {return [::punk::lib::system::lzip10lists {*}$args]} + 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } + default { + if {[llength $args] < 4000} { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } else { + return [::punk::lib::lzipn {*}$args] + } + } + } + } + + namespace eval system { + proc Build_lzipn {n} { + set arglist [list] + #use punk::lib::range which defers to lseq if available + set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) + set body "\nlmap " + for {set i 1} {$i <= $n} {incr i} { + lappend arglist l$i + append body "[lindex $vars $i] \$l$i " + } + append body "\{list " + for {set i 1} {$i <= $n} {incr i} { + append body "\$[lindex $vars $i] " + } + append body "\}" \n + #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" + #puts "$body" + #puts "\}" + proc ::punk::lib::system::lzip${n}lists $arglist $body + } + + #fastest is to know the number of lists to be zipped + proc lzip2lists {l1 l2} { + lmap a $l1 b $l2 {list $a $b} + } + proc lzip3lists {l1 l2 l3} { + lmap a $l1 b $l2 c $l3 {list $a $b $c} + } + proc lzip4lists {l1 l2 l3 l4} { + lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} + } + proc lzip5lists {l1 l2 l3 l4 l5} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} + } + proc lzip6lists {l1 l2 l3 l4 l5 l6} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} + } + proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} + } + proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} + } + proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} + } + proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} + } + + #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly + # review - + proc lzipn_alt args { + #stackoverflow - courtesy glenn jackman (modified) + foreach l $args { + lappend vars [incr n] + lappend lmap_args $n $l + } + lmap {*}$lmap_args {lmap v $vars {set $v}} + } + + #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) + proc lzipn_tcl8 {args} { + #For tcl pre 9 (without lsearch -stride) + #wiki - courtesy JAL + set list_l $args + set zip_l [] + while {1} { + set cur [lmap a_l $list_l { lindex $a_l 0 }] + set list_l [lmap a_l $list_l { lrange $a_l 1 end }] + + if {[join $cur {}] eq {}} { + break + } + lappend zip_l $cur + } + return $zip_l + } + proc lzipn_tcl9a {args} { + #For Tcl 9+ (with lsearch -stride) + #compared to wiki version + #comparable for lists len <3 or number of args < 3 + #approx 2x faster for large lists or more lists + #needs -stride single index bug fix to use empty string instead of NULL + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] + set outlist [lrepeat $numcolumns {}] + set s 0 + foreach len $lens list $args { + #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] + ledit flatlist $s [expr {$s + $len - 1}] {*}$list + incr s $numcolumns + } + #needs single index lstride bugfix + for {set c 0} {$c < $numcolumns} {incr c} { + ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] + } + return $outlist + } + proc lzipn_tcl9b {args} { + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} + } + proc lzipn_tcl9c {args} { + #SLOW + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + set zip_l {} + set cols_remaining $numcolumns + for {set c 0} {$c < $numcolumns} {incr c} { + if {$cols_remaining == 1} { + return [list {*}$zip_l $flatlist] + } + lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] + set flen [llength $flatlist] + set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] + incr cols_remaining -1 + } + return $zip_l + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzipn + @cmd -name punk::lib::lzipn\ + -summary\ + "zip any number of lists together (unoptimised)."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + See lzip which provides the same functionality but with + optimisations depending on the number of supplied lists. + " + @values -min 1 -max 1 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible + if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { + #-stride either not available - or has bug preventing use of main algorithm below + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] + } else { + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] + } + + + + namespace import ::punk::args::lib::tstr + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::tclscript_to_scriptlist + @cmd -name punk::lib::tclscript_to_scriptlist\ + -summary\ + "Parse tcl script to toplevel list of lists."\ + -help\ + "Get topmost list of tcl language elements in script. + produces a list of lists where each sublist is a commandlist or + a comment string." + @values -min 1 -max 1 + script -type string + }] + } + proc tclscript_to_scriptlist {script} { + set scriptlist [list] + set cmdlist [list] + set scrlen [string length $script] + set token "" + set in_token 0 + set in_cmdlist 0 + set in_comment 0 + set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? + for {set i 0} {$i < $scrlen} {incr i} { + set ch [string index $script $i] + set chswitch [string map $charmap $ch] + if {!$in_token} { + switch -- $chswitch { + { } - TB { + #ignore - continue being a non token + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {$in_cmdlist} { + #no active token - newline ends cmdlist + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + incr i + } + } + LF - ";" { + #no active token - newline or semicolon ends cmdlist + if {$in_cmdlist} { + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation of whitespace while no token - boring + incr i + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation of whitespace while no token - boring + incr i 2 + } else { + #an uncommon possibility, a command wth surrounding spaces called in an strange way + # e.g \ cmdname\ arg + set in_token 1 + set token "\\[string index $script $i+1]" + incr i + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + # { + if {$in_cmdlist} { + #ordinary data + set in_token 1 + set token # + } else { + if {!$in_comment} { + set in_token 1 + set in_comment 1 + set token # + } else { + #wnen in comment - all will be a single token until comment ends + append token # + } + } + } + default { + #for completeness.. we should exclude other possible whitespace chars + if {![string is space $ch]} { + set in_token 1 + set token $ch + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + } + } else { + #if we're in a token, we must be in a cmdlist or a comment (single token) + #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved + #note that unbalanced curly in *toplevel* comment will still 'info complete' to true + switch -- $chswitch { + LF { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ends a comment + lappend scriptlist $token ;#single token for comment + set token "" + set in_token 0 + set in_comment 0 + set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity + } + } + ";" { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ordinary char for comment + append token ";" + } + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {[tcl::info::complete $token]} { + #ends token and commandlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \r\n + incr i + } + } else { + append token \r + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation - lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i ;#skip LF + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation - cr-lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i 2 ;#skip CRLF + } else { + append token "\\[string index $script $i+1]" + incr i + } + } + default { + if {![string is space $ch]} { + append token $ch + } else { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token $ch + } + } else { + append token $ch + } + } + } + } + } + } + #eof + if {!$in_comment} { + if {$in_token} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + lappend scriptlist $cmdlist + } else { + error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" + } + } else { + if {$in_cmdlist} { + lappend scriptlist $cmdlist + } + } + } else { + lappend scriptlist $token + } + return $scriptlist + } + + + proc invoke command { + #*** !doctools + #[call [fun invoke] [arg command]] + #[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode + #[example { + # set script { + # puts stdout {hello on stdout} + # puts stderr {hello on stderr} + # exit 42 + # } + # invoke [list tclsh <<$script] + #}] + + #see https://wiki.tcl-lang.org/page/open + lassign [chan pipe] chanout chanin + lappend command 2>@$chanin + set fh [open |$command] + set stdout [read $fh] + close $chanin + set stderr [read $chanout] + close $chanout + if {[catch {close $fh} cres e]} { + dict with e {} + lassign [set -errorcode] sysmsg pid exit + if {$sysmsg eq {NONE}} { + #output to stderr caused [close] to fail. Do nothing + } elseif {$sysmsg eq {CHILDSTATUS}} { + return [list $stdout $stderr $exit] + } else { + return -options $e $stderr + } + } + return [list $stdout $stderr 0] + } + + proc pdict {args} { + package require punk::args + variable has_punk_ansi + if {!$has_punk_ansi} { + set sep " = " + } else { + #set sep " [a+ Web-seagreen]=[a] " + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " + } + set argspec [string map [list %sep% $sep] { + @id -id ::punk::lib::pdict + @cmd -name pdict -help\ + "Print dict keys,values to channel + The pdict function operates on variable names - passing the value to the showdict function which operates on values + (see also showdict)" + + @opts -any 1 + + #default separator to provide similarity to tcl's parray function + -separator -default "%sep%" + -roottype -default "dict" + -substructure -default {} + -channel -default stdout -help\ + "existing channel - or 'none' to return as string" + + @values -min 1 -max -1 + + dictvar -type string -help "name of variable. Can be a dict, list or array" + + patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. + Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) + The system uses similar patterns to the punk pipeline pattern-matching system. + The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. + Segments are classified into list,dict and string operations. + Leading % indicates a string operation - e.g %# gives string length + A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 + (todo - change to indexset syntax @1..3 @1..end-1 etc) + A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' + The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. + e.g1 pdict env */%# + the pattern starts with default type dict, so * retrieves all keys & values, + the next hierarchy switches to a string operation to get the length of each value. + e.g2 pdict env W* S* + Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns + e.g3 pdict punk_testd */* + This displays 2 levels of the dict hierarchy. + Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) + - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. + e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 + Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent + The second level segment in each pattern switches to a dict operation to retrieve the value by key. + When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. + } + }] + #puts stderr "$argspec" + set argd [punk::args::parse $args withdef $argspec] + + set opts [dict get $argd opts] + set dvar [dict get $argd values dictvar] + set patterns [dict get $argd values patterns] + set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] + if {$isarray} { + set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] + if {![dict exists $opts -keytemplates]} { + set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] + dict set opts -keytemplates [list $arrdisplay] + } + dict set opts -keysorttype dictionary + } else { + set dvalue [uplevel 1 [list set $dvar]] + } + showdict {*}$opts $dvalue {*}$patterns + } + + #TODO - much. + #showdict needs to be able to show different branches which share a root path + #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) + # - specify ansi colour per pattern so different branches can be highlighted? + # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc + # - The current version is incomplete but passably usable. + # - Copy proc and attempt rework so we can get back to this as a baseline for functionality + proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) + #set sep " [a+ Web-seagreen]=[a] " + variable has_punk_ansi + if {!$has_punk_ansi} { + set RST "" + set sep " = " + #set sep_mismatch " mismatch " + set sep \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) + } else { + set RST [punk::ansi::a] + set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support + #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " + set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " + } + package require punk::pipe + #package require punk ;#we need pipeline pattern matching features + package require textblock + + set argd [punk::args::parse $args withdef [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { + @id -id ::punk::lib::showdict + @cmd -name punk::lib::showdict -help "display dictionary keys and values" + #todo - table tableobject + -return -default "tailtohead" -choices {tailtohead sidebyside} + -channel -default none + -trimright -default 1 -type boolean -help\ + "Trim whitespace off rhs of each line. + This can help prevent a single long line that wraps in terminal from making + every line wrap due to long rhs padding." + -separator -default {%sep%} -help\ + "Separator column between keys and values" + -separator_mismatch -default {%sep_mismatch%} -help\ + "Separator to use when patterns mismatch" + -roottype -default "dict" -help\ + "list,dict,string" + -ansibase_keys -default "" -help\ + "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" + -substructure -default {} + -ansibase_values -default "" + -keytemplates -default {\$\{$key\}} -type list -help\ + "list of templates for keys at each level" + -keysorttype -default "none" -choices {none dictionary ascii integer real} + -keysortdirection -default increasing -choices {increasing decreasing} + -debug -default 0 -type boolean -help\ + "When enabled, produces some rudimentary debug output on stderr" + -- -type none -optional 1 + @values -min 1 -max -1 + dictvalue -type list -help\ + "dict or list value" + patterns -default "*" -type string -multiple 1 -help\ + "key or key glob pattern" + }]] + + #for punk::lib - we want to reduce pkg dependencies. + # - so we won't even use the tcllib debug pkg here + set opt_debug [dict get $argd opts -debug] + if {$opt_debug} { + if {[info body debug::showdict] eq ""} { + proc ::punk::lib::debug::showdict {args} { + catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} + } + } + } else { + if {[info body debug::showdict] ne ""} { + proc ::punk::lib::debug::showdict {args} {} + } + } + + set opt_sep [dict get $argd opts -separator] + set opt_mismatch_sep [dict get $argd opts -separator_mismatch] + set opt_keysorttype [dict get $argd opts -keysorttype] + set opt_keysortdirection [dict get $argd opts -keysortdirection] + set opt_trimright [dict get $argd opts -trimright] + set opt_keytemplates [dict get $argd opts -keytemplates] + debug::showdict "keytemplates ---> $opt_keytemplates <---" + set opt_ansibase_keys [dict get $argd opts -ansibase_keys] + set opt_ansibase_values [dict get $argd opts -ansibase_values] + set opt_return [dict get $argd opts -return] + set opt_roottype [dict get $argd opts -roottype] + set opt_structure [dict get $argd opts -substructure] + + set dval [dict get $argd values dictvalue] + set patterns [dict get $argd values patterns] + + set result "" + + #pattern hierarchy + # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest + # * @1 @0,%#,%str - segments + # a b 1 0 %# %str - keys + + set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated + set pattern_next_substructure [dict create] + set pattern_this_structure [dict create] + + # -- --- --- --- + #REVIEW + #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. + #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). + #todo - determine if there is a more consistent rule-based way to do this rather than adhoc + #e.g pdict something * + #we want the keys from the result as individual lines on lhs + #e.g pdict something @@ + #we want on lhs result on rhs + # = v0 + #e.g pdict something @0-2,@4 + #we currently return: + #0 = v0 + #1 = v1 + #2 = v2 + #4 = v4 + #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) + #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. + #this is a tradeoff that could create surprises and make things messy and/or inconsistent. + #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. + #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys + #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment + #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) + # -- --- --- --- + + set filtered_keys [list] + if {$opt_roottype in {dict list string}} { + #puts "getting keys for roottype:$opt_roottype" + if {[llength $dval]} { + + #TODO - change to indexset notation 0..1,3..end-1 etc + + set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} + set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + foreach pattern_nest $patterns { + set keyset [list] + set keyset_structure [list] + + set segments [split $pattern_nest /] + set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns + #we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + #puts stderr "showdict-->_split_patterns: $patterninfo" + foreach v_idx $patterninfo { + lassign $v_idx v idx + #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) + set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern + if {[string index $p 0] eq "!"} { + set get_not 1 + set p [string range $p 1 end] + } else { + set get_not 0 + } + switch -exact -- $p { + * - "" { + if {$opt_roottype eq "list"} { + set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + dict set pattern_this_structure $p list + } elseif {$opt_roottype eq "dict"} { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } else { + lappend keyset %string + lappend keyset_structure string + dict set pattern_this_structure $p string + } + } + %# { + dict set pattern_this_structure $p string + lappend keyset %# + lappend keyset_structure string + } + # { + #todo get_not !# is test for listiness (see punk) + dict set pattern_this_structure $p list + lappend keyset # + lappend keyset_structure list + } + ## { + dict set pattern_this_structure $p dict + lappend keyset [list ## query] + lappend keyset_structure dict + } + @* { + #puts "showdict ---->@*<----" + dict set pattern_this_structure $p list + set keys [punk::lib::range 0 [llength $dval]-1] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } + @@ { + #get first k v from dict + dict set pattern_this_structure $p dict + lappend keyset [list @@ query] + lappend keyset_structure dict + } + @*k@* - @*K@* { + #returns keys only + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @*.@* { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + default { + #puts stderr "===p:$p" + #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! + #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful + #@@"key,etc" should allow any non-whitespace key + switch -glob -- $p { + {@k\*@*} - {@K\*@*} { + #value glob return keys + #set search [string range $p 4 end] + #dict for {k v} $dval { + # if {[string match $search $v]} { + # lappend keyset $k + # } + #} + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @@* { + #exact match key - review - should raise error to match punk pipe behaviour? + set k [string range $p 2 end] + if {$get_not} { + if {[dict exists $dval $k]} { + set keys [dict keys [dict remove $dval $k]] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + lappend keyset {*}[dict keys $dval] + lappend keyset_structure {*}[lrepeat [dict size $dval] dict] + } + } else { + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + } + dict set pattern_this_structure $p dict + } + @k@* - @K@* { + #TODO get_not + set k [string range $p 3 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + {@\*@*} { + #return list of values + #set k [string range $p 3 end] + #lappend keyset {*}[dict keys $dval $k] + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*.@*} { + #TODO get_not + set k [string range $p 4 end] + set keys [dict keys $dval $k] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + {@v\*@*} - {@V\*@*} { + #value-glob return value + #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*v@*} - {@\*V@*} { + #key-glob return value + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*@*} - {@\*v@*} - {@\*V@} { + #key glob return val + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @??@* { + #exact key match - no error + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + default { + set this_type $opt_roottype + if {[string match @* $p]} { + #list mode - trim optional list specifier @ + set p [string range $p 1 end] + dict set pattern_this_structure $p list + set this_type list + } elseif {[string match %* $p]} { + dict set pattern_this_structure $p string + lappend keyset $p + lappend keyset_structure string + set this_type string + } + if {$this_type eq "list"} { + dict set pattern_this_structure $p list + if {[string is integer -strict $p]} { + if {$get_not} { + set keys [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $keys $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + lappend keyset $p + lappend keyset_structure list + } + } elseif {[punk::lib::is_indexset $p]} { + set keys [punk::lib::indexset_resolve [llength $dval] $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } elseif {[string match "?*-?*" $p]} { + #could be either - don't change type + #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers + #now we should map _ to "" first + set p [string map {_ {}} $p] + #lassign [textutil::split::splitx $p {\.\.}] a b + if {![regexp $re_idxdashidx $p _match a b]} { + error "unrecognised pattern $p" + } + #TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order + #change to start/end terminology? + + set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high + #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds + if {${lower_resolve} == Inf} { + ##x + #lower bound is above upper list range + #match with decreasing indices is still possible + set lower [expr {[llength $dval]-1}] ;#set to max + } elseif {$lower_resolve == -Inf} { + ##x + set lower 0 + } else { + set lower $lower_resolve + } + set upper [punk::lib::lindex_resolve [llength $dval] $b] + if {$upper == -Inf} { + ##x + #upper bound is below list range - + if {$lower_resolve > -Inf} { + ##x + set upper 0 + } else { + continue + } + } elseif {$upper == Inf} { + #use max + set upper [expr {[llength $dval]-1}] + #assert - upper >=0 because we have ruled out empty lists + } + #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order + set keys [punk::lib::range $lower $upper] + if {$get_not} { + set fullrange [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $fullrange {*}$keys] + if {$lower > $upper} { + set keys [lreverse $keys] + } + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + if {$get_not} { + lappend keyset [list !@$p query] + } else { + lappend keyset [list @$p query] + } + lappend keyset_structure list + } + } elseif {$this_type eq "string"} { + dict set pattern_this_structure $p string + } elseif {$this_type eq "dict"} { + #default equivalent to @\*@* + dict set pattern_this_structure $p dict + #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" + set keys [dict keys $dval $p] + if {$get_not} { + set keys [dict keys [dict remove $dval {*}$keys]] + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + puts stderr "list: unrecognised pattern $p" + } + } + } + } + } + } + + # -- --- --- --- + #check next pattern-segment for substructure type to use + # -- --- --- --- + set substructure "" + set pnext [lindex $segments 1] + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + if {[llength $patterninfo] == 0} { + # // ? -review - what does this mean? for xpath this would mean at any level + set substructure [lindex $pattern_this_structure end] + } elseif {[llength $patterninfo] == 1} { + #ignore the NOT operator for purposes of query-type detection + if {[string index $pnext 0] eq "!"} { + set pnext [string range $pnext 1 end] + } + # single type in segment e.g /@@something/ + switch -exact -- $pnext { + "" { + set substructure string + } + @*k@* - @*K@* - @*.@* - ## { + set substructure dict + } + # { + set substructure list + } + ## { + set substructure dict + } + %# { + set substructure string + } + * { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + default { + switch -glob -- $pnext { + @??@* - @?@* - @@* { + #all 4 or 3 len prefixes bounded by @ are dict + set substructure dict + } + default { + if {[string match @* $pnext]} { + set substructure list + } elseif {[string match %* $pnext]} { + set substructure string + } else { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + } + } + } + } + } else { + #e.g /@0,%str,.../ + #doesn't matter what the individual types are - we have a list result + set substructure list + } + #puts "--pattern_nest: $pattern_nest substructure: $substructure" + dict set pattern_next_substructure $pattern_nest $substructure + # -- --- --- --- + + if {$opt_keysorttype ne "none"} { + set int_keyset 1 + foreach k $keyset { + if {![string is integer -strict $k]} { + set int_keyset 0 + break + } + } + if {$int_keyset} { + set sortindices [lsort -indices -integer $keyset] + #set keyset [lsort -integer $keyset] + } else { + #set keyset [lsort -$opt_keysorttype $keyset] + set sortindices [lsort -indices -$opt_keysorttype $keyset] + } + set keyset [lmap i $sortindices {lindex $keyset $i}] + set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] + } + + foreach k $keyset { + lappend pattern_key_index $pattern_nest + } + + lappend filtered_keys {*}$keyset + lappend all_keyset_structure {*}$keyset_structure + + #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" + } + } + #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" + } else { + puts stdout "unrecognised roottype: $opt_roottype" + return $dval + } + + if {[llength $filtered_keys]} { + #both keys and values could have newline characters. + #simple use of 'format' won't cut it for more complex dict keys/values + #use block::width or our columns won't align in some cases + switch -- $opt_return { + "tailtohead" { + #last line of key is side by side (possibly with separator) with first line of value + #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values + #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries + + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt {${$key}} + } + #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] + set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] + set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] + + set kidx 0 + set last_hidekey 0 + foreach keydisplay $display_keys key $filtered_keys { + set thisval "?" + set hidekey 0 + set pattern_nest [lindex $pattern_key_index $kidx] + set pattern_nest_list [split $pattern_nest /] + #set this_type [dict get $pattern_this_structure $pattern_nest] + #set this_type [dict get $pattern_this_structure $key] + set this_type [lindex $all_keyset_structure $kidx] + #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" + + set is_match 1 ;#whether to display the normal separator or bad-match separator + switch -- $this_type { + dict { + #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict + # - default highlight dupes (ansi underline?) + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + % thisval.= $qry= $dval + } else { + set thisval [tcl::dict::get $dval $key] + } + + #set substructure [lrange $opt_structure 1 end] + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + + set subansibasekeys [lrange $opt_ansibase_keys 1 end] + set nextkeytemplates [lrange $opt_keytemplates 1 end] + #dict set nextopts -substructure $nextsub + dict set nextopts -keytemplates $nextkeytemplates + dict set nextopts -ansibase_keys $subansibasekeys + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" + + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" + set is_match 0 + } + } + } + list { + if {[string is integer -strict $key]} { + set thisval [lindex $dval $key] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + % thisval.= $qry= $dval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + #if {![llength $nextpatterns]} { + # set nextpatterns * + #} + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + set is_match 0 + } + } + } + string { + set hidekey 1 + switch -- $key { + "%string" { + set hidekey 1 + set thisval $dval + } + "%ansiview" { + set thisval [ansistring VIEW -lf 1 $dval] + } + "%ansiviewstyle" { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] + } + default { + switch -glob -- $key { + *lpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } + %lpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } + %rpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } + %rpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } + %split-* { + #split on one or more chars - review + set hidekey 1 + lassign [split $key -] _ splitchars + set thisval [split $dval $splitchars] + } + default { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + #pipeline + % thisval.= $key= $thisval + } + } + + } + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } + + } + } + if {$this_type eq "string" && $hidekey} { + lassign [textblock::size $thisval] _vw vwidth _vh vheight + #set blanks_above [string repeat \n [expr {$kheight -1}]] + set vblock $opt_ansibase_values$thisval$RST + #append result [textblock::join_basic -- $vblock] + #review - we wouldn't need this space if we had a literal %sp %sp-x ?? + append result " $vblock" + } else { + set ansibase_key [lindex $opt_ansibase_keys 0] + + lassign [textblock::size $keydisplay] _kw kwidth _kh kheight + lassign [textblock::size $thisval] _vw vwidth _vh vheight + + set totalheight [expr {$kheight + $vheight -1}] + set blanks_above [string repeat \n [expr {$kheight -1}]] + set blanks_below [string repeat \n [expr {$vheight -1}]] + + if {$is_match} { + set use_sep $opt_sep + } else { + set use_sep $opt_mismatch_sep + } + + + set sepwidth [textblock::width $use_sep] + set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] + set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] + set vblock $blanks_above$opt_ansibase_values$thisval$RST + #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace + if {$last_hidekey} { + append result \n + } + #append result [textblock::join_basic -- $kblock $sblock $vblock] \n + append result [textblock::join_basic_raw $kblock $sblock $vblock] \n + } + set last_hidekey $hidekey + incr kidx + } + } + "sidebyside" { + # TODO - fix + #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. + #use ansibase_key etc to make the output more comprehensible in that situation. + #This is why it is not the default. (review - terminal width detection and wrapping?) + set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] + foreach key $filtered_keys { + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt "%k%" + } + set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST + #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n + #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic + append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n + } + } + } + } + if {$opt_trimright} { + set result [::join [lines_as_list -line trimright $result] \n] + } + if {[string last \n $result] == [string length $result]-1} { + set result [string range $result 0 end-1] + } + #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) + set chan [dict get $argd opts -channel] + switch -- $chan { + stderr - stdout { + puts $chan $result + } + none { + return $result + } + default { + #review - check member of chan names? + #just try outputting to the supplied channel for now + puts $chan $result + } + } + } + + proc is_list_all_in_list {small large} { + if {[llength $small] > [llength $large]} {return 0} + foreach x $large { + ::set ($x) {} + } + foreach x $small { + if {![info exists ($x)]} { + return 0 + } + } + return 1 + } + #v2 generally seems slower + proc is_list_all_in_list2 {small large} { + set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] + return [struct::list equal [lsort $small] $small_in_large] + } + if {!$has_struct_list || !$has_struct_set} { + set body { + package require struct::list + package require struct::set + } + append body [info body is_list_all_in_list2] + proc is_list_all_in_list2 {small large} $body + } + + proc is_list_all_ni_list {A B} { + foreach x $B { + ::set ($x) {} + } + foreach x $A { + if {[info exists ($x)]} { + return 0 + } + } + return 1 + } + proc is_list_all_ni_list2 {a b} { + set i [struct::set intersect $a $b] + return [expr {[llength $i] == 0}] + } + if {!$has_struct_set} { + set body { + package require struct::list + } + append body [info body is_list_all_ni_list2] + proc is_list_all_ni_list2 {a b} $body + } + proc is_cachedlist_all_ni_list {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set expression "" + foreach t $a { + #append expression "({$t} ni \$b) && " + append expression "{$t} ni \$b && " + } + set expression [string trimright $expression " &"] ;#trim trailing spaces and ampersands + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @expression@ $expression] { + return [expr {@expression@}] + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + proc is_cachedlist_all_ni_list2 {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set d [dict create] + foreach x $a { + dict set d $x "" + } + #constructing a switch statement could be an option + # - but would need to avoid using escapes in order to get a jump-table + # - this would need runtime mapping of values - unlikely to be a win + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @d@ $d] { + foreach x $b { + if {[::tcl::dict::exists {@d@} $x]} { + return 0 + } + } + return 1 + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::ldiff + @cmd -name punk::lib::ldiff\ + -summary\ + "Difference consisting of items with removeitems removed."\ + -help\ + "Somewhat like struct::set difference, but order preserving, and doesn't + treat as a 'set' so preserves any duplicates in items. + + struct::set difference may happen to preserve ordering when items are + integers, but order can't be relied on, especially as struct::set has + 2 differening implementations (tcl vs critcl) which return results with + different ordering to each other and different deduping behaviour in + some cases (e.g when 2nd arg is empty)" + @values -min 2 -max 2 + items -type list + removeitems -type list + }] + } + proc ldiff {items removeitems} { + if {[llength $removeitems] == 0} {return $items} + set result {} + foreach item $items { + if {$item ni $removeitems} { + lappend result $item + } + } + return $result + } + #with ledit (also avail in 8.6 using punk::lib::compat::ledit + proc ldiff2 {fromlist removeitems} { + if {[llength $removeitems] == 0} {return $fromlist} + foreach item $removeitems { + set posns [lsearch -all -exact $fromlist $item] + foreach p $posns {ledit fromlist $p $p} + } + return $fromlist + } + proc ldiff3 {fromlist removeitems} { + set doomed [list] + foreach item $removeitems { + lappend doomed {*}[lsearch -all -exact $fromlist $item] + } + lremove $fromlist {*}$doomed + } + + #fix for tcl impl of struct::set::diff which doesn't dedupe + proc struct_set_diff_unique {A B} { + package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. + if {[struct::set::Loaded] eq "tcl"} { + return [punk::lib::setdiff $A $B] + } else { + #use (presumably critcl) implementation for speed + return [struct::set difference $A $B] + } + } + + + #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B + #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) + #also struct::set difference with critcl is faster + proc setdiff {A B} { + if {[llength $A] == 0} {return {}} + set d [dict create] + foreach x $A {dict set d $x {}} + foreach x $B {dict unset d $x} + return [dict keys $d] + } + #bulk dict remove is slower than a foreach with dict unset + #proc setdiff2 {fromlist removeitems} { + # #if {[llength $fromlist] == 0} {return {}} + # set d [dict create] + # foreach x $fromlist { + # dict set d $x {} + # } + # return [dict keys [dict remove $d {*}$removeitems]] + #} + #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) + proc setdiff_unordered {A B} { + if {[llength $A] == 0} {return {}} + array set tmp {} + foreach x $A {::set tmp($x) .} + foreach x $B {catch {unset tmp($x)}} + return [array names tmp] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique_unordered + @cmd -name punk::lib::lunique_unordered\ + -summary\ + "unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates but *may* rearrange the + order of the returned elements compared to the + original list. + + When struct::set is available this will be used + for the implementation, as it can be *slightly* + faster if acceleration is present. When struct::set + is not available it will fallback to lunique and + provide the same functionality with order preserved." + @values -min 1 -max 1 + list -type list + }] + } + #default/fallback implementation + proc lunique_unordered {list} { + lunique $list + } + if {$has_struct_set} { + if {[struct::set equal [struct::set union {a a} {}] {a}]} { + proc lunique_unordered {list} { + struct::set union $list {} + } + } else { + #struct::set union operates on a 'set' - so this probably won't change, and hopefully is + #consistent across unacelerated versions and those implemented in accelerators, + #but if it ever does change - be a little noisy about it. + puts stderr "punk::lib WARNING: struct::set union no longer dedupes!" + #we could also test a sequence of: struct::set add + } + } + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique + @cmd -name punk::lib::lunique\ + -summary\ + "Order-preserving unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates whilst preserving the + original order of the provided list. + + When struct::set is available with acceleration, + lunique_unordered may be slightly faster." + @values -min 1 -max 1 + list -type list + }] + } + proc lunique {list} { + set new {} + foreach item $list { + if {$item ni $new} { + lappend new $item + } + } + return $new + } + proc lunique2 {list} { + set doomed [list] + #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) + for {set i 0} {$i < [llength $list]} {} { + set item [lindex $list $i] + lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] + while {[incr i] in $doomed} {} + } + lremove $list {*}$doomed + } + #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env + proc lmapflat_closure {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + # -- --- --- + #capture - use uplevel 1 or namespace eval depending on context + set capture [uplevel 1 { + apply { varnames { + set capturevars [tcl::dict::create] + set capturearrs [tcl::dict::create] + foreach fullv $varnames { + set v [tcl::namespace::tail $fullv] + upvar 1 $v var + if {[info exists var]} { + if {(![array exists var])} { + tcl::dict::set capturevars $v $var + } else { + tcl::dict::set capturearrs capturedarray_$v [array get var] + } + } else { + #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set + } + } + return [tcl::dict::create vars $capturevars arrs $capturearrs] + } } [info vars] + } ] + # -- --- --- + set cvars [tcl::dict::get $capture vars] + set carrs [tcl::dict::get $capture arrs] + set apply_script "" + foreach arrayalias [tcl::dict::keys $carrs] { + set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { + array set %realname% [set %arrayalias%][unset %arrayalias%] + }] + } + + append apply_script [string map [list %script% $script] { + #foreach arrayalias [info vars capturedarray_*] { + # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + # array set $realname [set $arrayalias][unset arrayalias] + #} + #return [eval %script%] + %script% + }] + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ + $apply_script\ + ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] + } + return $result + } + #link version - can write to vars in calling context - but keeps varnames themselves isolated + #performance much better than capture version - but still a big price to pay for the isolation + proc lmapflat_link {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + #proc lmapflat {varnames list script} { + # concat {*}[uplevel 1 [list lmap $varnames $list $script]] + #} + #lmap can accept multiple var list pairs + proc lmapflat {args} { + concat {*}[uplevel 1 [list lmap {*}$args]] + } + proc lmapflat2 {args} { + concat {*}[uplevel 1 lmap {*}$args] + } + + #proc dict_getdef {dictValue args} { + # if {[llength $args] < 1} { + # error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + # } + # set keys [lrange $args -1 end-1] + # if {[tcl::dict::exists $dictValue {*}$keys]} { + # return [tcl::dict::get $dictValue {*}$keys] + # } else { + # return [lindex $args end] + # } + #} + if {[info commands ::tcl::dict::getdef] eq ""} { + proc dict_getdef {dictValue args} { + set keys [lrange $args 0 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + } else { + #we pay a minor perf penalty for the wrap + interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + + punk::args::define { + @id -id ::punk::lib::is_indexset + @cmd -name punk::lib::is_indexset\ + -summary\ + "Validate string is a comma-delimited 'indexset'."\ + -help\ + "Validate that a string is an 'indexset' + + An indexset consists of a comma delimited list of indexes or index-ranges. + No particular base is assumed for the purposes of validating an indexset here. + While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. + e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. + To validate if an indexset is strictly within range, both the length of the data and the base would + need to be considered. + + The normal 'range' specifier is .. but can be of the form .x. where x is the step value. + The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire + range of valid values. + e.g the following are all valid ranges + 1.. + (index 1 to 'max') + ..10 + (index 'base' to 10) + 2..11 + (index 2 to 11) + .. + (all indices) + .3. + (1st index and every 3rd index thereafter) + + Common whitespace elements space,tab,newlines are ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + see indexset_resolve" + @values -min 1 -max 1 + indexset -type string + } + proc is_indexset {indexset} { + #collapse internal whitespace (for basic whitespace set we allow) + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] + if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { + return 0 + } + set ranges [split $indexset ,] + foreach r $ranges { + set validateindices [list] + set rposn [string first .. $r] + if {$rposn >= 0} { + set sepsize 2 + set step 1 + #review - whitespace between ints? + lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] + } elseif {[string first . $r] >= 0} { + set stripped [string map {. ""} $r] + if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { + #if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) + return 0 + } + #assert - we have exactly 2 dots separated by something. + #check for .n. 'stepped' range + set fdot [string first . $r] + set ldot [string last . $r] + set step [string range $r $fdot+1 $ldot-1] + #todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq + #1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing + + #1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. + if {[string match *end* $step] || [catch {lindex {} $step}]} { + return 0 + } + #if {![string is integer -strict $step] || $step == 0} { + # return 0 + #} + lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] + } else { + #'range' is just an index + set validateindices [list $r] + } + + foreach v $validateindices { + if {$v eq "" || $v eq "end"} {continue} + if {[string is integer -strict $v]} {continue} + if {[catch {lindex {} $v}]} { + return 0 + } + } + } + return 1 + } + #review - compare to IMAP4 methods of specifying ranges? + #TODO add tests to test::punk::lib indexset_resolve is a little tricky + punk::args::define { + @id -id ::punk::lib::indexset_resolve + @cmd -name punk::lib::indexset_resolve\ + -summary\ + "Resolve an indexset to a list of integers based on supplied list or string length."\ + -help\ + "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. + e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 + + An indexset consists of a comma delimited list of indexes or index-ranges. + Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at + either side of the separator representing beginning and end of the index range respectively. + The range-separator can be of the form .x. where x is an integer or basic expression + (single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value + in the tcl9 lseq command. + + When the start index is lower than the end, the step value defaults to 1. + ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 + When the start index is higher than the end, the step value defaults to -1. + ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 + + If start and end are ommitted, increasing order is assumed if the step isn't specified. + eg + .. represents the range from the base to the end + .-1. would represent end to base with step -1 + + If start is omitted and only the end is supplied: + The default step is 1 indicating ascension and the missing start is equivalent to the base. + indexset_resolve 5 ..2 + -> 0 1 2 + The default start is 'end' if the step is negative + indexset_resolve 5 .-1.2 + -> 4 3 2 + If end is omitted and only the start is supplied: + The default step is 1 indicating ascension and the missing end is equivalent to 'end' + indexset_resolve 5 2.. + -> 2 3 4 + The default end is the base if the step is negative + indexset_resolve 5 2.-1. + -> 2 1 0 + + Like the tcl9 lseq command - a step (by) value of zero produces no results. + + The indexes are 0-based by default, but the base can be specified. + indexset_resolve 7 .. + -> 0 1 2 3 4 5 6 + indexset_resolve -base -3 7 .. + -> -3 -2 -1 0 1 2 3 + + Whitespace is ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + end means the last item. + end-1 means the second last item. + 0.. is the same as 0..end + + indexset examples: + + These assume the default 0-based indices (-base 0) + + 1,3.. + output the index 1 (2nd item) followed by all from index 3 to the end. + indexset_resolve 4 1,3.. + -> 1 3 + indexset_resolve 10 1,3.. + -> 1 3 4 5 6 7 8 9 + 0..2,end + output the first 3 indices, and the last index. + end-1..0 + output the indexes in reverse order from 2nd last item to first item." + @leaders -min 0 -max 0 + @opts + -base -type integer -prefix 1 -default 0 -help\ + "This is the starting index. It can be positive, negative or zero. + This affects the start and end calculations, limiting what indices will be + returned. + e.g with base 1 'end' will give a different value from base 0 + + for 10 items 'end' is 10 when 1-based + for 10 items 'end' is 9 when 0-based + + For base 1, index 0 is considered to be below the range. + ie + indexset_resolve -base 1 10 0..3 + -> 1 2 3 + indexset_resolve -base 0 10 0..3 + -> 0 1 2 3 + + It does not *convert* indexes within the range. + + indexset_resolve -base 1 10 5 + -> 5 + indexset_resolve -base 0 10 5 + -> 5 + + ie if you ask for a 1-based resolution of an indexset the integers that are within + the range will come out the same, so the result needs to be treated as a 1-based + set of indices when performing further operations. + " + @values -min 2 -max 3 + numitems -type integer + indexset -type indexset -help "comma delimited specification for indices to return" + } + + #limit punk::args parsing to unhappy paths where possible + proc indexset_resolve {args} { + # -------------------------------------------------- + # Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve + # This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. + # for the unhappy path - the punk::args::parse is fine to generate the usage/error information. + # -------------------------------------------------- + if {[llength $args] < 2} { + punk::args::resolve $args withid ::punk::lib::indexset_resolve + } + set indexset [lindex $args end] + set numitems [lindex $args end-1] + if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { + #use parser on unhappy path only + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + #assert we have 2 or more args + set base 0 ;#default + if {[llength $args] > 2} { + #if more than just numitems and indexset - we expect only -base ie 4 args in total + if {[llength $args] != 4} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set optname [lindex $args 0] + set optval [lindex $args 1] + set fulloptname [tcl::prefix::match -error "" -base $optname] + if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set base $optval + } + # -------------------------------------------------- + + + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace + set index_list [list] ;#list of actual indexes within the range + set iparts [split $indexset ,] + set based_max [expr {$numitems -1 + $base}] + + #we already did is_indexset check above, so we can make assumptions about well-formedness of each part + foreach ipart $iparts { + set ipart [string trim $ipart] + #we need to cater for n..m as well as n.s.m where s is 'step' + set rposn [string first . $ipart] + if {$rposn>=0} { + #if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression + set rposn2 [string last . $ipart] + if {$rposn2 == $rposn+1} { + #.. + set step "default" ;#could be 1 or -1 + } else { + set step [tcl::string::range $ipart $rposn+1 $rposn2-1] + } + lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb + + set rawa [string trim $rawa] + set rawb [string trim $rawb] + if {$rawa eq "" && $rawb eq ""} { + if {$step eq "default"} { + set step 1 ;#default ascending when no start and no end + } + if {$step < 0} { + set rawa end + set rawb $base + } else { + set rawa $base + set rawb end + } + #if neither start nor end specified - we won't get out of range results from lindex_resolve + set a [punk::lib::lindex_resolve $numitems $rawa $base] + set b [punk::lib::lindex_resolve $numitems $rawb $base] + } else { + if {$rawa eq ""} { + if {$step eq "default"} { + #when start not specified, but end is - default direction always ascending + #(even if end is base or below range) + set step 1 + } + if {$step < 0} { + set rawa end + } else { + set rawa $base + } + } + set a [punk::lib::lindex_resolve $numitems $rawa $base] + if {$a == -Inf} { + #undershot - leave negative + } elseif {$a == Inf} { + #overshot + set a [expr {$based_max + 1}] ;#put it outside the range on the upper side + } + #review - a may be -Inf + + if {$rawb eq ""} { + if {$step eq "default"} { + set step 1 + } + if {$step < 0} { + if {$a < $base} { + #make sure both + #mathfunc::isinf is tcl9+ + if {[catch { + if {[::tcl::mathfunc::isinf $a]} { + set a [expr {$base -1}] + } + }]} { + if {[string match -nocase *inf* $a]} { + set a [expr {$base -1}] + } + } + set rawb $a + } else { + set rawb $base + } + } else { + if {$a > $based_max} { + set rawb $a ;#make sure .. doesn't return last item - should return nothing + } else { + set rawb end + } + } + } + set b [punk::lib::lindex_resolve $numitems $rawb $base] + if {$b == -Inf} { + #undershot - leave negative + } elseif {$b == Inf} { + #set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side + set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side + } + } + + #JJJ + + #e.g make sure .. doesn't return last item - should return nothing as both are above the range. + if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { + #assert a & b are integers within the range + if {$step eq "default"} { + #unspecified step - base direction on order of a & b + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + if {$a >= $base && $a <= $based_max} { + #only a is in the range + if {$b < $base} { + set b $base + } else { + set b $based_max + } + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$b >=$base && $b <= $based_max} { + #only b is in the range + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + if {$step < 0} { + if {$a < $base} { + #negative step from below - doesn't matter if b is in range - recast both to an int below $base + #(a may be -Inf) + set a [expr {$base -1}] + set b $a + set step 0 ;#we should return nothing + } + } else { + if {$a < $base} { + set a $base + } else { + set a $based_max + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + #both outside the range + if {$a < $base && $b > $base} { + #spans the range in forward order + set a $base + set b $based_max + if {$step eq "default"} { + set step 1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$a > $base && $b < $base} { + #spans the range in reverse order + set a $based_max + set b $base + if {$step eq "default"} { + set step -1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } + #both outside of range on same side + } + } + } else { + set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] + #returns only -Inf for out of range at either end + if {$idx >= $base} { + #index within the range + lappend index_list $idx + } + } + } + return $index_list + } + # showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side + #This doesn't need the list itself - just the length suffices. + punk::args::define { + @id -id ::punk::lib::lindex_resolve + @cmd -name punk::lib::lindex_resolve\ + -summary\ + "Resolve an indexexpression to an integer based on supplied list or string length."\ + -help\ + "Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 + to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating + whether the index was below or above the range of possible indices for the length supplied. + + Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + This means the proc may be called with something like $x+2 end-$y etc + Sometimes the actual integer index is desired. + + We want to resolve the index used, without passing arbitrary expressions into the 'expr' function + - which could have security risks. + lindex_resolve will parse the index expression and return: + a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + The similar function lindex_resolve_basic uses -Inf to denote + out of range at either end of the list/string. + Otherwise it will return an integer corresponding to the position in the data. + This is in stark contrast to Tcl list/string function indices which will return empty strings for out of + bounds indices, or in the case of lrange, return results anyway. + Like Tcl list commands - it will produce an error if the form of the index is not acceptable. + For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side + - thus returning -2 + + Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. + We will get something like 10+1 - which can be resolved safely with expr + " + @values -min 2 -max 2 + datalength -type integer + index -type indexexpression + } + proc lindex_resolve {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve] [arg len] [arg index]] + #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length + #[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + #[para]This means the proc may be called with something like $x+2 end-$y etc + #[para]Sometimes the actual integer index is desired. + #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. + #[para]lindex_resolve will parse the index expression and return: + #[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string + #[para]Otherwise it will return an integer corresponding to the position in the list. + #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. + #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 + + #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr + + + #REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? + #A basic string map means we aren't properly validating + #todo - be stricter about malformations such as 1000_ + if {![string is integer -strict 1_0]} { + #basic forward compatibility with integers such as 1_000 for 8.6.x + set index [tcl::string::map {_ {}} $index] + set len [tcl::string::map {_ {}} $len] + } + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve len must be a positive integer." + } + set based_max [expr {$len -1 + $base}] + + if {[string is integer -strict $index]} { + #review - base? + #can match +i -i + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } else { + if {[string match end* $index]} { + if {$index ne "end"} { + set op [string index $index 3] + set offset [string range $index 4 end] + #note - offset could have leading + or - + # 'string is integer -strict +1' ==> true + #e.g end+-1 is valid (end++-1 is not) + if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$offset == 0} { + #(offset +0, -0 or 0 or 000 0_0 etc) + #op either + or - is irrelevant + #set index [expr {$len-1}] ;#+ base ? + set index $based_max + if {$index < $base} { + #return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds + return Inf + } else { + return $index + } + } + + #set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] + set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + return $index + } + } else { + #index is 'end' + if {$len == 0} { + #special case - 'end' with empty list - treat end like a positive number out of bounds + return Inf + } + #return [expr {$len - 1 + $base}] + return $based_max + } + } else { + #plain +- already handled above. + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + #regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op + if {[regexp {([^+-]*)([+-])(.*)} $index _match a op b]} { + if {[string is integer -strict $a] && [string is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } + return $index + } + } + } + proc lindex_resolve_basic {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg len] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -Inf for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve_basic len must be an integer greater than or equal to zero" + } + if {![string is integer -strict $base]} { + #base can be negative + error "lindex_resolve_basic base must be an integer" + } + set based_max [expr {$len -1 + $base}] + + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < $base || ($index > $based_max)} { + #even though in this case we could return -Inf or Inf like lindex_resolve; + #for consistency we don't return Inf for upper-boudn violation, + #as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. + return -Inf + } else { + #!NOTE! index within range is unchanged - no matter the base + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } + if {$len > 0} { + #For large len - this is a wasteful allocation if no true lseq available in Tcl version. + #lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) + set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. + } else { + set testlist [list] + #we want to call 'lindex' even in this case - to get the appropriate error message + } + set idx [lindex $testlist $index] + if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end + return -Inf + } else { + return $idx + } + } + proc lindex_get {list index} { + set resultlist [lrange $list $index $index] + if {![llength $resultlist]} { + return -1 + } else { + #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. + #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator + return [tcl::dict::create value [lindex $resultlist 0]] + } + } + + proc string_splitbefore {str index} { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + return [list "" $str] + } + Inf { + return [list $str ""] + } + } + } + return [list [string range $str 0 $index-1] [string range $str $index end]] + #scan %s stops at whitespace - not useful here. + #scan $s %${p}s%s + } + proc string_splitbefore_indices {str args} { + set parts [list $str] + set sizes [list [string length $str]] + set s 0 + foreach index $args { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + Inf { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + } + } + if {$index <= 0} { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + if {$index >= [string length $str]} { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + set i -1 + set a 0 + foreach sz $sizes { + incr i + if {$a + $sz > $index} { + set p [lindex $parts $i] + #puts "a:$a index:$index" + if {$a == $index} { + break + } + ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] + ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] + break + } + incr a $sz + } + #puts "->parts:$parts" + #puts "->sizes:$sizes" + } + return $parts + } + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + + proc is_utf8_multibyteprefix {bytes} { + #*** !doctools + #[call [fun is_utf8_multibyteprefix] [arg str]] + #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character + #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint + #[para] Will return false for an already complete utf-8 codepoint + #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument + #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes + #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] + regexp {(?x) + ^ + (?: + [\xC0-\xDF] | #possible prefix for two-byte codepoint + [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint + [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for + ) + $ + } $bytes + } + + proc is_utf8_first {str} { + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + } $str + } + proc is_utf8_single {1234bytes} { + #*** !doctools + #[call [fun is_utf8_single] [arg 1234bytes]] + #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + $ + } $1234bytes + } + proc get_utf8_leading {rawbytes} { + #*** !doctools + #[call [fun get_utf8_leading] [arg rawbytes]] + #[para] return the leading portion of rawbytes that is a valid utf8 sequence. + #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint + #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. + #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. + #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics + #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned + #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes + if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + \A ( + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + + } $rawbytes completeChars]} { + return $completeChars + } + return "" + } + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set opts [tcl::dict::create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $opts] + foreach {k v} $argopts { + tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + # -- --- --- --- + set opt_validate [tcl::dict::get $opts -validate] + set opt_empty [tcl::dict::get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map {_ ""} $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [tcl::dict::create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $defaults] + set fullopts [tcl::dict::create] + foreach {k v} $argopts { + tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [tcl::dict::merge $defaults $fullopts] + # -- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_case [tcl::dict::get $opts -case] + set opt_empty [tcl::dict::get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + switch -- [string tolower $opt_case] { + upper { + set spec X + } + lower { + set spec x + } + default { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] + if {![string is digit -strict [string map {_ ""} $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map {_ ""} $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::factors + @cmd -name punk::lib::factors\ + -summary\ + "Sorted factors of positive integer x"\ + -help\ + "Return a sorted list of the positive factors of x where x > 0 + For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. + (including zero itself in this context)* + + This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + Comparisons were done with some numbers below 17 digits long + For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + but has the disadvantage of being slower for 'small' numbers and using more memory. + If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get + there than computing the whole list, even for small values of x + * Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + In other mathematical contexts zero may be considered not to divide anything." + @values -min 1 -max 1 + x -type integer + }] + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::gcd + @cmd -name punk::lib::gcd\ + -summary\ + "Greatest common divisor of m and n."\ + -help\ + "Return the greatest common divisor of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib + + Graphical use: + An a by b rectangle can be covered with square tiles of side-length c, + only if c is a common divisor of a and b" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lcm + @cmd -name punk::lib::lcm\ + -summary\ + "Lowest common multiple of m and n."\ + -help\ + "Return the lowest common multiple of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc lcm {n m} { + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + tcl::dict::for {next -} $nums break + } + return [concat $primes [tcl::dict::keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [tcl::dict::keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [chan configure stdin] + if {[catch { + package require punk::console + set console_raw [tsv::get console is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + chan configure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] + } + return $answer + } + + #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. + proc indent {text {prefix " "}} { + set result [list] + foreach line [split $text \n] { + if {[string trim $line] eq ""} { + lappend result "" + } else { + lappend result $prefix[string trimright $line] + } + } + return [join $result \n] + } + #dedent? + proc undent {text {max -1}} { + if {$text eq ""} { + return "" + } + set lines [split $text \n] + set nonblank [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + continue + } + lappend nonblank $ln + } + set lcp [longestCommonPrefix $nonblank] + if {$lcp eq ""} { + return $text + } + regexp {^([\t ]*)} $lcp _m lcp + if {$lcp eq ""} { + return $text + } + set len [string length $lcp] + if {$max != -1} { + set len [expr {min($len,$max)}] + } + set result [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + lappend result "" + } else { + lappend result [string range $ln $len end] + } + } + return [join $result \n] + } + #A version of textutil::string::longestCommonPrefixList + proc longestCommonPrefix {items} { + if {[llength $items] <= 1} { + return [lindex $items 0] + } + set items [lsort $items[unset items]] + set min [lindex $items 0] + set max [lindex $items end] + #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) + #(sort order nothing to do with length - e.g min may be longer than max) + if {[string length $min] > [string length $max]} { + set temp $min + set min $max + set max $temp + } + set n [string length $min] + set prefix "" + set i -1 + while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { + append prefix $c + } + return $prefix + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joins the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? + lassign [tcl::dict::values [punk::args::parse $args withdef { + -joinchar -default \n + @values -min 1 -max 1 + }]] leaders opts values + + return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [tcl::dict::merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + lappend opts -block {} + } + set text [lindex $args end] + #tailcall linelist {*}$opts $text + return [linelist {*}$opts $text] + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [tcl::dict::values [punk::args::parse $args withdef { + @opts -any 1 + -block -default {} + }]] leaderdict opts valuedict + tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + set linelist_body { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + ;#package require punk::ansi + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) + + #we use detectcode_in_list instead of detect_in_list + #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) + # - but the main reason is it is slightly faster. + if {![punk::ansi::ta::detectcode_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + #set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + #get_codes_single lists only the codes. no plaintext or empty elements + set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits] == 0} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end] ;#may or may not be SGR + set lastcodeoffset [expr {[string length $lastcode]-1}] + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #code is at tail (no trailing plaintext) + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach code $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + + set linelist_body_original { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + #package require punk::ansi + + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) + if {![punk::ansi::ta::detect_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits]<= 1} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[lindex $ansisplits end] eq ""} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach {pt code} $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + proc linelist {args} $linelist_body + + + interp alias {} errortime {} punk::lib::errortime + proc errortime {script groupsize {iters 2}} { + #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance + set i 0 + set times {} + if {$iters < 2} {set iters 2} + + for {set i 0} {$i < $iters} {incr i} { + set result [uplevel [list time $script $groupsize]] + lappend times [lindex $result 0] + } + + set average 0.0 + set s2 0.0 + + foreach time $times { + set average [expr {$average + double($time)/$iters}] + } + + foreach time $times { + set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] + } + + set sigma [expr {int(sqrt($s2))}] + set average [expr {int($average)}] + + return "$average +/- $sigma microseconds per iteration" + } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version + proc switch_char_test {c} { + set dec [scan $c %c] + foreach t [list 1 2 3] { + switch -- $c { + x { + return [list $dec x $t] + } + y { + return [list $dec y $t] + } + z { + return [list $dec z $t] + } + } + } + + #tcl 8.6/8.7 (at least) + #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable + switch -- $c { + a { + return [list $dec a] + } + {"} { + return [list $dec dquote] + } + {[} {return [list $dec lb]} + {]} {return [list $dec rb]} + "{" { + return [list $dec lbrace] + } + "}" { + return [list $dec rbrace] + } + default { + return [list $dec $c] + } + } + + + + } + + #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) + proc show_jump_tables {args} { + #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. + if {[llength $args] == 1} { + set data [tcl::unsupported::disassemble proc [lindex $args 0]] + } elseif {[llength $args] == 2} { + #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. + #not sure if this handles more complex hierarchies or mixins etc. + lassign $args obj method + if {![info object isa object $obj]} { + error "show_jump_tables unable to examine '$args'. $obj is not an oo object" + } + #classes are objects too and can have direct methods + if {$method in [info object methods $obj]} { + set data [tcl::unsupported::disassemble objmethod $obj $method] + } else { + if {![info object isa class $obj]} { + set obj [info object class $obj] + } + set data [tcl::unsupported::disassemble method $obj $method] + } + } else { + error "show_jump_tables expected a procname or a class/object and method" + } + set result "" + set in_jt 0 + foreach ln [split $data \n] { + set tln [::tcl::string::trim $ln] + if {!$in_jt} { + if {[::tcl::string::match *jumpTable* $ln]} { + punk::ns::call_frame + append result $ln \n + set in_jt 1 + } + } else { + if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { + set in_jt 0 + } else { + append result $ln \n + } + } + } + return $result + } + + #a test + # punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 + # Note the different disassemble result when trace is running. + proc disassemble {procname} { + tcl::unsupported::disassemble proc $procname + } + + proc temperature_f_to_c {deg_fahrenheit} { + return [expr {($deg_fahrenheit -32) * (5/9.0)}] + } + proc temperature_c_to_f {deg_celsius} { + return [expr {($deg_celsius * (9/5.0)) + 32}] + } + + proc interp_sync_package_paths {interp} { + if {![interp exists $interp]} { + error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" + } + interp eval $interp [list set ::auto_path $::auto_path] + interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} + interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] + } + + proc valcopy {obj} { + append obj2 $obj {} + } + proc set_valcopy {varname obj} { + #used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] + append obj2 $obj {} + uplevel 1 [list set $varname $obj2] + } + + + + proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { + variable has_twapi + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + set localeid [twapi::get_system_default_lcid] + } + } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 + + set results [list] + set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list + foreach inputnum $nums { + set number [valcopy $inputnum] + #also handle tcl 8.7+ underscores in numbers + set number [string map [list _ "" , ""] $number] + #normalize e.g 2e4 -> 20000.0 + set number [expr {$number}] + + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + lappend results [twapi::format_number $number $localeid -idigits -1] + continue + } else { + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} + lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] + continue + } + } + #todo - get configured user defaults + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } + + lappend results [delimit_number $number $delim $groupsize] + } + + if {[llength $results] == 1} { + #keep intrep as string rather than list + return [lindex $results 0] + } + return $results + } + + + #from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse + # Given a number represented as a string, insert delimiters to break it up for + # readability. Normally, the delimiter will be a comma which will be inserted every + # three digits. However, the delimiter and groupsize are optional arguments, + # permitting use in other locales. + # + # The string is assumed to consist of digits, possibly preceded by spaces, + # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* + + proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { + set number [valcopy $unformattednumber] + set number [string map {_ ""} $number] + #normalize using expr - e.g 2e4 -> 20000.0 + set number [expr {$number}] + # First, extract right hand part of number, up to and including decimal point + set point [string last "." $number]; + if {$point >= 0} { + set PostDecimal [string range $number $point+1 end]; + set PostDecimalP 1; + } else { + set point [expr {[string length $number] + 1}] + set PostDecimal ""; + set PostDecimalP 0; + } + + # Now extract any leading spaces. review - regex for whitespace instead of just ascii space? + set ind 0; + while {[string equal [string index $number $ind] \u0020]} { + incr ind; + } + set FirstNonSpace $ind; + set LastSpace [expr {$FirstNonSpace - 1}]; + set LeadingSpaces [string range $number 0 $LastSpace]; + + # Now extract the non-fractional part of the number, omitting leading spaces. + set MainNumber [string range $number $FirstNonSpace $point-1]; + + # Insert commas into the non-fractional part. + set Length [string length $MainNumber]; + set Phase [expr {$Length % $GroupSize}] + set PhaseMinusOne [expr {$Phase -1}]; + set DelimitedMain ""; + + #First we deal with the extra stuff. + if {$Phase > 0} { + append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; + } + set FirstInGroup $Phase; + set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; + while {$LastInGroup < $Length} { + if {$FirstInGroup > 0} { + append DelimitedMain $delim; + } + append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; + incr FirstInGroup $GroupSize + incr LastInGroup $GroupSize + } + + # Reassemble the number. + if {$PostDecimalP} { + return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; + } else { + return [format "%s%s" $LeadingSpaces $DelimitedMain]; + } + } + + #review - there are various type of uuid - we should use something consistent across platforms + #twapi is used on windows because it's about 5 times faster - but is this more important than consistency? + #twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway + #(counterpoint: in the case of punk - we currently need twapi anyway on windows) + #does tcllib's uuid use the same mechanisms on different platforms anyway? + if {$has_twapi} { + interp alias "" ::punk::lib::uuid "" twapi::new_uuid + } else { + catch {package require uuid} + interp alias "" ::punk::lib::uuid "" uuid::uuid generate + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::lib::flatgrid { + namespace export filler_count rows cols col row block + + #WARNING - requires lseq and 'lsearch -stride' + #WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 + #todo - 8.6 fallback? + + proc filler_count {listlen numcolumns} { + #if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error + #if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense + expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} + } + proc rows {list numcolumns {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set rows [list] + set i 1 + foreach s [lrange $splits 0 end-1] { + lappend rows [lrange $padded_list $s [lindex $splits $i]-1] + incr i + } + return $rows + } + proc cols {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] + } + return $cols + } + proc cols2 {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [col2 $list $numcolumns $colindex $blank] + } + return $cols + } + proc col {list numcolumns colindex {blank NULL}} { + lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * + } + proc col2 {list numcolumns colindex {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set col [list] + foreach s [lrange $splits 0 end-1] { + lappend col [lindex $padded_list $s+$colindex] + } + return $col + } + proc col3 {list numcolumns colindex {blank NULL}} { + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} + } + proc col4 {list numcolumns colindex {blank NULL}} { + #slow + set vars [lrepeat $numcolumns _] + lset vars $colindex v + if {$blank eq ""} { + return [lmap $vars $list {set v}] + } + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} + } + + proc block {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } + proc block2 {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols2 $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } +} + +tcl::namespace::eval punk::lib::test { + + + +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + + + # incomplete - report which is the innermost bracket/quote etc awaiting completion for a Tcl command + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + set i 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + incr i + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) + switch -- $c { + {"} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } + {[} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "{" { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "}" - + default { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + incr i + } + set incomplete [list] + foreach w $waiting { + #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. + switch -- $w { + {"} { + lappend incomplete $w + } + {]} { + lappend incomplete "\[" + } + "{" {} + "}" { + lappend incomplete "\{" + } + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->awaiting:'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #get info about punk nestindex key ie type: list,dict,undetermined + # pdict devel + proc nestindex_info {args} { + set argd [punk::args::parse $args withdef { + -parent -default "" + nestindex + }] + set opt_parent [dict get $argd opts -parent] + if {$opt_parent eq ""} { + set parent_type undetermined + } else { + set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing + } + + #??? + + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} + +tcl::namespace::eval punk::lib::caches { + +} + +tcl::namespace::eval punk::lib::debug { + proc showdict {args} {} +} + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::lib +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [tcl::namespace::eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.6 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index 06c7ddf3..741d9fc0 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -229,12 +229,16 @@ tcl::namespace::eval punk::nav::fs { } else { set stripbase 1 } - if {$v eq "/"} { - #hack - dict set matchinfo files {} - dict set matchinfo filesizes {} - } - set out [dirfiles_dict_as_lines -stripbase $stripbase $matchinfo] + + #we need to pass matchinfo that includes files even when only doing a directory listing (d/ /) + #This is because we want to display links/shortcuts that point to directories as directories. + #( ./ listing needs to show navigable items) + #if {$v eq "/"} { + # #dodgy hack that doesn't give proper display of all links/shortcuts that are pointing to directories. + # dict set matchinfo files {} + # dict set matchinfo filesizes {} + #} + set out [dirfiles_dict_as_lines -listing $v -stripbase $stripbase $matchinfo] #set chunklist [list] #lappend chunklist [list stdout "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n"] set result "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n" @@ -258,10 +262,10 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args a1] + set atail [lassign $args cdtarget] if {[llength $args] == 1} { - set a1 [lindex $args 0] - switch -exact -- $a1 { + set cdtarget [lindex $args 0] + switch -exact -- $cdtarget { . - ./ { tailcall punk::nav::fs::d/ } @@ -286,43 +290,88 @@ tcl::namespace::eval punk::nav::fs { } } else { cd $up1 - #set VIRTUAL_CWD [file normalize $a1] + #set VIRTUAL_CWD [file normalize $cdtarget] } tailcall punk::nav::fs::d/ $v } } - if {![regexp {[*?]} $a1] && [file pathtype $a1] ne "relative"} { + set cdtarget_copy [punk::nav::fs::system::valcopy $cdtarget] + set cdtarget_copy [string map {\\ /} $cdtarget_copy] + if {[string range $cdtarget_copy 0 3] eq "//?/"} { + #handle dos device paths - convert to normal path for glob testing + set glob_test [string range $cdtarget_copy 3 end] + set cdtarget_is_glob [regexp {[*?]} $glob_test] + } else { + set cdtarget_is_glob [regexp {[*?]} $cdtarget] + } + if {!$cdtarget_is_glob} { + set cdtarget_file_type [file type $cdtarget] + #e.g may be a link - whilst the type returned in the 'file stat' info reflects the type of the link target + } else { + set cdtarget_file_type "glob" + } + + if {!$cdtarget_is_glob && [file pathtype $cdtarget] ne "relative"} { #non-relative non-glob - if { ![string match //zipfs:/* $a1]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD $a1 - tailcall punk::nav::fs::d/ $v + if {![string match //zipfs:/* $cdtarget]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + if {$linktarget_file_type eq "directory"} { + set linktarget [file readlink $cdtarget] + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } } } - if {![regexp {[*?]} $a1] && ![string match //zipfs:/* $a1] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD [file normalize $a1] - tailcall punk::nav::fs::d/ $v + if {!$cdtarget_is_glob && ![string match //zipfs:/* $cdtarget] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + set linktarget [file readlink $cdtarget] + if {$linktarget_file_type eq "directory"} { + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } + #if {[file type $cdtarget] eq "directory"} { + # cd $cdtarget + # #set VIRTUAL_CWD [file normalize $cdtarget] + # tailcall punk::nav::fs::d/ $v + #} } - if {![regexp {[*?]} $a1]} { + if {!$cdtarget_is_glob} { #NON-Glob target #review - if {[string match //zipfs:/* $a1]} { - if {[Zipfs_path_within_zipfs_mounts $a1]} { - commandstack::basecall cd $a1 + if {[string match //zipfs:/* $cdtarget]} { + if {[Zipfs_path_within_zipfs_mounts $cdtarget]} { + commandstack::basecall cd $cdtarget } - set VIRTUAL_CWD $a1 - set curdir $a1 + set VIRTUAL_CWD $cdtarget + set curdir $cdtarget } else { - set target [punk::path::normjoin $VIRTUAL_CWD $a1] + set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[Zipfs_path_within_zipfs_mounts $target]} { commandstack::basecall cd $target @@ -521,20 +570,93 @@ tcl::namespace::eval punk::nav::fs { return $result } + punk::args::define { + @id -id ::punk::nav::fs::d/new + -nonportable -type none -help\ + "Allow creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." + @values -min 1 -max -1 -type string + path -type string -multiple 1 -help\ + "Path(s) to create. Can be absolute or relative. + + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. + + If a path already exists, then it will be left as-is and no error will be raised. + + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." + } + #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { - if {![llength $args]} { - error "usage: d/new \[ ...\]" - } - set a1 [lindex $args 0] + set argd [punk::args::parse $args withid ::punk::nav::fs::d/new] + lassign [dict values $argd] leaders opts values received + set paths [dict get $values path] + set allow_nonportable [dict exists $received -nonportable] + set curdir [pwd] - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath_list [list] + set error_paths [list] + foreach p $paths { + if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { + #error "punk::nav::fs::d/new Path '$p' is not portable and may not be created without -nonportable option" + lappend error_paths [list $p "Path '$p' is not portable and may not be created without -nonportable option"] + continue + } + if {[string first \0 $p] != -1} { + #error "punk::nav::fs::d/new Path '$p' contains null character which is not allowed" + lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] + continue + } + set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] + #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. + set fullpath [file join $path1 {*}[lrange $args 1 end]] + #Some subpaths of the supplied paths to create may already exist. + #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + set parent [file dirname $fullpath] + while {![file exists $parent]} { + set parent [file dirname $parent] + } + if {![file writable $parent]} { + #error "punk::nav::fs::d/new Cannot create directory '$fullpath' as parent '$parent' is not writable" + lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] + continue + } + lappend fullpath_list $fullpath + } + if {[llength $fullpath_list] != [llength $paths]} { + set path_error_display "" + foreach e $error_paths { + set p [lindex $e 0] + set m [lindex $e 1] + append path_error_display " Path: '$p' Error: $m\n" + } + error "punk::nav::fs::d/new One or more supplied paths were invalid or not writable:\n$path_error_display" + } - if {[file exists $fullpath]} { - error "Folder $fullpath already exists" + set num_created 0 + set error_string "" + foreach fullpath $fullpath_list { + if {[catch {file mkdir $fullpath}]} { + set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + break + } + incr num_created } - file mkdir $fullpath - d/ $fullpath + if {$error_string ne ""} { + error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." + } + d/ $curdir } #todo use unknown to allow d/~c:/etc ?? @@ -849,11 +971,11 @@ tcl::namespace::eval punk::nav::fs { #file attr //cookit:/ returns {-vfs 1 -handle {}} #we will treat it differently for now - use generic handler REVIEW - set in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. + set is_in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. if {[llength [package provide vfs]]} { foreach mount [vfs::filesystem info] { if {[punk::mix::base::lib::path_a_atorbelow_b $location $mount]} { - set in_vfs 1 + set is_in_vfs 1 break } } @@ -871,27 +993,27 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } - if {$in_vfs} { + if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { - set in_zipfs 0 - set in_cookit 1 - set in_other_pseudovol 1 + set invfs "" switch -glob -- $location { //zipfs:/* { if {[info commands ::tcl::zipfs::mount] ne ""} { - set in_zipfs 1 + set invfs zipfs } } //cookit:/* { - set in_cookit 1 + set invfs cookit } default { #handle 'other/unknown' that mounts at a volume-like path //pseudovol:/ + #(intentionally will not match a dos device path such as //?/c:/) if {[regexp {//((?:(?!:|/).)+):/.*} $location _match pseudovol]} { #pseudovol probably more than one char long #we don't really expect something like //c:/ , but anyway, it's not the same as c:/ and for all we know someone could use that as a volume name? - set in_other_pseudovol 1 ;#flag so we don't use twapi - hope generic can handle it (uses tcl glob) + #flag so we don't use twapi - hope generic can handle it (uses tcl glob) + set invfs pseudovol } else { #we could use 'file attr' here to test if {-vfs 1} #but it's an extra filesystem hit on all normal paths too (which can be expensive on some systems) @@ -900,20 +1022,24 @@ tcl::namespace::eval punk::nav::fs { } } - - if {$in_zipfs} { - #relative vs absolute? review - cwd valid for //zipfs:/ ?? - set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_cookit} { - #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ - #don't use twapi - #could possibly use du_dirlisting_tclvfs REVIEW - #files and folders are all returned with the -types hidden option for glob on windows - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_other} { - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } else { - set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + switch -- $invfs { + zipfs { + #relative vs absolute? review - cwd valid for //zipfs:/ ?? + set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + cookit { + #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ + #don't use twapi + #could possibly use du_dirlisting_tclvfs REVIEW + #files and folders are all returned with the -types hidden option for glob on windows + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + pseudovol { + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + default { + set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } } } @@ -1018,11 +1144,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean + -listing -default "/" -choices {/ // //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { + set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1031,9 +1159,12 @@ tcl::namespace::eval punk::nav::fs { # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_stripbase [dict get $opts -stripbase] + set opt_stripbase [dict get $opts -stripbase] set opt_formatsizes [dict get $opts -formatsizes] + set opt_listing [dict get $opts -listing] # -- --- --- --- --- --- --- --- --- --- --- --- + #we still need to examine files for -listing / which means show only directories, + # because we want to display links/shortcuts that point to directories as directories #if multiple dicts and -stripbase = 1 - we can only strip the longest common part of the searchbases supplied set common_base "" @@ -1074,7 +1205,6 @@ tcl::namespace::eval punk::nav::fs { foreach fileset [list dirs files links underlayfiles flaggedhidden flaggedreadonly flaggedsystem nonportable vfsmounts] { set $fileset [list] } - #set contents [lindex $list_of_dicts 0] foreach contents $list_of_dicts { lappend dirs {*}[dict get $contents dirs] @@ -1090,6 +1220,7 @@ tcl::namespace::eval punk::nav::fs { lappend vfsmounts {*}[dict get $contents vfsmounts] } + set fkeys [dict create] ;#avoid some file normalize calls.. if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1224,27 +1355,41 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] + set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { if {![catch {package require punk::winlnk}]} { - set shortcutinfo [punk::winlnk::resolve $fname] set target_type "file" ;#default/fallback + + set shortcutinfo [punk::winlnk::resolve $fname] if {[dict exists $shortcutinfo link_target]} { set is_valid_lnk 1 set tgt [dict get $shortcutinfo link_target] - if {[file exists $tgt]} { - #file type could return 'link' - we will use isfile/isdirectory - if {[file isfile $tgt]} { - set target_type file - } elseif {[file isdirectory $tgt]} { - set target_type directory - } else { - set target_type file ;## ? + set link_target_type [dict get $shortcutinfo target_type] + switch -- $link_target_type { + file { + set target_type "file" + } + directory - "local disk" { + set target_type "directory" + } + unknown { + #fall back to checking attributes and filesystem if we have a link_target but no target_type + if {[file exists $tgt]} { + #file type could return 'link' - we will use isfile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } } - } else { - #todo - see if punk::winlnk has info about the type at the time of linking - #for now - treat as file } } else { #no link_target - probably an ordinary file - but there could have been some other error in reading the binary windows lnk format. @@ -1295,6 +1440,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo + puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1304,58 +1451,82 @@ tcl::namespace::eval punk::nav::fs { set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] set RST [punk::ansi::a] + if {$opt_listing eq "/"} { + #disply directories only (including items that were actually files that were links/shortcuts to directories) + set finfo_plus [list] + } foreach d $dirs filerec $finfo_plus { - set d1 [punk::ansi::a+ cyan bold] - set d2 [punk::ansi::a+ defaultfg defaultbg normal] - #set f1 [punk::ansi::a+ white bold] - set f1 [punk::ansi::a+ white] - set f2 [punk::ansi::a+ defaultfg defaultbg normal] + set d1 [punk::ansi::a+ cyan normal] + set d1_overrides [list] + #set d2 [punk::ansi::a+ defaultfg defaultbg normal] + set f1 [punk::ansi::a+ white normal] + set f1_overrides [list] + #set f2 [punk::ansi::a+ defaultfg defaultbg normal] set fdisp "" if {[string length $d]} { if {$d in $flaggedhidden} { - set d1 [punk::ansi::a+ cyan normal] + #set d1 [punk::ansi::a+ Term-grey50 normal] + lappend d1_overrides term-grey50 } if {$d in $vfsmounts} { - if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW - #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) - #mark it differently for now.. (todo bug report?) - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red Yellow bold] - } else { - set d1 [punk::ansi::a+ green Purple bold] - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red White bold] - } else { - set d1 [punk::ansi::a+ green bold] - } - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red bold] - } + lappend d1_overrides Green + } + if {$d in $nonportable} { + #lappend d1_overrides underline undt-red ;#we use underlins to indicate symlinks and shortcuts, so we shouldn't use underlines here if possible. + lappend d1_overrides italic bold } + #if {$d in $vfsmounts} { + # if {$d in $flaggedhidden} { + # #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indication of 'hidden' status - REVIEW + # #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) + # #mark it differently for now.. (todo bug report?) + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red Yellow bold] + # } else { + # set d1 [punk::ansi::a+ green Purple bold] + # } + # } else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red White bold] + # } else { + # set d1 [punk::ansi::a+ green bold] + # } + # } + #} else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red bold] + # } + #} #dlink-style & dshortcut_style are for underlines - can be added with colours already set + + if {[llength $d1_overrides]} { + set d1 [punk::ansi::a+ {*}$d1_overrides] + } if {$d in $dir_symlinks} { append d1 $dlink_style } elseif {$d in $dir_shortcuts} { append d1 $dshortcut_style } } + if {[llength $filerec]} { set fname [dict get $filerec file] set fdisp [dict get $filerec display] if {$fname in $flaggedhidden} { - set f1 [punk::ansi::a+ Purple] - } else { - if {$fname in $nonportable} { - set f1 [punk::ansi::a+ red bold] - } + #set f1 [punk::ansi::a+ Term-grey50] + lappend f1_overrides term-grey50 + } + if {$fname in $nonportable} { + lappend f1_overrides italic bold } + if {[llength $f1_overrides]} { + set f1 [punk::ansi::a+ {*}$f1_overrides] + } + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST + } else { + #either there are no files or opt_listing is / = show dirs only (some of which may have actually been files that were links/shortcuts to directories) + lappend displaylist [overtype::left $col1 $d1$d$RST] } - lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] @@ -1469,6 +1640,12 @@ tcl::namespace::eval punk::nav::fs::system { #[subsection {Namespace punk::nav::fs::system}] #[para] Internal functions that are not part of the API + #utility function to copy values from one variable to another without sharing the reference. + #Useful for example to avoid some issues with possible shimmering of the underlying type of file paths. + proc valcopy {obj} { + append obj2 $obj {} + } + #ordinary emission of chunklist when no repl proc emit_chunklist {chunklist} { set result "" diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/pipe-1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/pipe-1.0.tm index eac7df81..034fae01 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/pipe-1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/pipe-1.0.tm @@ -326,12 +326,12 @@ tcl::namespace::eval punk::pipe::lib { set in_atom 1 } ( { - incr in_brackets + incr in_brackets } default { if {$end_var_posn == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { - set end_var_posn $token_index - } + set end_var_posn $token_index + } } } } diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm new file mode 100644 index 00000000..f283348f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm @@ -0,0 +1,1014 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 0.1.1 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::winlnk 0 0.1.1] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [lb]MS_SHLLINK[rb].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Header_Get_HeaderSize $contents] eq $magic_HeaderSize && [Header_Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + variable LinkFlags + set LinkFlags [dict create\ + HasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Header_Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + } + + #MS-SHLLINK.pdf documents the .lnk file format in detail, but here is a brief overview of the structure of a .lnk file: + #protocol revision 10.0 (November 2025) https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net/MS-SHLLINK/%5bMS-SHLLINK%5d.pdf + + + #SHELL_LINK_HEADER structure is 76 bytes long and starts at the beginning of the file + #offset hex:0x00 dec:0 4 bytes + #Header size (HeaderSize) (must be 0x0000004C for .lnk files) + proc Header_Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + + + #offset hex:0x04 dec:4 16 bytes + #LinkCLSID (must be 00021401-0000-0000-C000-000000000046 for .lnk files) + proc Header_Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + + + #offset hex:0x14 dec:20 4 bytes + #Link flags (LinkFlags) - bit field specifying information about the shell link and the presence of optional portions of the structure. + #HasLinkTargetIDList bit 0 (0x00000001) - if set, a LinkTargetIDList structure is present immediately following the header + #HasLinkInfo bit 1 (0x00000002) - if set, a LinkInfo structure is present immediately following the header (or the LinkTargetIDList if that is present) + #HasName bit 2 (0x00000004) - if set, a null-terminated string containing the name of the link is present immediately following the header (or the LinkTargetIDList and LinkInfo if they are present) + #HasRelativePath bit 3 (0x00000008) - if set, a null-terminated string containing the relative path of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo and Name if they are present) + #HasWorkingDir bit 4 (0x00000010) - if set, a null-terminated string containing the working directory of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name and Relative Path if they are present) + #HasArguments bit 5 (0x00000020) - if set, a null-terminated string containing the command line arguments for the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path and Working Dir if they are present) + #HasIconLocation bit 6 (0x00000040) - if set, a null-terminated string containing the location of the icon for the link is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir and Arguments if they are present) + #IsUnicode bit 7 (0x00000080) - if set, the strings in the link are stored in Unicode (UTF-16LE) format; if not set, the strings are stored in ANSI format (usually the system's default code page) + #ForceNoLinkInfo bit 8 (0x00000100) - if set, the LinkInfo structure is not stored in the file even if the HasLinkInfo bit is set; this can be used to force the link to be resolved using only the information in the header and the optional strings, without using the LinkInfo structure + #HasExpString bit 9 (0x00000200) - if set, a null-terminated string containing an "environment variable" style string is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments and Icon Location if they are present); this string can contain environment variable references (e.g. %USERPROFILE%) that can be expanded to obtain the actual path of the link target + #RunInSeparateProcess bit 10 (0x00000400) - if set, the link target should be run in a separate process; if not set, the link target may be run in the same process as the caller + #Unused1 bit 11 (0x00000800) - reserved for future use; should be set to 0 + #HasDarwinID bit 12 (0x00001000) - if set, a null-terminated string containing a "Darwin ID" is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location and ExpString if they are present); this string can be used to identify the link target in a way that is independent of the file system (e.g. for links to Control Panel items or special folders) + #RunAsUser bit 13 (0x00002000) - if set, the link target should be run with the permissions of the user specified in the HasDarwinID string; if not set, the link target should be run with the permissions of the caller + #HasExpIcon bit 14 (0x00004000) - if set, a null-terminated string containing an "environment variable" style string for the icon location is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location, ExpString and DarwinID if they are present); this string can contain environment variable references that can be expanded to obtain the actual path of the icon for the link + #NoPidlAlias bit 15 (0x00008000) - if set, the link target should not be resolved using the PIDL alias mechanism; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed + #Unused2 bit 16 (0x00010000) - reserved for future use; should be set to 0 + #RunWithShimLayer bit 17 (0x00020000) - if set, the link target should be run with the application compatibility shim layer; if not set, the link target should be run without the shim layer + #ForceNoLinkTrack bit 18 (0x00040000) - if set, the link target should not be tracked by the shell's link tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed + #EnableTargetMetadata bit 19 (0x00080000) - if set, the link target should have metadata enabled; this can be used to allow the link to store additional information about the target (e.g. for links to files, the link can store the file's attributes, creation time, access time and modification time) + #DisableLinkPathTracking bit 20 (0x00100000) - if set, the link target should not be tracked by the shell's link path tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its path + #DisableKnownFolderTracking bit 21 (0x00200000) - if set, the link target should not be tracked by the shell's known folder tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its known folder ID + #DisableKnownFolderAlias bit 22 (0x00400000) - if set, the link target should not be aliased to a known folder; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on its known folder ID + #AllowLinkToLink bit 23 (0x00800000) - if set, the link target can be another link; if not set, the link target should not be another link (i.e. it should be a file or directory); this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #UnaliasOnSave bit 24 (0x01000000) - if set, the link should be unaliased when it is saved; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #PreferEnvironmentPath bit 25 (0x02000000) - if set, the link should prefer to resolve the target using environment variable references; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the environment variable references still point to the correct location + #KeepLocalIDListForUNCTarget bit 26 (0x04000000) - if set, the link should keep the local ID list for UNC targets; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the local ID list still points to the correct location + # - the presence of these flags indicates the presence of optional structures in the .lnk file and also provides information about how to interpret the data in the file + proc Header_Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x18 dec:24 4 bytes + #File attributes (FileAttributes) - bit field specifying the file attributes of the link target (if the EnableTargetMetadata flag is set in the LinkFlags field); this field is a bitwise combination of the following values: + proc Header_Get_FileAttributes {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return {} + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + set attrlist {} + if {$val & 0x00000001} {lappend attrlist "READONLY"} + if {$val & 0x00000002} {lappend attrlist "HIDDEN"} + if {$val & 0x00000004} {lappend attrlist "SYSTEM"} + if {$val & 0x00000010} {lappend attrlist "DIRECTORY"} + if {$val & 0x00000020} {lappend attrlist "ARCHIVE"} + if {$val & 0x00000040} {lappend attrlist "DEVICE"} + if {$val & 0x00000080} {lappend attrlist "NORMAL"} + if {$val & 0x00000100} {lappend attrlist "TEMPORARY"} + if {$val & 0x00000200} {lappend attrlist "SPARSE_FILE"} + if {$val & 0x00000400} {lappend attrlist "REPARSE_POINT"} + if {$val & 0x00000800} {lappend attrlist "COMPRESSED"} + if {$val & 0x00001000} {lappend attrlist "OFFLINE"} + if {$val & 0x00002000} {lappend attrlist "NOT_CONTENT_INDEXED"} + if {$val & 0x00004000} {lappend attrlist "ENCRYPTED"} + return $attrlist + } + proc Header_Get_FileAttributes_Raw {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return 0 + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + + + + #offset hex:0x1C dec:28 8 bytes + #creation date and time (CreationTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_CreationTime {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_CreationTime_Raw {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset 36 8 bytes + #last access date and time (AccessTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_AccessTime {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_AccessTime_Raw {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x2C dec:44 8 bytes + #last modification date and time (WriteTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_WriteTime {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_WriteTime_Raw {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x34 dec:52 Bytes:4 - unsigned int + #file size in bytes (of target - low 32 bits if >4GB) + proc Header_Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x38 dec:56 Bytes:4 - signed integer + #icon index value + proc Header_Get_IconIndex {contents} { + set 4bytes [string range $contents 56 59] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x3C dec:60 Bytes:4 - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Header_Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x40 dec:64 Bytes:2 + #Hot key + proc Header_Get_HotKey {contents} { + # Existing code that extracts the raw 16‑bit hotkey value: + set raw [Header_Get_HotKey_Raw $contents] + # The low byte holds the virtual‑key, high byte holds modifier flags + set vk [expr {$raw & 0xFF}] + set mods [expr {($raw >> 8) & 0xFF}] + set name [_vk_to_name $vk] + set modStr [_modifiers_to_string $mods] + if {$modStr eq ""} { + return $name + } else { + return "${modStr}+${name}" + } + } + proc Header_Get_HotKey_Raw {contents} { + set 2bytes [string range $contents 64 65] + set r [binary scan $2bytes s val] ;#short + return $val + } + proc _modifiers_to_string {mods} { + set parts {} + if {$mods & 0x01} {lappend parts "Shift"} + if {$mods & 0x02} {lappend parts "Ctrl"} + if {$mods & 0x04} {lappend parts "Alt"} + if {$mods & 0x08} {lappend parts "Win"} ;# optional + return [join $parts "+"] + } + proc _vk_to_name {vk} { + # Minimal map – extend as needed + array set vkMap { + 0x00 "No key assigned" + 0x08 Backspace 0x09 Tab 0x0D Return + 0x10 Shift 0x11 Control 0x12 Alt + 0x20 Space 0x21 PageUp 0x22 PageDown + 0x23 End 0x24 Home 0x25 Left + 0x26 Up 0x27 Right 0x28 Down + 0x2D Insert 0x2E Delete + 0x70 F1 0x71 F2 0x72 F3 + 0x73 F4 0x74 F5 0x75 F6 + 0x76 F7 0x77 F8 0x78 F9 + 0x79 F10 0x7A F11 0x7B F12 + 0x7c F13 0x7d F14 0x7e F15 + 0x7f F16 0x80 F17 0x81 F18 + 0x82 F19 0x83 F20 0x84 F21 + 0x85 F22 0x86 F23 0x87 F24 + 0x90 "NUM LOCK" 0x91 "SCROLL LOCK" + } + if {[info exists vkMap($vk)]} { + return $vkMap($vk) + } else { + if {$vk >= 0x30 && $vk <= 0x39} { + return [format "%c" $vk] ;# 0-9 + } elseif {$vk >= 0x41 && $vk <= 0x5A} { + return [format "%c" $vk] ;# A-Z + } + # fallback: hex representation + return [format "0x%02X" $vk] + } + } + + #offset hex:0x42 dec:66 Bytes:2 - reserved1 + proc Header_Get_Reserved1 {contents} { + set 2bytes [string range $contents 66 67] + set r [binary scan $2bytes s val] ;#short + return $val + } + + #offset hex:0x44 dec:68 Bytes:4 - reserved2 + proc Header_Get_Reserved2 {contents} { + set 4bytes [string range $contents 68 71] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x48 dec:72 Bytes:4 - reserved3 + proc Header_Get_Reserved3 {contents} { + set 4bytes [string range $contents 72 75] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #end of 76 byte header + + proc Get_LinkTargetIDList_size {contents} { + if {[Header_Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkTargetIDList_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + return "" + } else { + set idlist_content [string range $contents 78 [expr {78 + $idlist_size -1}]] + return $idlist_content + } + } + + #some clues on the structure of the IDList content and how to parse it can be found in the analysis of CVE-2020-0729, + #which is a remote code execution vulnerability in Windows that can be exploited through specially crafted .lnk files that contain malicious IDList content. + #The analysis of this vulnerability provides insights into how the IDList content is structured and how it can be parsed to extract information about the link target and potentially execute code. + #https://www.zerodayinitiative.com/blog/2020/3/25/cve-2020-0729-remote-code-execution-through-lnk-files + + proc Get_LinkTargetIDList_iteminfo {contents} { + set idlist_content [Get_LinkTargetIDList_content $contents] + set result {} + set offset 0 + while {$offset < [string length $idlist_content]} { + if {[string length $idlist_content] - $offset < 2} break + set size_bytes [string range $idlist_content $offset [expr {$offset + 1}]] ;#size including these 2 bytes + binary scan $size_bytes su size + if {$size == 0} break + if {$size < 2} { + # Invalid size, abort + error "punk::winlnk::Get_LinkTargetIDList_iteminfo: Invalid ItemID size: $size at offset $offset" + } + if {$offset + $size > [string length $idlist_content]} { + # ItemID extends beyond content, stop parsing + puts stderr "punk::winlnk::Get_LinkTargetIDList_iteminfo: ItemID at offset $offset with size $size extends beyond content length, stopping parse" + break + } + set itemid [string range $idlist_content $offset [expr {$offset + $size - 1}]] + set itemid_bytes [string range $itemid 0 1] + binary scan $itemid_bytes su itemid_size + #in *general* byte 3 of the ItemID structure can be used to determine the type of the item + #(e.g. file, folder, network location, etc.) but this is not always reliable and can vary + #based on the specific structure of the ItemID and the context in which it is used + set itemid_type_byte [string index $itemid 2] + #puts stderr "ItemID size: $itemid_size, type byte: [format %02X [scan $itemid_type_byte %c]]" + set maybe_type [format %02X [scan $itemid_type_byte %c]] + lappend result [dict create size $itemid_size type $maybe_type rawcontent $itemid] + + incr offset $size + } + return $result + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Header_Has_LinkFlag $contents "B"]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + #TODO - finish parsing of LinkInfo - add support + #Link location information + #present if data flag HasLinkInfo exists. + + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + binary scan $bytes_linkinfoheadersize i headersize + + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $bytes_linkinfoflags i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix note ] + } + + proc Contents_Get_Info {contents} { + + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-0.1.1/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Header_Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Header_Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set next_start [dict get $linkinfo_content_dict next_start] ;#location of section following LinkInfo (Location information) - this will be the Data Strings. + set link_target "" + set linkfields [dict create] + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + if {"windows" eq $::tcl_platform(platform)} { + set link_target [file join $localbase_path $suffix_path] + } else { + set suffix_path [string trimleft [string map {\\ /} $suffix_path] /] + if {[regexp {([a-zA-Z]):\\(.*)} $localbase_path _match drive_letter tail]} { + set localbase_path [string map {\\ /} $localbase_path] + set tail [string trimleft [string map {\\ /} $tail] /] + set link_target "" + #shortcut basepath is a windows path with drive letter - try to resolve it on unix by looking for a corresponding mount from fstab or a point under /mnt + set mountinfo [exec mount] + foreach line [split $mountinfo "\n"] { + #review - a more specific mount target might exist that includes the drive letter as part of the mount point name and is a longer prefix of the localbase_path + #- we should probably look for the longest prefix match rather than just the drive letter + if {[regexp -nocase -- [string cat ^$drive_letter {:\\\s+on\s+(\S+)}] $line _match mount_point]} { + set link_target [file join $mount_point $tail $suffix_path] + break + } + } + if {$link_target eq ""} { + #review - under what circumstances could this happen? If the drive letter doesn't match any mount points, then /mnt/drive_letter should generally already have been found above above + # - However, it may be possible for /mnt/drive_Letter to still exist even if it's not reflected in the output of mount or the output of mount is in an unexpected format. + + #nothing in mount result matches the drive letter - try looking for a mount point under /mnt with the drive letter as the name + if {[file exists /mnt/$drive_letter]} { + set link_target [file join /mnt/$drive_letter $tail $suffix_path] + } else { + if {$drive_letter eq [string tolower $drive_letter]]} { + set op_drive_letter [string toupper $drive_letter] + } else { + set op_drive_letter [string tolower $drive_letter] + } + if {[file exists /mnt/$op_drive_letter]} { + set link_target [file join /mnt/$op_drive_letter $tail $suffix_path] + } else { + #leave as is except for backslashes converted to forward + #- probably won't resolve correctly unless the unix system has a folder named drive_letter: in the current folder with a copy of the original filestructure. + set link_target [file join $localbase_path $suffix_path] + } + } + } else { + #shortcut basepath is a windows path with drive letter and we found a matching mount point - link_target is set to the resolved path + } + } else { + #shortcut basepath doesn't match expected windows path format - just join it with the suffix and hope for the best + #could be something like a network path or it could be something else entirely + set link_target [file join $localbase_path $suffix_path] + } + } + } + + # ---------------------------------------------------------------------- + #todo - get Data strings by parsing contents starting at $next_start + #stored in following order: + # description + # relative path + # working directory + # command line arguments + # icon location + + #Data strings format: + # 2 bytes: number of characters in the string + # following: The string. ASCII or UTF-16 little-endian string + + set datastring_dict [Contents_Get_DataStrings $contents $next_start] + + # ---------------------------------------------------------------------- + + set file_attributes [Header_Get_FileAttributes $contents] + set linktargetidlist [Get_LinkTargetIDList_iteminfo $contents] + + set target_type_info [Get_target_type $contents $file_attributes] + set target_type [dict get $target_type_info type] + set target_type_mech [dict get $target_type_info mechanism] + if {$target_type eq "unknown"} { + if {[file exists $link_target]} { + set target_type [file type $link_target] + set target_type_mech "filesystem" + } + } + + set result [dict create\ + link_target $link_target\ + link_flags $flags_enabled\ + file_attributes $file_attributes\ + creation_time [Header_Get_CreationTime $contents]\ + access_time [Header_Get_AccessTime $contents]\ + write_time [Header_Get_WriteTime $contents]\ + target_length [Header_Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey [Header_Get_HotKey $contents]\ + target_type $target_type\ + target_type_mech $target_type_mech\ + idlist $linktargetidlist\ + linkinfo $linkfields\ + ] + #relative_path "?" + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::resolve + @cmd -name punk::winlnk::resolve\ + -summary\ + "Return information about a .lnk file (windows shortcut)"\ + -help\ + "Return a dict of info obtained by parsing the binary data in a windows .lnk file. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut + file and the dictionary will contain an 'error' key." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc resolve {path} { + #*** !doctools + #[call [fun resolve] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [Contents_Get_Info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::file_show_info + @cmd -name punk::winlnk::file_show_info\ + -summary\ + "Show information about a .lnk file (windows shortcut)"\ + -help\ + "Print to stdout the information obtained by parsing the binary data in a windows .lnk file, in a human readable format. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and an error message will be printed." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc file_show_info {path} { + package require punk::lib + #punk::lib::showdict [resolve $path] */@* + set field_queries [dict create\ + link_target link_target\ + link_flags link_flags/@*\ + file_attributes file_attributes\ + creation_time creation_time\ + access_time access_time\ + write_time write_time\ + target_length target_length\ + icon_index icon_index\ + showwnd showwnd\ + hotkey hotkey\ + target_type target_type\ + idlist idlist/@*/@*.@*\ + linkinfo linkinfo/@*.@*\ + ] + set info [resolve $path] + if {[dict exists $info error]} { + return "Error: [dict get $info error]" + } else { + set querystring "" + foreach field [dict keys $info] { + if {[dict exists $field_queries $field]} { + append querystring "[dict get $field_queries $field] " + } else { + append querystring "$field " + } + } + puts "querystring: $querystring" + return [punk::lib::showdict $info {*}$querystring] + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::target + @cmd -name punk::winlnk::target\ + -summary\ + "Return the target path of a .lnk file (windows shortcut)"\ + -help\ + "Return the target path of the .lnk file specified in path. + This is a convenience function that extracts the target path from the .lnk file and returns it directly, + without all the additional information that resolve provides. If the .lnk header check fails, then + the .lnk file probably isn't really a shortcut file and an error message will be returned." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc target {path} { + #*** !doctools + #[call [fun target] [arg path] ] + #[para]Return the target path of the .lnk file specified in path + set info [resolve $path] + if {[dict exists $info error]} { + error [dict get $info error] + } else { + return [dict get $info link_target] + } + } + + proc target_type {path} { + set content [Get_contents $path] + if {![Contents_check_header $content]} { + error "lnk_header_check_failed" + } + set info [Contents_Get_Info $content] + return [dict get $info target_type] + } + + proc Get_target_type {content file_attributes} { + #determine type based on info in the .lnk file, such as file attributes and link flags + + if {"DIRECTORY" in $file_attributes} { + return [dict create type directory mechanism file_attributes]" + } elseif {"ARCHIVE" in $file_attributes} { + return [dict create type file mechanism file_attributes] + } else { + set iteminfo [Get_LinkTargetIDList_iteminfo $content] + if {[llength $iteminfo] > 0} { + set first_item [lindex $iteminfo 0] + set first_item_type [dict get $first_item type] + set saw_2f 0 + switch -- $first_item_type { + "1F" { + #plain files and folders always seem to have a first item type of 1F + #so does "local disk" + set type_so_far "unknown" + #For a file, we may first see multiple items of type 32 (directory) as we go through the folder structure, + #and then finally an item of type 31 (file) at the end. + #For a network location, we may see an item of type 2F. + #So we need to loop through all the items and keep track of what we've seen so far. + foreach item $iteminfo { + set item_type [dict get $item type] + if {$item_type eq "31"} { + set type_so_far "directory" + } elseif {$item_type eq "32"} { + return [dict create type file mechanism idlist] + } elseif {$item_type eq "2F"} { + set saw_2f 1 + } + } + if {$type_so_far eq "unknown" && $saw_2f} { + return [dict create type "local disk" mechanism idlist] + } + return [dict create type $type_so_far mechanism idlist] + } + } + return [dict create type "unknown" mechanism idlist] + } else { + return [dict create type "unknown" mechanism idlist] + } + } + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::winlnk +} +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 0.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm index a876d781..9079dbbc 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -196,7 +196,8 @@ namespace eval punk::winpath { #https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file #according to the above: Use any character in the current code page for a name, including Unicode characters and characters in the extended character set (128–255), except for the following: set reserved [list < > : \" / \\ | ? *] - + #embedded nulls (\0) are also disallowed - but these are also disallowed on unix-like platforms. + set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. foreach seg [file split $path] { @@ -208,6 +209,14 @@ namespace eval punk::winpath { #/./ /../ segments don't require protection - keep checking. continue } + if {[string toupper [file rootname $seg]] in $windows_reserved_names} { + #windows reserved names + #there are reports that these names aren't usable even with file extension - e.g that CON.txt is reserved and can't be created by some standard tools. + #In practice on windows 11 in 2026, cmd.exe,notepad,explorer and powershell seem to handle creation and access of CON.txt and PRN.txt etc without issue. + # the windows documentation reference above however still states that these names with an extension should be avoided. + #For this reason - we will still treat these as reserved and require protection with dos device syntax - even though in practice they seem to be usable without it. + return 1 + } #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm index 53cb4067..ea72ad1c 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm @@ -1422,7 +1422,7 @@ namespace eval punk { } if {[string is digit -strict [join $subindices ""]]} { - #review tip 551 (tcl9+?) + #review tip 551 (underscores in numerical literals) (tcl9+) #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # @@ -2650,6 +2650,76 @@ namespace eval punk { } }] } + } elseif {[punk::lib::is_indexset $index]} { + #review - a basic math statement such as 5-1 is also a valid member of an indexset + #see punk::lib::is_indexset and punk::lib::indexset_resolve + #single element of an indexset - e.g @..3 or @1..5 or @..end or @.. or @end..0 or @end-5..8 etc + set is_range [expr {[string first ".." $index] >= 0}] + if {$get_not} { + if {$is_range} { + lappend INDEX_OPERATIONS list-range-not + } else { + lappend INDEX_OPERATIONS listindex-not + } + set assign_script { + set assigned [lremove $assigned {*}[punk::lib::indexset_resolve [llength $leveldata] ]] + } + } else { + if {$is_range} { + lappend INDEX_OPERATIONS list-range + } else { + lappend INDEX_OPERATIONS listindex + } + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } + } + + if {$do_bounds_check} { + #bounds check each element of the resolved indexset - if any are out of bounds, return mismatch-list-index-out-of-range + if {$is_range} { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + lassign [split ..] idx1 _ idx2 + set v2 [punk::lib::lindex_resolve_basic $len $idx2] + if {isinf($v2)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + set v1 [punk::lib::lindex_resolve_basic $len $idx1] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + set v1 [punk::lib::lindex_resolve_basic $len ] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + ${$assign_script} + } + }] + } + set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm new file mode 100644 index 00000000..6a7b79d6 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -0,0 +1,5488 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.6 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.6] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6- +package require punk::args +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {punk::args}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + +tcl::namespace::eval punk::lib::ensemble { + #wiki.tcl-lang.org/page/ensemble+extend + # extend an ensemble-like routine with the routines in some namespace + + #NOTE - the extension ns becomes the '-namespace ' for the original routine name, + #with -unknown handling the original subcommands. + #This makes the original ensemble harder to introspect! + #e.g (the original -map or -namespace not visible) + #In this specific case (which, being published on the wiki might be common in the wild) + #we could call {*}[namespace ensemble configure $routine -unknown] $routine + #and then detect that the first resulting word is an ensemble + #For arbitrary '-unknown scripts' - sensible introspection is likely not possible + + proc extend {routine extension} { + if {![string match ::* $routine]} { + set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] + if {$resolved eq {}} { + error [list {no such routine} $routine] + } + set routine $resolved + } + set routinens [tcl::namespace::qualifiers $routine] + if {$routinens eq {::}} { + set routinens {} + } + set routinetail [tcl::namespace::tail $routine] + + if {![string match ::* $extension]} { + set extension [uplevel 1 [ + list [tcl::namespace::which namespace] current]]::$extension + } + + if {![tcl::namespace::exists $extension]} { + error [list {no such namespace} $extension] + } + + set extension [tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] current]] + + tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] export *] + + while 1 { + set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] + if {[tcl::namespace::which $renamed] eq {}} break + } + + rename $routine $renamed + + tcl::namespace::eval $extension [ + list namespace ensemble create -command $routine -unknown [ + list apply {{renamed ensemble routine args} { + list $renamed $routine + }} $renamed + ] + ] + + return $routine + } +} + +# some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated +tcl::namespace::eval punk::lib::check { + #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) + #Not any sort of comprehensive check of known tcl bugs. + #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + proc has_tclbug_regexp_emptystring {} { + #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces + #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, + #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: + #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) + set bug [expr {![catch {regexp {} [error should_error]}]}] + return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] + } + proc has_tclbug_lsearch_sorted_inline_subindices {} { + if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { + #probably tcl version doesn't support all options + set bug 0 + } else { + set bug [expr {$result ne "b"}] + } + set description "lsearch -sorted with -subindices -inline - incorrect result." + return [dict create bug $bug bugref bc4ac0 description $description level minor] + } + proc has_tclbug_script_var {} { + + set script {set j [list spud] ; list} + append script \n + uplevel #0 $script + set rep1 [tcl::unsupported::representation $::j] + set script "" + set rep2 [tcl::unsupported::representation $::j] + + set nostring1 [string match "*no string" $rep1] + set nostring2 [string match "*no string" $rep2] + + #we assume it should have no string rep in either case + #Review: check Tcl versions for behaviour/consistency + if {!$nostring2} { + set bug true + } else { + set bug false + } + set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" + return [dict create bug $bug bugref "" description $description level minor] + } + proc has_tclbug_lsearch_strideallinline {} { + #bug only occurs with single -index value combined with -stride -all -inline -subindices + #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d + if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { + #we aren't looking for an error result - error most likely indicates tcl too old to support -stride + set bug 0 + } else { + set bug [expr {$result ne "a2"}] + } + set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." + return [dict create bug $bug bugref 5a1aaa201d description $description level major] + } + proc has_tclbug_lseq_sign {} { + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + if {[catch {lseq 1 10}]} { + set bug 0 + } else { + set r1 [lseq 1 10 -9] + set r2 [lseq 1 10 -10] + set bug [expr {$r1 ne $r2}] + } + set description "lseq step sign not matching sequence direction - inconsistent results." + return [dict create bug $bug bugref 999b6966b2 description $description level minor] + } + + proc has_tclbug_list_quoting_emptyjoin {} { + #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 + set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases + set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" + set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. + set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" + return [dict create bug $bug bugref e38dc74e2 description $description level medium] + } + + proc has_tclbug_safeinterp_compile {{show 0}} { + #ensemble calls within safe interp not compiled + #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde + namespace eval [namespace current]::testcompile { + proc ensembletest {} {string index a 0} + } + + set has_bug 0 + + set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] + if {$show} { + puts outer: + puts $bytecode_outer + } + if {![interp issafe]} { + #test of safe subinterp only needed if we aren't already in a safe interp + if {![catch { + interp create x -safe + } errMsg]} { + x eval {proc ensembletest {} {string index a 0}} + set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] + if {$show} { + puts safe: + puts $bytecode_safe + } + interp delete x + #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) + #It's possible the interp we're running in is also not compiling ensembles. + #we could then get a result of 2 - which still indicates a problem + if {[string last "invokeStk" $bytecode_safe] >= 1} { + incr has_bug + } + } else { + #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? + #unlikely - but we should warn + puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" + } + } + + namespace delete [namespace current]::testcompile + + if {[string last "invokeStk" $bytecode_outer] >= 1} { + incr has_bug + } + set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" + return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] + } +} + +tcl::namespace::eval punk::lib::compat { + #*** !doctools + #[subsection {Namespace punk::lib::compat}] + #[para] compatibility functions for features that may not be available in earlier Tcl versions + #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. + #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. + + #*** !doctools + #[list_begin definitions] + + + + + if {"::lremove" ne [info commands ::lremove]} { + #puts stderr "Warning - no built-in lremove" + interp alias {} lremove {} ::punk::lib::compat::lremove + } + proc lremove {list args} { + #*** !doctools + #[call [fun lremove] [arg list] [opt {index ...}]] + #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove + + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lsearch -all -inline -index 1 -subindices $keep *] + } + #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers + proc lremove2 {list args} { + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lmap v $keep {lindex $v 1}] + } + #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. + #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 + + if {![info exists ::auto_index(readFile)]} { + if {[info commands ::readFile] eq ""} { + proc ::readFile {filename {mode text}} { + #readFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + + # Read the file + set f [open $filename [dict get {text r binary rb} $mode]] + try { + return [read $f] + } finally { + close $f + } + } + } + } + if {![info exists ::auto_index(writeFile)]} { + if {[info commands ::writeFile] eq ""} { + proc ::writeFile {args} { + #writeFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + switch [llength $args] { + 2 { + lassign $args filename data + set mode text + } + 3 { + lassign $args filename mode data + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + } + default { + set COMMAND [lindex [info level 0] 0] + return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" + } + } + + # Write the File + set f [open $filename [dict get {text w binary wb} $mode]] + try { + puts -nonewline $f $data + } finally { + close $f + } + } + } + } + + if {"::lpop" ne [info commands ::lpop]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lpop + punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore + } + proc lpop {lvar args} { + #*** !doctools + #[call [fun lpop] [arg listvar] [opt {index}]] + #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop + upvar $lvar l + if {![llength $args]} { + set args [list end] + } + set v [lindex $l {*}$args] + set newlist $l + + set path [list] + set subl $l + for {set i 0} {$i < [llength $args]} {incr i} { + set idx [lindex $args $i] + if {![llength [lrange $subl $idx $idx]]} { + error "tcl_lpop index \"$idx\" out of range" + } + lappend path [lindex $args $i] + set subl [lindex $l {*}$path] + } + + set sublist_path [lrange $args 0 end-1] + set tailidx [lindex $args end] + if {![llength $sublist_path]} { + #set newlist [lremove $newlist $tailidx] + set newlist [lreplace $newlist $tailidx $tailidx] + } else { + set sublist [lindex $newlist {*}$sublist_path] + #set sublist [lremove $sublist $tailidx] + set sublist [lreplace $sublist $tailidx $tailidx] + lset newlist {*}$sublist_path $sublist + } + #puts "[set l] -> $newlist" + set l $newlist + return $v + } + if {"::ledit" ni [info commands ::ledit]} { + interp alias {} ledit {} ::punk::lib::compat::ledit + punk::args::set_idalias ::punk::lib::compat::ledit ::ledit + } + proc ledit {lvar first last args} { + upvar $lvar l + #use lindex_resolve to support for example: ledit lst end+1 end+1 h i + set fidx [punk::lib::lindex_resolve [llength $l] $first] + switch -exact -- $fidx { + -Inf { + #index below lower bound + set pre [list] + set fidx -1 + } + Inf { + #first index position is greater than index of last element in the list + set pre [lrange $l 0 end] + set fidx [llength $l] + } + default { + #set pre [lrange $l 0 $first-1] + set pre [lrange $l 0 $fidx-1] + } + } + set lidx [punk::lib::lindex_resolve [llength $l] $last] + switch -exact -- $lidx { + -Inf { + #index below lower bound + set post [lrange $l 0 end] + } + Inf { + #index above upper bound + set post [list] + } + default { + if {$lidx < $fidx} { + #from ledit man page: + #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. + set post [lrange $l $fidx end] + } else { + #set post [lrange $l $last+1 end] + set post [lrange $l $lidx+1 end] + } + } + } + set l [list {*}$pre {*}$args {*}$post] + } + + + #slight isolation - varnames don't leak - but calling context vars can be affected + proc lmaptcl2 {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list ::tcl::info::vars]] + set nscaller [uplevel 1 [list ::tcl::namespace::current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result [apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + if {"::lmap" ne [info commands ::lmap]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lmap {} ::punk::lib::compat::lmaptcl + } + #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway + proc lmaptcl {varnames list script} { + set result [list] + set varlist [list] + foreach varname $varnames { + upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc + lappend varlist var_$varname + } + foreach $varlist $list { + lappend result [uplevel 1 $script] + } + return $result + } + + #tcl8.7/9 compatibility for 8.6 + if {[info commands ::tcl::string::insert] eq ""} { + #https://wiki.tcl-lang.org/page/string+insert + # Pure Tcl implementation of [string insert] command. + proc ::tcl::string::insert {string index insertString} { + # Convert end-relative and TIP 176 indexes to simple integers. + if {[regexp -expanded { + ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace + |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace + (?:([+-]) # op, omitted when index is "end" + ([+-]?\d+))? # n, omitted when index is "end" + [\t\n\v\f\r ]*$ # optional whitespace (unless "end") + } $index _ m op n]} { + # Convert first index to an integer. + switch $m { + end {set index [string length $string]} + default {scan $m %d index} + } + + # Add or subtract second index, if provided. + switch $op { + + {set index [expr {$index + $n}]} + - {set index [expr {$index - $n}]} + } + } elseif {![string is integer -strict $index]} { + # Reject invalid indexes. + return -code error "bad index \"$index\": must be\ + integer?\[+-\]integer? or end?\[+-\]integer?" + } + + # Concatenate the pre-insert, insertion, and post-insert strings. + string cat [string range $string 0 [expr {$index - 1}]] $insertString\ + [string range $string $index end] + } + + # Bind [string insert] to [::tcl::string::insert]. + tcl::namespace::ensemble configure string -map [tcl::dict::replace\ + [tcl::namespace::ensemble configure string -map]\ + insert ::tcl::string::insert] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + variable PUNKARGS + tcl::namespace::export * + variable has_struct_list + set has_struct_list [expr {![catch {package require struct::list}]}] + variable has_struct_set + set has_struct_set [expr {![catch {package require struct::set}]}] + variable has_punk_ansi + set has_punk_ansi [expr {![catch {package require punk::ansi}]}] + set has_twapi 0 + if {"windows" eq $::tcl_platform(platform)} { + set has_twapi [expr {![catch {package require twapi}]}] + } + + namespace eval argdoc { + #non-colour SGR codes + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::is_main_script + @cmd -name punk::lib::is_main_script\ + -summary\ + "Test if current script was launched directly."\ + -help\ + "The ${$B}main script${$N} is the primary script that is executed + by the interpreter, e.g. tclsh or wish. + (as opposed to being loaded by the 'source' command) + + see https://wiki.tcl-lang.org/page/main+script" + @values -min 0 -max 0 + }] + } + proc is_main_script {} { + #see https://wiki.tcl-lang.org/page/main+script + if {[info script] ne "" && [info exists ::argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $::argv0 ...]]] + } { + return true + } else { + return false + } + } + + + + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + # Maintenance - This is the primary source for tm_version... functions + # - certain packages script require these but without package dependency + # - 1 punk boot script + # - 2 packagetrace module + # - These should be updated to sync with this + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + proc tm_version_isvalid {versionpart} { + #Needs to be suitable for use with Tcl's 'package vcompare' + if {![catch [list package vcompare $versionpart $versionpart]]} { + return 1 + } else { + return 0 + } + } + proc tm_version_major {version} { + if {![tm_version_isvalid $version]} { + error "Invalid version '$version' is not a proper Tcl module version number" + } + set firstpart [lindex [split $version .] 0] + #check for a/b in first segment + if {[string is integer -strict $firstpart]} { + return $firstpart + } + if {[string first a $firstpart] > 0} { + return [lindex [split $firstpart a] 0] + } + if {[string first b $firstpart] > 0} { + return [lindex [split $firstpart b] 0] + } + error "tm_version_major unable to determine major version from version number '$version'" + } + proc tm_version_canonical {ver} { + #accepts a single valid version only - not a bounded or unbounded spec + if {![tm_version_isvalid $ver]} { + error "tm_version_canonical version '$ver' is not valid for a package version" + } + set parts [split $ver .] + set newparts [list] + foreach o $parts { + set trimmed [string trimleft $o 0] + set firstnonzero [string index $trimmed 0] + switch -exact -- $firstnonzero { + "" { + lappend newparts 0 + } + a - b { + #e.g 000bnnnn -> bnnnnn + set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] + if {$tailtrimmed eq ""} { + set tailtrimmed 0 + } + lappend newparts 0$firstnonzero$tailtrimmed + } + default { + #digit + if {[string is integer -strict $trimmed]} { + #e.g 0100 -> 100 + lappend newparts $trimmed + } else { + #e.g 0100b003 -> 100b003 (still need to process tail) + if {[set apos [string first a $trimmed]] > 0} { + set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}a${rhs} + } elseif {[set bpos [string first b $trimmed]] > 0} { + set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}b${rhs} + } else { + #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b + error "tm_version_canonical error - trimfail - unexpected" + } + } + } + } + } + return [join $newparts .] + } + proc tm_version_required_canonical {versionspec} { + #also trim leading zero from any dottedpart? + #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. + #e.g 1.01 is equivalent to 1.1 and 01.001 + #also 1b3 == 1b0003 + + if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version + set errmsg "tm_version_required_canonical - invalid version specification" + if {[string first - $versionspec] < 0} { + #no dash + #looks like a minbounded version (ie a single version with no dash) convert to min-max form + set from $versionspec + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionpec'" + } + if {![catch {tm_version_major $from} majorv]} { + set from [tm_version_canonical $from] + return "${from}-[expr {$majorv +1}]" + } else { + error "$errmsg '$versionspec'" + } + } else { + # min- or min-max + #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) + set parts [split $versionspec -] ;#we expect only 2 parts + lassign $parts from to + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionspec'" + } + set from [tm_version_canonical $from] + if {[llength $parts] == 2} { + if {$to ne ""} { + if {![tm_version_isvalid $to]} { + error "$errmsg '$versionspec'" + } + set to [tm_version_canonical $to] + return $from-$to + } else { + return $from- + } + } else { + error "$errmsg '$versionspec'" + } + error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" + } + } + # end tm_version... functions + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + + + + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lswap + @cmd -name punk::lib::lswap\ + -summary\ + "Swap list values in-place"\ + -help\ + "Similar to struct::list swap, except it fully supports basic + list index expressions such as 7-2 end-1 etc. + + struct::list swap doesn't support 'end' offsets, and only + sometimes appears to support basic expressions, depending on the + expression compared to the list length." + @values -min 3 -max 3 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + proc lswap {lvar a z} { + upvar $lvar l + set len [llength $l] + if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { + #lindex_resolve_basic returns only -Inf if out of range at either bound + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively + set a_index [lindex_resolve $len $a] + set a_msg "" + switch -- $a_index { + -Inf { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" + } + Inf { + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" + } + } + set z_index [lindex_resolve $len $z] + set z_msg "" + switch -- $z_index { + -Inf { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + Inf { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + if {[info commands lseq] ne ""} { + #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation + #support minimal set from to + proc range {from to {by 1}} { + #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + lseq $from $to by $by + } + } else { + #lseq accepts basic expressions e.g 4-2 for both arguments + #e.g we can do lseq 0 [llength $list]-1 + #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. + #our range function doesn't support double like lseq does. (deliberate) review + proc range {from to {by ""}} { + if {$by eq "0"} { + #as per lseq, step (by) zero always gives no result + return [list] + } + set to [offset_expr $to] + set from [offset_expr $from] + if {$by ne ""} { + set by [offset_expr $by] + } + #assert $by is now empty string or an integer + if {$to > $from} { + switch -- $by { + "" - 1 { + set count [expr {($to -$from) + 1}] + if {$from == 0} { + return [lsearch -all [lrepeat $count 0] *] + } else { + incr from -1 + return [lmap v [lrepeat $count 0] {incr from}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i <= $to} {incr i $by} { + lappend result $i + } + return $result + + #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. + #set count [expr {($to -$from) + 1}] + #if {$from == 0} { + # set fullrange [lsearch -all [lrepeat $count 0] *] + #} else { + # incr from -1 + # set fullrange [lmap v [lrepeat $count 0] {incr from}] + #} + #set result [list] + #for {set i 0} {$i < $count} {incr i} { + # if {$i % $by == 0} { + # lappend result [lindex $fullrange $i] + # } + #} + #return $result + } + } + #slower methods. + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from] + #} + #return $L + } elseif {$from > $to} { + switch -- $by { + "" - -1 { + set count [expr {$from - $to} + 1] + if {$to == 0} { + return [lreverse [lsearch -all [lrepeat $count 0] *]] + } else { + incr from + return [lmap v [lrepeat $count 0] {incr from -1}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i >= $to} {incr i $by} { + lappend result $i + } + return $result + } + } + + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from -1];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from -1] + #} + #return $L + } else { + return [list $from] + } + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzip + @cmd -name punk::lib::lzip\ + -summary\ + "zip any number of lists together."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + The number of returned lists (columns) will be equal to + the length of the longest supplied list (row). + If lengths of supplied lists don't match, empty strings + will be inserted in the resulting lists. + + e.g lzip {a b c d e} {1 2 3 4} {x y z} + -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} + " + @values -min 0 -max -1 + list -type list -multiple 1 -optional 1 + }] + } + proc lzip {args} { + switch -- [llength $args] { + 0 {return {}} + 1 {return [lindex $args 0]} + 2 {return [::punk::lib::system::lzip2lists {*}$args]} + 3 {return [::punk::lib::system::lzip3lists {*}$args]} + 4 {return [::punk::lib::system::lzip4lists {*}$args]} + 5 {return [::punk::lib::system::lzip5lists {*}$args]} + 6 {return [::punk::lib::system::lzip6lists {*}$args]} + 7 {return [::punk::lib::system::lzip7lists {*}$args]} + 8 {return [::punk::lib::system::lzip8lists {*}$args]} + 9 {return [::punk::lib::system::lzip9lists {*}$args]} + 10 {return [::punk::lib::system::lzip10lists {*}$args]} + 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } + default { + if {[llength $args] < 4000} { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } else { + return [::punk::lib::lzipn {*}$args] + } + } + } + } + + namespace eval system { + proc Build_lzipn {n} { + set arglist [list] + #use punk::lib::range which defers to lseq if available + set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) + set body "\nlmap " + for {set i 1} {$i <= $n} {incr i} { + lappend arglist l$i + append body "[lindex $vars $i] \$l$i " + } + append body "\{list " + for {set i 1} {$i <= $n} {incr i} { + append body "\$[lindex $vars $i] " + } + append body "\}" \n + #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" + #puts "$body" + #puts "\}" + proc ::punk::lib::system::lzip${n}lists $arglist $body + } + + #fastest is to know the number of lists to be zipped + proc lzip2lists {l1 l2} { + lmap a $l1 b $l2 {list $a $b} + } + proc lzip3lists {l1 l2 l3} { + lmap a $l1 b $l2 c $l3 {list $a $b $c} + } + proc lzip4lists {l1 l2 l3 l4} { + lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} + } + proc lzip5lists {l1 l2 l3 l4 l5} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} + } + proc lzip6lists {l1 l2 l3 l4 l5 l6} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} + } + proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} + } + proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} + } + proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} + } + proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} + } + + #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly + # review - + proc lzipn_alt args { + #stackoverflow - courtesy glenn jackman (modified) + foreach l $args { + lappend vars [incr n] + lappend lmap_args $n $l + } + lmap {*}$lmap_args {lmap v $vars {set $v}} + } + + #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) + proc lzipn_tcl8 {args} { + #For tcl pre 9 (without lsearch -stride) + #wiki - courtesy JAL + set list_l $args + set zip_l [] + while {1} { + set cur [lmap a_l $list_l { lindex $a_l 0 }] + set list_l [lmap a_l $list_l { lrange $a_l 1 end }] + + if {[join $cur {}] eq {}} { + break + } + lappend zip_l $cur + } + return $zip_l + } + proc lzipn_tcl9a {args} { + #For Tcl 9+ (with lsearch -stride) + #compared to wiki version + #comparable for lists len <3 or number of args < 3 + #approx 2x faster for large lists or more lists + #needs -stride single index bug fix to use empty string instead of NULL + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] + set outlist [lrepeat $numcolumns {}] + set s 0 + foreach len $lens list $args { + #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] + ledit flatlist $s [expr {$s + $len - 1}] {*}$list + incr s $numcolumns + } + #needs single index lstride bugfix + for {set c 0} {$c < $numcolumns} {incr c} { + ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] + } + return $outlist + } + proc lzipn_tcl9b {args} { + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} + } + proc lzipn_tcl9c {args} { + #SLOW + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + set zip_l {} + set cols_remaining $numcolumns + for {set c 0} {$c < $numcolumns} {incr c} { + if {$cols_remaining == 1} { + return [list {*}$zip_l $flatlist] + } + lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] + set flen [llength $flatlist] + set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] + incr cols_remaining -1 + } + return $zip_l + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzipn + @cmd -name punk::lib::lzipn\ + -summary\ + "zip any number of lists together (unoptimised)."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + See lzip which provides the same functionality but with + optimisations depending on the number of supplied lists. + " + @values -min 1 -max 1 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible + if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { + #-stride either not available - or has bug preventing use of main algorithm below + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] + } else { + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] + } + + + + namespace import ::punk::args::lib::tstr + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::tclscript_to_scriptlist + @cmd -name punk::lib::tclscript_to_scriptlist\ + -summary\ + "Parse tcl script to toplevel list of lists."\ + -help\ + "Get topmost list of tcl language elements in script. + produces a list of lists where each sublist is a commandlist or + a comment string." + @values -min 1 -max 1 + script -type string + }] + } + proc tclscript_to_scriptlist {script} { + set scriptlist [list] + set cmdlist [list] + set scrlen [string length $script] + set token "" + set in_token 0 + set in_cmdlist 0 + set in_comment 0 + set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? + for {set i 0} {$i < $scrlen} {incr i} { + set ch [string index $script $i] + set chswitch [string map $charmap $ch] + if {!$in_token} { + switch -- $chswitch { + { } - TB { + #ignore - continue being a non token + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {$in_cmdlist} { + #no active token - newline ends cmdlist + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + incr i + } + } + LF - ";" { + #no active token - newline or semicolon ends cmdlist + if {$in_cmdlist} { + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation of whitespace while no token - boring + incr i + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation of whitespace while no token - boring + incr i 2 + } else { + #an uncommon possibility, a command wth surrounding spaces called in an strange way + # e.g \ cmdname\ arg + set in_token 1 + set token "\\[string index $script $i+1]" + incr i + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + # { + if {$in_cmdlist} { + #ordinary data + set in_token 1 + set token # + } else { + if {!$in_comment} { + set in_token 1 + set in_comment 1 + set token # + } else { + #wnen in comment - all will be a single token until comment ends + append token # + } + } + } + default { + #for completeness.. we should exclude other possible whitespace chars + if {![string is space $ch]} { + set in_token 1 + set token $ch + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + } + } else { + #if we're in a token, we must be in a cmdlist or a comment (single token) + #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved + #note that unbalanced curly in *toplevel* comment will still 'info complete' to true + switch -- $chswitch { + LF { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ends a comment + lappend scriptlist $token ;#single token for comment + set token "" + set in_token 0 + set in_comment 0 + set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity + } + } + ";" { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ordinary char for comment + append token ";" + } + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {[tcl::info::complete $token]} { + #ends token and commandlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \r\n + incr i + } + } else { + append token \r + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation - lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i ;#skip LF + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation - cr-lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i 2 ;#skip CRLF + } else { + append token "\\[string index $script $i+1]" + incr i + } + } + default { + if {![string is space $ch]} { + append token $ch + } else { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token $ch + } + } else { + append token $ch + } + } + } + } + } + } + #eof + if {!$in_comment} { + if {$in_token} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + lappend scriptlist $cmdlist + } else { + error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" + } + } else { + if {$in_cmdlist} { + lappend scriptlist $cmdlist + } + } + } else { + lappend scriptlist $token + } + return $scriptlist + } + + + proc invoke command { + #*** !doctools + #[call [fun invoke] [arg command]] + #[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode + #[example { + # set script { + # puts stdout {hello on stdout} + # puts stderr {hello on stderr} + # exit 42 + # } + # invoke [list tclsh <<$script] + #}] + + #see https://wiki.tcl-lang.org/page/open + lassign [chan pipe] chanout chanin + lappend command 2>@$chanin + set fh [open |$command] + set stdout [read $fh] + close $chanin + set stderr [read $chanout] + close $chanout + if {[catch {close $fh} cres e]} { + dict with e {} + lassign [set -errorcode] sysmsg pid exit + if {$sysmsg eq {NONE}} { + #output to stderr caused [close] to fail. Do nothing + } elseif {$sysmsg eq {CHILDSTATUS}} { + return [list $stdout $stderr $exit] + } else { + return -options $e $stderr + } + } + return [list $stdout $stderr 0] + } + + proc pdict {args} { + package require punk::args + variable has_punk_ansi + if {!$has_punk_ansi} { + set sep " = " + } else { + #set sep " [a+ Web-seagreen]=[a] " + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " + } + set argspec [string map [list %sep% $sep] { + @id -id ::punk::lib::pdict + @cmd -name pdict -help\ + "Print dict keys,values to channel + The pdict function operates on variable names - passing the value to the showdict function which operates on values + (see also showdict)" + + @opts -any 1 + + #default separator to provide similarity to tcl's parray function + -separator -default "%sep%" + -roottype -default "dict" + -substructure -default {} + -channel -default stdout -help\ + "existing channel - or 'none' to return as string" + + @values -min 1 -max -1 + + dictvar -type string -help "name of variable. Can be a dict, list or array" + + patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. + Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) + The system uses similar patterns to the punk pipeline pattern-matching system. + The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. + Segments are classified into list,dict and string operations. + Leading % indicates a string operation - e.g %# gives string length + A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 + (todo - change to indexset syntax @1..3 @1..end-1 etc) + A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' + The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. + e.g1 pdict env */%# + the pattern starts with default type dict, so * retrieves all keys & values, + the next hierarchy switches to a string operation to get the length of each value. + e.g2 pdict env W* S* + Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns + e.g3 pdict punk_testd */* + This displays 2 levels of the dict hierarchy. + Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) + - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. + e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 + Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent + The second level segment in each pattern switches to a dict operation to retrieve the value by key. + When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. + } + }] + #puts stderr "$argspec" + set argd [punk::args::parse $args withdef $argspec] + + set opts [dict get $argd opts] + set dvar [dict get $argd values dictvar] + set patterns [dict get $argd values patterns] + set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] + if {$isarray} { + set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] + if {![dict exists $opts -keytemplates]} { + set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] + dict set opts -keytemplates [list $arrdisplay] + } + dict set opts -keysorttype dictionary + } else { + set dvalue [uplevel 1 [list set $dvar]] + } + showdict {*}$opts $dvalue {*}$patterns + } + + #TODO - much. + #showdict needs to be able to show different branches which share a root path + #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) + # - specify ansi colour per pattern so different branches can be highlighted? + # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc + # - The current version is incomplete but passably usable. + # - Copy proc and attempt rework so we can get back to this as a baseline for functionality + proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) + #set sep " [a+ Web-seagreen]=[a] " + variable has_punk_ansi + if {!$has_punk_ansi} { + set RST "" + set sep " = " + #set sep_mismatch " mismatch " + set sep \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) + } else { + set RST [punk::ansi::a] + set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support + #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " + set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " + } + package require punk::pipe + #package require punk ;#we need pipeline pattern matching features + package require textblock + + set argd [punk::args::parse $args withdef [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { + @id -id ::punk::lib::showdict + @cmd -name punk::lib::showdict -help "display dictionary keys and values" + #todo - table tableobject + -return -default "tailtohead" -choices {tailtohead sidebyside} + -channel -default none + -trimright -default 1 -type boolean -help\ + "Trim whitespace off rhs of each line. + This can help prevent a single long line that wraps in terminal from making + every line wrap due to long rhs padding." + -separator -default {%sep%} -help\ + "Separator column between keys and values" + -separator_mismatch -default {%sep_mismatch%} -help\ + "Separator to use when patterns mismatch" + -roottype -default "dict" -help\ + "list,dict,string" + -ansibase_keys -default "" -help\ + "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" + -substructure -default {} + -ansibase_values -default "" + -keytemplates -default {\$\{$key\}} -type list -help\ + "list of templates for keys at each level" + -keysorttype -default "none" -choices {none dictionary ascii integer real} + -keysortdirection -default increasing -choices {increasing decreasing} + -debug -default 0 -type boolean -help\ + "When enabled, produces some rudimentary debug output on stderr" + -- -type none -optional 1 + @values -min 1 -max -1 + dictvalue -type list -help\ + "dict or list value" + patterns -default "*" -type string -multiple 1 -help\ + "key or key glob pattern" + }]] + + #for punk::lib - we want to reduce pkg dependencies. + # - so we won't even use the tcllib debug pkg here + set opt_debug [dict get $argd opts -debug] + if {$opt_debug} { + if {[info body debug::showdict] eq ""} { + proc ::punk::lib::debug::showdict {args} { + catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} + } + } + } else { + if {[info body debug::showdict] ne ""} { + proc ::punk::lib::debug::showdict {args} {} + } + } + + set opt_sep [dict get $argd opts -separator] + set opt_mismatch_sep [dict get $argd opts -separator_mismatch] + set opt_keysorttype [dict get $argd opts -keysorttype] + set opt_keysortdirection [dict get $argd opts -keysortdirection] + set opt_trimright [dict get $argd opts -trimright] + set opt_keytemplates [dict get $argd opts -keytemplates] + debug::showdict "keytemplates ---> $opt_keytemplates <---" + set opt_ansibase_keys [dict get $argd opts -ansibase_keys] + set opt_ansibase_values [dict get $argd opts -ansibase_values] + set opt_return [dict get $argd opts -return] + set opt_roottype [dict get $argd opts -roottype] + set opt_structure [dict get $argd opts -substructure] + + set dval [dict get $argd values dictvalue] + set patterns [dict get $argd values patterns] + + set result "" + + #pattern hierarchy + # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest + # * @1 @0,%#,%str - segments + # a b 1 0 %# %str - keys + + set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated + set pattern_next_substructure [dict create] + set pattern_this_structure [dict create] + + # -- --- --- --- + #REVIEW + #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. + #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). + #todo - determine if there is a more consistent rule-based way to do this rather than adhoc + #e.g pdict something * + #we want the keys from the result as individual lines on lhs + #e.g pdict something @@ + #we want on lhs result on rhs + # = v0 + #e.g pdict something @0-2,@4 + #we currently return: + #0 = v0 + #1 = v1 + #2 = v2 + #4 = v4 + #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) + #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. + #this is a tradeoff that could create surprises and make things messy and/or inconsistent. + #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. + #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys + #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment + #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) + # -- --- --- --- + + set filtered_keys [list] + if {$opt_roottype in {dict list string}} { + #puts "getting keys for roottype:$opt_roottype" + if {[llength $dval]} { + + #TODO - change to indexset notation 0..1,3..end-1 etc + + set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} + set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + foreach pattern_nest $patterns { + set keyset [list] + set keyset_structure [list] + + set segments [split $pattern_nest /] + set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns + #we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + #puts stderr "showdict-->_split_patterns: $patterninfo" + foreach v_idx $patterninfo { + lassign $v_idx v idx + #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) + set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern + if {[string index $p 0] eq "!"} { + set get_not 1 + set p [string range $p 1 end] + } else { + set get_not 0 + } + switch -exact -- $p { + * - "" { + if {$opt_roottype eq "list"} { + set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + dict set pattern_this_structure $p list + } elseif {$opt_roottype eq "dict"} { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } else { + lappend keyset %string + lappend keyset_structure string + dict set pattern_this_structure $p string + } + } + %# { + dict set pattern_this_structure $p string + lappend keyset %# + lappend keyset_structure string + } + # { + #todo get_not !# is test for listiness (see punk) + dict set pattern_this_structure $p list + lappend keyset # + lappend keyset_structure list + } + ## { + dict set pattern_this_structure $p dict + lappend keyset [list ## query] + lappend keyset_structure dict + } + @* { + #puts "showdict ---->@*<----" + dict set pattern_this_structure $p list + set keys [punk::lib::range 0 [llength $dval]-1] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } + @@ { + #get first k v from dict + dict set pattern_this_structure $p dict + lappend keyset [list @@ query] + lappend keyset_structure dict + } + @*k@* - @*K@* { + #returns keys only + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @*.@* { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + default { + #puts stderr "===p:$p" + #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! + #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful + #@@"key,etc" should allow any non-whitespace key + switch -glob -- $p { + {@k\*@*} - {@K\*@*} { + #value glob return keys + #set search [string range $p 4 end] + #dict for {k v} $dval { + # if {[string match $search $v]} { + # lappend keyset $k + # } + #} + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @@* { + #exact match key - review - should raise error to match punk pipe behaviour? + set k [string range $p 2 end] + if {$get_not} { + if {[dict exists $dval $k]} { + set keys [dict keys [dict remove $dval $k]] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + lappend keyset {*}[dict keys $dval] + lappend keyset_structure {*}[lrepeat [dict size $dval] dict] + } + } else { + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + } + dict set pattern_this_structure $p dict + } + @k@* - @K@* { + #TODO get_not + set k [string range $p 3 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + {@\*@*} { + #return list of values + #set k [string range $p 3 end] + #lappend keyset {*}[dict keys $dval $k] + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*.@*} { + #TODO get_not + set k [string range $p 4 end] + set keys [dict keys $dval $k] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + {@v\*@*} - {@V\*@*} { + #value-glob return value + #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*v@*} - {@\*V@*} { + #key-glob return value + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*@*} - {@\*v@*} - {@\*V@} { + #key glob return val + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @??@* { + #exact key match - no error + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + default { + set this_type $opt_roottype + if {[string match @* $p]} { + #list mode - trim optional list specifier @ + set p [string range $p 1 end] + dict set pattern_this_structure $p list + set this_type list + } elseif {[string match %* $p]} { + dict set pattern_this_structure $p string + lappend keyset $p + lappend keyset_structure string + set this_type string + } + if {$this_type eq "list"} { + dict set pattern_this_structure $p list + if {[string is integer -strict $p]} { + if {$get_not} { + set keys [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $keys $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + lappend keyset $p + lappend keyset_structure list + } + } elseif {[punk::lib::is_indexset $p]} { + set keys [punk::lib::indexset_resolve [llength $dval] $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } elseif {[string match "?*-?*" $p]} { + #could be either - don't change type + #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers + #now we should map _ to "" first + set p [string map {_ {}} $p] + #lassign [textutil::split::splitx $p {\.\.}] a b + if {![regexp $re_idxdashidx $p _match a b]} { + error "unrecognised pattern $p" + } + #TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order + #change to start/end terminology? + + set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high + #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds + if {${lower_resolve} == Inf} { + ##x + #lower bound is above upper list range + #match with decreasing indices is still possible + set lower [expr {[llength $dval]-1}] ;#set to max + } elseif {$lower_resolve == -Inf} { + ##x + set lower 0 + } else { + set lower $lower_resolve + } + set upper [punk::lib::lindex_resolve [llength $dval] $b] + if {$upper == -Inf} { + ##x + #upper bound is below list range - + if {$lower_resolve > -Inf} { + ##x + set upper 0 + } else { + continue + } + } elseif {$upper == Inf} { + #use max + set upper [expr {[llength $dval]-1}] + #assert - upper >=0 because we have ruled out empty lists + } + #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order + set keys [punk::lib::range $lower $upper] + if {$get_not} { + set fullrange [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $fullrange {*}$keys] + if {$lower > $upper} { + set keys [lreverse $keys] + } + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + if {$get_not} { + lappend keyset [list !@$p query] + } else { + lappend keyset [list @$p query] + } + lappend keyset_structure list + } + } elseif {$this_type eq "string"} { + dict set pattern_this_structure $p string + } elseif {$this_type eq "dict"} { + #default equivalent to @\*@* + dict set pattern_this_structure $p dict + #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" + set keys [dict keys $dval $p] + if {$get_not} { + set keys [dict keys [dict remove $dval {*}$keys]] + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + puts stderr "list: unrecognised pattern $p" + } + } + } + } + } + } + + # -- --- --- --- + #check next pattern-segment for substructure type to use + # -- --- --- --- + set substructure "" + set pnext [lindex $segments 1] + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + if {[llength $patterninfo] == 0} { + # // ? -review - what does this mean? for xpath this would mean at any level + set substructure [lindex $pattern_this_structure end] + } elseif {[llength $patterninfo] == 1} { + #ignore the NOT operator for purposes of query-type detection + if {[string index $pnext 0] eq "!"} { + set pnext [string range $pnext 1 end] + } + # single type in segment e.g /@@something/ + switch -exact -- $pnext { + "" { + set substructure string + } + @*k@* - @*K@* - @*.@* - ## { + set substructure dict + } + # { + set substructure list + } + ## { + set substructure dict + } + %# { + set substructure string + } + * { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + default { + switch -glob -- $pnext { + @??@* - @?@* - @@* { + #all 4 or 3 len prefixes bounded by @ are dict + set substructure dict + } + default { + if {[string match @* $pnext]} { + set substructure list + } elseif {[string match %* $pnext]} { + set substructure string + } else { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + } + } + } + } + } else { + #e.g /@0,%str,.../ + #doesn't matter what the individual types are - we have a list result + set substructure list + } + #puts "--pattern_nest: $pattern_nest substructure: $substructure" + dict set pattern_next_substructure $pattern_nest $substructure + # -- --- --- --- + + if {$opt_keysorttype ne "none"} { + set int_keyset 1 + foreach k $keyset { + if {![string is integer -strict $k]} { + set int_keyset 0 + break + } + } + if {$int_keyset} { + set sortindices [lsort -indices -integer $keyset] + #set keyset [lsort -integer $keyset] + } else { + #set keyset [lsort -$opt_keysorttype $keyset] + set sortindices [lsort -indices -$opt_keysorttype $keyset] + } + set keyset [lmap i $sortindices {lindex $keyset $i}] + set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] + } + + foreach k $keyset { + lappend pattern_key_index $pattern_nest + } + + lappend filtered_keys {*}$keyset + lappend all_keyset_structure {*}$keyset_structure + + #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" + } + } + #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" + } else { + puts stdout "unrecognised roottype: $opt_roottype" + return $dval + } + + if {[llength $filtered_keys]} { + #both keys and values could have newline characters. + #simple use of 'format' won't cut it for more complex dict keys/values + #use block::width or our columns won't align in some cases + switch -- $opt_return { + "tailtohead" { + #last line of key is side by side (possibly with separator) with first line of value + #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values + #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries + + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt {${$key}} + } + #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] + set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] + set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] + + set kidx 0 + set last_hidekey 0 + foreach keydisplay $display_keys key $filtered_keys { + set thisval "?" + set hidekey 0 + set pattern_nest [lindex $pattern_key_index $kidx] + set pattern_nest_list [split $pattern_nest /] + #set this_type [dict get $pattern_this_structure $pattern_nest] + #set this_type [dict get $pattern_this_structure $key] + set this_type [lindex $all_keyset_structure $kidx] + #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" + + set is_match 1 ;#whether to display the normal separator or bad-match separator + switch -- $this_type { + dict { + #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict + # - default highlight dupes (ansi underline?) + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + % thisval.= $qry= $dval + } else { + set thisval [tcl::dict::get $dval $key] + } + + #set substructure [lrange $opt_structure 1 end] + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + + set subansibasekeys [lrange $opt_ansibase_keys 1 end] + set nextkeytemplates [lrange $opt_keytemplates 1 end] + #dict set nextopts -substructure $nextsub + dict set nextopts -keytemplates $nextkeytemplates + dict set nextopts -ansibase_keys $subansibasekeys + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" + + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" + set is_match 0 + } + } + } + list { + if {[string is integer -strict $key]} { + set thisval [lindex $dval $key] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + % thisval.= $qry= $dval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + #if {![llength $nextpatterns]} { + # set nextpatterns * + #} + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + set is_match 0 + } + } + } + string { + set hidekey 1 + switch -- $key { + "%string" { + set hidekey 1 + set thisval $dval + } + "%ansiview" { + set thisval [ansistring VIEW -lf 1 $dval] + } + "%ansiviewstyle" { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] + } + default { + switch -glob -- $key { + *lpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } + %lpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } + %rpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } + %rpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } + %split-* { + #split on one or more chars - review + set hidekey 1 + lassign [split $key -] _ splitchars + set thisval [split $dval $splitchars] + } + default { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + #pipeline + % thisval.= $key= $thisval + } + } + + } + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } + + } + } + if {$this_type eq "string" && $hidekey} { + lassign [textblock::size $thisval] _vw vwidth _vh vheight + #set blanks_above [string repeat \n [expr {$kheight -1}]] + set vblock $opt_ansibase_values$thisval$RST + #append result [textblock::join_basic -- $vblock] + #review - we wouldn't need this space if we had a literal %sp %sp-x ?? + append result " $vblock" + } else { + set ansibase_key [lindex $opt_ansibase_keys 0] + + lassign [textblock::size $keydisplay] _kw kwidth _kh kheight + lassign [textblock::size $thisval] _vw vwidth _vh vheight + + set totalheight [expr {$kheight + $vheight -1}] + set blanks_above [string repeat \n [expr {$kheight -1}]] + set blanks_below [string repeat \n [expr {$vheight -1}]] + + if {$is_match} { + set use_sep $opt_sep + } else { + set use_sep $opt_mismatch_sep + } + + + set sepwidth [textblock::width $use_sep] + set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] + set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] + set vblock $blanks_above$opt_ansibase_values$thisval$RST + #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace + if {$last_hidekey} { + append result \n + } + #append result [textblock::join_basic -- $kblock $sblock $vblock] \n + append result [textblock::join_basic_raw $kblock $sblock $vblock] \n + } + set last_hidekey $hidekey + incr kidx + } + } + "sidebyside" { + # TODO - fix + #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. + #use ansibase_key etc to make the output more comprehensible in that situation. + #This is why it is not the default. (review - terminal width detection and wrapping?) + set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] + foreach key $filtered_keys { + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt "%k%" + } + set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST + #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n + #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic + append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n + } + } + } + } + if {$opt_trimright} { + set result [::join [lines_as_list -line trimright $result] \n] + } + if {[string last \n $result] == [string length $result]-1} { + set result [string range $result 0 end-1] + } + #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) + set chan [dict get $argd opts -channel] + switch -- $chan { + stderr - stdout { + puts $chan $result + } + none { + return $result + } + default { + #review - check member of chan names? + #just try outputting to the supplied channel for now + puts $chan $result + } + } + } + + proc is_list_all_in_list {small large} { + if {[llength $small] > [llength $large]} {return 0} + foreach x $large { + ::set ($x) {} + } + foreach x $small { + if {![info exists ($x)]} { + return 0 + } + } + return 1 + } + #v2 generally seems slower + proc is_list_all_in_list2 {small large} { + set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] + return [struct::list equal [lsort $small] $small_in_large] + } + if {!$has_struct_list || !$has_struct_set} { + set body { + package require struct::list + package require struct::set + } + append body [info body is_list_all_in_list2] + proc is_list_all_in_list2 {small large} $body + } + + proc is_list_all_ni_list {A B} { + foreach x $B { + ::set ($x) {} + } + foreach x $A { + if {[info exists ($x)]} { + return 0 + } + } + return 1 + } + proc is_list_all_ni_list2 {a b} { + set i [struct::set intersect $a $b] + return [expr {[llength $i] == 0}] + } + if {!$has_struct_set} { + set body { + package require struct::list + } + append body [info body is_list_all_ni_list2] + proc is_list_all_ni_list2 {a b} $body + } + proc is_cachedlist_all_ni_list {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set expression "" + foreach t $a { + #append expression "({$t} ni \$b) && " + append expression "{$t} ni \$b && " + } + set expression [string trimright $expression " &"] ;#trim trailing spaces and ampersands + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @expression@ $expression] { + return [expr {@expression@}] + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + proc is_cachedlist_all_ni_list2 {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set d [dict create] + foreach x $a { + dict set d $x "" + } + #constructing a switch statement could be an option + # - but would need to avoid using escapes in order to get a jump-table + # - this would need runtime mapping of values - unlikely to be a win + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @d@ $d] { + foreach x $b { + if {[::tcl::dict::exists {@d@} $x]} { + return 0 + } + } + return 1 + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::ldiff + @cmd -name punk::lib::ldiff\ + -summary\ + "Difference consisting of items with removeitems removed."\ + -help\ + "Somewhat like struct::set difference, but order preserving, and doesn't + treat as a 'set' so preserves any duplicates in items. + + struct::set difference may happen to preserve ordering when items are + integers, but order can't be relied on, especially as struct::set has + 2 differening implementations (tcl vs critcl) which return results with + different ordering to each other and different deduping behaviour in + some cases (e.g when 2nd arg is empty)" + @values -min 2 -max 2 + items -type list + removeitems -type list + }] + } + proc ldiff {items removeitems} { + if {[llength $removeitems] == 0} {return $items} + set result {} + foreach item $items { + if {$item ni $removeitems} { + lappend result $item + } + } + return $result + } + #with ledit (also avail in 8.6 using punk::lib::compat::ledit + proc ldiff2 {fromlist removeitems} { + if {[llength $removeitems] == 0} {return $fromlist} + foreach item $removeitems { + set posns [lsearch -all -exact $fromlist $item] + foreach p $posns {ledit fromlist $p $p} + } + return $fromlist + } + proc ldiff3 {fromlist removeitems} { + set doomed [list] + foreach item $removeitems { + lappend doomed {*}[lsearch -all -exact $fromlist $item] + } + lremove $fromlist {*}$doomed + } + + #fix for tcl impl of struct::set::diff which doesn't dedupe + proc struct_set_diff_unique {A B} { + package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. + if {[struct::set::Loaded] eq "tcl"} { + return [punk::lib::setdiff $A $B] + } else { + #use (presumably critcl) implementation for speed + return [struct::set difference $A $B] + } + } + + + #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B + #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) + #also struct::set difference with critcl is faster + proc setdiff {A B} { + if {[llength $A] == 0} {return {}} + set d [dict create] + foreach x $A {dict set d $x {}} + foreach x $B {dict unset d $x} + return [dict keys $d] + } + #bulk dict remove is slower than a foreach with dict unset + #proc setdiff2 {fromlist removeitems} { + # #if {[llength $fromlist] == 0} {return {}} + # set d [dict create] + # foreach x $fromlist { + # dict set d $x {} + # } + # return [dict keys [dict remove $d {*}$removeitems]] + #} + #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) + proc setdiff_unordered {A B} { + if {[llength $A] == 0} {return {}} + array set tmp {} + foreach x $A {::set tmp($x) .} + foreach x $B {catch {unset tmp($x)}} + return [array names tmp] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique_unordered + @cmd -name punk::lib::lunique_unordered\ + -summary\ + "unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates but *may* rearrange the + order of the returned elements compared to the + original list. + + When struct::set is available this will be used + for the implementation, as it can be *slightly* + faster if acceleration is present. When struct::set + is not available it will fallback to lunique and + provide the same functionality with order preserved." + @values -min 1 -max 1 + list -type list + }] + } + #default/fallback implementation + proc lunique_unordered {list} { + lunique $list + } + if {$has_struct_set} { + if {[struct::set equal [struct::set union {a a} {}] {a}]} { + proc lunique_unordered {list} { + struct::set union $list {} + } + } else { + #struct::set union operates on a 'set' - so this probably won't change, and hopefully is + #consistent across unacelerated versions and those implemented in accelerators, + #but if it ever does change - be a little noisy about it. + puts stderr "punk::lib WARNING: struct::set union no longer dedupes!" + #we could also test a sequence of: struct::set add + } + } + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique + @cmd -name punk::lib::lunique\ + -summary\ + "Order-preserving unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates whilst preserving the + original order of the provided list. + + When struct::set is available with acceleration, + lunique_unordered may be slightly faster." + @values -min 1 -max 1 + list -type list + }] + } + proc lunique {list} { + set new {} + foreach item $list { + if {$item ni $new} { + lappend new $item + } + } + return $new + } + proc lunique2 {list} { + set doomed [list] + #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) + for {set i 0} {$i < [llength $list]} {} { + set item [lindex $list $i] + lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] + while {[incr i] in $doomed} {} + } + lremove $list {*}$doomed + } + #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env + proc lmapflat_closure {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + # -- --- --- + #capture - use uplevel 1 or namespace eval depending on context + set capture [uplevel 1 { + apply { varnames { + set capturevars [tcl::dict::create] + set capturearrs [tcl::dict::create] + foreach fullv $varnames { + set v [tcl::namespace::tail $fullv] + upvar 1 $v var + if {[info exists var]} { + if {(![array exists var])} { + tcl::dict::set capturevars $v $var + } else { + tcl::dict::set capturearrs capturedarray_$v [array get var] + } + } else { + #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set + } + } + return [tcl::dict::create vars $capturevars arrs $capturearrs] + } } [info vars] + } ] + # -- --- --- + set cvars [tcl::dict::get $capture vars] + set carrs [tcl::dict::get $capture arrs] + set apply_script "" + foreach arrayalias [tcl::dict::keys $carrs] { + set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { + array set %realname% [set %arrayalias%][unset %arrayalias%] + }] + } + + append apply_script [string map [list %script% $script] { + #foreach arrayalias [info vars capturedarray_*] { + # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + # array set $realname [set $arrayalias][unset arrayalias] + #} + #return [eval %script%] + %script% + }] + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ + $apply_script\ + ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] + } + return $result + } + #link version - can write to vars in calling context - but keeps varnames themselves isolated + #performance much better than capture version - but still a big price to pay for the isolation + proc lmapflat_link {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + #proc lmapflat {varnames list script} { + # concat {*}[uplevel 1 [list lmap $varnames $list $script]] + #} + #lmap can accept multiple var list pairs + proc lmapflat {args} { + concat {*}[uplevel 1 [list lmap {*}$args]] + } + proc lmapflat2 {args} { + concat {*}[uplevel 1 lmap {*}$args] + } + + #proc dict_getdef {dictValue args} { + # if {[llength $args] < 1} { + # error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + # } + # set keys [lrange $args -1 end-1] + # if {[tcl::dict::exists $dictValue {*}$keys]} { + # return [tcl::dict::get $dictValue {*}$keys] + # } else { + # return [lindex $args end] + # } + #} + if {[info commands ::tcl::dict::getdef] eq ""} { + proc dict_getdef {dictValue args} { + set keys [lrange $args 0 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + } else { + #we pay a minor perf penalty for the wrap + interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + + punk::args::define { + @id -id ::punk::lib::is_indexset + @cmd -name punk::lib::is_indexset\ + -summary\ + "Validate string is a comma-delimited 'indexset'."\ + -help\ + "Validate that a string is an 'indexset' + + An indexset consists of a comma delimited list of indexes or index-ranges. + No particular base is assumed for the purposes of validating an indexset here. + While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. + e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. + To validate if an indexset is strictly within range, both the length of the data and the base would + need to be considered. + + The normal 'range' specifier is .. but can be of the form .x. where x is the step value. + The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire + range of valid values. + e.g the following are all valid ranges + 1.. + (index 1 to 'max') + ..10 + (index 'base' to 10) + 2..11 + (index 2 to 11) + .. + (all indices) + .3. + (1st index and every 3rd index thereafter) + + Common whitespace elements space,tab,newlines are ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + see indexset_resolve" + @values -min 1 -max 1 + indexset -type string + } + proc is_indexset {indexset} { + #collapse internal whitespace (for basic whitespace set we allow) + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] + if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { + return 0 + } + set ranges [split $indexset ,] + foreach r $ranges { + set validateindices [list] + set rposn [string first .. $r] + if {$rposn >= 0} { + set sepsize 2 + set step 1 + #review - whitespace between ints? + lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] + } elseif {[string first . $r] >= 0} { + set stripped [string map {. ""} $r] + if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { + #if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) + return 0 + } + #assert - we have exactly 2 dots separated by something. + #check for .n. 'stepped' range + set fdot [string first . $r] + set ldot [string last . $r] + set step [string range $r $fdot+1 $ldot-1] + #todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq + #1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing + + #1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. + if {[string match *end* $step] || [catch {lindex {} $step}]} { + return 0 + } + #if {![string is integer -strict $step] || $step == 0} { + # return 0 + #} + lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] + } else { + #'range' is just an index + set validateindices [list $r] + } + + foreach v $validateindices { + if {$v eq "" || $v eq "end"} {continue} + if {[string is integer -strict $v]} {continue} + if {[catch {lindex {} $v}]} { + return 0 + } + } + } + return 1 + } + #review - compare to IMAP4 methods of specifying ranges? + #TODO add tests to test::punk::lib indexset_resolve is a little tricky + punk::args::define { + @id -id ::punk::lib::indexset_resolve + @cmd -name punk::lib::indexset_resolve\ + -summary\ + "Resolve an indexset to a list of integers based on supplied list or string length."\ + -help\ + "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. + e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 + + An indexset consists of a comma delimited list of indexes or index-ranges. + Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at + either side of the separator representing beginning and end of the index range respectively. + The range-separator can be of the form .x. where x is an integer or basic expression + (single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value + in the tcl9 lseq command. + + When the start index is lower than the end, the step value defaults to 1. + ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 + When the start index is higher than the end, the step value defaults to -1. + ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 + + If start and end are ommitted, increasing order is assumed if the step isn't specified. + eg + .. represents the range from the base to the end + .-1. would represent end to base with step -1 + + If start is omitted and only the end is supplied: + The default step is 1 indicating ascension and the missing start is equivalent to the base. + indexset_resolve 5 ..2 + -> 0 1 2 + The default start is 'end' if the step is negative + indexset_resolve 5 .-1.2 + -> 4 3 2 + If end is omitted and only the start is supplied: + The default step is 1 indicating ascension and the missing end is equivalent to 'end' + indexset_resolve 5 2.. + -> 2 3 4 + The default end is the base if the step is negative + indexset_resolve 5 2.-1. + -> 2 1 0 + + Like the tcl9 lseq command - a step (by) value of zero produces no results. + + The indexes are 0-based by default, but the base can be specified. + indexset_resolve 7 .. + -> 0 1 2 3 4 5 6 + indexset_resolve -base -3 7 .. + -> -3 -2 -1 0 1 2 3 + + Whitespace is ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + end means the last item. + end-1 means the second last item. + 0.. is the same as 0..end + + indexset examples: + + These assume the default 0-based indices (-base 0) + + 1,3.. + output the index 1 (2nd item) followed by all from index 3 to the end. + indexset_resolve 4 1,3.. + -> 1 3 + indexset_resolve 10 1,3.. + -> 1 3 4 5 6 7 8 9 + 0..2,end + output the first 3 indices, and the last index. + end-1..0 + output the indexes in reverse order from 2nd last item to first item." + @leaders -min 0 -max 0 + @opts + -base -type integer -prefix 1 -default 0 -help\ + "This is the starting index. It can be positive, negative or zero. + This affects the start and end calculations, limiting what indices will be + returned. + e.g with base 1 'end' will give a different value from base 0 + + for 10 items 'end' is 10 when 1-based + for 10 items 'end' is 9 when 0-based + + For base 1, index 0 is considered to be below the range. + ie + indexset_resolve -base 1 10 0..3 + -> 1 2 3 + indexset_resolve -base 0 10 0..3 + -> 0 1 2 3 + + It does not *convert* indexes within the range. + + indexset_resolve -base 1 10 5 + -> 5 + indexset_resolve -base 0 10 5 + -> 5 + + ie if you ask for a 1-based resolution of an indexset the integers that are within + the range will come out the same, so the result needs to be treated as a 1-based + set of indices when performing further operations. + " + @values -min 2 -max 3 + numitems -type integer + indexset -type indexset -help "comma delimited specification for indices to return" + } + + #limit punk::args parsing to unhappy paths where possible + proc indexset_resolve {args} { + # -------------------------------------------------- + # Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve + # This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. + # for the unhappy path - the punk::args::parse is fine to generate the usage/error information. + # -------------------------------------------------- + if {[llength $args] < 2} { + punk::args::resolve $args withid ::punk::lib::indexset_resolve + } + set indexset [lindex $args end] + set numitems [lindex $args end-1] + if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { + #use parser on unhappy path only + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + #assert we have 2 or more args + set base 0 ;#default + if {[llength $args] > 2} { + #if more than just numitems and indexset - we expect only -base ie 4 args in total + if {[llength $args] != 4} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set optname [lindex $args 0] + set optval [lindex $args 1] + set fulloptname [tcl::prefix::match -error "" -base $optname] + if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set base $optval + } + # -------------------------------------------------- + + + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace + set index_list [list] ;#list of actual indexes within the range + set iparts [split $indexset ,] + set based_max [expr {$numitems -1 + $base}] + + #we already did is_indexset check above, so we can make assumptions about well-formedness of each part + foreach ipart $iparts { + set ipart [string trim $ipart] + #we need to cater for n..m as well as n.s.m where s is 'step' + set rposn [string first . $ipart] + if {$rposn>=0} { + #if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression + set rposn2 [string last . $ipart] + if {$rposn2 == $rposn+1} { + #.. + set step "default" ;#could be 1 or -1 + } else { + set step [tcl::string::range $ipart $rposn+1 $rposn2-1] + } + lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb + + set rawa [string trim $rawa] + set rawb [string trim $rawb] + if {$rawa eq "" && $rawb eq ""} { + if {$step eq "default"} { + set step 1 ;#default ascending when no start and no end + } + if {$step < 0} { + set rawa end + set rawb $base + } else { + set rawa $base + set rawb end + } + #if neither start nor end specified - we won't get out of range results from lindex_resolve + set a [punk::lib::lindex_resolve $numitems $rawa $base] + set b [punk::lib::lindex_resolve $numitems $rawb $base] + } else { + if {$rawa eq ""} { + if {$step eq "default"} { + #when start not specified, but end is - default direction always ascending + #(even if end is base or below range) + set step 1 + } + if {$step < 0} { + set rawa end + } else { + set rawa $base + } + } + set a [punk::lib::lindex_resolve $numitems $rawa $base] + if {$a == -Inf} { + #undershot - leave negative + } elseif {$a == Inf} { + #overshot + set a [expr {$based_max + 1}] ;#put it outside the range on the upper side + } + #review - a may be -Inf + + if {$rawb eq ""} { + if {$step eq "default"} { + set step 1 + } + if {$step < 0} { + if {$a < $base} { + #make sure both + #mathfunc::isinf is tcl9+ + if {[catch { + if {[::tcl::mathfunc::isinf $a]} { + set a [expr {$base -1}] + } + }]} { + if {[string match -nocase *inf* $a]} { + set a [expr {$base -1}] + } + } + set rawb $a + } else { + set rawb $base + } + } else { + if {$a > $based_max} { + set rawb $a ;#make sure .. doesn't return last item - should return nothing + } else { + set rawb end + } + } + } + set b [punk::lib::lindex_resolve $numitems $rawb $base] + if {$b == -Inf} { + #undershot - leave negative + } elseif {$b == Inf} { + #set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side + set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side + } + } + + #JJJ + + #e.g make sure .. doesn't return last item - should return nothing as both are above the range. + if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { + #assert a & b are integers within the range + if {$step eq "default"} { + #unspecified step - base direction on order of a & b + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + if {$a >= $base && $a <= $based_max} { + #only a is in the range + if {$b < $base} { + set b $base + } else { + set b $based_max + } + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$b >=$base && $b <= $based_max} { + #only b is in the range + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + if {$step < 0} { + if {$a < $base} { + #negative step from below - doesn't matter if b is in range - recast both to an int below $base + #(a may be -Inf) + set a [expr {$base -1}] + set b $a + set step 0 ;#we should return nothing + } + } else { + if {$a < $base} { + set a $base + } else { + set a $based_max + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + #both outside the range + if {$a < $base && $b > $base} { + #spans the range in forward order + set a $base + set b $based_max + if {$step eq "default"} { + set step 1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$a > $base && $b < $base} { + #spans the range in reverse order + set a $based_max + set b $base + if {$step eq "default"} { + set step -1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } + #both outside of range on same side + } + } + } else { + set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] + #returns only -Inf for out of range at either end + if {$idx >= $base} { + #index within the range + lappend index_list $idx + } + } + } + return $index_list + } + # showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side + #This doesn't need the list itself - just the length suffices. + punk::args::define { + @id -id ::punk::lib::lindex_resolve + @cmd -name punk::lib::lindex_resolve\ + -summary\ + "Resolve an indexexpression to an integer based on supplied list or string length."\ + -help\ + "Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 + to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating + whether the index was below or above the range of possible indices for the length supplied. + + Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + This means the proc may be called with something like $x+2 end-$y etc + Sometimes the actual integer index is desired. + + We want to resolve the index used, without passing arbitrary expressions into the 'expr' function + - which could have security risks. + lindex_resolve will parse the index expression and return: + a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + The similar function lindex_resolve_basic uses -Inf to denote + out of range at either end of the list/string. + Otherwise it will return an integer corresponding to the position in the data. + This is in stark contrast to Tcl list/string function indices which will return empty strings for out of + bounds indices, or in the case of lrange, return results anyway. + Like Tcl list commands - it will produce an error if the form of the index is not acceptable. + For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side + - thus returning -2 + + Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. + We will get something like 10+1 - which can be resolved safely with expr + " + @values -min 2 -max 2 + datalength -type integer + index -type indexexpression + } + proc lindex_resolve {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve] [arg len] [arg index]] + #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length + #[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + #[para]This means the proc may be called with something like $x+2 end-$y etc + #[para]Sometimes the actual integer index is desired. + #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. + #[para]lindex_resolve will parse the index expression and return: + #[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string + #[para]Otherwise it will return an integer corresponding to the position in the list. + #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. + #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 + + #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr + + + #REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? + #A basic string map means we aren't properly validating + #todo - be stricter about malformations such as 1000_ + if {![string is integer -strict 1_0]} { + #basic forward compatibility with integers such as 1_000 for 8.6.x + set index [tcl::string::map {_ {}} $index] + set len [tcl::string::map {_ {}} $len] + } + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve len must be a positive integer." + } + set based_max [expr {$len -1 + $base}] + + if {[string is integer -strict $index]} { + #review - base? + #can match +i -i + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } else { + if {[string match end* $index]} { + if {$index ne "end"} { + set op [string index $index 3] + set offset [string range $index 4 end] + #note - offset could have leading + or - + # 'string is integer -strict +1' ==> true + #e.g end+-1 is valid (end++-1 is not) + if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$offset == 0} { + #(offset +0, -0 or 0 or 000 0_0 etc) + #op either + or - is irrelevant + #set index [expr {$len-1}] ;#+ base ? + set index $based_max + if {$index < $base} { + #return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds + return Inf + } else { + return $index + } + } + + #set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] + set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + return $index + } + } else { + #index is 'end' + if {$len == 0} { + #special case - 'end' with empty list - treat end like a positive number out of bounds + return Inf + } + #return [expr {$len - 1 + $base}] + return $based_max + } + } else { + #plain +- already handled above. + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + #regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op + if {[regexp {([^+-]*)([+-])(.*)} $index _match a op b]} { + if {[string is integer -strict $a] && [string is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } + return $index + } + } + } + proc lindex_resolve_basic {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg len] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -Inf for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve_basic len must be an integer greater than or equal to zero" + } + if {![string is integer -strict $base]} { + #base can be negative + error "lindex_resolve_basic base must be an integer" + } + set based_max [expr {$len -1 + $base}] + + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < $base || ($index > $based_max)} { + #even though in this case we could return -Inf or Inf like lindex_resolve; + #for consistency we don't return Inf for upper-boudn violation, + #as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. + return -Inf + } else { + #!NOTE! index within range is unchanged - no matter the base + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } + if {$len > 0} { + #For large len - this is a wasteful allocation if no true lseq available in Tcl version. + #lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) + set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. + } else { + set testlist [list] + #we want to call 'lindex' even in this case - to get the appropriate error message + } + set idx [lindex $testlist $index] + if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end + return -Inf + } else { + return $idx + } + } + proc lindex_get {list index} { + set resultlist [lrange $list $index $index] + if {![llength $resultlist]} { + return -1 + } else { + #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. + #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator + return [tcl::dict::create value [lindex $resultlist 0]] + } + } + + proc string_splitbefore {str index} { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + return [list "" $str] + } + Inf { + return [list $str ""] + } + } + } + return [list [string range $str 0 $index-1] [string range $str $index end]] + #scan %s stops at whitespace - not useful here. + #scan $s %${p}s%s + } + proc string_splitbefore_indices {str args} { + set parts [list $str] + set sizes [list [string length $str]] + set s 0 + foreach index $args { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + Inf { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + } + } + if {$index <= 0} { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + if {$index >= [string length $str]} { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + set i -1 + set a 0 + foreach sz $sizes { + incr i + if {$a + $sz > $index} { + set p [lindex $parts $i] + #puts "a:$a index:$index" + if {$a == $index} { + break + } + ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] + ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] + break + } + incr a $sz + } + #puts "->parts:$parts" + #puts "->sizes:$sizes" + } + return $parts + } + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + + proc is_utf8_multibyteprefix {bytes} { + #*** !doctools + #[call [fun is_utf8_multibyteprefix] [arg str]] + #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character + #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint + #[para] Will return false for an already complete utf-8 codepoint + #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument + #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes + #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] + regexp {(?x) + ^ + (?: + [\xC0-\xDF] | #possible prefix for two-byte codepoint + [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint + [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for + ) + $ + } $bytes + } + + proc is_utf8_first {str} { + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + } $str + } + proc is_utf8_single {1234bytes} { + #*** !doctools + #[call [fun is_utf8_single] [arg 1234bytes]] + #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + $ + } $1234bytes + } + proc get_utf8_leading {rawbytes} { + #*** !doctools + #[call [fun get_utf8_leading] [arg rawbytes]] + #[para] return the leading portion of rawbytes that is a valid utf8 sequence. + #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint + #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. + #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. + #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics + #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned + #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes + if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + \A ( + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + + } $rawbytes completeChars]} { + return $completeChars + } + return "" + } + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set opts [tcl::dict::create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $opts] + foreach {k v} $argopts { + tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + # -- --- --- --- + set opt_validate [tcl::dict::get $opts -validate] + set opt_empty [tcl::dict::get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map {_ ""} $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [tcl::dict::create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $defaults] + set fullopts [tcl::dict::create] + foreach {k v} $argopts { + tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [tcl::dict::merge $defaults $fullopts] + # -- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_case [tcl::dict::get $opts -case] + set opt_empty [tcl::dict::get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + switch -- [string tolower $opt_case] { + upper { + set spec X + } + lower { + set spec x + } + default { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] + if {![string is digit -strict [string map {_ ""} $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map {_ ""} $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::factors + @cmd -name punk::lib::factors\ + -summary\ + "Sorted factors of positive integer x"\ + -help\ + "Return a sorted list of the positive factors of x where x > 0 + For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. + (including zero itself in this context)* + + This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + Comparisons were done with some numbers below 17 digits long + For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + but has the disadvantage of being slower for 'small' numbers and using more memory. + If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get + there than computing the whole list, even for small values of x + * Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + In other mathematical contexts zero may be considered not to divide anything." + @values -min 1 -max 1 + x -type integer + }] + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::gcd + @cmd -name punk::lib::gcd\ + -summary\ + "Greatest common divisor of m and n."\ + -help\ + "Return the greatest common divisor of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib + + Graphical use: + An a by b rectangle can be covered with square tiles of side-length c, + only if c is a common divisor of a and b" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lcm + @cmd -name punk::lib::lcm\ + -summary\ + "Lowest common multiple of m and n."\ + -help\ + "Return the lowest common multiple of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc lcm {n m} { + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + tcl::dict::for {next -} $nums break + } + return [concat $primes [tcl::dict::keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [tcl::dict::keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [chan configure stdin] + if {[catch { + package require punk::console + set console_raw [tsv::get console is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + chan configure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] + } + return $answer + } + + #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. + proc indent {text {prefix " "}} { + set result [list] + foreach line [split $text \n] { + if {[string trim $line] eq ""} { + lappend result "" + } else { + lappend result $prefix[string trimright $line] + } + } + return [join $result \n] + } + #dedent? + proc undent {text {max -1}} { + if {$text eq ""} { + return "" + } + set lines [split $text \n] + set nonblank [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + continue + } + lappend nonblank $ln + } + set lcp [longestCommonPrefix $nonblank] + if {$lcp eq ""} { + return $text + } + regexp {^([\t ]*)} $lcp _m lcp + if {$lcp eq ""} { + return $text + } + set len [string length $lcp] + if {$max != -1} { + set len [expr {min($len,$max)}] + } + set result [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + lappend result "" + } else { + lappend result [string range $ln $len end] + } + } + return [join $result \n] + } + #A version of textutil::string::longestCommonPrefixList + proc longestCommonPrefix {items} { + if {[llength $items] <= 1} { + return [lindex $items 0] + } + set items [lsort $items[unset items]] + set min [lindex $items 0] + set max [lindex $items end] + #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) + #(sort order nothing to do with length - e.g min may be longer than max) + if {[string length $min] > [string length $max]} { + set temp $min + set min $max + set max $temp + } + set n [string length $min] + set prefix "" + set i -1 + while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { + append prefix $c + } + return $prefix + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joins the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? + lassign [tcl::dict::values [punk::args::parse $args withdef { + -joinchar -default \n + @values -min 1 -max 1 + }]] leaders opts values + + return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [tcl::dict::merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + lappend opts -block {} + } + set text [lindex $args end] + #tailcall linelist {*}$opts $text + return [linelist {*}$opts $text] + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [tcl::dict::values [punk::args::parse $args withdef { + @opts -any 1 + -block -default {} + }]] leaderdict opts valuedict + tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + set linelist_body { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + ;#package require punk::ansi + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) + + #we use detectcode_in_list instead of detect_in_list + #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) + # - but the main reason is it is slightly faster. + if {![punk::ansi::ta::detectcode_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + #set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + #get_codes_single lists only the codes. no plaintext or empty elements + set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits] == 0} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end] ;#may or may not be SGR + set lastcodeoffset [expr {[string length $lastcode]-1}] + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #code is at tail (no trailing plaintext) + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach code $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + + set linelist_body_original { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + #package require punk::ansi + + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) + if {![punk::ansi::ta::detect_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits]<= 1} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[lindex $ansisplits end] eq ""} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach {pt code} $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + proc linelist {args} $linelist_body + + + interp alias {} errortime {} punk::lib::errortime + proc errortime {script groupsize {iters 2}} { + #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance + set i 0 + set times {} + if {$iters < 2} {set iters 2} + + for {set i 0} {$i < $iters} {incr i} { + set result [uplevel [list time $script $groupsize]] + lappend times [lindex $result 0] + } + + set average 0.0 + set s2 0.0 + + foreach time $times { + set average [expr {$average + double($time)/$iters}] + } + + foreach time $times { + set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] + } + + set sigma [expr {int(sqrt($s2))}] + set average [expr {int($average)}] + + return "$average +/- $sigma microseconds per iteration" + } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version + proc switch_char_test {c} { + set dec [scan $c %c] + foreach t [list 1 2 3] { + switch -- $c { + x { + return [list $dec x $t] + } + y { + return [list $dec y $t] + } + z { + return [list $dec z $t] + } + } + } + + #tcl 8.6/8.7 (at least) + #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable + switch -- $c { + a { + return [list $dec a] + } + {"} { + return [list $dec dquote] + } + {[} {return [list $dec lb]} + {]} {return [list $dec rb]} + "{" { + return [list $dec lbrace] + } + "}" { + return [list $dec rbrace] + } + default { + return [list $dec $c] + } + } + + + + } + + #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) + proc show_jump_tables {args} { + #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. + if {[llength $args] == 1} { + set data [tcl::unsupported::disassemble proc [lindex $args 0]] + } elseif {[llength $args] == 2} { + #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. + #not sure if this handles more complex hierarchies or mixins etc. + lassign $args obj method + if {![info object isa object $obj]} { + error "show_jump_tables unable to examine '$args'. $obj is not an oo object" + } + #classes are objects too and can have direct methods + if {$method in [info object methods $obj]} { + set data [tcl::unsupported::disassemble objmethod $obj $method] + } else { + if {![info object isa class $obj]} { + set obj [info object class $obj] + } + set data [tcl::unsupported::disassemble method $obj $method] + } + } else { + error "show_jump_tables expected a procname or a class/object and method" + } + set result "" + set in_jt 0 + foreach ln [split $data \n] { + set tln [::tcl::string::trim $ln] + if {!$in_jt} { + if {[::tcl::string::match *jumpTable* $ln]} { + punk::ns::call_frame + append result $ln \n + set in_jt 1 + } + } else { + if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { + set in_jt 0 + } else { + append result $ln \n + } + } + } + return $result + } + + #a test + # punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 + # Note the different disassemble result when trace is running. + proc disassemble {procname} { + tcl::unsupported::disassemble proc $procname + } + + proc temperature_f_to_c {deg_fahrenheit} { + return [expr {($deg_fahrenheit -32) * (5/9.0)}] + } + proc temperature_c_to_f {deg_celsius} { + return [expr {($deg_celsius * (9/5.0)) + 32}] + } + + proc interp_sync_package_paths {interp} { + if {![interp exists $interp]} { + error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" + } + interp eval $interp [list set ::auto_path $::auto_path] + interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} + interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] + } + + proc valcopy {obj} { + append obj2 $obj {} + } + proc set_valcopy {varname obj} { + #used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] + append obj2 $obj {} + uplevel 1 [list set $varname $obj2] + } + + + + proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { + variable has_twapi + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + set localeid [twapi::get_system_default_lcid] + } + } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 + + set results [list] + set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list + foreach inputnum $nums { + set number [valcopy $inputnum] + #also handle tcl 8.7+ underscores in numbers + set number [string map [list _ "" , ""] $number] + #normalize e.g 2e4 -> 20000.0 + set number [expr {$number}] + + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + lappend results [twapi::format_number $number $localeid -idigits -1] + continue + } else { + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} + lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] + continue + } + } + #todo - get configured user defaults + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } + + lappend results [delimit_number $number $delim $groupsize] + } + + if {[llength $results] == 1} { + #keep intrep as string rather than list + return [lindex $results 0] + } + return $results + } + + + #from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse + # Given a number represented as a string, insert delimiters to break it up for + # readability. Normally, the delimiter will be a comma which will be inserted every + # three digits. However, the delimiter and groupsize are optional arguments, + # permitting use in other locales. + # + # The string is assumed to consist of digits, possibly preceded by spaces, + # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* + + proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { + set number [valcopy $unformattednumber] + set number [string map {_ ""} $number] + #normalize using expr - e.g 2e4 -> 20000.0 + set number [expr {$number}] + # First, extract right hand part of number, up to and including decimal point + set point [string last "." $number]; + if {$point >= 0} { + set PostDecimal [string range $number $point+1 end]; + set PostDecimalP 1; + } else { + set point [expr {[string length $number] + 1}] + set PostDecimal ""; + set PostDecimalP 0; + } + + # Now extract any leading spaces. review - regex for whitespace instead of just ascii space? + set ind 0; + while {[string equal [string index $number $ind] \u0020]} { + incr ind; + } + set FirstNonSpace $ind; + set LastSpace [expr {$FirstNonSpace - 1}]; + set LeadingSpaces [string range $number 0 $LastSpace]; + + # Now extract the non-fractional part of the number, omitting leading spaces. + set MainNumber [string range $number $FirstNonSpace $point-1]; + + # Insert commas into the non-fractional part. + set Length [string length $MainNumber]; + set Phase [expr {$Length % $GroupSize}] + set PhaseMinusOne [expr {$Phase -1}]; + set DelimitedMain ""; + + #First we deal with the extra stuff. + if {$Phase > 0} { + append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; + } + set FirstInGroup $Phase; + set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; + while {$LastInGroup < $Length} { + if {$FirstInGroup > 0} { + append DelimitedMain $delim; + } + append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; + incr FirstInGroup $GroupSize + incr LastInGroup $GroupSize + } + + # Reassemble the number. + if {$PostDecimalP} { + return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; + } else { + return [format "%s%s" $LeadingSpaces $DelimitedMain]; + } + } + + #review - there are various type of uuid - we should use something consistent across platforms + #twapi is used on windows because it's about 5 times faster - but is this more important than consistency? + #twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway + #(counterpoint: in the case of punk - we currently need twapi anyway on windows) + #does tcllib's uuid use the same mechanisms on different platforms anyway? + if {$has_twapi} { + interp alias "" ::punk::lib::uuid "" twapi::new_uuid + } else { + catch {package require uuid} + interp alias "" ::punk::lib::uuid "" uuid::uuid generate + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::lib::flatgrid { + namespace export filler_count rows cols col row block + + #WARNING - requires lseq and 'lsearch -stride' + #WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 + #todo - 8.6 fallback? + + proc filler_count {listlen numcolumns} { + #if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error + #if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense + expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} + } + proc rows {list numcolumns {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set rows [list] + set i 1 + foreach s [lrange $splits 0 end-1] { + lappend rows [lrange $padded_list $s [lindex $splits $i]-1] + incr i + } + return $rows + } + proc cols {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] + } + return $cols + } + proc cols2 {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [col2 $list $numcolumns $colindex $blank] + } + return $cols + } + proc col {list numcolumns colindex {blank NULL}} { + lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * + } + proc col2 {list numcolumns colindex {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set col [list] + foreach s [lrange $splits 0 end-1] { + lappend col [lindex $padded_list $s+$colindex] + } + return $col + } + proc col3 {list numcolumns colindex {blank NULL}} { + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} + } + proc col4 {list numcolumns colindex {blank NULL}} { + #slow + set vars [lrepeat $numcolumns _] + lset vars $colindex v + if {$blank eq ""} { + return [lmap $vars $list {set v}] + } + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} + } + + proc block {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } + proc block2 {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols2 $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } +} + +tcl::namespace::eval punk::lib::test { + + + +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + + + # incomplete - report which is the innermost bracket/quote etc awaiting completion for a Tcl command + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + set i 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + incr i + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) + switch -- $c { + {"} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } + {[} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "{" { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "}" - + default { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + incr i + } + set incomplete [list] + foreach w $waiting { + #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. + switch -- $w { + {"} { + lappend incomplete $w + } + {]} { + lappend incomplete "\[" + } + "{" {} + "}" { + lappend incomplete "\{" + } + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->awaiting:'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #get info about punk nestindex key ie type: list,dict,undetermined + # pdict devel + proc nestindex_info {args} { + set argd [punk::args::parse $args withdef { + -parent -default "" + nestindex + }] + set opt_parent [dict get $argd opts -parent] + if {$opt_parent eq ""} { + set parent_type undetermined + } else { + set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing + } + + #??? + + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} + +tcl::namespace::eval punk::lib::caches { + +} + +tcl::namespace::eval punk::lib::debug { + proc showdict {args} {} +} + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::lib +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [tcl::namespace::eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.6 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index 06c7ddf3..741d9fc0 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -229,12 +229,16 @@ tcl::namespace::eval punk::nav::fs { } else { set stripbase 1 } - if {$v eq "/"} { - #hack - dict set matchinfo files {} - dict set matchinfo filesizes {} - } - set out [dirfiles_dict_as_lines -stripbase $stripbase $matchinfo] + + #we need to pass matchinfo that includes files even when only doing a directory listing (d/ /) + #This is because we want to display links/shortcuts that point to directories as directories. + #( ./ listing needs to show navigable items) + #if {$v eq "/"} { + # #dodgy hack that doesn't give proper display of all links/shortcuts that are pointing to directories. + # dict set matchinfo files {} + # dict set matchinfo filesizes {} + #} + set out [dirfiles_dict_as_lines -listing $v -stripbase $stripbase $matchinfo] #set chunklist [list] #lappend chunklist [list stdout "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n"] set result "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n" @@ -258,10 +262,10 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args a1] + set atail [lassign $args cdtarget] if {[llength $args] == 1} { - set a1 [lindex $args 0] - switch -exact -- $a1 { + set cdtarget [lindex $args 0] + switch -exact -- $cdtarget { . - ./ { tailcall punk::nav::fs::d/ } @@ -286,43 +290,88 @@ tcl::namespace::eval punk::nav::fs { } } else { cd $up1 - #set VIRTUAL_CWD [file normalize $a1] + #set VIRTUAL_CWD [file normalize $cdtarget] } tailcall punk::nav::fs::d/ $v } } - if {![regexp {[*?]} $a1] && [file pathtype $a1] ne "relative"} { + set cdtarget_copy [punk::nav::fs::system::valcopy $cdtarget] + set cdtarget_copy [string map {\\ /} $cdtarget_copy] + if {[string range $cdtarget_copy 0 3] eq "//?/"} { + #handle dos device paths - convert to normal path for glob testing + set glob_test [string range $cdtarget_copy 3 end] + set cdtarget_is_glob [regexp {[*?]} $glob_test] + } else { + set cdtarget_is_glob [regexp {[*?]} $cdtarget] + } + if {!$cdtarget_is_glob} { + set cdtarget_file_type [file type $cdtarget] + #e.g may be a link - whilst the type returned in the 'file stat' info reflects the type of the link target + } else { + set cdtarget_file_type "glob" + } + + if {!$cdtarget_is_glob && [file pathtype $cdtarget] ne "relative"} { #non-relative non-glob - if { ![string match //zipfs:/* $a1]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD $a1 - tailcall punk::nav::fs::d/ $v + if {![string match //zipfs:/* $cdtarget]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + if {$linktarget_file_type eq "directory"} { + set linktarget [file readlink $cdtarget] + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } } } - if {![regexp {[*?]} $a1] && ![string match //zipfs:/* $a1] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD [file normalize $a1] - tailcall punk::nav::fs::d/ $v + if {!$cdtarget_is_glob && ![string match //zipfs:/* $cdtarget] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + set linktarget [file readlink $cdtarget] + if {$linktarget_file_type eq "directory"} { + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } + #if {[file type $cdtarget] eq "directory"} { + # cd $cdtarget + # #set VIRTUAL_CWD [file normalize $cdtarget] + # tailcall punk::nav::fs::d/ $v + #} } - if {![regexp {[*?]} $a1]} { + if {!$cdtarget_is_glob} { #NON-Glob target #review - if {[string match //zipfs:/* $a1]} { - if {[Zipfs_path_within_zipfs_mounts $a1]} { - commandstack::basecall cd $a1 + if {[string match //zipfs:/* $cdtarget]} { + if {[Zipfs_path_within_zipfs_mounts $cdtarget]} { + commandstack::basecall cd $cdtarget } - set VIRTUAL_CWD $a1 - set curdir $a1 + set VIRTUAL_CWD $cdtarget + set curdir $cdtarget } else { - set target [punk::path::normjoin $VIRTUAL_CWD $a1] + set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[Zipfs_path_within_zipfs_mounts $target]} { commandstack::basecall cd $target @@ -521,20 +570,93 @@ tcl::namespace::eval punk::nav::fs { return $result } + punk::args::define { + @id -id ::punk::nav::fs::d/new + -nonportable -type none -help\ + "Allow creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." + @values -min 1 -max -1 -type string + path -type string -multiple 1 -help\ + "Path(s) to create. Can be absolute or relative. + + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. + + If a path already exists, then it will be left as-is and no error will be raised. + + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." + } + #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { - if {![llength $args]} { - error "usage: d/new \[ ...\]" - } - set a1 [lindex $args 0] + set argd [punk::args::parse $args withid ::punk::nav::fs::d/new] + lassign [dict values $argd] leaders opts values received + set paths [dict get $values path] + set allow_nonportable [dict exists $received -nonportable] + set curdir [pwd] - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath_list [list] + set error_paths [list] + foreach p $paths { + if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { + #error "punk::nav::fs::d/new Path '$p' is not portable and may not be created without -nonportable option" + lappend error_paths [list $p "Path '$p' is not portable and may not be created without -nonportable option"] + continue + } + if {[string first \0 $p] != -1} { + #error "punk::nav::fs::d/new Path '$p' contains null character which is not allowed" + lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] + continue + } + set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] + #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. + set fullpath [file join $path1 {*}[lrange $args 1 end]] + #Some subpaths of the supplied paths to create may already exist. + #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + set parent [file dirname $fullpath] + while {![file exists $parent]} { + set parent [file dirname $parent] + } + if {![file writable $parent]} { + #error "punk::nav::fs::d/new Cannot create directory '$fullpath' as parent '$parent' is not writable" + lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] + continue + } + lappend fullpath_list $fullpath + } + if {[llength $fullpath_list] != [llength $paths]} { + set path_error_display "" + foreach e $error_paths { + set p [lindex $e 0] + set m [lindex $e 1] + append path_error_display " Path: '$p' Error: $m\n" + } + error "punk::nav::fs::d/new One or more supplied paths were invalid or not writable:\n$path_error_display" + } - if {[file exists $fullpath]} { - error "Folder $fullpath already exists" + set num_created 0 + set error_string "" + foreach fullpath $fullpath_list { + if {[catch {file mkdir $fullpath}]} { + set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + break + } + incr num_created } - file mkdir $fullpath - d/ $fullpath + if {$error_string ne ""} { + error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." + } + d/ $curdir } #todo use unknown to allow d/~c:/etc ?? @@ -849,11 +971,11 @@ tcl::namespace::eval punk::nav::fs { #file attr //cookit:/ returns {-vfs 1 -handle {}} #we will treat it differently for now - use generic handler REVIEW - set in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. + set is_in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. if {[llength [package provide vfs]]} { foreach mount [vfs::filesystem info] { if {[punk::mix::base::lib::path_a_atorbelow_b $location $mount]} { - set in_vfs 1 + set is_in_vfs 1 break } } @@ -871,27 +993,27 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } - if {$in_vfs} { + if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { - set in_zipfs 0 - set in_cookit 1 - set in_other_pseudovol 1 + set invfs "" switch -glob -- $location { //zipfs:/* { if {[info commands ::tcl::zipfs::mount] ne ""} { - set in_zipfs 1 + set invfs zipfs } } //cookit:/* { - set in_cookit 1 + set invfs cookit } default { #handle 'other/unknown' that mounts at a volume-like path //pseudovol:/ + #(intentionally will not match a dos device path such as //?/c:/) if {[regexp {//((?:(?!:|/).)+):/.*} $location _match pseudovol]} { #pseudovol probably more than one char long #we don't really expect something like //c:/ , but anyway, it's not the same as c:/ and for all we know someone could use that as a volume name? - set in_other_pseudovol 1 ;#flag so we don't use twapi - hope generic can handle it (uses tcl glob) + #flag so we don't use twapi - hope generic can handle it (uses tcl glob) + set invfs pseudovol } else { #we could use 'file attr' here to test if {-vfs 1} #but it's an extra filesystem hit on all normal paths too (which can be expensive on some systems) @@ -900,20 +1022,24 @@ tcl::namespace::eval punk::nav::fs { } } - - if {$in_zipfs} { - #relative vs absolute? review - cwd valid for //zipfs:/ ?? - set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_cookit} { - #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ - #don't use twapi - #could possibly use du_dirlisting_tclvfs REVIEW - #files and folders are all returned with the -types hidden option for glob on windows - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_other} { - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } else { - set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + switch -- $invfs { + zipfs { + #relative vs absolute? review - cwd valid for //zipfs:/ ?? + set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + cookit { + #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ + #don't use twapi + #could possibly use du_dirlisting_tclvfs REVIEW + #files and folders are all returned with the -types hidden option for glob on windows + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + pseudovol { + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + default { + set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } } } @@ -1018,11 +1144,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean + -listing -default "/" -choices {/ // //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { + set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1031,9 +1159,12 @@ tcl::namespace::eval punk::nav::fs { # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_stripbase [dict get $opts -stripbase] + set opt_stripbase [dict get $opts -stripbase] set opt_formatsizes [dict get $opts -formatsizes] + set opt_listing [dict get $opts -listing] # -- --- --- --- --- --- --- --- --- --- --- --- + #we still need to examine files for -listing / which means show only directories, + # because we want to display links/shortcuts that point to directories as directories #if multiple dicts and -stripbase = 1 - we can only strip the longest common part of the searchbases supplied set common_base "" @@ -1074,7 +1205,6 @@ tcl::namespace::eval punk::nav::fs { foreach fileset [list dirs files links underlayfiles flaggedhidden flaggedreadonly flaggedsystem nonportable vfsmounts] { set $fileset [list] } - #set contents [lindex $list_of_dicts 0] foreach contents $list_of_dicts { lappend dirs {*}[dict get $contents dirs] @@ -1090,6 +1220,7 @@ tcl::namespace::eval punk::nav::fs { lappend vfsmounts {*}[dict get $contents vfsmounts] } + set fkeys [dict create] ;#avoid some file normalize calls.. if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1224,27 +1355,41 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] + set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { if {![catch {package require punk::winlnk}]} { - set shortcutinfo [punk::winlnk::resolve $fname] set target_type "file" ;#default/fallback + + set shortcutinfo [punk::winlnk::resolve $fname] if {[dict exists $shortcutinfo link_target]} { set is_valid_lnk 1 set tgt [dict get $shortcutinfo link_target] - if {[file exists $tgt]} { - #file type could return 'link' - we will use isfile/isdirectory - if {[file isfile $tgt]} { - set target_type file - } elseif {[file isdirectory $tgt]} { - set target_type directory - } else { - set target_type file ;## ? + set link_target_type [dict get $shortcutinfo target_type] + switch -- $link_target_type { + file { + set target_type "file" + } + directory - "local disk" { + set target_type "directory" + } + unknown { + #fall back to checking attributes and filesystem if we have a link_target but no target_type + if {[file exists $tgt]} { + #file type could return 'link' - we will use isfile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } } - } else { - #todo - see if punk::winlnk has info about the type at the time of linking - #for now - treat as file } } else { #no link_target - probably an ordinary file - but there could have been some other error in reading the binary windows lnk format. @@ -1295,6 +1440,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo + puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1304,58 +1451,82 @@ tcl::namespace::eval punk::nav::fs { set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] set RST [punk::ansi::a] + if {$opt_listing eq "/"} { + #disply directories only (including items that were actually files that were links/shortcuts to directories) + set finfo_plus [list] + } foreach d $dirs filerec $finfo_plus { - set d1 [punk::ansi::a+ cyan bold] - set d2 [punk::ansi::a+ defaultfg defaultbg normal] - #set f1 [punk::ansi::a+ white bold] - set f1 [punk::ansi::a+ white] - set f2 [punk::ansi::a+ defaultfg defaultbg normal] + set d1 [punk::ansi::a+ cyan normal] + set d1_overrides [list] + #set d2 [punk::ansi::a+ defaultfg defaultbg normal] + set f1 [punk::ansi::a+ white normal] + set f1_overrides [list] + #set f2 [punk::ansi::a+ defaultfg defaultbg normal] set fdisp "" if {[string length $d]} { if {$d in $flaggedhidden} { - set d1 [punk::ansi::a+ cyan normal] + #set d1 [punk::ansi::a+ Term-grey50 normal] + lappend d1_overrides term-grey50 } if {$d in $vfsmounts} { - if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW - #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) - #mark it differently for now.. (todo bug report?) - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red Yellow bold] - } else { - set d1 [punk::ansi::a+ green Purple bold] - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red White bold] - } else { - set d1 [punk::ansi::a+ green bold] - } - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red bold] - } + lappend d1_overrides Green + } + if {$d in $nonportable} { + #lappend d1_overrides underline undt-red ;#we use underlins to indicate symlinks and shortcuts, so we shouldn't use underlines here if possible. + lappend d1_overrides italic bold } + #if {$d in $vfsmounts} { + # if {$d in $flaggedhidden} { + # #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indication of 'hidden' status - REVIEW + # #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) + # #mark it differently for now.. (todo bug report?) + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red Yellow bold] + # } else { + # set d1 [punk::ansi::a+ green Purple bold] + # } + # } else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red White bold] + # } else { + # set d1 [punk::ansi::a+ green bold] + # } + # } + #} else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red bold] + # } + #} #dlink-style & dshortcut_style are for underlines - can be added with colours already set + + if {[llength $d1_overrides]} { + set d1 [punk::ansi::a+ {*}$d1_overrides] + } if {$d in $dir_symlinks} { append d1 $dlink_style } elseif {$d in $dir_shortcuts} { append d1 $dshortcut_style } } + if {[llength $filerec]} { set fname [dict get $filerec file] set fdisp [dict get $filerec display] if {$fname in $flaggedhidden} { - set f1 [punk::ansi::a+ Purple] - } else { - if {$fname in $nonportable} { - set f1 [punk::ansi::a+ red bold] - } + #set f1 [punk::ansi::a+ Term-grey50] + lappend f1_overrides term-grey50 + } + if {$fname in $nonportable} { + lappend f1_overrides italic bold } + if {[llength $f1_overrides]} { + set f1 [punk::ansi::a+ {*}$f1_overrides] + } + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST + } else { + #either there are no files or opt_listing is / = show dirs only (some of which may have actually been files that were links/shortcuts to directories) + lappend displaylist [overtype::left $col1 $d1$d$RST] } - lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] @@ -1469,6 +1640,12 @@ tcl::namespace::eval punk::nav::fs::system { #[subsection {Namespace punk::nav::fs::system}] #[para] Internal functions that are not part of the API + #utility function to copy values from one variable to another without sharing the reference. + #Useful for example to avoid some issues with possible shimmering of the underlying type of file paths. + proc valcopy {obj} { + append obj2 $obj {} + } + #ordinary emission of chunklist when no repl proc emit_chunklist {chunklist} { set result "" diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/pipe-1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/pipe-1.0.tm index eac7df81..034fae01 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/pipe-1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/pipe-1.0.tm @@ -326,12 +326,12 @@ tcl::namespace::eval punk::pipe::lib { set in_atom 1 } ( { - incr in_brackets + incr in_brackets } default { if {$end_var_posn == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { - set end_var_posn $token_index - } + set end_var_posn $token_index + } } } } diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm new file mode 100644 index 00000000..f283348f --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winlnk-0.1.1.tm @@ -0,0 +1,1014 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 0.1.1 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::winlnk 0 0.1.1] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [lb]MS_SHLLINK[rb].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Header_Get_HeaderSize $contents] eq $magic_HeaderSize && [Header_Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + variable LinkFlags + set LinkFlags [dict create\ + HasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Header_Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + } + + #MS-SHLLINK.pdf documents the .lnk file format in detail, but here is a brief overview of the structure of a .lnk file: + #protocol revision 10.0 (November 2025) https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net/MS-SHLLINK/%5bMS-SHLLINK%5d.pdf + + + #SHELL_LINK_HEADER structure is 76 bytes long and starts at the beginning of the file + #offset hex:0x00 dec:0 4 bytes + #Header size (HeaderSize) (must be 0x0000004C for .lnk files) + proc Header_Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + + + #offset hex:0x04 dec:4 16 bytes + #LinkCLSID (must be 00021401-0000-0000-C000-000000000046 for .lnk files) + proc Header_Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + + + #offset hex:0x14 dec:20 4 bytes + #Link flags (LinkFlags) - bit field specifying information about the shell link and the presence of optional portions of the structure. + #HasLinkTargetIDList bit 0 (0x00000001) - if set, a LinkTargetIDList structure is present immediately following the header + #HasLinkInfo bit 1 (0x00000002) - if set, a LinkInfo structure is present immediately following the header (or the LinkTargetIDList if that is present) + #HasName bit 2 (0x00000004) - if set, a null-terminated string containing the name of the link is present immediately following the header (or the LinkTargetIDList and LinkInfo if they are present) + #HasRelativePath bit 3 (0x00000008) - if set, a null-terminated string containing the relative path of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo and Name if they are present) + #HasWorkingDir bit 4 (0x00000010) - if set, a null-terminated string containing the working directory of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name and Relative Path if they are present) + #HasArguments bit 5 (0x00000020) - if set, a null-terminated string containing the command line arguments for the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path and Working Dir if they are present) + #HasIconLocation bit 6 (0x00000040) - if set, a null-terminated string containing the location of the icon for the link is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir and Arguments if they are present) + #IsUnicode bit 7 (0x00000080) - if set, the strings in the link are stored in Unicode (UTF-16LE) format; if not set, the strings are stored in ANSI format (usually the system's default code page) + #ForceNoLinkInfo bit 8 (0x00000100) - if set, the LinkInfo structure is not stored in the file even if the HasLinkInfo bit is set; this can be used to force the link to be resolved using only the information in the header and the optional strings, without using the LinkInfo structure + #HasExpString bit 9 (0x00000200) - if set, a null-terminated string containing an "environment variable" style string is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments and Icon Location if they are present); this string can contain environment variable references (e.g. %USERPROFILE%) that can be expanded to obtain the actual path of the link target + #RunInSeparateProcess bit 10 (0x00000400) - if set, the link target should be run in a separate process; if not set, the link target may be run in the same process as the caller + #Unused1 bit 11 (0x00000800) - reserved for future use; should be set to 0 + #HasDarwinID bit 12 (0x00001000) - if set, a null-terminated string containing a "Darwin ID" is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location and ExpString if they are present); this string can be used to identify the link target in a way that is independent of the file system (e.g. for links to Control Panel items or special folders) + #RunAsUser bit 13 (0x00002000) - if set, the link target should be run with the permissions of the user specified in the HasDarwinID string; if not set, the link target should be run with the permissions of the caller + #HasExpIcon bit 14 (0x00004000) - if set, a null-terminated string containing an "environment variable" style string for the icon location is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location, ExpString and DarwinID if they are present); this string can contain environment variable references that can be expanded to obtain the actual path of the icon for the link + #NoPidlAlias bit 15 (0x00008000) - if set, the link target should not be resolved using the PIDL alias mechanism; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed + #Unused2 bit 16 (0x00010000) - reserved for future use; should be set to 0 + #RunWithShimLayer bit 17 (0x00020000) - if set, the link target should be run with the application compatibility shim layer; if not set, the link target should be run without the shim layer + #ForceNoLinkTrack bit 18 (0x00040000) - if set, the link target should not be tracked by the shell's link tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed + #EnableTargetMetadata bit 19 (0x00080000) - if set, the link target should have metadata enabled; this can be used to allow the link to store additional information about the target (e.g. for links to files, the link can store the file's attributes, creation time, access time and modification time) + #DisableLinkPathTracking bit 20 (0x00100000) - if set, the link target should not be tracked by the shell's link path tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its path + #DisableKnownFolderTracking bit 21 (0x00200000) - if set, the link target should not be tracked by the shell's known folder tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its known folder ID + #DisableKnownFolderAlias bit 22 (0x00400000) - if set, the link target should not be aliased to a known folder; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on its known folder ID + #AllowLinkToLink bit 23 (0x00800000) - if set, the link target can be another link; if not set, the link target should not be another link (i.e. it should be a file or directory); this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #UnaliasOnSave bit 24 (0x01000000) - if set, the link should be unaliased when it is saved; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #PreferEnvironmentPath bit 25 (0x02000000) - if set, the link should prefer to resolve the target using environment variable references; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the environment variable references still point to the correct location + #KeepLocalIDListForUNCTarget bit 26 (0x04000000) - if set, the link should keep the local ID list for UNC targets; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the local ID list still points to the correct location + # - the presence of these flags indicates the presence of optional structures in the .lnk file and also provides information about how to interpret the data in the file + proc Header_Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x18 dec:24 4 bytes + #File attributes (FileAttributes) - bit field specifying the file attributes of the link target (if the EnableTargetMetadata flag is set in the LinkFlags field); this field is a bitwise combination of the following values: + proc Header_Get_FileAttributes {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return {} + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + set attrlist {} + if {$val & 0x00000001} {lappend attrlist "READONLY"} + if {$val & 0x00000002} {lappend attrlist "HIDDEN"} + if {$val & 0x00000004} {lappend attrlist "SYSTEM"} + if {$val & 0x00000010} {lappend attrlist "DIRECTORY"} + if {$val & 0x00000020} {lappend attrlist "ARCHIVE"} + if {$val & 0x00000040} {lappend attrlist "DEVICE"} + if {$val & 0x00000080} {lappend attrlist "NORMAL"} + if {$val & 0x00000100} {lappend attrlist "TEMPORARY"} + if {$val & 0x00000200} {lappend attrlist "SPARSE_FILE"} + if {$val & 0x00000400} {lappend attrlist "REPARSE_POINT"} + if {$val & 0x00000800} {lappend attrlist "COMPRESSED"} + if {$val & 0x00001000} {lappend attrlist "OFFLINE"} + if {$val & 0x00002000} {lappend attrlist "NOT_CONTENT_INDEXED"} + if {$val & 0x00004000} {lappend attrlist "ENCRYPTED"} + return $attrlist + } + proc Header_Get_FileAttributes_Raw {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return 0 + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + + + + #offset hex:0x1C dec:28 8 bytes + #creation date and time (CreationTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_CreationTime {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_CreationTime_Raw {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset 36 8 bytes + #last access date and time (AccessTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_AccessTime {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_AccessTime_Raw {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x2C dec:44 8 bytes + #last modification date and time (WriteTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_WriteTime {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_WriteTime_Raw {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x34 dec:52 Bytes:4 - unsigned int + #file size in bytes (of target - low 32 bits if >4GB) + proc Header_Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x38 dec:56 Bytes:4 - signed integer + #icon index value + proc Header_Get_IconIndex {contents} { + set 4bytes [string range $contents 56 59] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x3C dec:60 Bytes:4 - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Header_Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x40 dec:64 Bytes:2 + #Hot key + proc Header_Get_HotKey {contents} { + # Existing code that extracts the raw 16‑bit hotkey value: + set raw [Header_Get_HotKey_Raw $contents] + # The low byte holds the virtual‑key, high byte holds modifier flags + set vk [expr {$raw & 0xFF}] + set mods [expr {($raw >> 8) & 0xFF}] + set name [_vk_to_name $vk] + set modStr [_modifiers_to_string $mods] + if {$modStr eq ""} { + return $name + } else { + return "${modStr}+${name}" + } + } + proc Header_Get_HotKey_Raw {contents} { + set 2bytes [string range $contents 64 65] + set r [binary scan $2bytes s val] ;#short + return $val + } + proc _modifiers_to_string {mods} { + set parts {} + if {$mods & 0x01} {lappend parts "Shift"} + if {$mods & 0x02} {lappend parts "Ctrl"} + if {$mods & 0x04} {lappend parts "Alt"} + if {$mods & 0x08} {lappend parts "Win"} ;# optional + return [join $parts "+"] + } + proc _vk_to_name {vk} { + # Minimal map – extend as needed + array set vkMap { + 0x00 "No key assigned" + 0x08 Backspace 0x09 Tab 0x0D Return + 0x10 Shift 0x11 Control 0x12 Alt + 0x20 Space 0x21 PageUp 0x22 PageDown + 0x23 End 0x24 Home 0x25 Left + 0x26 Up 0x27 Right 0x28 Down + 0x2D Insert 0x2E Delete + 0x70 F1 0x71 F2 0x72 F3 + 0x73 F4 0x74 F5 0x75 F6 + 0x76 F7 0x77 F8 0x78 F9 + 0x79 F10 0x7A F11 0x7B F12 + 0x7c F13 0x7d F14 0x7e F15 + 0x7f F16 0x80 F17 0x81 F18 + 0x82 F19 0x83 F20 0x84 F21 + 0x85 F22 0x86 F23 0x87 F24 + 0x90 "NUM LOCK" 0x91 "SCROLL LOCK" + } + if {[info exists vkMap($vk)]} { + return $vkMap($vk) + } else { + if {$vk >= 0x30 && $vk <= 0x39} { + return [format "%c" $vk] ;# 0-9 + } elseif {$vk >= 0x41 && $vk <= 0x5A} { + return [format "%c" $vk] ;# A-Z + } + # fallback: hex representation + return [format "0x%02X" $vk] + } + } + + #offset hex:0x42 dec:66 Bytes:2 - reserved1 + proc Header_Get_Reserved1 {contents} { + set 2bytes [string range $contents 66 67] + set r [binary scan $2bytes s val] ;#short + return $val + } + + #offset hex:0x44 dec:68 Bytes:4 - reserved2 + proc Header_Get_Reserved2 {contents} { + set 4bytes [string range $contents 68 71] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x48 dec:72 Bytes:4 - reserved3 + proc Header_Get_Reserved3 {contents} { + set 4bytes [string range $contents 72 75] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #end of 76 byte header + + proc Get_LinkTargetIDList_size {contents} { + if {[Header_Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkTargetIDList_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + return "" + } else { + set idlist_content [string range $contents 78 [expr {78 + $idlist_size -1}]] + return $idlist_content + } + } + + #some clues on the structure of the IDList content and how to parse it can be found in the analysis of CVE-2020-0729, + #which is a remote code execution vulnerability in Windows that can be exploited through specially crafted .lnk files that contain malicious IDList content. + #The analysis of this vulnerability provides insights into how the IDList content is structured and how it can be parsed to extract information about the link target and potentially execute code. + #https://www.zerodayinitiative.com/blog/2020/3/25/cve-2020-0729-remote-code-execution-through-lnk-files + + proc Get_LinkTargetIDList_iteminfo {contents} { + set idlist_content [Get_LinkTargetIDList_content $contents] + set result {} + set offset 0 + while {$offset < [string length $idlist_content]} { + if {[string length $idlist_content] - $offset < 2} break + set size_bytes [string range $idlist_content $offset [expr {$offset + 1}]] ;#size including these 2 bytes + binary scan $size_bytes su size + if {$size == 0} break + if {$size < 2} { + # Invalid size, abort + error "punk::winlnk::Get_LinkTargetIDList_iteminfo: Invalid ItemID size: $size at offset $offset" + } + if {$offset + $size > [string length $idlist_content]} { + # ItemID extends beyond content, stop parsing + puts stderr "punk::winlnk::Get_LinkTargetIDList_iteminfo: ItemID at offset $offset with size $size extends beyond content length, stopping parse" + break + } + set itemid [string range $idlist_content $offset [expr {$offset + $size - 1}]] + set itemid_bytes [string range $itemid 0 1] + binary scan $itemid_bytes su itemid_size + #in *general* byte 3 of the ItemID structure can be used to determine the type of the item + #(e.g. file, folder, network location, etc.) but this is not always reliable and can vary + #based on the specific structure of the ItemID and the context in which it is used + set itemid_type_byte [string index $itemid 2] + #puts stderr "ItemID size: $itemid_size, type byte: [format %02X [scan $itemid_type_byte %c]]" + set maybe_type [format %02X [scan $itemid_type_byte %c]] + lappend result [dict create size $itemid_size type $maybe_type rawcontent $itemid] + + incr offset $size + } + return $result + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Header_Has_LinkFlag $contents "B"]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + #TODO - finish parsing of LinkInfo - add support + #Link location information + #present if data flag HasLinkInfo exists. + + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + binary scan $bytes_linkinfoheadersize i headersize + + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $bytes_linkinfoflags i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix note ] + } + + proc Contents_Get_Info {contents} { + + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-0.1.1/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Header_Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Header_Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set next_start [dict get $linkinfo_content_dict next_start] ;#location of section following LinkInfo (Location information) - this will be the Data Strings. + set link_target "" + set linkfields [dict create] + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + if {"windows" eq $::tcl_platform(platform)} { + set link_target [file join $localbase_path $suffix_path] + } else { + set suffix_path [string trimleft [string map {\\ /} $suffix_path] /] + if {[regexp {([a-zA-Z]):\\(.*)} $localbase_path _match drive_letter tail]} { + set localbase_path [string map {\\ /} $localbase_path] + set tail [string trimleft [string map {\\ /} $tail] /] + set link_target "" + #shortcut basepath is a windows path with drive letter - try to resolve it on unix by looking for a corresponding mount from fstab or a point under /mnt + set mountinfo [exec mount] + foreach line [split $mountinfo "\n"] { + #review - a more specific mount target might exist that includes the drive letter as part of the mount point name and is a longer prefix of the localbase_path + #- we should probably look for the longest prefix match rather than just the drive letter + if {[regexp -nocase -- [string cat ^$drive_letter {:\\\s+on\s+(\S+)}] $line _match mount_point]} { + set link_target [file join $mount_point $tail $suffix_path] + break + } + } + if {$link_target eq ""} { + #review - under what circumstances could this happen? If the drive letter doesn't match any mount points, then /mnt/drive_letter should generally already have been found above above + # - However, it may be possible for /mnt/drive_Letter to still exist even if it's not reflected in the output of mount or the output of mount is in an unexpected format. + + #nothing in mount result matches the drive letter - try looking for a mount point under /mnt with the drive letter as the name + if {[file exists /mnt/$drive_letter]} { + set link_target [file join /mnt/$drive_letter $tail $suffix_path] + } else { + if {$drive_letter eq [string tolower $drive_letter]]} { + set op_drive_letter [string toupper $drive_letter] + } else { + set op_drive_letter [string tolower $drive_letter] + } + if {[file exists /mnt/$op_drive_letter]} { + set link_target [file join /mnt/$op_drive_letter $tail $suffix_path] + } else { + #leave as is except for backslashes converted to forward + #- probably won't resolve correctly unless the unix system has a folder named drive_letter: in the current folder with a copy of the original filestructure. + set link_target [file join $localbase_path $suffix_path] + } + } + } else { + #shortcut basepath is a windows path with drive letter and we found a matching mount point - link_target is set to the resolved path + } + } else { + #shortcut basepath doesn't match expected windows path format - just join it with the suffix and hope for the best + #could be something like a network path or it could be something else entirely + set link_target [file join $localbase_path $suffix_path] + } + } + } + + # ---------------------------------------------------------------------- + #todo - get Data strings by parsing contents starting at $next_start + #stored in following order: + # description + # relative path + # working directory + # command line arguments + # icon location + + #Data strings format: + # 2 bytes: number of characters in the string + # following: The string. ASCII or UTF-16 little-endian string + + set datastring_dict [Contents_Get_DataStrings $contents $next_start] + + # ---------------------------------------------------------------------- + + set file_attributes [Header_Get_FileAttributes $contents] + set linktargetidlist [Get_LinkTargetIDList_iteminfo $contents] + + set target_type_info [Get_target_type $contents $file_attributes] + set target_type [dict get $target_type_info type] + set target_type_mech [dict get $target_type_info mechanism] + if {$target_type eq "unknown"} { + if {[file exists $link_target]} { + set target_type [file type $link_target] + set target_type_mech "filesystem" + } + } + + set result [dict create\ + link_target $link_target\ + link_flags $flags_enabled\ + file_attributes $file_attributes\ + creation_time [Header_Get_CreationTime $contents]\ + access_time [Header_Get_AccessTime $contents]\ + write_time [Header_Get_WriteTime $contents]\ + target_length [Header_Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey [Header_Get_HotKey $contents]\ + target_type $target_type\ + target_type_mech $target_type_mech\ + idlist $linktargetidlist\ + linkinfo $linkfields\ + ] + #relative_path "?" + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::resolve + @cmd -name punk::winlnk::resolve\ + -summary\ + "Return information about a .lnk file (windows shortcut)"\ + -help\ + "Return a dict of info obtained by parsing the binary data in a windows .lnk file. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut + file and the dictionary will contain an 'error' key." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc resolve {path} { + #*** !doctools + #[call [fun resolve] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [Contents_Get_Info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::file_show_info + @cmd -name punk::winlnk::file_show_info\ + -summary\ + "Show information about a .lnk file (windows shortcut)"\ + -help\ + "Print to stdout the information obtained by parsing the binary data in a windows .lnk file, in a human readable format. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and an error message will be printed." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc file_show_info {path} { + package require punk::lib + #punk::lib::showdict [resolve $path] */@* + set field_queries [dict create\ + link_target link_target\ + link_flags link_flags/@*\ + file_attributes file_attributes\ + creation_time creation_time\ + access_time access_time\ + write_time write_time\ + target_length target_length\ + icon_index icon_index\ + showwnd showwnd\ + hotkey hotkey\ + target_type target_type\ + idlist idlist/@*/@*.@*\ + linkinfo linkinfo/@*.@*\ + ] + set info [resolve $path] + if {[dict exists $info error]} { + return "Error: [dict get $info error]" + } else { + set querystring "" + foreach field [dict keys $info] { + if {[dict exists $field_queries $field]} { + append querystring "[dict get $field_queries $field] " + } else { + append querystring "$field " + } + } + puts "querystring: $querystring" + return [punk::lib::showdict $info {*}$querystring] + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::target + @cmd -name punk::winlnk::target\ + -summary\ + "Return the target path of a .lnk file (windows shortcut)"\ + -help\ + "Return the target path of the .lnk file specified in path. + This is a convenience function that extracts the target path from the .lnk file and returns it directly, + without all the additional information that resolve provides. If the .lnk header check fails, then + the .lnk file probably isn't really a shortcut file and an error message will be returned." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc target {path} { + #*** !doctools + #[call [fun target] [arg path] ] + #[para]Return the target path of the .lnk file specified in path + set info [resolve $path] + if {[dict exists $info error]} { + error [dict get $info error] + } else { + return [dict get $info link_target] + } + } + + proc target_type {path} { + set content [Get_contents $path] + if {![Contents_check_header $content]} { + error "lnk_header_check_failed" + } + set info [Contents_Get_Info $content] + return [dict get $info target_type] + } + + proc Get_target_type {content file_attributes} { + #determine type based on info in the .lnk file, such as file attributes and link flags + + if {"DIRECTORY" in $file_attributes} { + return [dict create type directory mechanism file_attributes]" + } elseif {"ARCHIVE" in $file_attributes} { + return [dict create type file mechanism file_attributes] + } else { + set iteminfo [Get_LinkTargetIDList_iteminfo $content] + if {[llength $iteminfo] > 0} { + set first_item [lindex $iteminfo 0] + set first_item_type [dict get $first_item type] + set saw_2f 0 + switch -- $first_item_type { + "1F" { + #plain files and folders always seem to have a first item type of 1F + #so does "local disk" + set type_so_far "unknown" + #For a file, we may first see multiple items of type 32 (directory) as we go through the folder structure, + #and then finally an item of type 31 (file) at the end. + #For a network location, we may see an item of type 2F. + #So we need to loop through all the items and keep track of what we've seen so far. + foreach item $iteminfo { + set item_type [dict get $item type] + if {$item_type eq "31"} { + set type_so_far "directory" + } elseif {$item_type eq "32"} { + return [dict create type file mechanism idlist] + } elseif {$item_type eq "2F"} { + set saw_2f 1 + } + } + if {$type_so_far eq "unknown" && $saw_2f} { + return [dict create type "local disk" mechanism idlist] + } + return [dict create type $type_so_far mechanism idlist] + } + } + return [dict create type "unknown" mechanism idlist] + } else { + return [dict create type "unknown" mechanism idlist] + } + } + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::winlnk +} +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 0.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm index a876d781..9079dbbc 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -196,7 +196,8 @@ namespace eval punk::winpath { #https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file #according to the above: Use any character in the current code page for a name, including Unicode characters and characters in the extended character set (128–255), except for the following: set reserved [list < > : \" / \\ | ? *] - + #embedded nulls (\0) are also disallowed - but these are also disallowed on unix-like platforms. + set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. foreach seg [file split $path] { @@ -208,6 +209,14 @@ namespace eval punk::winpath { #/./ /../ segments don't require protection - keep checking. continue } + if {[string toupper [file rootname $seg]] in $windows_reserved_names} { + #windows reserved names + #there are reports that these names aren't usable even with file extension - e.g that CON.txt is reserved and can't be created by some standard tools. + #In practice on windows 11 in 2026, cmd.exe,notepad,explorer and powershell seem to handle creation and access of CON.txt and PRN.txt etc without issue. + # the windows documentation reference above however still states that these names with an extension should be avoided. + #For this reason - we will still treat these as reserved and require protection with dos device syntax - even though in practice they seem to be usable without it. + return 1 + } #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph diff --git a/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm b/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm index 53cb4067..ea72ad1c 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm @@ -1422,7 +1422,7 @@ namespace eval punk { } if {[string is digit -strict [join $subindices ""]]} { - #review tip 551 (tcl9+?) + #review tip 551 (underscores in numerical literals) (tcl9+) #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # @@ -2650,6 +2650,76 @@ namespace eval punk { } }] } + } elseif {[punk::lib::is_indexset $index]} { + #review - a basic math statement such as 5-1 is also a valid member of an indexset + #see punk::lib::is_indexset and punk::lib::indexset_resolve + #single element of an indexset - e.g @..3 or @1..5 or @..end or @.. or @end..0 or @end-5..8 etc + set is_range [expr {[string first ".." $index] >= 0}] + if {$get_not} { + if {$is_range} { + lappend INDEX_OPERATIONS list-range-not + } else { + lappend INDEX_OPERATIONS listindex-not + } + set assign_script { + set assigned [lremove $assigned {*}[punk::lib::indexset_resolve [llength $leveldata] ]] + } + } else { + if {$is_range} { + lappend INDEX_OPERATIONS list-range + } else { + lappend INDEX_OPERATIONS listindex + } + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } + } + + if {$do_bounds_check} { + #bounds check each element of the resolved indexset - if any are out of bounds, return mismatch-list-index-out-of-range + if {$is_range} { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + lassign [split ..] idx1 _ idx2 + set v2 [punk::lib::lindex_resolve_basic $len $idx2] + if {isinf($v2)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + set v1 [punk::lib::lindex_resolve_basic $len $idx1] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + set v1 [punk::lib::lindex_resolve_basic $len ] + if {isinf($v1)} { + ${[tstr -ret string $tpl_return_mismatch_list_index_out_of_range]} + } + ${$assign_script} + } + }] + } + } else { + append script \n [tstr -return string -allowcommands { + if {[catch {llength $leveldata} len]} { + #set action ?mismatch-not-a-list + ${[tstr -ret string $tpl_return_mismatch_not_a_list]} + } else { + ${$assign_script} + } + }] + } + set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { diff --git a/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm new file mode 100644 index 00000000..6a7b79d6 --- /dev/null +++ b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm @@ -0,0 +1,5488 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::lib 0.1.6 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::lib 0 0.1.6] +#[copyright "2024"] +#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk library}] [comment {-- Description at end of page heading --}] +#[require punk::lib] +#[keywords module utility lib] +#[description] +#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. +#[para]The base set includes string and math functions but has no specific theme + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::lib +#[subsection Concepts] +#[para]The punk::lib modules should have no strong dependencies other than Tcl +#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. +#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::lib +#[list_begin itemized] + +package require Tcl 8.6- +package require punk::args +#*** !doctools +#[item] [package {Tcl 8.6-}] +#[item] [package {punk::args}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + + +tcl::namespace::eval punk::lib::ensemble { + #wiki.tcl-lang.org/page/ensemble+extend + # extend an ensemble-like routine with the routines in some namespace + + #NOTE - the extension ns becomes the '-namespace ' for the original routine name, + #with -unknown handling the original subcommands. + #This makes the original ensemble harder to introspect! + #e.g (the original -map or -namespace not visible) + #In this specific case (which, being published on the wiki might be common in the wild) + #we could call {*}[namespace ensemble configure $routine -unknown] $routine + #and then detect that the first resulting word is an ensemble + #For arbitrary '-unknown scripts' - sensible introspection is likely not possible + + proc extend {routine extension} { + if {![string match ::* $routine]} { + set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] + if {$resolved eq {}} { + error [list {no such routine} $routine] + } + set routine $resolved + } + set routinens [tcl::namespace::qualifiers $routine] + if {$routinens eq {::}} { + set routinens {} + } + set routinetail [tcl::namespace::tail $routine] + + if {![string match ::* $extension]} { + set extension [uplevel 1 [ + list [tcl::namespace::which namespace] current]]::$extension + } + + if {![tcl::namespace::exists $extension]} { + error [list {no such namespace} $extension] + } + + set extension [tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] current]] + + tcl::namespace::eval $extension [ + list [tcl::namespace::which namespace] export *] + + while 1 { + set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] + if {[tcl::namespace::which $renamed] eq {}} break + } + + rename $routine $renamed + + tcl::namespace::eval $extension [ + list namespace ensemble create -command $routine -unknown [ + list apply {{renamed ensemble routine args} { + list $renamed $routine + }} $renamed + ] + ] + + return $routine + } +} + +# some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated +tcl::namespace::eval punk::lib::check { + #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) + #Not any sort of comprehensive check of known tcl bugs. + #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + proc has_tclbug_regexp_emptystring {} { + #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces + #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, + #but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: + #https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) + set bug [expr {![catch {regexp {} [error should_error]}]}] + return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] + } + proc has_tclbug_lsearch_sorted_inline_subindices {} { + if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { + #probably tcl version doesn't support all options + set bug 0 + } else { + set bug [expr {$result ne "b"}] + } + set description "lsearch -sorted with -subindices -inline - incorrect result." + return [dict create bug $bug bugref bc4ac0 description $description level minor] + } + proc has_tclbug_script_var {} { + + set script {set j [list spud] ; list} + append script \n + uplevel #0 $script + set rep1 [tcl::unsupported::representation $::j] + set script "" + set rep2 [tcl::unsupported::representation $::j] + + set nostring1 [string match "*no string" $rep1] + set nostring2 [string match "*no string" $rep2] + + #we assume it should have no string rep in either case + #Review: check Tcl versions for behaviour/consistency + if {!$nostring2} { + set bug true + } else { + set bug false + } + set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" + return [dict create bug $bug bugref "" description $description level minor] + } + proc has_tclbug_lsearch_strideallinline {} { + #bug only occurs with single -index value combined with -stride -all -inline -subindices + #https://core.tcl-lang.org/tcl/tktview/5a1aaa201d + if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { + #we aren't looking for an error result - error most likely indicates tcl too old to support -stride + set bug 0 + } else { + set bug [expr {$result ne "a2"}] + } + set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." + return [dict create bug $bug bugref 5a1aaa201d description $description level major] + } + proc has_tclbug_lseq_sign {} { + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + if {[catch {lseq 1 10}]} { + set bug 0 + } else { + set r1 [lseq 1 10 -9] + set r2 [lseq 1 10 -10] + set bug [expr {$r1 ne $r2}] + } + set description "lseq step sign not matching sequence direction - inconsistent results." + return [dict create bug $bug bugref 999b6966b2 description $description level minor] + } + + proc has_tclbug_list_quoting_emptyjoin {} { + #https://core.tcl-lang.org/tcl/tktview/e38dce74e2 + set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases + set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" + set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. + set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" + return [dict create bug $bug bugref e38dc74e2 description $description level medium] + } + + proc has_tclbug_safeinterp_compile {{show 0}} { + #ensemble calls within safe interp not compiled + #https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde + namespace eval [namespace current]::testcompile { + proc ensembletest {} {string index a 0} + } + + set has_bug 0 + + set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] + if {$show} { + puts outer: + puts $bytecode_outer + } + if {![interp issafe]} { + #test of safe subinterp only needed if we aren't already in a safe interp + if {![catch { + interp create x -safe + } errMsg]} { + x eval {proc ensembletest {} {string index a 0}} + set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] + if {$show} { + puts safe: + puts $bytecode_safe + } + interp delete x + #mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) + #It's possible the interp we're running in is also not compiling ensembles. + #we could then get a result of 2 - which still indicates a problem + if {[string last "invokeStk" $bytecode_safe] >= 1} { + incr has_bug + } + } else { + #our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? + #unlikely - but we should warn + puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" + } + } + + namespace delete [namespace current]::testcompile + + if {[string last "invokeStk" $bytecode_outer] >= 1} { + incr has_bug + } + set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" + return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] + } +} + +tcl::namespace::eval punk::lib::compat { + #*** !doctools + #[subsection {Namespace punk::lib::compat}] + #[para] compatibility functions for features that may not be available in earlier Tcl versions + #[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. + #[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. + + #*** !doctools + #[list_begin definitions] + + + + + if {"::lremove" ne [info commands ::lremove]} { + #puts stderr "Warning - no built-in lremove" + interp alias {} lremove {} ::punk::lib::compat::lremove + } + proc lremove {list args} { + #*** !doctools + #[call [fun lremove] [arg list] [opt {index ...}]] + #[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove + + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lsearch -all -inline -index 1 -subindices $keep *] + } + #not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers + proc lremove2 {list args} { + set data [lmap v $list {list data $v}] + foreach doomed_index $args { + if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} + lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value + } + set keep [lsearch -all -inline -not -exact $data x] + return [lmap v $keep {lindex $v 1}] + } + #outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. + #flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 + + if {![info exists ::auto_index(readFile)]} { + if {[info commands ::readFile] eq ""} { + proc ::readFile {filename {mode text}} { + #readFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + + # Read the file + set f [open $filename [dict get {text r binary rb} $mode]] + try { + return [read $f] + } finally { + close $f + } + } + } + } + if {![info exists ::auto_index(writeFile)]} { + if {[info commands ::writeFile] eq ""} { + proc ::writeFile {args} { + #writeFile not seen in auto_index or as command: installed by punk::lib + # Parse the arguments + switch [llength $args] { + 2 { + lassign $args filename data + set mode text + } + 3 { + lassign $args filename mode data + set MODES {binary text} + set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] + set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] + } + default { + set COMMAND [lindex [info level 0] 0] + return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" + } + } + + # Write the File + set f [open $filename [dict get {text w binary wb} $mode]] + try { + puts -nonewline $f $data + } finally { + close $f + } + } + } + } + + if {"::lpop" ne [info commands ::lpop]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lpop {} ::punk::lib::compat::lpop + punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore + } + proc lpop {lvar args} { + #*** !doctools + #[call [fun lpop] [arg listvar] [opt {index}]] + #[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop + upvar $lvar l + if {![llength $args]} { + set args [list end] + } + set v [lindex $l {*}$args] + set newlist $l + + set path [list] + set subl $l + for {set i 0} {$i < [llength $args]} {incr i} { + set idx [lindex $args $i] + if {![llength [lrange $subl $idx $idx]]} { + error "tcl_lpop index \"$idx\" out of range" + } + lappend path [lindex $args $i] + set subl [lindex $l {*}$path] + } + + set sublist_path [lrange $args 0 end-1] + set tailidx [lindex $args end] + if {![llength $sublist_path]} { + #set newlist [lremove $newlist $tailidx] + set newlist [lreplace $newlist $tailidx $tailidx] + } else { + set sublist [lindex $newlist {*}$sublist_path] + #set sublist [lremove $sublist $tailidx] + set sublist [lreplace $sublist $tailidx $tailidx] + lset newlist {*}$sublist_path $sublist + } + #puts "[set l] -> $newlist" + set l $newlist + return $v + } + if {"::ledit" ni [info commands ::ledit]} { + interp alias {} ledit {} ::punk::lib::compat::ledit + punk::args::set_idalias ::punk::lib::compat::ledit ::ledit + } + proc ledit {lvar first last args} { + upvar $lvar l + #use lindex_resolve to support for example: ledit lst end+1 end+1 h i + set fidx [punk::lib::lindex_resolve [llength $l] $first] + switch -exact -- $fidx { + -Inf { + #index below lower bound + set pre [list] + set fidx -1 + } + Inf { + #first index position is greater than index of last element in the list + set pre [lrange $l 0 end] + set fidx [llength $l] + } + default { + #set pre [lrange $l 0 $first-1] + set pre [lrange $l 0 $fidx-1] + } + } + set lidx [punk::lib::lindex_resolve [llength $l] $last] + switch -exact -- $lidx { + -Inf { + #index below lower bound + set post [lrange $l 0 end] + } + Inf { + #index above upper bound + set post [list] + } + default { + if {$lidx < $fidx} { + #from ledit man page: + #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. + set post [lrange $l $fidx end] + } else { + #set post [lrange $l $last+1 end] + set post [lrange $l $lidx+1 end] + } + } + } + set l [list {*}$pre {*}$args {*}$post] + } + + + #slight isolation - varnames don't leak - but calling context vars can be affected + proc lmaptcl2 {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list ::tcl::info::vars]] + set nscaller [uplevel 1 [list ::tcl::namespace::current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result [apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + if {"::lmap" ne [info commands ::lmap]} { + #puts stderr "Warning - no built-in lpop" + interp alias {} lmap {} ::punk::lib::compat::lmaptcl + } + #lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway + proc lmaptcl {varnames list script} { + set result [list] + set varlist [list] + foreach varname $varnames { + upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc + lappend varlist var_$varname + } + foreach $varlist $list { + lappend result [uplevel 1 $script] + } + return $result + } + + #tcl8.7/9 compatibility for 8.6 + if {[info commands ::tcl::string::insert] eq ""} { + #https://wiki.tcl-lang.org/page/string+insert + # Pure Tcl implementation of [string insert] command. + proc ::tcl::string::insert {string index insertString} { + # Convert end-relative and TIP 176 indexes to simple integers. + if {[regexp -expanded { + ^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace + |[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace + (?:([+-]) # op, omitted when index is "end" + ([+-]?\d+))? # n, omitted when index is "end" + [\t\n\v\f\r ]*$ # optional whitespace (unless "end") + } $index _ m op n]} { + # Convert first index to an integer. + switch $m { + end {set index [string length $string]} + default {scan $m %d index} + } + + # Add or subtract second index, if provided. + switch $op { + + {set index [expr {$index + $n}]} + - {set index [expr {$index - $n}]} + } + } elseif {![string is integer -strict $index]} { + # Reject invalid indexes. + return -code error "bad index \"$index\": must be\ + integer?\[+-\]integer? or end?\[+-\]integer?" + } + + # Concatenate the pre-insert, insertion, and post-insert strings. + string cat [string range $string 0 [expr {$index - 1}]] $insertString\ + [string range $string $index end] + } + + # Bind [string insert] to [::tcl::string::insert]. + tcl::namespace::ensemble configure string -map [tcl::dict::replace\ + [tcl::namespace::ensemble configure string -map]\ + insert ::tcl::string::insert] + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::lib { + variable PUNKARGS + tcl::namespace::export * + variable has_struct_list + set has_struct_list [expr {![catch {package require struct::list}]}] + variable has_struct_set + set has_struct_set [expr {![catch {package require struct::set}]}] + variable has_punk_ansi + set has_punk_ansi [expr {![catch {package require punk::ansi}]}] + set has_twapi 0 + if {"windows" eq $::tcl_platform(platform)} { + set has_twapi [expr {![catch {package require twapi}]}] + } + + namespace eval argdoc { + #non-colour SGR codes + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::is_main_script + @cmd -name punk::lib::is_main_script\ + -summary\ + "Test if current script was launched directly."\ + -help\ + "The ${$B}main script${$N} is the primary script that is executed + by the interpreter, e.g. tclsh or wish. + (as opposed to being loaded by the 'source' command) + + see https://wiki.tcl-lang.org/page/main+script" + @values -min 0 -max 0 + }] + } + proc is_main_script {} { + #see https://wiki.tcl-lang.org/page/main+script + if {[info script] ne "" && [info exists ::argv0] + && + [file dirname [file normalize [file join [info script] ...]]] + eq + [file dirname [file normalize [file join $::argv0 ...]]] + } { + return true + } else { + return false + } + } + + + + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + # Maintenance - This is the primary source for tm_version... functions + # - certain packages script require these but without package dependency + # - 1 punk boot script + # - 2 packagetrace module + # - These should be updated to sync with this + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + proc tm_version_isvalid {versionpart} { + #Needs to be suitable for use with Tcl's 'package vcompare' + if {![catch [list package vcompare $versionpart $versionpart]]} { + return 1 + } else { + return 0 + } + } + proc tm_version_major {version} { + if {![tm_version_isvalid $version]} { + error "Invalid version '$version' is not a proper Tcl module version number" + } + set firstpart [lindex [split $version .] 0] + #check for a/b in first segment + if {[string is integer -strict $firstpart]} { + return $firstpart + } + if {[string first a $firstpart] > 0} { + return [lindex [split $firstpart a] 0] + } + if {[string first b $firstpart] > 0} { + return [lindex [split $firstpart b] 0] + } + error "tm_version_major unable to determine major version from version number '$version'" + } + proc tm_version_canonical {ver} { + #accepts a single valid version only - not a bounded or unbounded spec + if {![tm_version_isvalid $ver]} { + error "tm_version_canonical version '$ver' is not valid for a package version" + } + set parts [split $ver .] + set newparts [list] + foreach o $parts { + set trimmed [string trimleft $o 0] + set firstnonzero [string index $trimmed 0] + switch -exact -- $firstnonzero { + "" { + lappend newparts 0 + } + a - b { + #e.g 000bnnnn -> bnnnnn + set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] + if {$tailtrimmed eq ""} { + set tailtrimmed 0 + } + lappend newparts 0$firstnonzero$tailtrimmed + } + default { + #digit + if {[string is integer -strict $trimmed]} { + #e.g 0100 -> 100 + lappend newparts $trimmed + } else { + #e.g 0100b003 -> 100b003 (still need to process tail) + if {[set apos [string first a $trimmed]] > 0} { + set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}a${rhs} + } elseif {[set bpos [string first b $trimmed]] > 0} { + set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch + set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits + set rhs [string trimleft $rhs 0] + if {$rhs eq ""} { + set rhs 0 + } + lappend newparts ${lhs}b${rhs} + } else { + #assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b + error "tm_version_canonical error - trimfail - unexpected" + } + } + } + } + } + return [join $newparts .] + } + proc tm_version_required_canonical {versionspec} { + #also trim leading zero from any dottedpart? + #Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. + #e.g 1.01 is equivalent to 1.1 and 01.001 + #also 1b3 == 1b0003 + + if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version + set errmsg "tm_version_required_canonical - invalid version specification" + if {[string first - $versionspec] < 0} { + #no dash + #looks like a minbounded version (ie a single version with no dash) convert to min-max form + set from $versionspec + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionpec'" + } + if {![catch {tm_version_major $from} majorv]} { + set from [tm_version_canonical $from] + return "${from}-[expr {$majorv +1}]" + } else { + error "$errmsg '$versionspec'" + } + } else { + # min- or min-max + #validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) + set parts [split $versionspec -] ;#we expect only 2 parts + lassign $parts from to + if {![tm_version_isvalid $from]} { + error "$errmsg '$versionspec'" + } + set from [tm_version_canonical $from] + if {[llength $parts] == 2} { + if {$to ne ""} { + if {![tm_version_isvalid $to]} { + error "$errmsg '$versionspec'" + } + set to [tm_version_canonical $to] + return $from-$to + } else { + return $from- + } + } else { + error "$errmsg '$versionspec'" + } + error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" + } + } + # end tm_version... functions + # == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == + + + + # -- --- + #https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists + #DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 + #8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows + # Review and retest as new versions come out. + # -- --- + proc list_multi_append1 {lvar1 lvar2} { + #clear winner in 2024 + upvar $lvar1 l1 $lvar2 l2 + lappend l1 {*}$l2 + return $l1 + } + proc list_multi_append2 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [list {*}$l1 {*}$l2] + } + proc list_multi_append3 {lvar1 lvar2} { + upvar $lvar1 l1 $lvar2 l2 + set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] + } + #testing e.g + #set l1_reset {a b c} + #set l2 {a b c d e f g} + #set l1 $l1_reset + #time {list_multi_append1 l1 l2} 1000 + #set l1 $l1_reset + #time {list_multi_append2 l1 l2} 1000 + # -- --- + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lswap + @cmd -name punk::lib::lswap\ + -summary\ + "Swap list values in-place"\ + -help\ + "Similar to struct::list swap, except it fully supports basic + list index expressions such as 7-2 end-1 etc. + + struct::list swap doesn't support 'end' offsets, and only + sometimes appears to support basic expressions, depending on the + expression compared to the list length." + @values -min 3 -max 3 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + proc lswap {lvar a z} { + upvar $lvar l + set len [llength $l] + if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { + #lindex_resolve_basic returns only -Inf if out of range at either bound + #if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred + #(e.g using: lswap mylist end-2 end on a two element list) + + #on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report + #use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively + set a_index [lindex_resolve $len $a] + set a_msg "" + switch -- $a_index { + -Inf { + set a_msg "1st supplied index $a is below the lower bound for the list (0)" + } + Inf { + set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" + } + } + set z_index [lindex_resolve $len $z] + set z_msg "" + switch -- $z_index { + -Inf { + set z_msg "2nd supplied index $z is below the lower bound for the list (0)" + } + Inf { + set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" + } + } + set errmsg "lswap cannot swap indices $a and $z" + if {$a_msg ne ""} { + append errmsg \n $a_msg + } + if {$z_msg ne ""} { + append errmsg \n $z_msg + } + error $errmsg + } + set item2 [lindex $l $z] + lset l $z [lindex $l $a] + lset l $a $item2 + return $l + } + #proc lswap2 {lvar a z} { + # upvar $lvar l + # #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + # set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] + #} + + proc lswap2 {lvar a z} { + upvar $lvar l + #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower + set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] + } + + #an experimental test of swapping vars without intermediate variables + #It's an interesting idea - but probably of little to no practical use + # - the swap_intvars3 version using intermediate var is faster in Tcl + # - This is probably unsurprising - as it's simpler code. + # Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. + #proc swap_intvars {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] + #} + #proc swap_intvars2 {swapv1 swapv2} { + # upvar $swapv1 _x $swapv2 _y + # set _x [expr {$_x ^ $_y}] + # set _y [expr {$_x ^ $_y}] + # set _x [expr {$_x ^ $_y}] + #} + #proc swap_intvars3 {swapv1 swapv2} { + # #using intermediate variable + # upvar $swapv1 _x $swapv2 _y + # set z $_x + # set _x $_y + # set _y $z + #} + + #*** !doctools + #[subsection {Namespace punk::lib}] + #[para] Core API functions for punk::lib + #[list_begin definitions] + + if {[info commands lseq] ne ""} { + #tcl 8.7+ lseq significantly faster, especially for larger ranges + #The internal rep can be an 'arithseries' with no string representation + #support minimal set from to + proc range {from to {by 1}} { + #note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + lseq $from $to by $by + } + } else { + #lseq accepts basic expressions e.g 4-2 for both arguments + #e.g we can do lseq 0 [llength $list]-1 + #if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. + #our range function doesn't support double like lseq does. (deliberate) review + proc range {from to {by ""}} { + if {$by eq "0"} { + #as per lseq, step (by) zero always gives no result + return [list] + } + set to [offset_expr $to] + set from [offset_expr $from] + if {$by ne ""} { + set by [offset_expr $by] + } + #assert $by is now empty string or an integer + if {$to > $from} { + switch -- $by { + "" - 1 { + set count [expr {($to -$from) + 1}] + if {$from == 0} { + return [lsearch -all [lrepeat $count 0] *] + } else { + incr from -1 + return [lmap v [lrepeat $count 0] {incr from}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + #https://core.tcl-lang.org/tcl/tktview/999b6966b2 + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i <= $to} {incr i $by} { + lappend result $i + } + return $result + + #if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. + #set count [expr {($to -$from) + 1}] + #if {$from == 0} { + # set fullrange [lsearch -all [lrepeat $count 0] *] + #} else { + # incr from -1 + # set fullrange [lmap v [lrepeat $count 0] {incr from}] + #} + #set result [list] + #for {set i 0} {$i < $count} {incr i} { + # if {$i % $by == 0} { + # lappend result [lindex $fullrange $i] + # } + #} + #return $result + } + } + #slower methods. + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from] + #} + #return $L + } elseif {$from > $to} { + switch -- $by { + "" - -1 { + set count [expr {$from - $to} + 1] + if {$to == 0} { + return [lreverse [lsearch -all [lrepeat $count 0] *]] + } else { + incr from + return [lmap v [lrepeat $count 0] {incr from -1}] + } + } + default { + set count [expr {($to - $from + $by) / $by}] + if {$count <= 0} { + #return [list] + return [list $from] ;#review + } + set result [list] + for {set i $from} {$i >= $to} {incr i $by} { + lappend result $i + } + return $result + } + } + + #2) + #set i -1 + #set L [lrepeat $count 0] + #lmap v $L {lset L [incr i] [incr from -1];lindex {}} + #return $L + #3) + #set L {} + #for {set i 0} {$i < $count} {incr i} { + # lappend L [incr from -1] + #} + #return $L + } else { + return [list $from] + } + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzip + @cmd -name punk::lib::lzip\ + -summary\ + "zip any number of lists together."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + The number of returned lists (columns) will be equal to + the length of the longest supplied list (row). + If lengths of supplied lists don't match, empty strings + will be inserted in the resulting lists. + + e.g lzip {a b c d e} {1 2 3 4} {x y z} + -> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} + " + @values -min 0 -max -1 + list -type list -multiple 1 -optional 1 + }] + } + proc lzip {args} { + switch -- [llength $args] { + 0 {return {}} + 1 {return [lindex $args 0]} + 2 {return [::punk::lib::system::lzip2lists {*}$args]} + 3 {return [::punk::lib::system::lzip3lists {*}$args]} + 4 {return [::punk::lib::system::lzip4lists {*}$args]} + 5 {return [::punk::lib::system::lzip5lists {*}$args]} + 6 {return [::punk::lib::system::lzip6lists {*}$args]} + 7 {return [::punk::lib::system::lzip7lists {*}$args]} + 8 {return [::punk::lib::system::lzip8lists {*}$args]} + 9 {return [::punk::lib::system::lzip9lists {*}$args]} + 10 {return [::punk::lib::system::lzip10lists {*}$args]} + 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } + default { + if {[llength $args] < 4000} { + set n [llength $args] + if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { + #puts "calling ::punk::lib::system::Build_lzipn $n" + ::punk::lib::system::Build_lzipn $n + } + return [::punk::lib::system::lzip${n}lists {*}$args] + } else { + return [::punk::lib::lzipn {*}$args] + } + } + } + } + + namespace eval system { + proc Build_lzipn {n} { + set arglist [list] + #use punk::lib::range which defers to lseq if available + set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) + set body "\nlmap " + for {set i 1} {$i <= $n} {incr i} { + lappend arglist l$i + append body "[lindex $vars $i] \$l$i " + } + append body "\{list " + for {set i 1} {$i <= $n} {incr i} { + append body "\$[lindex $vars $i] " + } + append body "\}" \n + #puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" + #puts "$body" + #puts "\}" + proc ::punk::lib::system::lzip${n}lists $arglist $body + } + + #fastest is to know the number of lists to be zipped + proc lzip2lists {l1 l2} { + lmap a $l1 b $l2 {list $a $b} + } + proc lzip3lists {l1 l2 l3} { + lmap a $l1 b $l2 c $l3 {list $a $b $c} + } + proc lzip4lists {l1 l2 l3 l4} { + lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} + } + proc lzip5lists {l1 l2 l3 l4 l5} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} + } + proc lzip6lists {l1 l2 l3 l4 l5 l6} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} + } + proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} + } + proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} + } + proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} + } + proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { + lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} + } + + #neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly + # review - + proc lzipn_alt args { + #stackoverflow - courtesy glenn jackman (modified) + foreach l $args { + lappend vars [incr n] + lappend lmap_args $n $l + } + lmap {*}$lmap_args {lmap v $vars {set $v}} + } + + #2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) + proc lzipn_tcl8 {args} { + #For tcl pre 9 (without lsearch -stride) + #wiki - courtesy JAL + set list_l $args + set zip_l [] + while {1} { + set cur [lmap a_l $list_l { lindex $a_l 0 }] + set list_l [lmap a_l $list_l { lrange $a_l 1 end }] + + if {[join $cur {}] eq {}} { + break + } + lappend zip_l $cur + } + return $zip_l + } + proc lzipn_tcl9a {args} { + #For Tcl 9+ (with lsearch -stride) + #compared to wiki version + #comparable for lists len <3 or number of args < 3 + #approx 2x faster for large lists or more lists + #needs -stride single index bug fix to use empty string instead of NULL + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] + set outlist [lrepeat $numcolumns {}] + set s 0 + foreach len $lens list $args { + #ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] + ledit flatlist $s [expr {$s + $len - 1}] {*}$list + incr s $numcolumns + } + #needs single index lstride bugfix + for {set c 0} {$c < $numcolumns} {incr c} { + ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] + } + return $outlist + } + proc lzipn_tcl9b {args} { + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} + } + proc lzipn_tcl9c {args} { + #SLOW + if {![llength $args]} {return {}} + set lens [lmap l $args {llength $l}] + set numcolumns [::tcl::mathfunc::max {*}$lens] + set flatlist [list] + foreach len $lens list $args { + lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] + } + set zip_l {} + set cols_remaining $numcolumns + for {set c 0} {$c < $numcolumns} {incr c} { + if {$cols_remaining == 1} { + return [list {*}$zip_l $flatlist] + } + lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] + set flen [llength $flatlist] + set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] + incr cols_remaining -1 + } + return $zip_l + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lzipn + @cmd -name punk::lib::lzipn\ + -summary\ + "zip any number of lists together (unoptimised)."\ + -help\ + "Conceptually equivalent to converting a list of rows + to a list of columns. + + See lzip which provides the same functionality but with + optimisations depending on the number of supplied lists. + " + @values -min 1 -max 1 + lvar -type string -help\ + "name of list variable" + a -type indexexpression + z -type indexexpression + }] + } + #keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible + if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { + #-stride either not available - or has bug preventing use of main algorithm below + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] + } else { + proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] + } + + + + namespace import ::punk::args::lib::tstr + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::tclscript_to_scriptlist + @cmd -name punk::lib::tclscript_to_scriptlist\ + -summary\ + "Parse tcl script to toplevel list of lists."\ + -help\ + "Get topmost list of tcl language elements in script. + produces a list of lists where each sublist is a commandlist or + a comment string." + @values -min 1 -max 1 + script -type string + }] + } + proc tclscript_to_scriptlist {script} { + set scriptlist [list] + set cmdlist [list] + set scrlen [string length $script] + set token "" + set in_token 0 + set in_cmdlist 0 + set in_comment 0 + set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? + for {set i 0} {$i < $scrlen} {incr i} { + set ch [string index $script $i] + set chswitch [string map $charmap $ch] + if {!$in_token} { + switch -- $chswitch { + { } - TB { + #ignore - continue being a non token + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {$in_cmdlist} { + #no active token - newline ends cmdlist + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + incr i + } + } + LF - ";" { + #no active token - newline or semicolon ends cmdlist + if {$in_cmdlist} { + set in_cmdlist 0 + lappend scriptlist $cmdlist + set cmdlist [list] + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation of whitespace while no token - boring + incr i + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation of whitespace while no token - boring + incr i 2 + } else { + #an uncommon possibility, a command wth surrounding spaces called in an strange way + # e.g \ cmdname\ arg + set in_token 1 + set token "\\[string index $script $i+1]" + incr i + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + # { + if {$in_cmdlist} { + #ordinary data + set in_token 1 + set token # + } else { + if {!$in_comment} { + set in_token 1 + set in_comment 1 + set token # + } else { + #wnen in comment - all will be a single token until comment ends + append token # + } + } + } + default { + #for completeness.. we should exclude other possible whitespace chars + if {![string is space $ch]} { + set in_token 1 + set token $ch + if {!$in_cmdlist} { + set in_cmdlist 1 + } + } + } + } + } else { + #if we're in a token, we must be in a cmdlist or a comment (single token) + #review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved + #note that unbalanced curly in *toplevel* comment will still 'info complete' to true + switch -- $chswitch { + LF { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ends a comment + lappend scriptlist $token ;#single token for comment + set token "" + set in_token 0 + set in_comment 0 + set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity + } + } + ";" { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + #ends token and cmdlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \n + } + } else { + #ordinary char for comment + append token ";" + } + } + CR { + if {[string index $script $i+1] eq "\n"} { + if {[tcl::info::complete $token]} { + #ends token and commandlist + lappend cmdlist $token + lappend scriptlist $cmdlist + set cmdlist "" + set in_cmdlist 0 + set token "" + set in_token 0 + } else { + append token \r\n + incr i + } + } else { + append token \r + } + } + BSL { + if {[string index $script $i+1] eq "\n"} { + #continuation - lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i ;#skip LF + } elseif {[string range $script $i+1 $i+2] eq "\r\n"} { + #continuation - cr-lf effectively becomes a space + if {!$in_comment} { + #token may end - but cmdlist goes on + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token " " + } + } else { + append token " " + } + incr i 2 ;#skip CRLF + } else { + append token "\\[string index $script $i+1]" + incr i + } + } + default { + if {![string is space $ch]} { + append token $ch + } else { + if {!$in_comment} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + set token "" + set in_token 0 + } else { + append token $ch + } + } else { + append token $ch + } + } + } + } + } + } + #eof + if {!$in_comment} { + if {$in_token} { + if {[tcl::info::complete $token]} { + lappend cmdlist $token + lappend scriptlist $cmdlist + } else { + error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" + } + } else { + if {$in_cmdlist} { + lappend scriptlist $cmdlist + } + } + } else { + lappend scriptlist $token + } + return $scriptlist + } + + + proc invoke command { + #*** !doctools + #[call [fun invoke] [arg command]] + #[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode + #[example { + # set script { + # puts stdout {hello on stdout} + # puts stderr {hello on stderr} + # exit 42 + # } + # invoke [list tclsh <<$script] + #}] + + #see https://wiki.tcl-lang.org/page/open + lassign [chan pipe] chanout chanin + lappend command 2>@$chanin + set fh [open |$command] + set stdout [read $fh] + close $chanin + set stderr [read $chanout] + close $chanout + if {[catch {close $fh} cres e]} { + dict with e {} + lassign [set -errorcode] sysmsg pid exit + if {$sysmsg eq {NONE}} { + #output to stderr caused [close] to fail. Do nothing + } elseif {$sysmsg eq {CHILDSTATUS}} { + return [list $stdout $stderr $exit] + } else { + return -options $e $stderr + } + } + return [list $stdout $stderr 0] + } + + proc pdict {args} { + package require punk::args + variable has_punk_ansi + if {!$has_punk_ansi} { + set sep " = " + } else { + #set sep " [a+ Web-seagreen]=[a] " + set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " + } + set argspec [string map [list %sep% $sep] { + @id -id ::punk::lib::pdict + @cmd -name pdict -help\ + "Print dict keys,values to channel + The pdict function operates on variable names - passing the value to the showdict function which operates on values + (see also showdict)" + + @opts -any 1 + + #default separator to provide similarity to tcl's parray function + -separator -default "%sep%" + -roottype -default "dict" + -substructure -default {} + -channel -default stdout -help\ + "existing channel - or 'none' to return as string" + + @values -min 1 -max -1 + + dictvar -type string -help "name of variable. Can be a dict, list or array" + + patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. + Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) + The system uses similar patterns to the punk pipeline pattern-matching system. + The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. + Segments are classified into list,dict and string operations. + Leading % indicates a string operation - e.g %# gives string length + A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 + (todo - change to indexset syntax @1..3 @1..end-1 etc) + A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' + The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. + e.g1 pdict env */%# + the pattern starts with default type dict, so * retrieves all keys & values, + the next hierarchy switches to a string operation to get the length of each value. + e.g2 pdict env W* S* + Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns + e.g3 pdict punk_testd */* + This displays 2 levels of the dict hierarchy. + Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) + - then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. + e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 + Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent + The second level segment in each pattern switches to a dict operation to retrieve the value by key. + When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. + } + }] + #puts stderr "$argspec" + set argd [punk::args::parse $args withdef $argspec] + + set opts [dict get $argd opts] + set dvar [dict get $argd values dictvar] + set patterns [dict get $argd values patterns] + set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] + if {$isarray} { + set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] + if {![dict exists $opts -keytemplates]} { + set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] + dict set opts -keytemplates [list $arrdisplay] + } + dict set opts -keysorttype dictionary + } else { + set dvalue [uplevel 1 [list set $dvar]] + } + showdict {*}$opts $dvalue {*}$patterns + } + + #TODO - much. + #showdict needs to be able to show different branches which share a root path + #e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) + # - specify ansi colour per pattern so different branches can be highlighted? + # - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc + # - The current version is incomplete but passably usable. + # - Copy proc and attempt rework so we can get back to this as a baseline for functionality + proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) + #set sep " [a+ Web-seagreen]=[a] " + variable has_punk_ansi + if {!$has_punk_ansi} { + set RST "" + set sep " = " + #set sep_mismatch " mismatch " + set sep \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) + } else { + set RST [punk::ansi::a] + set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support + #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " + set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " + } + package require punk::pipe + #package require punk ;#we need pipeline pattern matching features + package require textblock + + set argd [punk::args::parse $args withdef [string map [list %sep% $sep %sep_mismatch% $sep_mismatch] { + @id -id ::punk::lib::showdict + @cmd -name punk::lib::showdict -help "display dictionary keys and values" + #todo - table tableobject + -return -default "tailtohead" -choices {tailtohead sidebyside} + -channel -default none + -trimright -default 1 -type boolean -help\ + "Trim whitespace off rhs of each line. + This can help prevent a single long line that wraps in terminal from making + every line wrap due to long rhs padding." + -separator -default {%sep%} -help\ + "Separator column between keys and values" + -separator_mismatch -default {%sep_mismatch%} -help\ + "Separator to use when patterns mismatch" + -roottype -default "dict" -help\ + "list,dict,string" + -ansibase_keys -default "" -help\ + "ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" + -substructure -default {} + -ansibase_values -default "" + -keytemplates -default {\$\{$key\}} -type list -help\ + "list of templates for keys at each level" + -keysorttype -default "none" -choices {none dictionary ascii integer real} + -keysortdirection -default increasing -choices {increasing decreasing} + -debug -default 0 -type boolean -help\ + "When enabled, produces some rudimentary debug output on stderr" + -- -type none -optional 1 + @values -min 1 -max -1 + dictvalue -type list -help\ + "dict or list value" + patterns -default "*" -type string -multiple 1 -help\ + "key or key glob pattern" + }]] + + #for punk::lib - we want to reduce pkg dependencies. + # - so we won't even use the tcllib debug pkg here + set opt_debug [dict get $argd opts -debug] + if {$opt_debug} { + if {[info body debug::showdict] eq ""} { + proc ::punk::lib::debug::showdict {args} { + catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} + } + } + } else { + if {[info body debug::showdict] ne ""} { + proc ::punk::lib::debug::showdict {args} {} + } + } + + set opt_sep [dict get $argd opts -separator] + set opt_mismatch_sep [dict get $argd opts -separator_mismatch] + set opt_keysorttype [dict get $argd opts -keysorttype] + set opt_keysortdirection [dict get $argd opts -keysortdirection] + set opt_trimright [dict get $argd opts -trimright] + set opt_keytemplates [dict get $argd opts -keytemplates] + debug::showdict "keytemplates ---> $opt_keytemplates <---" + set opt_ansibase_keys [dict get $argd opts -ansibase_keys] + set opt_ansibase_values [dict get $argd opts -ansibase_values] + set opt_return [dict get $argd opts -return] + set opt_roottype [dict get $argd opts -roottype] + set opt_structure [dict get $argd opts -substructure] + + set dval [dict get $argd values dictvalue] + set patterns [dict get $argd values patterns] + + set result "" + + #pattern hierarchy + # */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest + # * @1 @0,%#,%str - segments + # a b 1 0 %# %str - keys + + set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated + set pattern_next_substructure [dict create] + set pattern_this_structure [dict create] + + # -- --- --- --- + #REVIEW + #as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. + #The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). + #todo - determine if there is a more consistent rule-based way to do this rather than adhoc + #e.g pdict something * + #we want the keys from the result as individual lines on lhs + #e.g pdict something @@ + #we want on lhs result on rhs + # = v0 + #e.g pdict something @0-2,@4 + #we currently return: + #0 = v0 + #1 = v1 + #2 = v2 + #4 = v4 + #This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) + #ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. + #this is a tradeoff that could create surprises and make things messy and/or inconsistent. + #todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. + #It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys + #The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment + #that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) + # -- --- --- --- + + set filtered_keys [list] + if {$opt_roottype in {dict list string}} { + #puts "getting keys for roottype:$opt_roottype" + if {[llength $dval]} { + + #TODO - change to indexset notation 0..1,3..end-1 etc + + set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} + set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + foreach pattern_nest $patterns { + set keyset [list] + set keyset_structure [list] + + set segments [split $pattern_nest /] + set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns + #we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + #puts stderr "showdict-->_split_patterns: $patterninfo" + foreach v_idx $patterninfo { + lassign $v_idx v idx + #we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) + set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern + if {[string index $p 0] eq "!"} { + set get_not 1 + set p [string range $p 1 end] + } else { + set get_not 0 + } + switch -exact -- $p { + * - "" { + if {$opt_roottype eq "list"} { + set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + dict set pattern_this_structure $p list + } elseif {$opt_roottype eq "dict"} { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } else { + lappend keyset %string + lappend keyset_structure string + dict set pattern_this_structure $p string + } + } + %# { + dict set pattern_this_structure $p string + lappend keyset %# + lappend keyset_structure string + } + # { + #todo get_not !# is test for listiness (see punk) + dict set pattern_this_structure $p list + lappend keyset # + lappend keyset_structure list + } + ## { + dict set pattern_this_structure $p dict + lappend keyset [list ## query] + lappend keyset_structure dict + } + @* { + #puts "showdict ---->@*<----" + dict set pattern_this_structure $p list + set keys [punk::lib::range 0 [llength $dval]-1] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } + @@ { + #get first k v from dict + dict set pattern_this_structure $p dict + lappend keyset [list @@ query] + lappend keyset_structure dict + } + @*k@* - @*K@* { + #returns keys only + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @*.@* { + set keys [dict keys $dval] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + default { + #puts stderr "===p:$p" + #the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! + #we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful + #@@"key,etc" should allow any non-whitespace key + switch -glob -- $p { + {@k\*@*} - {@K\*@*} { + #value glob return keys + #set search [string range $p 4 end] + #dict for {k v} $dval { + # if {[string match $search $v]} { + # lappend keyset $k + # } + #} + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @@* { + #exact match key - review - should raise error to match punk pipe behaviour? + set k [string range $p 2 end] + if {$get_not} { + if {[dict exists $dval $k]} { + set keys [dict keys [dict remove $dval $k]] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + lappend keyset {*}[dict keys $dval] + lappend keyset_structure {*}[lrepeat [dict size $dval] dict] + } + } else { + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + } + dict set pattern_this_structure $p dict + } + @k@* - @K@* { + #TODO get_not + set k [string range $p 3 end] + if {[dict exists $dval $k]} { + lappend keyset $k + lappend keyset_structure dict + } + dict set pattern_this_structure $p dict + } + {@\*@*} { + #return list of values + #set k [string range $p 3 end] + #lappend keyset {*}[dict keys $dval $k] + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*.@*} { + #TODO get_not + set k [string range $p 4 end] + set keys [dict keys $dval $k] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + dict set pattern_this_structure $p dict + } + {@v\*@*} - {@V\*@*} { + #value-glob return value + #error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" + if {$get_not} { + lappend keyset [list !$p query] + } else { + lappend keyset [list $p query] + } + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*v@*} - {@\*V@*} { + #key-glob return value + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + {@\*@*} - {@\*v@*} - {@\*V@} { + #key glob return val + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + @??@* { + #exact key match - no error + lappend keyset [list $p query] + lappend keyset_structure dict + dict set pattern_this_structure $p dict + } + default { + set this_type $opt_roottype + if {[string match @* $p]} { + #list mode - trim optional list specifier @ + set p [string range $p 1 end] + dict set pattern_this_structure $p list + set this_type list + } elseif {[string match %* $p]} { + dict set pattern_this_structure $p string + lappend keyset $p + lappend keyset_structure string + set this_type string + } + if {$this_type eq "list"} { + dict set pattern_this_structure $p list + if {[string is integer -strict $p]} { + if {$get_not} { + set keys [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $keys $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + lappend keyset $p + lappend keyset_structure list + } + } elseif {[punk::lib::is_indexset $p]} { + set keys [punk::lib::indexset_resolve [llength $dval] $p] + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } elseif {[string match "?*-?*" $p]} { + #could be either - don't change type + #list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers + #now we should map _ to "" first + set p [string map {_ {}} $p] + #lassign [textutil::split::splitx $p {\.\.}] a b + if {![regexp $re_idxdashidx $p _match a b]} { + error "unrecognised pattern $p" + } + #TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order + #change to start/end terminology? + + set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high + #keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds + if {${lower_resolve} == Inf} { + ##x + #lower bound is above upper list range + #match with decreasing indices is still possible + set lower [expr {[llength $dval]-1}] ;#set to max + } elseif {$lower_resolve == -Inf} { + ##x + set lower 0 + } else { + set lower $lower_resolve + } + set upper [punk::lib::lindex_resolve [llength $dval] $b] + if {$upper == -Inf} { + ##x + #upper bound is below list range - + if {$lower_resolve > -Inf} { + ##x + set upper 0 + } else { + continue + } + } elseif {$upper == Inf} { + #use max + set upper [expr {[llength $dval]-1}] + #assert - upper >=0 because we have ruled out empty lists + } + #note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order + set keys [punk::lib::range $lower $upper] + if {$get_not} { + set fullrange [punk::lib::range 0 [llength $dval]-1] + set keys [lremove $fullrange {*}$keys] + if {$lower > $upper} { + set keys [lreverse $keys] + } + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] list] + } else { + if {$get_not} { + lappend keyset [list !@$p query] + } else { + lappend keyset [list @$p query] + } + lappend keyset_structure list + } + } elseif {$this_type eq "string"} { + dict set pattern_this_structure $p string + } elseif {$this_type eq "dict"} { + #default equivalent to @\*@* + dict set pattern_this_structure $p dict + #puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" + set keys [dict keys $dval $p] + if {$get_not} { + set keys [dict keys [dict remove $dval {*}$keys]] + } + lappend keyset {*}$keys + lappend keyset_structure {*}[lrepeat [llength $keys] dict] + } else { + puts stderr "list: unrecognised pattern $p" + } + } + } + } + } + } + + # -- --- --- --- + #check next pattern-segment for substructure type to use + # -- --- --- --- + set substructure "" + set pnext [lindex $segments 1] + set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] + if {[llength $patterninfo] == 0} { + # // ? -review - what does this mean? for xpath this would mean at any level + set substructure [lindex $pattern_this_structure end] + } elseif {[llength $patterninfo] == 1} { + #ignore the NOT operator for purposes of query-type detection + if {[string index $pnext 0] eq "!"} { + set pnext [string range $pnext 1 end] + } + # single type in segment e.g /@@something/ + switch -exact -- $pnext { + "" { + set substructure string + } + @*k@* - @*K@* - @*.@* - ## { + set substructure dict + } + # { + set substructure list + } + ## { + set substructure dict + } + %# { + set substructure string + } + * { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + default { + switch -glob -- $pnext { + @??@* - @?@* - @@* { + #all 4 or 3 len prefixes bounded by @ are dict + set substructure dict + } + default { + if {[string match @* $pnext]} { + set substructure list + } elseif {[string match %* $pnext]} { + set substructure string + } else { + #set substructure $opt_roottype + #set substructure [dict get $pattern_this_structure $pattern_nest] + set substructure [lindex $pattern_this_structure end] + } + } + } + } + } + } else { + #e.g /@0,%str,.../ + #doesn't matter what the individual types are - we have a list result + set substructure list + } + #puts "--pattern_nest: $pattern_nest substructure: $substructure" + dict set pattern_next_substructure $pattern_nest $substructure + # -- --- --- --- + + if {$opt_keysorttype ne "none"} { + set int_keyset 1 + foreach k $keyset { + if {![string is integer -strict $k]} { + set int_keyset 0 + break + } + } + if {$int_keyset} { + set sortindices [lsort -indices -integer $keyset] + #set keyset [lsort -integer $keyset] + } else { + #set keyset [lsort -$opt_keysorttype $keyset] + set sortindices [lsort -indices -$opt_keysorttype $keyset] + } + set keyset [lmap i $sortindices {lindex $keyset $i}] + set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] + } + + foreach k $keyset { + lappend pattern_key_index $pattern_nest + } + + lappend filtered_keys {*}$keyset + lappend all_keyset_structure {*}$keyset_structure + + #puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" + } + } + #puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" + } else { + puts stdout "unrecognised roottype: $opt_roottype" + return $dval + } + + if {[llength $filtered_keys]} { + #both keys and values could have newline characters. + #simple use of 'format' won't cut it for more complex dict keys/values + #use block::width or our columns won't align in some cases + switch -- $opt_return { + "tailtohead" { + #last line of key is side by side (possibly with separator) with first line of value + #This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values + #we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries + + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt {${$key}} + } + #set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] + set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] + set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] + + set kidx 0 + set last_hidekey 0 + foreach keydisplay $display_keys key $filtered_keys { + set thisval "?" + set hidekey 0 + set pattern_nest [lindex $pattern_key_index $kidx] + set pattern_nest_list [split $pattern_nest /] + #set this_type [dict get $pattern_this_structure $pattern_nest] + #set this_type [dict get $pattern_this_structure $key] + set this_type [lindex $all_keyset_structure $kidx] + #puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" + + set is_match 1 ;#whether to display the normal separator or bad-match separator + switch -- $this_type { + dict { + #todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict + # - default highlight dupes (ansi underline?) + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + % thisval.= $qry= $dval + } else { + set thisval [tcl::dict::get $dval $key] + } + + #set substructure [lrange $opt_structure 1 end] + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + + set subansibasekeys [lrange $opt_ansibase_keys 1 end] + set nextkeytemplates [lrange $opt_keytemplates 1 end] + #dict set nextopts -substructure $nextsub + dict set nextopts -keytemplates $nextkeytemplates + dict set nextopts -ansibase_keys $subansibasekeys + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + #puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" + + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + #puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" + set is_match 0 + } + } + } + list { + if {[string is integer -strict $key]} { + set thisval [lindex $dval $key] + } else { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + % thisval.= $qry= $dval + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + set nextopts [dict get $argd opts] + + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + #if {![llength $nextpatterns]} { + # set nextpatterns * + #} + if {[llength $nextpatterns]} { + if {[catch { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } errMsg]} { + set is_match 0 + } + } + } + string { + set hidekey 1 + switch -- $key { + "%string" { + set hidekey 1 + set thisval $dval + } + "%ansiview" { + set thisval [ansistring VIEW -lf 1 $dval] + } + "%ansiviewstyle" { + set thisval [ansistring VIEWSTYLE -lf 1 $dval] + } + default { + switch -glob -- $key { + *lpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which left -width $width] + } + %lpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which left -width $width -padchar $extra] + } + %rpad-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + $extra}] + set thisval [textblock::pad $dval -which right -width $width] + } + %rpadstr-* { + set hidekey 1 + lassign [split $key -] _ extra + set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] + set thisval [textblock::pad $dval -which right -width $width -padchar $extra] + } + %split-* { + #split on one or more chars - review + set hidekey 1 + lassign [split $key -] _ splitchars + set thisval [split $dval $splitchars] + } + default { + if {[lindex $key 1] eq "query"} { + set qry [lindex $key 0] + } else { + set qry $key + } + set thisval $dval + if {[string index $key 0] ne "%"} { + set key %$key + } + #pipeline + % thisval.= $key= $thisval + } + } + + } + } + + set nextpatterns [list] + #which pattern nest applies to this branch + set nextsub [dict get $pattern_next_substructure $pattern_nest] + if {[llength $pattern_nest_list]} { + set nest [lrange $pattern_nest_list 1 end] + lappend nextpatterns {*}[join $nest /] + } + #set nextopts [dict get $argd opts] + dict set nextopts -roottype $nextsub + dict set nextopts -channel none + + if {[llength $nextpatterns]} { + set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] + } + + } + } + if {$this_type eq "string" && $hidekey} { + lassign [textblock::size $thisval] _vw vwidth _vh vheight + #set blanks_above [string repeat \n [expr {$kheight -1}]] + set vblock $opt_ansibase_values$thisval$RST + #append result [textblock::join_basic -- $vblock] + #review - we wouldn't need this space if we had a literal %sp %sp-x ?? + append result " $vblock" + } else { + set ansibase_key [lindex $opt_ansibase_keys 0] + + lassign [textblock::size $keydisplay] _kw kwidth _kh kheight + lassign [textblock::size $thisval] _vw vwidth _vh vheight + + set totalheight [expr {$kheight + $vheight -1}] + set blanks_above [string repeat \n [expr {$kheight -1}]] + set blanks_below [string repeat \n [expr {$vheight -1}]] + + if {$is_match} { + set use_sep $opt_sep + } else { + set use_sep $opt_mismatch_sep + } + + + set sepwidth [textblock::width $use_sep] + set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] + set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] + set vblock $blanks_above$opt_ansibase_values$thisval$RST + #only vblock is ragged - we can do a basic join because we don't care about rhs whitespace + if {$last_hidekey} { + append result \n + } + #append result [textblock::join_basic -- $kblock $sblock $vblock] \n + append result [textblock::join_basic_raw $kblock $sblock $vblock] \n + } + set last_hidekey $hidekey + incr kidx + } + } + "sidebyside" { + # TODO - fix + #This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. + #use ansibase_key etc to make the output more comprehensible in that situation. + #This is why it is not the default. (review - terminal width detection and wrapping?) + set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] + foreach key $filtered_keys { + set kt [lindex $opt_keytemplates 0] + if {$kt eq ""} { + set kt "%k%" + } + set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST + #append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n + #differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic + append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n + } + } + } + } + if {$opt_trimright} { + set result [::join [lines_as_list -line trimright $result] \n] + } + if {[string last \n $result] == [string length $result]-1} { + set result [string range $result 0 end-1] + } + #stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) + set chan [dict get $argd opts -channel] + switch -- $chan { + stderr - stdout { + puts $chan $result + } + none { + return $result + } + default { + #review - check member of chan names? + #just try outputting to the supplied channel for now + puts $chan $result + } + } + } + + proc is_list_all_in_list {small large} { + if {[llength $small] > [llength $large]} {return 0} + foreach x $large { + ::set ($x) {} + } + foreach x $small { + if {![info exists ($x)]} { + return 0 + } + } + return 1 + } + #v2 generally seems slower + proc is_list_all_in_list2 {small large} { + set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] + return [struct::list equal [lsort $small] $small_in_large] + } + if {!$has_struct_list || !$has_struct_set} { + set body { + package require struct::list + package require struct::set + } + append body [info body is_list_all_in_list2] + proc is_list_all_in_list2 {small large} $body + } + + proc is_list_all_ni_list {A B} { + foreach x $B { + ::set ($x) {} + } + foreach x $A { + if {[info exists ($x)]} { + return 0 + } + } + return 1 + } + proc is_list_all_ni_list2 {a b} { + set i [struct::set intersect $a $b] + return [expr {[llength $i] == 0}] + } + if {!$has_struct_set} { + set body { + package require struct::list + } + append body [info body is_list_all_ni_list2] + proc is_list_all_ni_list2 {a b} $body + } + proc is_cachedlist_all_ni_list {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set expression "" + foreach t $a { + #append expression "({$t} ni \$b) && " + append expression "{$t} ni \$b && " + } + set expression [string trimright $expression " &"] ;#trim trailing spaces and ampersands + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @expression@ $expression] { + return [expr {@expression@}] + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + proc is_cachedlist_all_ni_list2 {a b} { + upvar 0 ::punk::lib::caches::funcs_ni_list funcs + if {[info exists funcs($a)]} { + return [[set funcs($a)] $b] + } + set keybytes [encoding convertto utf-8 $a] + set key [binary encode base64 $keybytes] ;#one single-line base64 string + + set d [dict create] + foreach x $a { + dict set d $x "" + } + #constructing a switch statement could be an option + # - but would need to avoid using escapes in order to get a jump-table + # - this would need runtime mapping of values - unlikely to be a win + proc ::punk::lib::caches::ni_list_$key {b} [string map [list @d@ $d] { + foreach x $b { + if {[::tcl::dict::exists {@d@} $x]} { + return 0 + } + } + return 1 + }] + + set funcs($a) ::punk::lib::caches::ni_list_$key + return [punk::lib::caches::ni_list_$key $b] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::ldiff + @cmd -name punk::lib::ldiff\ + -summary\ + "Difference consisting of items with removeitems removed."\ + -help\ + "Somewhat like struct::set difference, but order preserving, and doesn't + treat as a 'set' so preserves any duplicates in items. + + struct::set difference may happen to preserve ordering when items are + integers, but order can't be relied on, especially as struct::set has + 2 differening implementations (tcl vs critcl) which return results with + different ordering to each other and different deduping behaviour in + some cases (e.g when 2nd arg is empty)" + @values -min 2 -max 2 + items -type list + removeitems -type list + }] + } + proc ldiff {items removeitems} { + if {[llength $removeitems] == 0} {return $items} + set result {} + foreach item $items { + if {$item ni $removeitems} { + lappend result $item + } + } + return $result + } + #with ledit (also avail in 8.6 using punk::lib::compat::ledit + proc ldiff2 {fromlist removeitems} { + if {[llength $removeitems] == 0} {return $fromlist} + foreach item $removeitems { + set posns [lsearch -all -exact $fromlist $item] + foreach p $posns {ledit fromlist $p $p} + } + return $fromlist + } + proc ldiff3 {fromlist removeitems} { + set doomed [list] + foreach item $removeitems { + lappend doomed {*}[lsearch -all -exact $fromlist $item] + } + lremove $fromlist {*}$doomed + } + + #fix for tcl impl of struct::set::diff which doesn't dedupe + proc struct_set_diff_unique {A B} { + package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. + if {[struct::set::Loaded] eq "tcl"} { + return [punk::lib::setdiff $A $B] + } else { + #use (presumably critcl) implementation for speed + return [struct::set difference $A $B] + } + } + + + #non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B + #consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) + #also struct::set difference with critcl is faster + proc setdiff {A B} { + if {[llength $A] == 0} {return {}} + set d [dict create] + foreach x $A {dict set d $x {}} + foreach x $B {dict unset d $x} + return [dict keys $d] + } + #bulk dict remove is slower than a foreach with dict unset + #proc setdiff2 {fromlist removeitems} { + # #if {[llength $fromlist] == 0} {return {}} + # set d [dict create] + # foreach x $fromlist { + # dict set d $x {} + # } + # return [dict keys [dict remove $d {*}$removeitems]] + #} + #array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) + proc setdiff_unordered {A B} { + if {[llength $A] == 0} {return {}} + array set tmp {} + foreach x $A {::set tmp($x) .} + foreach x $B {catch {unset tmp($x)}} + return [array names tmp] + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique_unordered + @cmd -name punk::lib::lunique_unordered\ + -summary\ + "unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates but *may* rearrange the + order of the returned elements compared to the + original list. + + When struct::set is available this will be used + for the implementation, as it can be *slightly* + faster if acceleration is present. When struct::set + is not available it will fallback to lunique and + provide the same functionality with order preserved." + @values -min 1 -max 1 + list -type list + }] + } + #default/fallback implementation + proc lunique_unordered {list} { + lunique $list + } + if {$has_struct_set} { + if {[struct::set equal [struct::set union {a a} {}] {a}]} { + proc lunique_unordered {list} { + struct::set union $list {} + } + } else { + #struct::set union operates on a 'set' - so this probably won't change, and hopefully is + #consistent across unacelerated versions and those implemented in accelerators, + #but if it ever does change - be a little noisy about it. + puts stderr "punk::lib WARNING: struct::set union no longer dedupes!" + #we could also test a sequence of: struct::set add + } + } + + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lunique + @cmd -name punk::lib::lunique\ + -summary\ + "Order-preserving unique values in list"\ + -help\ + "Return unique values in provided list. + This removes duplicates whilst preserving the + original order of the provided list. + + When struct::set is available with acceleration, + lunique_unordered may be slightly faster." + @values -min 1 -max 1 + list -type list + }] + } + proc lunique {list} { + set new {} + foreach item $list { + if {$item ni $new} { + lappend new $item + } + } + return $new + } + proc lunique2 {list} { + set doomed [list] + #expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) + for {set i 0} {$i < [llength $list]} {} { + set item [lindex $list $i] + lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] + while {[incr i] in $doomed} {} + } + lremove $list {*}$doomed + } + #The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env + proc lmapflat_closure {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + # -- --- --- + #capture - use uplevel 1 or namespace eval depending on context + set capture [uplevel 1 { + apply { varnames { + set capturevars [tcl::dict::create] + set capturearrs [tcl::dict::create] + foreach fullv $varnames { + set v [tcl::namespace::tail $fullv] + upvar 1 $v var + if {[info exists var]} { + if {(![array exists var])} { + tcl::dict::set capturevars $v $var + } else { + tcl::dict::set capturearrs capturedarray_$v [array get var] + } + } else { + #A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set + } + } + return [tcl::dict::create vars $capturevars arrs $capturearrs] + } } [info vars] + } ] + # -- --- --- + set cvars [tcl::dict::get $capture vars] + set carrs [tcl::dict::get $capture arrs] + set apply_script "" + foreach arrayalias [tcl::dict::keys $carrs] { + set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { + array set %realname% [set %arrayalias%][unset %arrayalias%] + }] + } + + append apply_script [string map [list %script% $script] { + #foreach arrayalias [info vars capturedarray_*] { + # set realname [string range $arrayalias [string first _ $arrayalias]+1 end] + # array set $realname [set $arrayalias][unset arrayalias] + #} + #return [eval %script%] + %script% + }] + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\ + $apply_script\ + ] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] + } + return $result + } + #link version - can write to vars in calling context - but keeps varnames themselves isolated + #performance much better than capture version - but still a big price to pay for the isolation + proc lmapflat_link {varnames list script} { + set result [list] + set values [list] + foreach v $varnames { + lappend values "\$$v" + } + set linkvars [uplevel 1 [list info vars]] + set nscaller [uplevel 1 [list namespace current]] + + set apply_script "" + foreach vname $linkvars { + append apply_script [string map [list %vname% $vname]\ + {upvar 2 %vname% %vname%}\ + ] \n + } + append apply_script $script \n + + #puts "--> $apply_script" + foreach $varnames $list { + lappend result {*}[apply\ + [list\ + $varnames\ + $apply_script\ + $nscaller\ + ] {*}[subst $values]\ + ] + } + return $result + } + + #proc lmapflat {varnames list script} { + # concat {*}[uplevel 1 [list lmap $varnames $list $script]] + #} + #lmap can accept multiple var list pairs + proc lmapflat {args} { + concat {*}[uplevel 1 [list lmap {*}$args]] + } + proc lmapflat2 {args} { + concat {*}[uplevel 1 lmap {*}$args] + } + + #proc dict_getdef {dictValue args} { + # if {[llength $args] < 1} { + # error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} + # } + # set keys [lrange $args -1 end-1] + # if {[tcl::dict::exists $dictValue {*}$keys]} { + # return [tcl::dict::get $dictValue {*}$keys] + # } else { + # return [lindex $args end] + # } + #} + if {[info commands ::tcl::dict::getdef] eq ""} { + proc dict_getdef {dictValue args} { + set keys [lrange $args 0 end-1] + if {[tcl::dict::exists $dictValue {*}$keys]} { + return [tcl::dict::get $dictValue {*}$keys] + } else { + return [lindex $args end] + } + } + } else { + #we pay a minor perf penalty for the wrap + interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + + punk::args::define { + @id -id ::punk::lib::is_indexset + @cmd -name punk::lib::is_indexset\ + -summary\ + "Validate string is a comma-delimited 'indexset'."\ + -help\ + "Validate that a string is an 'indexset' + + An indexset consists of a comma delimited list of indexes or index-ranges. + No particular base is assumed for the purposes of validating an indexset here. + While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. + e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. + To validate if an indexset is strictly within range, both the length of the data and the base would + need to be considered. + + The normal 'range' specifier is .. but can be of the form .x. where x is the step value. + The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire + range of valid values. + e.g the following are all valid ranges + 1.. + (index 1 to 'max') + ..10 + (index 'base' to 10) + 2..11 + (index 2 to 11) + .. + (all indices) + .3. + (1st index and every 3rd index thereafter) + + Common whitespace elements space,tab,newlines are ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + see indexset_resolve" + @values -min 1 -max 1 + indexset -type string + } + proc is_indexset {indexset} { + #collapse internal whitespace (for basic whitespace set we allow) + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] + if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { + return 0 + } + set ranges [split $indexset ,] + foreach r $ranges { + set validateindices [list] + set rposn [string first .. $r] + if {$rposn >= 0} { + set sepsize 2 + set step 1 + #review - whitespace between ints? + lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] + } elseif {[string first . $r] >= 0} { + set stripped [string map {. ""} $r] + if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { + #if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) + return 0 + } + #assert - we have exactly 2 dots separated by something. + #check for .n. 'stepped' range + set fdot [string first . $r] + set ldot [string last . $r] + set step [string range $r $fdot+1 $ldot-1] + #todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq + #1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing + + #1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. + if {[string match *end* $step] || [catch {lindex {} $step}]} { + return 0 + } + #if {![string is integer -strict $step] || $step == 0} { + # return 0 + #} + lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] + } else { + #'range' is just an index + set validateindices [list $r] + } + + foreach v $validateindices { + if {$v eq "" || $v eq "end"} {continue} + if {[string is integer -strict $v]} {continue} + if {[catch {lindex {} $v}]} { + return 0 + } + } + } + return 1 + } + #review - compare to IMAP4 methods of specifying ranges? + #TODO add tests to test::punk::lib indexset_resolve is a little tricky + punk::args::define { + @id -id ::punk::lib::indexset_resolve + @cmd -name punk::lib::indexset_resolve\ + -summary\ + "Resolve an indexset to a list of integers based on supplied list or string length."\ + -help\ + "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. + e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 + + An indexset consists of a comma delimited list of indexes or index-ranges. + Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at + either side of the separator representing beginning and end of the index range respectively. + The range-separator can be of the form .x. where x is an integer or basic expression + (single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value + in the tcl9 lseq command. + + When the start index is lower than the end, the step value defaults to 1. + ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 + When the start index is higher than the end, the step value defaults to -1. + ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 + + If start and end are ommitted, increasing order is assumed if the step isn't specified. + eg + .. represents the range from the base to the end + .-1. would represent end to base with step -1 + + If start is omitted and only the end is supplied: + The default step is 1 indicating ascension and the missing start is equivalent to the base. + indexset_resolve 5 ..2 + -> 0 1 2 + The default start is 'end' if the step is negative + indexset_resolve 5 .-1.2 + -> 4 3 2 + If end is omitted and only the start is supplied: + The default step is 1 indicating ascension and the missing end is equivalent to 'end' + indexset_resolve 5 2.. + -> 2 3 4 + The default end is the base if the step is negative + indexset_resolve 5 2.-1. + -> 2 1 0 + + Like the tcl9 lseq command - a step (by) value of zero produces no results. + + The indexes are 0-based by default, but the base can be specified. + indexset_resolve 7 .. + -> 0 1 2 3 4 5 6 + indexset_resolve -base -3 7 .. + -> -3 -2 -1 0 1 2 3 + + Whitespace is ignored. + Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, + e.g end-2 or 2+2. + + end means the last item. + end-1 means the second last item. + 0.. is the same as 0..end + + indexset examples: + + These assume the default 0-based indices (-base 0) + + 1,3.. + output the index 1 (2nd item) followed by all from index 3 to the end. + indexset_resolve 4 1,3.. + -> 1 3 + indexset_resolve 10 1,3.. + -> 1 3 4 5 6 7 8 9 + 0..2,end + output the first 3 indices, and the last index. + end-1..0 + output the indexes in reverse order from 2nd last item to first item." + @leaders -min 0 -max 0 + @opts + -base -type integer -prefix 1 -default 0 -help\ + "This is the starting index. It can be positive, negative or zero. + This affects the start and end calculations, limiting what indices will be + returned. + e.g with base 1 'end' will give a different value from base 0 + + for 10 items 'end' is 10 when 1-based + for 10 items 'end' is 9 when 0-based + + For base 1, index 0 is considered to be below the range. + ie + indexset_resolve -base 1 10 0..3 + -> 1 2 3 + indexset_resolve -base 0 10 0..3 + -> 0 1 2 3 + + It does not *convert* indexes within the range. + + indexset_resolve -base 1 10 5 + -> 5 + indexset_resolve -base 0 10 5 + -> 5 + + ie if you ask for a 1-based resolution of an indexset the integers that are within + the range will come out the same, so the result needs to be treated as a 1-based + set of indices when performing further operations. + " + @values -min 2 -max 3 + numitems -type integer + indexset -type indexset -help "comma delimited specification for indices to return" + } + + #limit punk::args parsing to unhappy paths where possible + proc indexset_resolve {args} { + # -------------------------------------------------- + # Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve + # This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. + # for the unhappy path - the punk::args::parse is fine to generate the usage/error information. + # -------------------------------------------------- + if {[llength $args] < 2} { + punk::args::resolve $args withid ::punk::lib::indexset_resolve + } + set indexset [lindex $args end] + set numitems [lindex $args end-1] + if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { + #use parser on unhappy path only + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + #assert we have 2 or more args + set base 0 ;#default + if {[llength $args] > 2} { + #if more than just numitems and indexset - we expect only -base ie 4 args in total + if {[llength $args] != 4} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set optname [lindex $args 0] + set optval [lindex $args 1] + set fulloptname [tcl::prefix::match -error "" -base $optname] + if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { + set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] + uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] + } + set base $optval + } + # -------------------------------------------------- + + + set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace + set index_list [list] ;#list of actual indexes within the range + set iparts [split $indexset ,] + set based_max [expr {$numitems -1 + $base}] + + #we already did is_indexset check above, so we can make assumptions about well-formedness of each part + foreach ipart $iparts { + set ipart [string trim $ipart] + #we need to cater for n..m as well as n.s.m where s is 'step' + set rposn [string first . $ipart] + if {$rposn>=0} { + #if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression + set rposn2 [string last . $ipart] + if {$rposn2 == $rposn+1} { + #.. + set step "default" ;#could be 1 or -1 + } else { + set step [tcl::string::range $ipart $rposn+1 $rposn2-1] + } + lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb + + set rawa [string trim $rawa] + set rawb [string trim $rawb] + if {$rawa eq "" && $rawb eq ""} { + if {$step eq "default"} { + set step 1 ;#default ascending when no start and no end + } + if {$step < 0} { + set rawa end + set rawb $base + } else { + set rawa $base + set rawb end + } + #if neither start nor end specified - we won't get out of range results from lindex_resolve + set a [punk::lib::lindex_resolve $numitems $rawa $base] + set b [punk::lib::lindex_resolve $numitems $rawb $base] + } else { + if {$rawa eq ""} { + if {$step eq "default"} { + #when start not specified, but end is - default direction always ascending + #(even if end is base or below range) + set step 1 + } + if {$step < 0} { + set rawa end + } else { + set rawa $base + } + } + set a [punk::lib::lindex_resolve $numitems $rawa $base] + if {$a == -Inf} { + #undershot - leave negative + } elseif {$a == Inf} { + #overshot + set a [expr {$based_max + 1}] ;#put it outside the range on the upper side + } + #review - a may be -Inf + + if {$rawb eq ""} { + if {$step eq "default"} { + set step 1 + } + if {$step < 0} { + if {$a < $base} { + #make sure both + #mathfunc::isinf is tcl9+ + if {[catch { + if {[::tcl::mathfunc::isinf $a]} { + set a [expr {$base -1}] + } + }]} { + if {[string match -nocase *inf* $a]} { + set a [expr {$base -1}] + } + } + set rawb $a + } else { + set rawb $base + } + } else { + if {$a > $based_max} { + set rawb $a ;#make sure .. doesn't return last item - should return nothing + } else { + set rawb end + } + } + } + set b [punk::lib::lindex_resolve $numitems $rawb $base] + if {$b == -Inf} { + #undershot - leave negative + } elseif {$b == Inf} { + #set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side + set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side + } + } + + #JJJ + + #e.g make sure .. doesn't return last item - should return nothing as both are above the range. + if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { + #assert a & b are integers within the range + if {$step eq "default"} { + #unspecified step - base direction on order of a & b + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + if {$a >= $base && $a <= $based_max} { + #only a is in the range + if {$b < $base} { + set b $base + } else { + set b $based_max + } + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$b >=$base && $b <= $based_max} { + #only b is in the range + if {$step eq "default"} { + if {$a <= $b} { + set step 1 + } else { + set step -1 + } + } + if {$step < 0} { + if {$a < $base} { + #negative step from below - doesn't matter if b is in range - recast both to an int below $base + #(a may be -Inf) + set a [expr {$base -1}] + set b $a + set step 0 ;#we should return nothing + } + } else { + if {$a < $base} { + set a $base + } else { + set a $based_max + } + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } else { + #both outside the range + if {$a < $base && $b > $base} { + #spans the range in forward order + set a $base + set b $based_max + if {$step eq "default"} { + set step 1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } elseif {$a > $base && $b < $base} { + #spans the range in reverse order + set a $based_max + set b $base + if {$step eq "default"} { + set step -1 + } + lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. + } + #both outside of range on same side + } + } + } else { + set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] + #returns only -Inf for out of range at either end + if {$idx >= $base} { + #index within the range + lappend index_list $idx + } + } + } + return $index_list + } + # showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side + #This doesn't need the list itself - just the length suffices. + punk::args::define { + @id -id ::punk::lib::lindex_resolve + @cmd -name punk::lib::lindex_resolve\ + -summary\ + "Resolve an indexexpression to an integer based on supplied list or string length."\ + -help\ + "Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 + to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating + whether the index was below or above the range of possible indices for the length supplied. + + Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + This means the proc may be called with something like $x+2 end-$y etc + Sometimes the actual integer index is desired. + + We want to resolve the index used, without passing arbitrary expressions into the 'expr' function + - which could have security risks. + lindex_resolve will parse the index expression and return: + a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + The similar function lindex_resolve_basic uses -Inf to denote + out of range at either end of the list/string. + Otherwise it will return an integer corresponding to the position in the data. + This is in stark contrast to Tcl list/string function indices which will return empty strings for out of + bounds indices, or in the case of lrange, return results anyway. + Like Tcl list commands - it will produce an error if the form of the index is not acceptable. + For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side + - thus returning -2 + + Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. + We will get something like 10+1 - which can be resolved safely with expr + " + @values -min 2 -max 2 + datalength -type integer + index -type indexexpression + } + proc lindex_resolve {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve] [arg len] [arg index]] + #[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length + #[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. + #[para]This means the proc may be called with something like $x+2 end-$y etc + #[para]Sometimes the actual integer index is desired. + #[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. + #[para]lindex_resolve will parse the index expression and return: + #[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) + #[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) + #[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string + #[para]Otherwise it will return an integer corresponding to the position in the list. + #[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. + #[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable + #[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 + + #Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr + + + #REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? + #A basic string map means we aren't properly validating + #todo - be stricter about malformations such as 1000_ + if {![string is integer -strict 1_0]} { + #basic forward compatibility with integers such as 1_000 for 8.6.x + set index [tcl::string::map {_ {}} $index] + set len [tcl::string::map {_ {}} $len] + } + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve len must be a positive integer." + } + set based_max [expr {$len -1 + $base}] + + if {[string is integer -strict $index]} { + #review - base? + #can match +i -i + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } else { + if {[string match end* $index]} { + if {$index ne "end"} { + set op [string index $index 3] + set offset [string range $index 4 end] + #note - offset could have leading + or - + # 'string is integer -strict +1' ==> true + #e.g end+-1 is valid (end++-1 is not) + if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} + if {$offset == 0} { + #(offset +0, -0 or 0 or 000 0_0 etc) + #op either + or - is irrelevant + #set index [expr {$len-1}] ;#+ base ? + set index $based_max + if {$index < $base} { + #return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds + return Inf + } else { + return $index + } + } + + #set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] + set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } else { + return $index + } + } else { + #index is 'end' + if {$len == 0} { + #special case - 'end' with empty list - treat end like a positive number out of bounds + return Inf + } + #return [expr {$len - 1 + $base}] + return $based_max + } + } else { + #plain +- already handled above. + #we are trying to avoid evaluating unbraced expr of potentially insecure origin + #regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op + if {[regexp {([^+-]*)([+-])(.*)} $index _match a op b]} { + if {[string is integer -strict $a] && [string is integer -strict $b]} { + if {$op eq "-"} { + set index [expr {$a - $b}] + } else { + set index [expr {$a + $b}] + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + } else { + error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + } + if {$index < $base} { + return -Inf + } elseif {$index > $based_max} { + return Inf + } + return $index + } + } + } + proc lindex_resolve_basic {len index {base 0}} { + #*** !doctools + #[call [fun lindex_resolve_basic] [arg len] [arg index]] + #[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) + #[para] returns -Inf for out of range at either end, or a valid integer index + #[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound + #[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command + #[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 + #[para] For pure integer indices the performance should be equivalent + + if {![string is integer -strict $len] || $len < 0} { + error "lindex_resolve_basic len must be an integer greater than or equal to zero" + } + if {![string is integer -strict $base]} { + #base can be negative + error "lindex_resolve_basic base must be an integer" + } + set based_max [expr {$len -1 + $base}] + + set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 + if {[string is integer -strict $index]} { + #can match +i -i + #avoid even the lseq overhead when the index is simple + if {$index < $base || ($index > $based_max)} { + #even though in this case we could return -Inf or Inf like lindex_resolve; + #for consistency we don't return Inf for upper-boudn violation, + #as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. + return -Inf + } else { + #!NOTE! index within range is unchanged - no matter the base + #integer may still have + sign - normalize with expr + return [expr {$index}] + } + } + if {$len > 0} { + #For large len - this is a wasteful allocation if no true lseq available in Tcl version. + #lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) + set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. + } else { + set testlist [list] + #we want to call 'lindex' even in this case - to get the appropriate error message + } + set idx [lindex $testlist $index] + if {$idx eq ""} { + #we have no way to determine if out of bounds is at lower vs upper end + return -Inf + } else { + return $idx + } + } + proc lindex_get {list index} { + set resultlist [lrange $list $index $index] + if {![llength $resultlist]} { + return -1 + } else { + #we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. + #we can return the value - but only in a way that won't collide with our -1 out-of-range indicator + return [tcl::dict::create value [lindex $resultlist 0]] + } + } + + proc string_splitbefore {str index} { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + return [list "" $str] + } + Inf { + return [list $str ""] + } + } + } + return [list [string range $str 0 $index-1] [string range $str $index end]] + #scan %s stops at whitespace - not useful here. + #scan $s %${p}s%s + } + proc string_splitbefore_indices {str args} { + set parts [list $str] + set sizes [list [string length $str]] + set s 0 + foreach index $args { + if {![string is integer -strict $index]} { + set index [punk::lib::lindex_resolve [string length $str] $index] + switch -- $index { + -Inf { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + Inf { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + } + } + if {$index <= 0} { + if {[lindex $sizes 0] != 0} { + ledit parts 0 0 {} [lindex $parts 0] + ledit sizes 0 0 0 [lindex $sizes 0] + } + continue + } + if {$index >= [string length $str]} { + if {[lindex $sizes end] != 0} { + ledit parts end end [lindex $parts end] {} + ledit sizes end end [lindex $sizes end] 0 + } + continue + } + set i -1 + set a 0 + foreach sz $sizes { + incr i + if {$a + $sz > $index} { + set p [lindex $parts $i] + #puts "a:$a index:$index" + if {$a == $index} { + break + } + ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] + ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] + break + } + incr a $sz + } + #puts "->parts:$parts" + #puts "->sizes:$sizes" + } + return $parts + } + + proc K {x y} {return $x} + #*** !doctools + #[call [fun K] [arg x] [arg y]] + #[para]The K-combinator function - returns the first argument, x and discards y + #[para]see [uri https://wiki.tcl-lang.org/page/K] + #[para]It is used in cases where command-substitution at the calling-point performs some desired effect. + + + proc is_utf8_multibyteprefix {bytes} { + #*** !doctools + #[call [fun is_utf8_multibyteprefix] [arg str]] + #[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character + #[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint + #[para] Will return false for an already complete utf-8 codepoint + #[para] It is assumed the incomplete sequence is at the beginning of the bytes argument + #[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes + #[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] + regexp {(?x) + ^ + (?: + [\xC0-\xDF] | #possible prefix for two-byte codepoint + [\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint + [\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for + ) + $ + } $bytes + } + + proc is_utf8_first {str} { + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + } $str + } + proc is_utf8_single {1234bytes} { + #*** !doctools + #[call [fun is_utf8_single] [arg 1234bytes]] + #[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) + regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + ^ + (?: + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + $ + } $1234bytes + } + proc get_utf8_leading {rawbytes} { + #*** !doctools + #[call [fun get_utf8_leading] [arg rawbytes]] + #[para] return the leading portion of rawbytes that is a valid utf8 sequence. + #[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint + #[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. + #[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. + #[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics + #[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned + #[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes + if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) + \A ( + [\x00-\x7F] | # Single-byte chars (ASCII range) + [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) + [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) + [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) + ) + + } $rawbytes completeChars]} { + return $completeChars + } + return "" + } + proc hex2dec {args} { + #*** !doctools + #[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] + #[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values + #[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 + #[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. + #[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 + #[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 + + set list_largeHex [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" + } + set opts [tcl::dict::create\ + -validate 1\ + -empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $opts] + foreach {k v} $argopts { + tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v + } + # -- --- --- --- + set opt_validate [tcl::dict::get $opts -validate] + set opt_empty [tcl::dict::get $opts -empty_as_hex] + # -- --- --- --- + + set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] + if {$opt_validate} { + #Note appended F so that we accept list of empty strings as per the documentation + if {![string is xdigit -strict [join $list_largeHex ""]F ]} { + error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" + } + } + if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { + #mapping empty string to a value destroys any advantage of -scanonly + #todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] + if {[lsearch $list_largeHex ""] >=0} { + error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" + } + } else { + set opt_empty [string trim [string map {_ ""} $opt_empty]] + if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { + #set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] + set nonempty_head [lrange $list_largeHex 0 $first_empty-1] + set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] + } + + proc dec2hex {args} { + #*** !doctools + #[call [fun dex2hex] [opt {option value...}] [arg list_decimals]] + #[para]Convert a list of decimal integers to a list of hex values + #[para] -width can be used to make each hex value at least int characters wide, with leading zeroes. + #[para] -case upper|lower determines the case of the hex letters in the output + set list_decimals [lindex $args end] + set argopts [lrange $args 0 end-1] + if {[llength $argopts]%2 !=0} { + error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" + } + set defaults [tcl::dict::create\ + -width 1\ + -case upper\ + -empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\ + ] + set known_opts [tcl::dict::keys $defaults] + set fullopts [tcl::dict::create] + foreach {k v} $argopts { + tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v + } + set opts [tcl::dict::merge $defaults $fullopts] + # -- --- --- --- + set opt_width [tcl::dict::get $opts -width] + set opt_case [tcl::dict::get $opts -case] + set opt_empty [tcl::dict::get $opts -empty_as_decimal] + # -- --- --- --- + + + set resultlist [list] + switch -- [string tolower $opt_case] { + upper { + set spec X + } + lower { + set spec x + } + default { + error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" + } + } + set fmt "%${opt_width}.${opt_width}ll${spec}" + + set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] + if {![string is digit -strict [string map {_ ""} $opt_empty]]} { + if {[lsearch $list_decimals ""] >=0} { + error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" + } + } else { + set opt_empty [string map {_ ""} $opt_empty] + if {[set first_empty [lsearch $list_decimals ""]] >= 0} { + set nonempty_head [lrange $list_decimals 0 $first_empty-1] + set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] + } + } + return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] + } + + proc log2 x "expr {log(\$x)/[expr log(2)]}" + #*** !doctools + #[call [fun log2] [arg x]] + #[para]log base2 of x + #[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time + #[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) + + proc logbase {b x} { + #*** !doctools + #[call [fun logbase] [arg b] [arg x]] + #[para]log base b of x + #[para]This function uses expr's natural log and the change of base division. + #[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 + #[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 + expr {log($x)/log($b)} + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::factors + @cmd -name punk::lib::factors\ + -summary\ + "Sorted factors of positive integer x"\ + -help\ + "Return a sorted list of the positive factors of x where x > 0 + For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. + (including zero itself in this context)* + + This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + Comparisons were done with some numbers below 17 digits long + For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + but has the disadvantage of being slower for 'small' numbers and using more memory. + If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get + there than computing the whole list, even for small values of x + * Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + In other mathematical contexts zero may be considered not to divide anything." + @values -min 1 -max 1 + x -type integer + }] + } + proc factors {x} { + #*** !doctools + #[call [fun factors] [arg x]] + #[para]Return a sorted list of the positive factors of x where x > 0 + #[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* + #[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors + #[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions + #[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers + #[para]Comparisons were done with some numbers below 17 digits long + #[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. + #[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers + #but has the disadvantage of being slower for 'small' numbers and using more memory. + #[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x + #[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py + #[para] In other mathematical contexts zero may be considered not to divide anything. + set factors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {($x % $j) == 0} { + lappend factors $j [expr {$x / $j}] + } + incr j + } + lappend factors $x + return [lsort -unique -integer $factors] + } + proc oddFactors {x} { + #*** !doctools + #[call [fun oddFactors] [arg x]] + #[para]Return a list of odd integer factors of x, sorted in ascending order + set j 2 + set max [expr {sqrt($x)}] + set factors [list 1] + while {$j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2} { + if {$other ni $factors} { + lappend factors $other + } + } + if {$j % 2} { + if {$j ni $factors} { + lappend factors $j + } + } + } + incr j + } + return [lsort -integer -increasing $factors] + } + proc greatestFactorBelow {x} { + #*** !doctools + #[call [fun greatestFactorBelow] [arg x]] + #[para]Return the largest factor of x excluding itself + #[para]factor functions can be useful for console layout calculations + #[para]See Tcllib math::numtheory for more extensive implementations + if {$x % 2 == 0 || $x == 0} { + return [expr {$x / 2}] + } + set j 3 + set max [expr {sqrt($x)}] + while {$j <= $max} { + if {$x % $j == 0} { + return [expr {$x / $j}] + } + incr j 2 + } + return 1 + } + proc greatestOddFactorBelow {x} { + #*** !doctools + #[call [fun greatestOddFactorBelow] [arg x]] + #[para]Return the largest odd integer factor of x excluding x itself + if {$x %2 == 0} { + return [greatestOddFactor $x] + } + set j 3 + #dumb brute force - time taken to compute is wildly variable on big numbers + #todo - use a (memoized?) generator of primes to reduce the search space + #tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. + set god 1 + set max [expr {sqrt($x)}] + while { $j <= $max} { + if {$x % $j == 0} { + set other [expr {$x / $j}] + if {$other % 2 == 0} { + set god $j + } else { + set god [expr {$x / $j}] + #lowest j - so other side must be highest + break + } + } + incr j 2 + } + return $god + } + proc greatestOddFactor {x} { + #*** !doctools + #[call [fun greatestOddFactor] [arg x]] + #[para]Return the largest odd integer factor of x + #[para]For an odd value of x - this will always return x + if {$x % 2 != 0 || $x == 0} { + return $x + } + set r [expr {$x / 2}] + while {$r % 2 == 0} { + set r [expr {$r / 2}] + } + return $r + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::gcd + @cmd -name punk::lib::gcd\ + -summary\ + "Greatest common divisor of m and n."\ + -help\ + "Return the greatest common divisor of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib + + Graphical use: + An a by b rectangle can be covered with square tiles of side-length c, + only if c is a common divisor of a and b" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc gcd {n m} { + #*** !doctools + #[call [fun gcd] [arg n] [arg m]] + #[para]Return the greatest common divisor of m and n + #[para]Straight from Lars Hellström's math::numtheory library in Tcllib + #[para]Graphical use: + #[para]An a by b rectangle can be covered with square tiles of side-length c, + #[para]only if c is a common divisor of a and b + + # + # Apply Euclid's good old algorithm + # + if { $n > $m } { + set t $n + set n $m + set m $t + } + + while { $n > 0 } { + set r [expr {$m % $n}] + set m $n + set n $r + } + + return $m + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::lib::lcm + @cmd -name punk::lib::lcm\ + -summary\ + "Lowest common multiple of m and n."\ + -help\ + "Return the lowest common multiple of m and n. + Straight from Lars Hellström's math::numtheory library in Tcllib" + @values -min 2 -max 2 + m -type integer + n -type integer + }] + } + proc lcm {n m} { + set gcd [gcd $n $m] + return [expr {$n*$m/$gcd}] + } + proc commonDivisors {x y} { + #*** !doctools + #[call [fun commonDivisors] [arg x] [arg y]] + #[para]Return a list of all the common factors of x and y + #[para](equivalent to factors of their gcd) + return [factors [gcd $x $y]] + } + + #experimental only - there are better/faster ways + proc sieve n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + tcl::dict::for {next -} $nums break + } + return [concat $primes [tcl::dict::keys $nums]] + } + proc sieve2 n { + set primes [list] + if {$n < 2} {return $primes} + set nums [tcl::dict::create] + for {set i 2} {$i <= $n} {incr i} { + tcl::dict::set nums $i "" + } + set next 2 + set limit [expr {sqrt($n)}] + while {$next <= $limit} { + for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} + lappend primes $next + #dict for {next -} $nums break + set next [lindex $nums 0] + } + return [concat $primes [tcl::dict::keys $nums]] + } + + proc hasglobs {str} { + #*** !doctools + #[call [fun hasglobs] [arg str]] + #[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] + #[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. + regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving + } + + proc trimzero {number} { + #*** !doctools + #[call [fun trimzero] [arg number]] + #[para]Return number with left-hand-side zeros trimmed off - unless all zero + #[para]If number is all zero - a single 0 is returned + set trimmed [string trimleft $number 0] + if {[string length $trimmed] == 0} { + set trimmed 0 + } + return $trimmed + } + proc substring_count {str substring} { + #*** !doctools + #[call [fun substring_count] [arg str] [arg substring]] + #[para]Search str and return number of occurrences of substring + + #faster than lsearch on split for str of a few K + if {$substring eq ""} {return 0} + set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] + return [expr {$occurrences / [string length $substring]}] + } + + proc dict_merge_ordered {defaults main} { + #*** !doctools + #[call [fun dict_merge_ordered] [arg defaults] [arg main]] + #[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. + #[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. + #[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. + + #1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values + return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] + } + + proc askuser {question} { + #*** !doctools + #[call [fun askuser] [arg question]] + #[para]A basic utility to read an answer from stdin + #[para]The prompt is written to the terminal and then it waits for a user to type something + #[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. + #[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. + #[para](Generic terminal raw vs linemode detection not yet present) + #[para]The user must hit enter to submit the response + #[para]The return value is the string if any that was typed prior to hitting enter. + #[para]The question argument can be manually colourised using the various punk::ansi funcitons + #[example_begin] + # set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] + # if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { + # puts "Proceeding" + # } else { + # puts "Cancelled by user" + # } + #[example_end] + puts stdout $question + flush stdout + set stdin_state [chan configure stdin] + if {[catch { + package require punk::console + set console_raw [tsv::get console is_raw] + } err_console]} { + #assume normal line mode + set console_raw 0 + } + try { + chan configure stdin -blocking 1 + if {$console_raw} { + punk::console::disableRaw + set answer [gets stdin] + punk::console::enableRaw + } else { + set answer [gets stdin] + } + } finally { + chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] + } + return $answer + } + + #like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. + proc indent {text {prefix " "}} { + set result [list] + foreach line [split $text \n] { + if {[string trim $line] eq ""} { + lappend result "" + } else { + lappend result $prefix[string trimright $line] + } + } + return [join $result \n] + } + #dedent? + proc undent {text {max -1}} { + if {$text eq ""} { + return "" + } + set lines [split $text \n] + set nonblank [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + continue + } + lappend nonblank $ln + } + set lcp [longestCommonPrefix $nonblank] + if {$lcp eq ""} { + return $text + } + regexp {^([\t ]*)} $lcp _m lcp + if {$lcp eq ""} { + return $text + } + set len [string length $lcp] + if {$max != -1} { + set len [expr {min($len,$max)}] + } + set result [list] + foreach ln $lines { + if {[string trim $ln] eq ""} { + lappend result "" + } else { + lappend result [string range $ln $len end] + } + } + return [join $result \n] + } + #A version of textutil::string::longestCommonPrefixList + proc longestCommonPrefix {items} { + if {[llength $items] <= 1} { + return [lindex $items 0] + } + set items [lsort $items[unset items]] + set min [lindex $items 0] + set max [lindex $items end] + #if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) + #(sort order nothing to do with length - e.g min may be longer than max) + if {[string length $min] > [string length $max]} { + set temp $min + set min $max + set max $temp + } + set n [string length $min] + set prefix "" + set i -1 + while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { + append prefix $c + } + return $prefix + } + + #e.g linesort -decreasing $data + proc linesort {args} { + #*** !doctools + #[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] + #[para]Sort lines in textblock + #[para]Returns another textblock with lines sorted + #[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique + if {[llength $args] < 1} { + error "linesort missing lines argument" + } + set lines [lindex $args end] + set opts [lrange $args 0 end-1] + #.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts + list_as_lines [lsort {*}$opts [linelist $lines]] + } + + proc list_as_lines {args} { + #*** !doctools + #[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] + #[para]This simply joins the elements of the list with -joinchar + #[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines + #[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. + if {[set eop [lsearch $args --]] == [llength $args]-2} { + #end-of-opts not really necessary - except for consistency with lines_as_list + set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] + } + if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { + set joinchar [lindex $args 1] + set lines [lindex $args 2] + } elseif {[llength $args] == 1} { + set joinchar "\n" + set lines [lindex $args 0] + } else { + error "list_as_lines usage: list_as_lines ?-joinchar ? " + } + return [join $lines $joinchar] + } + proc list_as_lines2 {args} { + #eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? + lassign [tcl::dict::values [punk::args::parse $args withdef { + -joinchar -default \n + @values -min 1 -max 1 + }]] leaders opts values + + return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] + } + + proc lines_as_list {args} { + #*** !doctools + #[call [fun lines_as_list] [opt {option value ...}] [arg text]] + #[para]Returns a list of possibly trimmed lines depeding on options + #[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf + #[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements + + #The underlying function linelist has the validation code which gives nicer usage errors. + #we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error + #..because we don't know what to say if there are odd numbers of args + #we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work + #e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway + + if {[lsearch $args "--"] == [llength $args]-2} { + set opts [lrange $args 0 end-2] + } else { + set opts [lrange $args 0 end-1] + } + #set opts [tcl::dict::merge {-block {}} $opts] + set bposn [lsearch $opts -block] + if {$bposn < 0} { + lappend opts -block {} + } + set text [lindex $args end] + #tailcall linelist {*}$opts $text + return [linelist {*}$opts $text] + } + #this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds + proc lines_as_list2 {args} { + #pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults + #-anyopts 1 avoids having to know what to say if odd numbers of options passed etc + #we don't have to decide what is an opt vs a value + #even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) + lassign [tcl::dict::values [punk::args::parse $args withdef { + @opts -any 1 + -block -default {} + }]] leaderdict opts valuedict + tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] + } + + # important for pipeline & match_assign + # -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? + # -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace + set linelist_body { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + ;#package require punk::ansi + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) + + #we use detectcode_in_list instead of detect_in_list + #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) + # - but the main reason is it is slightly faster. + if {![punk::ansi::ta::detectcode_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + #set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + #get_codes_single lists only the codes. no plaintext or empty elements + set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits] == 0} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end] ;#may or may not be SGR + set lastcodeoffset [expr {[string length $lastcode]-1}] + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #code is at tail (no trailing plaintext) + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach code $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + + set linelist_body_original { + set usage "linelist ?-ansiresets auto|? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix text" + if {[llength $args] == 0} { + error "linelist missing textchunk argument usage:$usage" + } + set text [lindex $args end] + set text [string map {\r\n \n} $text] ;#review - option? + + set arglist [lrange $args 0 end-1] + set opts [tcl::dict::create\ + -block {trimhead1 trimtail1}\ + -line {}\ + -commandprefix ""\ + -ansiresets auto\ + -ansireplays 0\ + ] + foreach {o v} $arglist { + switch -- $o { + -block - -line - -commandprefix - -ansiresets - -ansireplays { + tcl::dict::set opts $o $v + } + default { + error "linelist: Unrecognized option '$o' usage:$usage" + } + } + } + # -- --- --- --- --- --- + set opt_block [tcl::dict::get $opts -block] + if {[llength $opt_block]} { + foreach bo $opt_block { + switch -- $bo { + trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} + default { + set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] + error "linelist: unknown -block option value: $bo known values: $known_blockopts" + } + } + } + #normalize certain combos + if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { + set opt_block [lreplace $opt_block $posn $posn] + } + if {"trimall" in $opt_block} { + #no other block options make sense in combination with this + set opt_block [list "trimall"] + } + + #TODO + if {"triminner" in $opt_block } { + error "linelist -block triminner not implemented - sorry" + } + + } + + + # -- --- --- --- --- --- + set opt_line [tcl::dict::get $opts -line] + set tl_left 0 + set tl_right 0 + set tl_both 0 + foreach lo $opt_line { + switch -- $lo { + trimline { + set tl_both 1 + } + trimleft { + set tl_left 1 + } + trimright { + set tl_right 1 + } + default { + set known_lineopts [list trimline trimleft trimright] + error "linelist: unknown -line option value: $lo known values: $known_lineopts" + } + } + } + #normalize trimleft trimright combo + if {$tl_left && $tl_right} { + set opt_line [list "trimline"] + set tl_both 1 + } + # -- --- --- --- --- --- + set opt_commandprefix [tcl::dict::get $opts -commandprefix] + # -- --- --- --- --- --- + set opt_ansiresets [tcl::dict::get $opts -ansiresets] + # -- --- --- --- --- --- + set opt_ansireplays [tcl::dict::get $opts -ansireplays] + if {$opt_ansireplays} { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 1 + } + } else { + if {$opt_ansiresets eq "auto"} { + set opt_ansiresets 0 + } + } + # -- --- --- --- --- --- + set linelist [list] + set nlsplit [split $text \n] + if {![llength $opt_line]} { + set linelist $nlsplit + #lappend linelist {*}$nlsplit + } else { + #already normalized trimleft+trimright to trimline + if {$tl_both} { + foreach ln $nlsplit { + lappend linelist [string trim $ln] + } + } elseif {$tl_left} { + foreach ln $nlsplit { + lappend linelist [string trimleft $ln] + } + } elseif {$tl_right} { + foreach ln $nlsplit { + lappend linelist [string trimright $ln] + } + } + } + + if {"collateempty" in $opt_block} { + set inputlist $linelist[set linelist [list]] + set last "-" + foreach input $inputlist { + if {$input ne ""} { + lappend linelist $input + set last "-" + } else { + if {$last ne ""} { + lappend linelist "" + } + set last "" + } + } + } + + if {"trimall" in $opt_block} { + set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] + } else { + set start 0 + if {"trimhead" in $opt_block} { + set idx 0 + set lastempty -1 + foreach ln $linelist { + if {[lindex $linelist $idx] ne ""} { + break + } else { + set lastempty $idx + } + incr idx + } + if {$lastempty >=0} { + set start [expr {$lastempty +1}] + } + } + set linelist [lrange $linelist $start end] + + if {"trimtail" in $opt_block} { + set revlinelist [lreverse $linelist][set linelist {}] + set i 0 + foreach ln $revlinelist { + if {$ln ne ""} { + set linelist [lreverse [lrange $revlinelist $i end]] + break + } + incr i + } + } + + # --- --- + set start 0 + set end "end" + if {"trimhead1" in $opt_block} { + if {[lindex $linelist 0] eq ""} { + set start 1 + } + } + if {"trimtail1" in $opt_block} { + if {[lindex $linelist end] eq ""} { + set end "end-1" + } + } + set linelist [lrange $linelist $start $end] + } + + #review - we need to make sure ansiresets don't accumulate/grow on any line + #Each resulting line should have a reset of some type at start and a pure-reset at end to stop + #see if we can find an ST sequence that most terminals will not display for marking sections? + if {$opt_ansireplays} { + #package require punk::ansi + + if {$opt_ansiresets} { + set RST "\x1b\[0m" + } else { + set RST "" + } + set replaycodes $RST ;#todo - default? + set transformed [list] + #shortcircuit common case of no ansi + #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. + #This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) + #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable + #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) + if {![punk::ansi::ta::detect_in_list $linelist]} { + if {$opt_ansiresets} { + foreach ln $linelist { + lappend transformed $RST$ln$RST + } + set linelist $transformed + } + } else { + + #INLINE punk::ansi::codetype::is_sgr_reset + #regexp {\x1b\[0*m$} $code + set re_is_sgr_reset {\x1b\[0*m$} + #INLINE punk::ansi::codetype::is_sgr + #regexp {\033\[[0-9;:]*m$} $code + set re_is_sgr {\x1b\[[0-9;:]*m$} + + foreach ln $linelist { + #set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable + + set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. + if {[llength $ansisplits]<= 1} { + #plaintext only - no ansi codes in line + lappend transformed [string cat $replaycodes $ln $RST] + #leave replaycodes as is for next line + set nextreplay $replaycodes + } else { + set tail $RST + set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR + if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { + if {[lindex $ansisplits end] eq ""} { + #last plaintext is empty. So the line is already suffixed with a reset + set tail "" + set nextreplay $RST + } else { + #trailing text has been reset within line - but no tail reset present + #we normalize by putting a tail reset on anyway + set tail $RST + set nextreplay $RST + } + } elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { + #No tail reset - and no need to examine whole line to determine stack that is in effect + set tail $RST + set nextreplay $lastcode + } else { + #last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect + #last codeset doesn't end in a pure-reset + #whether code was at very end or not - add a reset tail + set tail $RST + #determine effective replay for line + set codestack [list start] + foreach {pt code} $ansisplits { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + set codestack [list] ;#different from 'start' marked - this means we've had a reset + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + if {[punk::ansi::codetype::is_sgr $code]} { + #todo - proper test of each code - so we only take latest background/foreground etc. + #requires handling codes with varying numbers of parameters. + #basic simplification - remove straight dupes. + set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. + set codestack [lremove $codestack {*}$dup_posns] + lappend codestack $code + } ;#else gx0 or other code - we don't want to stack it with SGR codes + } + } + if {$codestack eq [list start]} { + #No SGRs - may have been other codes + set line_has_sgr 0 + } else { + #list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes + set line_has_sgr 1 + if {[lindex $codestack 0] eq "start"} { + set codestack [lrange $codestack 1 end] + } + } + + #set newreplay [join $codestack ""] + set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] + + if {$line_has_sgr && $newreplay ne $replaycodes} { + #adjust if it doesn't already does a reset at start + if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { + set nextreplay $newreplay + } else { + set nextreplay $RST$newreplay + } + } else { + set nextreplay $replaycodes + } + } + if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { + #no point attaching any replay + lappend transformed [string cat $ln $tail] + } else { + lappend transformed [string cat $replaycodes $ln $tail] + } + } + set replaycodes $nextreplay + } + set linelist $transformed + } + } + + if {[llength $opt_commandprefix]} { + set transformed [list] + foreach ln $linelist { + lappend transformed [{*}$opt_commandprefix $ln] + } + set linelist $transformed + } + + return $linelist + } + if {$has_punk_ansi} { + #optimise linelist as much as possible + set linelist_body [string map { ""} $linelist_body] + } else { + #punk ansi not avail at time of package load. + #by putting in calls to punk::ansi the user will get appropriate error messages + set linelist_body [string map { "package require punk::ansi"} $linelist_body] + } + proc linelist {args} $linelist_body + + + interp alias {} errortime {} punk::lib::errortime + proc errortime {script groupsize {iters 2}} { + #by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance + set i 0 + set times {} + if {$iters < 2} {set iters 2} + + for {set i 0} {$i < $iters} {incr i} { + set result [uplevel [list time $script $groupsize]] + lappend times [lindex $result 0] + } + + set average 0.0 + set s2 0.0 + + foreach time $times { + set average [expr {$average + double($time)/$iters}] + } + + foreach time $times { + set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] + } + + set sigma [expr {int(sqrt($s2))}] + set average [expr {int($average)}] + + return "$average +/- $sigma microseconds per iteration" + } + + #test function to use with show_jump_tables + #todo - check if switch compilation to jump tables differs by Tcl version + proc switch_char_test {c} { + set dec [scan $c %c] + foreach t [list 1 2 3] { + switch -- $c { + x { + return [list $dec x $t] + } + y { + return [list $dec y $t] + } + z { + return [list $dec z $t] + } + } + } + + #tcl 8.6/8.7 (at least) + #curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable + switch -- $c { + a { + return [list $dec a] + } + {"} { + return [list $dec dquote] + } + {[} {return [list $dec lb]} + {]} {return [list $dec rb]} + "{" { + return [list $dec lbrace] + } + "}" { + return [list $dec rbrace] + } + default { + return [list $dec $c] + } + } + + + + } + + #we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) + proc show_jump_tables {args} { + #avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. + if {[llength $args] == 1} { + set data [tcl::unsupported::disassemble proc [lindex $args 0]] + } elseif {[llength $args] == 2} { + #review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. + #not sure if this handles more complex hierarchies or mixins etc. + lassign $args obj method + if {![info object isa object $obj]} { + error "show_jump_tables unable to examine '$args'. $obj is not an oo object" + } + #classes are objects too and can have direct methods + if {$method in [info object methods $obj]} { + set data [tcl::unsupported::disassemble objmethod $obj $method] + } else { + if {![info object isa class $obj]} { + set obj [info object class $obj] + } + set data [tcl::unsupported::disassemble method $obj $method] + } + } else { + error "show_jump_tables expected a procname or a class/object and method" + } + set result "" + set in_jt 0 + foreach ln [split $data \n] { + set tln [::tcl::string::trim $ln] + if {!$in_jt} { + if {[::tcl::string::match *jumpTable* $ln]} { + punk::ns::call_frame + append result $ln \n + set in_jt 1 + } + } else { + if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { + set in_jt 0 + } else { + append result $ln \n + } + } + } + return $result + } + + #a test + # punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 + # Note the different disassemble result when trace is running. + proc disassemble {procname} { + tcl::unsupported::disassemble proc $procname + } + + proc temperature_f_to_c {deg_fahrenheit} { + return [expr {($deg_fahrenheit -32) * (5/9.0)}] + } + proc temperature_c_to_f {deg_celsius} { + return [expr {($deg_celsius * (9/5.0)) + 32}] + } + + proc interp_sync_package_paths {interp} { + if {![interp exists $interp]} { + error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" + } + interp eval $interp [list set ::auto_path $::auto_path] + interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} + interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] + } + + proc valcopy {obj} { + append obj2 $obj {} + } + proc set_valcopy {varname obj} { + #used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] + append obj2 $obj {} + uplevel 1 [list set $varname $obj2] + } + + + + proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { + variable has_twapi + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + set localeid [twapi::get_system_default_lcid] + } + } + #when using twapi we currently only get the localeid - not the specific defaults + #when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this + set default_delim "," + set default_groupsize 3 + + set results [list] + set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list + foreach inputnum $nums { + set number [valcopy $inputnum] + #also handle tcl 8.7+ underscores in numbers + set number [string map [list _ "" , ""] $number] + #normalize e.g 2e4 -> 20000.0 + set number [expr {$number}] + + if {$has_twapi} { + if {$delim eq "" && $groupsize eq ""} { + lappend results [twapi::format_number $number $localeid -idigits -1] + continue + } else { + #setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one + #todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? + if {$delim eq ""} {set delim $default_delim} + if {$groupsize eq ""} {set groupsize $default_groupsize} + lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] + continue + } + } + #todo - get configured user defaults + if {$delim eq ""} { + set delim $default_delim + } + if {$groupsize eq ""} { + set groupsize $default_groupsize + } + + lappend results [delimit_number $number $delim $groupsize] + } + + if {[llength $results] == 1} { + #keep intrep as string rather than list + return [lindex $results 0] + } + return $results + } + + + #from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse + # Given a number represented as a string, insert delimiters to break it up for + # readability. Normally, the delimiter will be a comma which will be inserted every + # three digits. However, the delimiter and groupsize are optional arguments, + # permitting use in other locales. + # + # The string is assumed to consist of digits, possibly preceded by spaces, + # and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* + + proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { + set number [valcopy $unformattednumber] + set number [string map {_ ""} $number] + #normalize using expr - e.g 2e4 -> 20000.0 + set number [expr {$number}] + # First, extract right hand part of number, up to and including decimal point + set point [string last "." $number]; + if {$point >= 0} { + set PostDecimal [string range $number $point+1 end]; + set PostDecimalP 1; + } else { + set point [expr {[string length $number] + 1}] + set PostDecimal ""; + set PostDecimalP 0; + } + + # Now extract any leading spaces. review - regex for whitespace instead of just ascii space? + set ind 0; + while {[string equal [string index $number $ind] \u0020]} { + incr ind; + } + set FirstNonSpace $ind; + set LastSpace [expr {$FirstNonSpace - 1}]; + set LeadingSpaces [string range $number 0 $LastSpace]; + + # Now extract the non-fractional part of the number, omitting leading spaces. + set MainNumber [string range $number $FirstNonSpace $point-1]; + + # Insert commas into the non-fractional part. + set Length [string length $MainNumber]; + set Phase [expr {$Length % $GroupSize}] + set PhaseMinusOne [expr {$Phase -1}]; + set DelimitedMain ""; + + #First we deal with the extra stuff. + if {$Phase > 0} { + append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; + } + set FirstInGroup $Phase; + set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; + while {$LastInGroup < $Length} { + if {$FirstInGroup > 0} { + append DelimitedMain $delim; + } + append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; + incr FirstInGroup $GroupSize + incr LastInGroup $GroupSize + } + + # Reassemble the number. + if {$PostDecimalP} { + return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; + } else { + return [format "%s%s" $LeadingSpaces $DelimitedMain]; + } + } + + #review - there are various type of uuid - we should use something consistent across platforms + #twapi is used on windows because it's about 5 times faster - but is this more important than consistency? + #twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway + #(counterpoint: in the case of punk - we currently need twapi anyway on windows) + #does tcllib's uuid use the same mechanisms on different platforms anyway? + if {$has_twapi} { + interp alias "" ::punk::lib::uuid "" twapi::new_uuid + } else { + catch {package require uuid} + interp alias "" ::punk::lib::uuid "" uuid::uuid generate + } + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +tcl::namespace::eval punk::lib::flatgrid { + namespace export filler_count rows cols col row block + + #WARNING - requires lseq and 'lsearch -stride' + #WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 + #todo - 8.6 fallback? + + proc filler_count {listlen numcolumns} { + #if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error + #if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense + expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} + } + proc rows {list numcolumns {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set rows [list] + set i 1 + foreach s [lrange $splits 0 end-1] { + lappend rows [lrange $padded_list $s [lindex $splits $i]-1] + incr i + } + return $rows + } + proc cols {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] + } + return $cols + } + proc cols2 {list numcolumns {blank NULL}} { + set cols [list] + foreach colindex [lseq 0 $numcolumns-1] { + lappend cols [col2 $list $numcolumns $colindex $blank] + } + return $cols + } + proc col {list numcolumns colindex {blank NULL}} { + lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * + } + proc col2 {list numcolumns colindex {blank NULL}} { + set numblanks [filler_count [llength $list] $numcolumns] + set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] + set splits [lseq 0 to [llength $padded_list] by $numcolumns] + set col [list] + foreach s [lrange $splits 0 end-1] { + lappend col [lindex $padded_list $s+$colindex] + } + return $col + } + proc col3 {list numcolumns colindex {blank NULL}} { + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} + } + proc col4 {list numcolumns colindex {blank NULL}} { + #slow + set vars [lrepeat $numcolumns _] + lset vars $colindex v + if {$blank eq ""} { + return [lmap $vars $list {set v}] + } + set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] + lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} + } + + proc block {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } + proc block2 {list numcolumns {blank NULL}} { + set colblocks [list] + foreach c [cols2 $list $numcolumns $blank] { + lappend colblocks [join $c \n] " " + } + textblock::join -- {*}$colblocks + } +} + +tcl::namespace::eval punk::lib::test { + + + +} + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#todo - way to generate 'internal' docs separately? +#*** !doctools +#[section Internal] +tcl::namespace::eval punk::lib::system { + #*** !doctools + #[subsection {Namespace punk::lib::system}] + #[para] Internal functions that are not part of the API + #[list_begin definitions] + + + proc mostFactorsBelow {n} { + ##*** !doctools + #[call [fun mostFactorsBelow] [arg n]] + #[para]Find the number below $n which has the greatest number of factors + #[para]This will get slow quickly as n increases (100K = 1s+ 2024) + set most 0 + set mostcount 0 + for {set i 1} {$i < $n} {incr i} { + set fc [llength [punk::lib::factors $i]] + if {$fc > $mostcount} { + set most $i + set mostcount $fc + } + } + return [list number $most numfactors $mostcount] + } + proc factorCountBelow_punk {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [punk::lib::factors $i]] + } + return $tally + } + proc factorCountBelow_numtheory {n} { + ##*** !doctools + #[call [fun factorCountBelow] [arg n]] + #[para]For numbers 1 to n - keep a tally of the total count of factors + #[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result + #[para]and as a rudimentary performance comparison + #[para]gets slow quickly! (significantly slower than factorCountBelow_punk) + package require math::numtheory + set tally 0 + for {set i 1} {$i <= $n} {incr i} { + incr tally [llength [math::numtheory::factors $i]] + } + return $tally + } + + proc factors2 {x} { + ##*** !doctools + #[call [fun factors2] [arg x]] + #[para]Return a sorted list of factors of x + #[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. + set smallfactors [list 1] + set j 2 + set max [expr {sqrt($x)}] + while {$j < $max} { + if {($x % $j) == 0} { + lappend smallfactors $j + lappend largefactors [expr {$x / $j}] + } + incr j + } + #handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop + if {($x % $j) == 0} { + if {$j == ($x / $j)} { + lappend smallfactors $j + } + } + return [concat $smallfactors [lreverse $largefactors] $x] + } + + + + # incomplete - report which is the innermost bracket/quote etc awaiting completion for a Tcl command + #important - used by punk::repl + proc incomplete {partial} { + #we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + #puts stderr "-->$clist<--" + set waiting [list ""] + set innerpartials [list ""] + set escaped 0 + set i 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + incr i + continue + } ;# set escaped 0 at end + set p [lindex $innerpartials end] + if {$escaped == 0} { + #NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) + switch -- $c { + {"} { + if {![info complete ${p}]} { + lappend waiting {"} + lappend innerpartials "" + } else { + if {[lindex $waiting end] eq {"}} { + #this quote is endquote + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + if {![info complete ${p}$c]} { + lappend waiting {"} + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } + {[} { + if {![info complete ${p}$c]} { + lappend waiting "\]" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "{" { + if {![info complete ${p}$c]} { + lappend waiting "\}" + lappend innerpartials "" + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + "}" - + default { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + set innerpartials [lrange $innerpartials 0 end-1] + } else { + set p ${p}${c} + lset innerpartials end $p + } + } + } + } else { + set p ${p}${c} + lset innerpartials end $p + } + set escaped 0 + incr i + } + set incomplete [list] + foreach w $waiting { + #to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm. + switch -- $w { + {"} { + lappend incomplete $w + } + {]} { + lappend incomplete "\[" + } + "{" {} + "}" { + lappend incomplete "\{" + } + } + } + set debug 0 + if {$debug} { + foreach w $waiting p $innerpartials { + puts stderr "->awaiting:'$w' partial: $p" + } + } + return $incomplete + } + #This only works for very simple cases will get confused with for example: + # {set x "a["""} + proc incomplete_naive {partial} { + if {[info complete $partial]} { + return [list] + } + set clist [split $partial ""] + set waiting [list] + set escaped 0 + foreach c $clist { + if {$c eq "\\"} { + set escaped [expr {!$escaped}] + continue + } + if {!$escaped} { + if {$c eq {"}} { + if {[lindex $waiting end] eq {"}} { + set waiting [lrange $waiting 0 end-1] + } else { + lappend waiting {"} + } + } elseif {$c eq "\["} { + lappend waiting "\]" + } elseif {$c eq "\{"} { + lappend waiting "\}" + } else { + set waitingfor [lindex $waiting end] + if {$c eq "$waitingfor"} { + set waiting [lrange $waiting 0 end-1] + } + } + } + } + set incomplete [list] + foreach w $waiting { + if {$w eq {"}} { + lappend incomplete $w + } elseif {$w eq "\]"} { + lappend incomplete "\[" + } elseif {$w eq "\}"} { + lappend incomplete "\{" + } + } + return $incomplete + } + + #get info about punk nestindex key ie type: list,dict,undetermined + # pdict devel + proc nestindex_info {args} { + set argd [punk::args::parse $args withdef { + -parent -default "" + nestindex + }] + set opt_parent [dict get $argd opts -parent] + if {$opt_parent eq ""} { + set parent_type undetermined + } else { + set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing + } + + #??? + + } + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::lib::system ---}] +} + +tcl::namespace::eval punk::lib::caches { + +} + +tcl::namespace::eval punk::lib::debug { + proc showdict {args} {} +} + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::lib +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::lib [tcl::namespace::eval punk::lib { + variable pkg punk::lib + variable version + set version 0.1.6 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm index 06c7ddf3..741d9fc0 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm @@ -229,12 +229,16 @@ tcl::namespace::eval punk::nav::fs { } else { set stripbase 1 } - if {$v eq "/"} { - #hack - dict set matchinfo files {} - dict set matchinfo filesizes {} - } - set out [dirfiles_dict_as_lines -stripbase $stripbase $matchinfo] + + #we need to pass matchinfo that includes files even when only doing a directory listing (d/ /) + #This is because we want to display links/shortcuts that point to directories as directories. + #( ./ listing needs to show navigable items) + #if {$v eq "/"} { + # #dodgy hack that doesn't give proper display of all links/shortcuts that are pointing to directories. + # dict set matchinfo files {} + # dict set matchinfo filesizes {} + #} + set out [dirfiles_dict_as_lines -listing $v -stripbase $stripbase $matchinfo] #set chunklist [list] #lappend chunklist [list stdout "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n"] set result "[punk::ansi::a+ brightwhite]$out[punk::ansi::a]\n" @@ -258,10 +262,10 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args a1] + set atail [lassign $args cdtarget] if {[llength $args] == 1} { - set a1 [lindex $args 0] - switch -exact -- $a1 { + set cdtarget [lindex $args 0] + switch -exact -- $cdtarget { . - ./ { tailcall punk::nav::fs::d/ } @@ -286,43 +290,88 @@ tcl::namespace::eval punk::nav::fs { } } else { cd $up1 - #set VIRTUAL_CWD [file normalize $a1] + #set VIRTUAL_CWD [file normalize $cdtarget] } tailcall punk::nav::fs::d/ $v } } - if {![regexp {[*?]} $a1] && [file pathtype $a1] ne "relative"} { + set cdtarget_copy [punk::nav::fs::system::valcopy $cdtarget] + set cdtarget_copy [string map {\\ /} $cdtarget_copy] + if {[string range $cdtarget_copy 0 3] eq "//?/"} { + #handle dos device paths - convert to normal path for glob testing + set glob_test [string range $cdtarget_copy 3 end] + set cdtarget_is_glob [regexp {[*?]} $glob_test] + } else { + set cdtarget_is_glob [regexp {[*?]} $cdtarget] + } + if {!$cdtarget_is_glob} { + set cdtarget_file_type [file type $cdtarget] + #e.g may be a link - whilst the type returned in the 'file stat' info reflects the type of the link target + } else { + set cdtarget_file_type "glob" + } + + if {!$cdtarget_is_glob && [file pathtype $cdtarget] ne "relative"} { #non-relative non-glob - if { ![string match //zipfs:/* $a1]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD $a1 - tailcall punk::nav::fs::d/ $v + if {![string match //zipfs:/* $cdtarget]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + if {$linktarget_file_type eq "directory"} { + set linktarget [file readlink $cdtarget] + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } } } - if {![regexp {[*?]} $a1] && ![string match //zipfs:/* $a1] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { - if {[file type $a1] eq "directory"} { - cd $a1 - #set VIRTUAL_CWD [file normalize $a1] - tailcall punk::nav::fs::d/ $v + if {!$cdtarget_is_glob && ![string match //zipfs:/* $cdtarget] && ![string match "//zipfs:/*" $VIRTUAL_CWD]} { + switch -- $cdtarget_file_type { + link { + file stat $cdtarget cdtargetinfo + set linktarget_file_type $cdtargetinfo(type) + set linktarget [file readlink $cdtarget] + if {$linktarget_file_type eq "directory"} { + cd $linktarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } + } + directory { + cd $cdtarget + #set VIRTUAL_CWD $cdtarget + tailcall punk::nav::fs::d/ $v + } } + #if {[file type $cdtarget] eq "directory"} { + # cd $cdtarget + # #set VIRTUAL_CWD [file normalize $cdtarget] + # tailcall punk::nav::fs::d/ $v + #} } - if {![regexp {[*?]} $a1]} { + if {!$cdtarget_is_glob} { #NON-Glob target #review - if {[string match //zipfs:/* $a1]} { - if {[Zipfs_path_within_zipfs_mounts $a1]} { - commandstack::basecall cd $a1 + if {[string match //zipfs:/* $cdtarget]} { + if {[Zipfs_path_within_zipfs_mounts $cdtarget]} { + commandstack::basecall cd $cdtarget } - set VIRTUAL_CWD $a1 - set curdir $a1 + set VIRTUAL_CWD $cdtarget + set curdir $cdtarget } else { - set target [punk::path::normjoin $VIRTUAL_CWD $a1] + set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[Zipfs_path_within_zipfs_mounts $target]} { commandstack::basecall cd $target @@ -521,20 +570,93 @@ tcl::namespace::eval punk::nav::fs { return $result } + punk::args::define { + @id -id ::punk::nav::fs::d/new + -nonportable -type none -help\ + "Allow creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." + @values -min 1 -max -1 -type string + path -type string -multiple 1 -help\ + "Path(s) to create. Can be absolute or relative. + + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. + + If a path already exists, then it will be left as-is and no error will be raised. + + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." + } + #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { - if {![llength $args]} { - error "usage: d/new \[ ...\]" - } - set a1 [lindex $args 0] + set argd [punk::args::parse $args withid ::punk::nav::fs::d/new] + lassign [dict values $argd] leaders opts values received + set paths [dict get $values path] + set allow_nonportable [dict exists $received -nonportable] + set curdir [pwd] - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath_list [list] + set error_paths [list] + foreach p $paths { + if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { + #error "punk::nav::fs::d/new Path '$p' is not portable and may not be created without -nonportable option" + lappend error_paths [list $p "Path '$p' is not portable and may not be created without -nonportable option"] + continue + } + if {[string first \0 $p] != -1} { + #error "punk::nav::fs::d/new Path '$p' contains null character which is not allowed" + lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] + continue + } + set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] + #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. + set fullpath [file join $path1 {*}[lrange $args 1 end]] + #Some subpaths of the supplied paths to create may already exist. + #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + set parent [file dirname $fullpath] + while {![file exists $parent]} { + set parent [file dirname $parent] + } + if {![file writable $parent]} { + #error "punk::nav::fs::d/new Cannot create directory '$fullpath' as parent '$parent' is not writable" + lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] + continue + } + lappend fullpath_list $fullpath + } + if {[llength $fullpath_list] != [llength $paths]} { + set path_error_display "" + foreach e $error_paths { + set p [lindex $e 0] + set m [lindex $e 1] + append path_error_display " Path: '$p' Error: $m\n" + } + error "punk::nav::fs::d/new One or more supplied paths were invalid or not writable:\n$path_error_display" + } - if {[file exists $fullpath]} { - error "Folder $fullpath already exists" + set num_created 0 + set error_string "" + foreach fullpath $fullpath_list { + if {[catch {file mkdir $fullpath}]} { + set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + break + } + incr num_created } - file mkdir $fullpath - d/ $fullpath + if {$error_string ne ""} { + error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." + } + d/ $curdir } #todo use unknown to allow d/~c:/etc ?? @@ -849,11 +971,11 @@ tcl::namespace::eval punk::nav::fs { #file attr //cookit:/ returns {-vfs 1 -handle {}} #we will treat it differently for now - use generic handler REVIEW - set in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. + set is_in_vfs 0 ;#we use this only for a vfs which is reported to have a mountpoint by vfs::filesystem info - not all that have -vfs 1 attr like cookit. if {[llength [package provide vfs]]} { foreach mount [vfs::filesystem info] { if {[punk::mix::base::lib::path_a_atorbelow_b $location $mount]} { - set in_vfs 1 + set is_in_vfs 1 break } } @@ -871,27 +993,27 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } - if {$in_vfs} { + if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { - set in_zipfs 0 - set in_cookit 1 - set in_other_pseudovol 1 + set invfs "" switch -glob -- $location { //zipfs:/* { if {[info commands ::tcl::zipfs::mount] ne ""} { - set in_zipfs 1 + set invfs zipfs } } //cookit:/* { - set in_cookit 1 + set invfs cookit } default { #handle 'other/unknown' that mounts at a volume-like path //pseudovol:/ + #(intentionally will not match a dos device path such as //?/c:/) if {[regexp {//((?:(?!:|/).)+):/.*} $location _match pseudovol]} { #pseudovol probably more than one char long #we don't really expect something like //c:/ , but anyway, it's not the same as c:/ and for all we know someone could use that as a volume name? - set in_other_pseudovol 1 ;#flag so we don't use twapi - hope generic can handle it (uses tcl glob) + #flag so we don't use twapi - hope generic can handle it (uses tcl glob) + set invfs pseudovol } else { #we could use 'file attr' here to test if {-vfs 1} #but it's an extra filesystem hit on all normal paths too (which can be expensive on some systems) @@ -900,20 +1022,24 @@ tcl::namespace::eval punk::nav::fs { } } - - if {$in_zipfs} { - #relative vs absolute? review - cwd valid for //zipfs:/ ?? - set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_cookit} { - #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ - #don't use twapi - #could possibly use du_dirlisting_tclvfs REVIEW - #files and folders are all returned with the -types hidden option for glob on windows - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } elseif {$in_other} { - set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] - } else { - set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + switch -- $invfs { + zipfs { + #relative vs absolute? review - cwd valid for //zipfs:/ ?? + set listing [punk::du::lib::du_dirlisting_zipfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + cookit { + #seems to be a vfs - except it mounts on a pseudo-volume path //cookit:/ + #don't use twapi + #could possibly use du_dirlisting_tclvfs REVIEW + #files and folders are all returned with the -types hidden option for glob on windows + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + pseudovol { + set listing [punk::du::lib::du_dirlisting_generic $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } + default { + set listing [punk::du::dirlisting $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] + } } } @@ -1018,11 +1144,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean + -listing -default "/" -choices {/ // //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { + set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1031,9 +1159,12 @@ tcl::namespace::eval punk::nav::fs { # -- --- --- --- --- --- --- --- --- --- --- --- - set opt_stripbase [dict get $opts -stripbase] + set opt_stripbase [dict get $opts -stripbase] set opt_formatsizes [dict get $opts -formatsizes] + set opt_listing [dict get $opts -listing] # -- --- --- --- --- --- --- --- --- --- --- --- + #we still need to examine files for -listing / which means show only directories, + # because we want to display links/shortcuts that point to directories as directories #if multiple dicts and -stripbase = 1 - we can only strip the longest common part of the searchbases supplied set common_base "" @@ -1074,7 +1205,6 @@ tcl::namespace::eval punk::nav::fs { foreach fileset [list dirs files links underlayfiles flaggedhidden flaggedreadonly flaggedsystem nonportable vfsmounts] { set $fileset [list] } - #set contents [lindex $list_of_dicts 0] foreach contents $list_of_dicts { lappend dirs {*}[dict get $contents dirs] @@ -1090,6 +1220,7 @@ tcl::namespace::eval punk::nav::fs { lappend vfsmounts {*}[dict get $contents vfsmounts] } + set fkeys [dict create] ;#avoid some file normalize calls.. if {$opt_stripbase && $common_base ne ""} { set filetails [list] @@ -1224,27 +1355,41 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] + set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { if {![catch {package require punk::winlnk}]} { - set shortcutinfo [punk::winlnk::resolve $fname] set target_type "file" ;#default/fallback + + set shortcutinfo [punk::winlnk::resolve $fname] if {[dict exists $shortcutinfo link_target]} { set is_valid_lnk 1 set tgt [dict get $shortcutinfo link_target] - if {[file exists $tgt]} { - #file type could return 'link' - we will use isfile/isdirectory - if {[file isfile $tgt]} { - set target_type file - } elseif {[file isdirectory $tgt]} { - set target_type directory - } else { - set target_type file ;## ? + set link_target_type [dict get $shortcutinfo target_type] + switch -- $link_target_type { + file { + set target_type "file" + } + directory - "local disk" { + set target_type "directory" + } + unknown { + #fall back to checking attributes and filesystem if we have a link_target but no target_type + if {[file exists $tgt]} { + #file type could return 'link' - we will use isfile/isdirectory + if {[file isfile $tgt]} { + set target_type file + } elseif {[file isdirectory $tgt]} { + set target_type directory + } else { + set target_type file ;## ? + } + } else { + #todo - see if punk::winlnk has info about the type at the time of linking + #for now - treat as file + } } - } else { - #todo - see if punk::winlnk has info about the type at the time of linking - #for now - treat as file } } else { #no link_target - probably an ordinary file - but there could have been some other error in reading the binary windows lnk format. @@ -1295,6 +1440,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo + puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1304,58 +1451,82 @@ tcl::namespace::eval punk::nav::fs { set displaylist [list] set col1 [string repeat " " [expr {$widest1 + 2}]] set RST [punk::ansi::a] + if {$opt_listing eq "/"} { + #disply directories only (including items that were actually files that were links/shortcuts to directories) + set finfo_plus [list] + } foreach d $dirs filerec $finfo_plus { - set d1 [punk::ansi::a+ cyan bold] - set d2 [punk::ansi::a+ defaultfg defaultbg normal] - #set f1 [punk::ansi::a+ white bold] - set f1 [punk::ansi::a+ white] - set f2 [punk::ansi::a+ defaultfg defaultbg normal] + set d1 [punk::ansi::a+ cyan normal] + set d1_overrides [list] + #set d2 [punk::ansi::a+ defaultfg defaultbg normal] + set f1 [punk::ansi::a+ white normal] + set f1_overrides [list] + #set f2 [punk::ansi::a+ defaultfg defaultbg normal] set fdisp "" if {[string length $d]} { if {$d in $flaggedhidden} { - set d1 [punk::ansi::a+ cyan normal] + #set d1 [punk::ansi::a+ Term-grey50 normal] + lappend d1_overrides term-grey50 } if {$d in $vfsmounts} { - if {$d in $flaggedhidden} { - #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indicatio of 'hidden' status - REVIEW - #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) - #mark it differently for now.. (todo bug report?) - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red Yellow bold] - } else { - set d1 [punk::ansi::a+ green Purple bold] - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red White bold] - } else { - set d1 [punk::ansi::a+ green bold] - } - } - } else { - if {$d in $nonportable} { - set d1 [punk::ansi::a+ red bold] - } + lappend d1_overrides Green + } + if {$d in $nonportable} { + #lappend d1_overrides underline undt-red ;#we use underlins to indicate symlinks and shortcuts, so we shouldn't use underlines here if possible. + lappend d1_overrides italic bold } + #if {$d in $vfsmounts} { + # if {$d in $flaggedhidden} { + # #we could have a hidden dir which is also a vfs.. colour will be overridden giving no indication of 'hidden' status - REVIEW + # #(This situation encountered on windows - even though file attr showed -hidden 0 - the glob with -types hidden returned it.. possibly a tcl glob bug on windows) + # #mark it differently for now.. (todo bug report?) + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red Yellow bold] + # } else { + # set d1 [punk::ansi::a+ green Purple bold] + # } + # } else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red White bold] + # } else { + # set d1 [punk::ansi::a+ green bold] + # } + # } + #} else { + # if {$d in $nonportable} { + # set d1 [punk::ansi::a+ red bold] + # } + #} #dlink-style & dshortcut_style are for underlines - can be added with colours already set + + if {[llength $d1_overrides]} { + set d1 [punk::ansi::a+ {*}$d1_overrides] + } if {$d in $dir_symlinks} { append d1 $dlink_style } elseif {$d in $dir_shortcuts} { append d1 $dshortcut_style } } + if {[llength $filerec]} { set fname [dict get $filerec file] set fdisp [dict get $filerec display] if {$fname in $flaggedhidden} { - set f1 [punk::ansi::a+ Purple] - } else { - if {$fname in $nonportable} { - set f1 [punk::ansi::a+ red bold] - } + #set f1 [punk::ansi::a+ Term-grey50] + lappend f1_overrides term-grey50 + } + if {$fname in $nonportable} { + lappend f1_overrides italic bold } + if {[llength $f1_overrides]} { + set f1 [punk::ansi::a+ {*}$f1_overrides] + } + lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST + } else { + #either there are no files or opt_listing is / = show dirs only (some of which may have actually been files that were links/shortcuts to directories) + lappend displaylist [overtype::left $col1 $d1$d$RST] } - lappend displaylist [overtype::left $col1 $d1$d$RST]$f1$fdisp$RST } return [punk::lib::list_as_lines $displaylist] @@ -1469,6 +1640,12 @@ tcl::namespace::eval punk::nav::fs::system { #[subsection {Namespace punk::nav::fs::system}] #[para] Internal functions that are not part of the API + #utility function to copy values from one variable to another without sharing the reference. + #Useful for example to avoid some issues with possible shimmering of the underlying type of file paths. + proc valcopy {obj} { + append obj2 $obj {} + } + #ordinary emission of chunklist when no repl proc emit_chunklist {chunklist} { set result "" diff --git a/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm index eac7df81..034fae01 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm @@ -326,12 +326,12 @@ tcl::namespace::eval punk::pipe::lib { set in_atom 1 } ( { - incr in_brackets + incr in_brackets } default { if {$end_var_posn == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { - set end_var_posn $token_index - } + set end_var_posn $token_index + } } } } diff --git a/src/vfs/_vfscommon.vfs/modules/punk/winlnk-0.1.1.tm b/src/vfs/_vfscommon.vfs/modules/punk/winlnk-0.1.1.tm new file mode 100644 index 00000000..f283348f --- /dev/null +++ b/src/vfs/_vfscommon.vfs/modules/punk/winlnk-0.1.1.tm @@ -0,0 +1,1014 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::winlnk 0.1.1 +# Meta platform tcl +# Meta license MIT +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::winlnk 0 0.1.1] +#[copyright "2024"] +#[titledesc {windows shortcut .lnk library}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::winlnk}] [comment {-- Description at end of page heading --}] +#[require punk::winlnk] +#[keywords module shortcut lnk parse windows crossplatform] +#[description] +#[para] Tools for reading windows shortcuts (.lnk files) on any platform + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::winlnk +#[subsection Concepts] +#[para] Windows shortcuts are a binary format file with a .lnk extension +#[para] Shell Link (.LNK) Binary File Format is documented in [lb]MS_SHLLINK[rb].pdf published by Microsoft. +#[para] Revision 8.0 published 2024-04-23 + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::winlnk +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +#TODO - logger + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::winlnk}] + #[para] Core API functions for punk::winlnk + #[list_begin definitions] + + + variable magic_HeaderSize "0000004C" ;#HeaderSize MUST equal this + variable magic_LinkCLSID "00021401-0000-0000-C000-000000000046" ;#LinkCLSID MUST equal this + + proc Get_contents {path {bytes all}} { + if {![file exists $path] || [file type $path] ne "file"} { + error "punk::winlnk::get_contents cannot find a filesystem object of type 'file' at location: $path" + } + set fd [open $path r] + chan configure $fd -translation binary -encoding iso8859-1 + if {$bytes eq "all"} { + set data [read $fd] + } else { + set data [read $fd $bytes] + } + close $fd + return $data + } + proc Contents_check_header {contents} { + variable magic_HeaderSize + variable magic_LinkCLSID + expr {[Header_Get_HeaderSize $contents] eq $magic_HeaderSize && [Header_Get_LinkCLSID $contents] eq $magic_LinkCLSID} + } + + #LinkFlags - 4 bytes - specifies information about the shell link and the presence of optional portions of the structure. + proc Show_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + puts "val: $val" + set declist [scan [string reverse $4bytes] %c%c%c%c] + set fmt [string repeat %08b 4] + puts "LinkFlags:[format $fmt {*}$declist]" + + set r [binary scan $4bytes b32 val] + puts "bscan-le: $val" + set r [binary scan [string reverse $4bytes] b32 val] + puts "bscan-2 : $val" + } + variable LinkFlags + set LinkFlags [dict create\ + HasLinkTargetIDList 1\ + HasLinkInfo 2\ + HasName 4\ + HasRelativePath 8\ + HasWorkingDir 16\ + HasArguments 32\ + HasIconLocation 64\ + IsUnicode 128\ + ForceNoLinkInfo 256\ + HasExpString 512\ + RunInSeparateProcess 1024\ + Unused1 2048\ + HasDarwinID 4096\ + RunAsUser 8192\ + HasExpIcon 16394\ + NoPidlAlias 32768\ + Unused2 65536\ + RunWithShimLayer 131072\ + ForceNoLinkTrack 262144\ + EnableTargetMetadata 524288\ + DisableLinkPathTracking 1048576\ + DisableKnownFolderTracking 2097152\ + DisableKnownFolderAlias 4194304\ + AllowLinkToLink 8388608\ + UnaliasOnSave 16777216\ + PreferEnvironmentPath 33554432\ + KeepLocalIDListForUNCTarget 67108864\ + ] + variable LinkFlagLetters [list A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA] + proc Header_Has_LinkFlag {contents flagname} { + variable LinkFlags + variable LinkFlagLetters + if {[string length $flagname] <= 2} { + set idx [lsearch $LinkFlagLetters $flagname] + if {$idx < 0} { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + set binflag [expr {2**$idx}] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } + if {[dict exists $LinkFlags $flagname]} { + set binflag [dict get $LinkFlags $flagname] + set allflags [Header_Get_LinkFlags $contents] + return [expr {$allflags & $binflag}] + } else { + error "punk::winlnk::Header_Has_LinkFlag error - flagname $flagname not known" + } + } + + #MS-SHLLINK.pdf documents the .lnk file format in detail, but here is a brief overview of the structure of a .lnk file: + #protocol revision 10.0 (November 2025) https://winprotocoldocs-bhdugrdyduf5h2e4.b02.azurefd.net/MS-SHLLINK/%5bMS-SHLLINK%5d.pdf + + + #SHELL_LINK_HEADER structure is 76 bytes long and starts at the beginning of the file + #offset hex:0x00 dec:0 4 bytes + #Header size (HeaderSize) (must be 0x0000004C for .lnk files) + proc Header_Get_HeaderSize {contents} { + set 4bytes [split [string range $contents 0 3] ""] + set hex4 "" + foreach b [lreverse $4bytes] { + set dec [scan $b %c] ;# 0-255 decimal + set HH [format %2.2llX $dec] + append hex4 $HH + } + return $hex4 + } + + + #offset hex:0x04 dec:4 16 bytes + #LinkCLSID (must be 00021401-0000-0000-C000-000000000046 for .lnk files) + proc Header_Get_LinkCLSID {contents} { + set 16bytes [string range $contents 4 19] + #CLSID hex textual representation is split as 4-2-2-2-6 bytes(hex pairs) + #e.g We expect 00021401-0000-0000-C000-000000000046 for .lnk files + #for endianness - it is little endian all the way but the split is 4-2-2-1-1-1-1-1-1-1-1 REVIEW + #(so it can appear as mixed endianness if you don't know the splits) + #https://devblogs.microsoft.com/oldnewthing/20220928-00/?p=107221 + #This is based on COM textual representation of GUIDS + #Apparently a CLSID is a GUID that identifies a COM object + set clsid "" + set s1 [tcl::string::range $16bytes 0 3] + set declist [scan [string reverse $s1] %c%c%c%c] + set fmt "%02X%02X%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s2 [tcl::string::range $16bytes 4 5] + set declist [scan [string reverse $s2] %c%c] + set fmt "%02X%02X" + append clsid [format $fmt {*}$declist] + + append clsid - + set s3 [tcl::string::range $16bytes 6 7] + set declist [scan [string reverse $s3] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + #now treat bytes individually - so no endianness conversion + set declist [scan [tcl::string::range $16bytes 8 9] %c%c] + append clsid [format $fmt {*}$declist] + + append clsid - + set scan [string repeat %c 6] + set fmt [string repeat %02X 6] + set declist [scan [tcl::string::range $16bytes 10 15] $scan] + append clsid [format $fmt {*}$declist] + + return $clsid + } + + + #offset hex:0x14 dec:20 4 bytes + #Link flags (LinkFlags) - bit field specifying information about the shell link and the presence of optional portions of the structure. + #HasLinkTargetIDList bit 0 (0x00000001) - if set, a LinkTargetIDList structure is present immediately following the header + #HasLinkInfo bit 1 (0x00000002) - if set, a LinkInfo structure is present immediately following the header (or the LinkTargetIDList if that is present) + #HasName bit 2 (0x00000004) - if set, a null-terminated string containing the name of the link is present immediately following the header (or the LinkTargetIDList and LinkInfo if they are present) + #HasRelativePath bit 3 (0x00000008) - if set, a null-terminated string containing the relative path of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo and Name if they are present) + #HasWorkingDir bit 4 (0x00000010) - if set, a null-terminated string containing the working directory of the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name and Relative Path if they are present) + #HasArguments bit 5 (0x00000020) - if set, a null-terminated string containing the command line arguments for the link target is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path and Working Dir if they are present) + #HasIconLocation bit 6 (0x00000040) - if set, a null-terminated string containing the location of the icon for the link is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir and Arguments if they are present) + #IsUnicode bit 7 (0x00000080) - if set, the strings in the link are stored in Unicode (UTF-16LE) format; if not set, the strings are stored in ANSI format (usually the system's default code page) + #ForceNoLinkInfo bit 8 (0x00000100) - if set, the LinkInfo structure is not stored in the file even if the HasLinkInfo bit is set; this can be used to force the link to be resolved using only the information in the header and the optional strings, without using the LinkInfo structure + #HasExpString bit 9 (0x00000200) - if set, a null-terminated string containing an "environment variable" style string is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments and Icon Location if they are present); this string can contain environment variable references (e.g. %USERPROFILE%) that can be expanded to obtain the actual path of the link target + #RunInSeparateProcess bit 10 (0x00000400) - if set, the link target should be run in a separate process; if not set, the link target may be run in the same process as the caller + #Unused1 bit 11 (0x00000800) - reserved for future use; should be set to 0 + #HasDarwinID bit 12 (0x00001000) - if set, a null-terminated string containing a "Darwin ID" is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location and ExpString if they are present); this string can be used to identify the link target in a way that is independent of the file system (e.g. for links to Control Panel items or special folders) + #RunAsUser bit 13 (0x00002000) - if set, the link target should be run with the permissions of the user specified in the HasDarwinID string; if not set, the link target should be run with the permissions of the caller + #HasExpIcon bit 14 (0x00004000) - if set, a null-terminated string containing an "environment variable" style string for the icon location is present immediately following the header (or the LinkTargetIDList, LinkInfo, Name, Relative Path, Working Dir, Arguments, Icon Location, ExpString and DarwinID if they are present); this string can contain environment variable references that can be expanded to obtain the actual path of the icon for the link + #NoPidlAlias bit 15 (0x00008000) - if set, the link target should not be resolved using the PIDL alias mechanism; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed + #Unused2 bit 16 (0x00010000) - reserved for future use; should be set to 0 + #RunWithShimLayer bit 17 (0x00020000) - if set, the link target should be run with the application compatibility shim layer; if not set, the link target should be run without the shim layer + #ForceNoLinkTrack bit 18 (0x00040000) - if set, the link target should not be tracked by the shell's link tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed + #EnableTargetMetadata bit 19 (0x00080000) - if set, the link target should have metadata enabled; this can be used to allow the link to store additional information about the target (e.g. for links to files, the link can store the file's attributes, creation time, access time and modification time) + #DisableLinkPathTracking bit 20 (0x00100000) - if set, the link target should not be tracked by the shell's link path tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its path + #DisableKnownFolderTracking bit 21 (0x00200000) - if set, the link target should not be tracked by the shell's known folder tracking mechanism; this can be used to prevent the link from being automatically updated if the target is moved or renamed based on its known folder ID + #DisableKnownFolderAlias bit 22 (0x00400000) - if set, the link target should not be aliased to a known folder; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on its known folder ID + #AllowLinkToLink bit 23 (0x00800000) - if set, the link target can be another link; if not set, the link target should not be another link (i.e. it should be a file or directory); this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #UnaliasOnSave bit 24 (0x01000000) - if set, the link should be unaliased when it is saved; this can be used to prevent the link from being resolved to a different target if the original target is moved or renamed based on the fact that it is a link + #PreferEnvironmentPath bit 25 (0x02000000) - if set, the link should prefer to resolve the target using environment variable references; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the environment variable references still point to the correct location + #KeepLocalIDListForUNCTarget bit 26 (0x04000000) - if set, the link should keep the local ID list for UNC targets; this can be used to allow the link to be resolved correctly even if the target is moved or renamed, as long as the local ID list still points to the correct location + # - the presence of these flags indicates the presence of optional structures in the .lnk file and also provides information about how to interpret the data in the file + proc Header_Get_LinkFlags {contents} { + set 4bytes [string range $contents 20 23] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x18 dec:24 4 bytes + #File attributes (FileAttributes) - bit field specifying the file attributes of the link target (if the EnableTargetMetadata flag is set in the LinkFlags field); this field is a bitwise combination of the following values: + proc Header_Get_FileAttributes {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return {} + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + set attrlist {} + if {$val & 0x00000001} {lappend attrlist "READONLY"} + if {$val & 0x00000002} {lappend attrlist "HIDDEN"} + if {$val & 0x00000004} {lappend attrlist "SYSTEM"} + if {$val & 0x00000010} {lappend attrlist "DIRECTORY"} + if {$val & 0x00000020} {lappend attrlist "ARCHIVE"} + if {$val & 0x00000040} {lappend attrlist "DEVICE"} + if {$val & 0x00000080} {lappend attrlist "NORMAL"} + if {$val & 0x00000100} {lappend attrlist "TEMPORARY"} + if {$val & 0x00000200} {lappend attrlist "SPARSE_FILE"} + if {$val & 0x00000400} {lappend attrlist "REPARSE_POINT"} + if {$val & 0x00000800} {lappend attrlist "COMPRESSED"} + if {$val & 0x00001000} {lappend attrlist "OFFLINE"} + if {$val & 0x00002000} {lappend attrlist "NOT_CONTENT_INDEXED"} + if {$val & 0x00004000} {lappend attrlist "ENCRYPTED"} + return $attrlist + } + proc Header_Get_FileAttributes_Raw {contents} { + if {![Header_Has_LinkFlag $contents "EnableTargetMetadata"]} { + return 0 + } + set 4bytes [string range $contents 24 27] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + + + + #offset hex:0x1C dec:28 8 bytes + #creation date and time (CreationTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_CreationTime {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_CreationTime_Raw {contents} { + set 8bytes [string range $contents 28 35] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset 36 8 bytes + #last access date and time (AccessTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_AccessTime {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_AccessTime_Raw {contents} { + set 8bytes [string range $contents 36 43] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x2C dec:44 8 bytes + #last modification date and time (WriteTime) (FILETIME structure - 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC)) + proc Header_Get_WriteTime {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + #convert FILETIME to human readable format - this is a bit complex because FILETIME is in 100-nanosecond intervals since January 1, 1601 (UTC) + #we can convert it to seconds and then to a human readable format + set seconds [expr {$val / 10000000.0}] + set epoch_seconds [expr {round($seconds) - 11644473600}] ;# number of seconds between January 1, 1601 and January 1, 1970 + set human_time [clock format $epoch_seconds -format "%Y-%m-%d %H:%M:%S" -gmt true] + return $human_time + } + proc Header_Get_WriteTime_Raw {contents} { + set 8bytes [string range $contents 44 51] + set r [binary scan $8bytes w val] ;# w for little endian 64-bit signed int + return $val + } + + #offset hex:0x34 dec:52 Bytes:4 - unsigned int + #file size in bytes (of target - low 32 bits if >4GB) + proc Header_Get_FileSize {contents} { + set 4bytes [string range $contents 52 55] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x38 dec:56 Bytes:4 - signed integer + #icon index value + proc Header_Get_IconIndex {contents} { + set 4bytes [string range $contents 56 59] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x3C dec:60 Bytes:4 - unsigned integer + #SW_SHOWNORMAL 0x00000001 + #SW_SHOWMAXIMIZED 0x00000001 + #SW_SHOWMINNOACTIVE 0x00000007 + # - all other values MUST be treated as SW_SHOWNORMAL + proc Header_Get_ShowCommand {contents} { + set 4bytes [string range $contents 60 63] + set r [binary scan $4bytes i val] + return $val + } + + #offset hex:0x40 dec:64 Bytes:2 + #Hot key + proc Header_Get_HotKey {contents} { + # Existing code that extracts the raw 16‑bit hotkey value: + set raw [Header_Get_HotKey_Raw $contents] + # The low byte holds the virtual‑key, high byte holds modifier flags + set vk [expr {$raw & 0xFF}] + set mods [expr {($raw >> 8) & 0xFF}] + set name [_vk_to_name $vk] + set modStr [_modifiers_to_string $mods] + if {$modStr eq ""} { + return $name + } else { + return "${modStr}+${name}" + } + } + proc Header_Get_HotKey_Raw {contents} { + set 2bytes [string range $contents 64 65] + set r [binary scan $2bytes s val] ;#short + return $val + } + proc _modifiers_to_string {mods} { + set parts {} + if {$mods & 0x01} {lappend parts "Shift"} + if {$mods & 0x02} {lappend parts "Ctrl"} + if {$mods & 0x04} {lappend parts "Alt"} + if {$mods & 0x08} {lappend parts "Win"} ;# optional + return [join $parts "+"] + } + proc _vk_to_name {vk} { + # Minimal map – extend as needed + array set vkMap { + 0x00 "No key assigned" + 0x08 Backspace 0x09 Tab 0x0D Return + 0x10 Shift 0x11 Control 0x12 Alt + 0x20 Space 0x21 PageUp 0x22 PageDown + 0x23 End 0x24 Home 0x25 Left + 0x26 Up 0x27 Right 0x28 Down + 0x2D Insert 0x2E Delete + 0x70 F1 0x71 F2 0x72 F3 + 0x73 F4 0x74 F5 0x75 F6 + 0x76 F7 0x77 F8 0x78 F9 + 0x79 F10 0x7A F11 0x7B F12 + 0x7c F13 0x7d F14 0x7e F15 + 0x7f F16 0x80 F17 0x81 F18 + 0x82 F19 0x83 F20 0x84 F21 + 0x85 F22 0x86 F23 0x87 F24 + 0x90 "NUM LOCK" 0x91 "SCROLL LOCK" + } + if {[info exists vkMap($vk)]} { + return $vkMap($vk) + } else { + if {$vk >= 0x30 && $vk <= 0x39} { + return [format "%c" $vk] ;# 0-9 + } elseif {$vk >= 0x41 && $vk <= 0x5A} { + return [format "%c" $vk] ;# A-Z + } + # fallback: hex representation + return [format "0x%02X" $vk] + } + } + + #offset hex:0x42 dec:66 Bytes:2 - reserved1 + proc Header_Get_Reserved1 {contents} { + set 2bytes [string range $contents 66 67] + set r [binary scan $2bytes s val] ;#short + return $val + } + + #offset hex:0x44 dec:68 Bytes:4 - reserved2 + proc Header_Get_Reserved2 {contents} { + set 4bytes [string range $contents 68 71] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #offset hex:0x48 dec:72 Bytes:4 - reserved3 + proc Header_Get_Reserved3 {contents} { + set 4bytes [string range $contents 72 75] + set r [binary scan $4bytes i val] ;# i for little endian 32-bit signed int + return $val + } + + #end of 76 byte header + + proc Get_LinkTargetIDList_size {contents} { + if {[Header_Has_LinkFlag $contents "A"]} { + set 2bytes [string range $contents 76 77] + set r [binary scan $2bytes s val] ;#short + #logger + #puts stderr "LinkTargetIDList_size: $val" + return $val + } else { + return 0 + } + } + proc Get_LinkTargetIDList_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + return "" + } else { + set idlist_content [string range $contents 78 [expr {78 + $idlist_size -1}]] + return $idlist_content + } + } + + #some clues on the structure of the IDList content and how to parse it can be found in the analysis of CVE-2020-0729, + #which is a remote code execution vulnerability in Windows that can be exploited through specially crafted .lnk files that contain malicious IDList content. + #The analysis of this vulnerability provides insights into how the IDList content is structured and how it can be parsed to extract information about the link target and potentially execute code. + #https://www.zerodayinitiative.com/blog/2020/3/25/cve-2020-0729-remote-code-execution-through-lnk-files + + proc Get_LinkTargetIDList_iteminfo {contents} { + set idlist_content [Get_LinkTargetIDList_content $contents] + set result {} + set offset 0 + while {$offset < [string length $idlist_content]} { + if {[string length $idlist_content] - $offset < 2} break + set size_bytes [string range $idlist_content $offset [expr {$offset + 1}]] ;#size including these 2 bytes + binary scan $size_bytes su size + if {$size == 0} break + if {$size < 2} { + # Invalid size, abort + error "punk::winlnk::Get_LinkTargetIDList_iteminfo: Invalid ItemID size: $size at offset $offset" + } + if {$offset + $size > [string length $idlist_content]} { + # ItemID extends beyond content, stop parsing + puts stderr "punk::winlnk::Get_LinkTargetIDList_iteminfo: ItemID at offset $offset with size $size extends beyond content length, stopping parse" + break + } + set itemid [string range $idlist_content $offset [expr {$offset + $size - 1}]] + set itemid_bytes [string range $itemid 0 1] + binary scan $itemid_bytes su itemid_size + #in *general* byte 3 of the ItemID structure can be used to determine the type of the item + #(e.g. file, folder, network location, etc.) but this is not always reliable and can vary + #based on the specific structure of the ItemID and the context in which it is used + set itemid_type_byte [string index $itemid 2] + #puts stderr "ItemID size: $itemid_size, type byte: [format %02X [scan $itemid_type_byte %c]]" + set maybe_type [format %02X [scan $itemid_type_byte %c]] + lappend result [dict create size $itemid_size type $maybe_type rawcontent $itemid] + + incr offset $size + } + return $result + } + proc Get_LinkInfo_content {contents} { + set idlist_size [Get_LinkTargetIDList_size $contents] + if {$idlist_size == 0} { + set offset 0 + } else { + set offset [expr {2 + $idlist_size}] ;#LinkTargetIdList IDListSize field + value + } + set linkinfo_start [expr {76 + $offset}] + if {[Header_Has_LinkFlag $contents "B"]} { + #puts stderr "linkinfo_start: $linkinfo_start" + set 4bytes [string range $contents $linkinfo_start $linkinfo_start+3] + binary scan $4bytes i val ;#size *including* these 4 bytes + set linkinfo_content [string range $contents $linkinfo_start [expr {$linkinfo_start + $val -1}]] + return [dict create linkinfo_start $linkinfo_start size $val next_start [expr {$linkinfo_start + $val}] content $linkinfo_content] + } else { + return [dict create linkinfo_start $linkinfo_start size 0 next_start $linkinfo_start content ""] + } + } + + proc LinkInfo_get_fields {linkinfocontent} { + #TODO - finish parsing of LinkInfo - add support + #Link location information + #present if data flag HasLinkInfo exists. + + set 4bytes [string range $linkinfocontent 0 3] + binary scan $4bytes i val ;#size *including* these 4 bytes + + set bytes_linkinfoheadersize [string range $linkinfocontent 4 7] + binary scan $bytes_linkinfoheadersize i headersize + + set bytes_linkinfoflags [string range $linkinfocontent 8 11] + set r [binary scan $bytes_linkinfoflags i flags] ;# i for little endian 32-bit signed int + #puts "linkinfoflags: $flags" + + set localbasepath "" + set commonpathsuffix "" + + #REVIEW - flags problem? + if {$flags & 1} { + #VolumeIDAndLocalBasePath + #logger + #puts stderr "VolumeIDAndLocalBasePath" + } + if {$flags & 2} { + #logger + #puts stderr "CommonNetworkRelativeLinkAndPathSuffix" + } + set bytes_volumeid_offset [string range $linkinfocontent 12 15] + set bytes_localbasepath_offset [string range $linkinfocontent 16 19] + set bytes_commonnetworkrelativelinkoffset [string range $linkinfocontent 20 23] + set bytes_commonpathsuffix_offset [string range $linkinfocontent 24 27] + + binary scan $bytes_localbasepath_offset i bp_offset + if {$bp_offset > 0} { + set tail [string range $linkinfocontent $bp_offset end] + set stringterminator 0 + set i 0 + set localbasepath "" + #TODO + while {!$stringterminator & $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append localbasepath $c + } + incr i + } + } + binary scan $bytes_commonpathsuffix_offset i cps_offset + if {$cps_offset > 0} { + set tail [string range $linkinfocontent $cps_offset end] + set stringterminator 0 + set i 0 + set commonpathsuffix "" + #TODO + while {!$stringterminator && $i < 100} { + set c [string index $tail $i] + if {$c eq "\x00"} { + set stringterminator 1 + } else { + append commonpathsuffix $c + } + incr i + } + } + + + return [dict create localbasepath $localbasepath commonpathsuffix $commonpathsuffix note ] + } + + proc Contents_Get_Info {contents} { + + + #todo - return something like the perl lnk-parse-1.0.pl script? + + #Link File: C:/repo/jn/tclmodules/tomlish/src/modules/test/#modpod-tomlish-0.1.1/suites/all/arrays_1.toml#roundtrip+roundtrip_files+arrays_1.toml.fauxlink.lnk + #Link Flags: HAS SHELLIDLIST | POINTS TO FILE/DIR | NO DESCRIPTION | HAS RELATIVE PATH STRING | HAS WORKING DIRECTORY | NO CMD LINE ARGS | NO CUSTOM ICON | + #File Attributes: ARCHIVE + #Create Time: Sun Jul 14 2024 10:41:34 + #Last Accessed time: Sat Sept 21 2024 02:46:10 + #Last Modified Time: Tue Sept 10 2024 17:16:07 + #Target Length: 479 + #Icon Index: 0 + #ShowWnd: 1 SW_NORMAL + #HotKey: 0 + #(App Path:) Remaining Path: repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files\arrays_1.toml + #Relative Path: ..\roundtrip\roundtrip_files\arrays_1.toml + #Working Dir: C:\repo\jn\tclmodules\tomlish\src\modules\test\#modpod-tomlish-0.1.1\suites\roundtrip\roundtrip_files + + variable LinkFlags + set flags_enabled [list] + dict for {k v} $LinkFlags { + if {[Header_Has_LinkFlag $contents $k] > 0} { + lappend flags_enabled $k + } + } + + set showcommand_val [Header_Get_ShowCommand $contents] + switch -- $showcommand_val { + 1 { + set showwnd [list 1 SW_SHOWNORMAL] + } + 3 { + set showwnd [list 3 SW_SHOWMAXIMIZED] + } + 7 { + set showwnd [list 7 SW_SHOWMINNOACTIVE] + } + default { + set showwnd [list $showcommand_val SW_SHOWNORMAL-effective] + } + } + + set linkinfo_content_dict [Get_LinkInfo_content $contents] + set localbase_path "" + set suffix_path "" + set linkinfocontent [dict get $linkinfo_content_dict content] + set next_start [dict get $linkinfo_content_dict next_start] ;#location of section following LinkInfo (Location information) - this will be the Data Strings. + set link_target "" + set linkfields [dict create] + if {$linkinfocontent ne ""} { + set linkfields [LinkInfo_get_fields $linkinfocontent] + set localbase_path [dict get $linkfields localbasepath] + set suffix_path [dict get $linkfields commonpathsuffix] + if {"windows" eq $::tcl_platform(platform)} { + set link_target [file join $localbase_path $suffix_path] + } else { + set suffix_path [string trimleft [string map {\\ /} $suffix_path] /] + if {[regexp {([a-zA-Z]):\\(.*)} $localbase_path _match drive_letter tail]} { + set localbase_path [string map {\\ /} $localbase_path] + set tail [string trimleft [string map {\\ /} $tail] /] + set link_target "" + #shortcut basepath is a windows path with drive letter - try to resolve it on unix by looking for a corresponding mount from fstab or a point under /mnt + set mountinfo [exec mount] + foreach line [split $mountinfo "\n"] { + #review - a more specific mount target might exist that includes the drive letter as part of the mount point name and is a longer prefix of the localbase_path + #- we should probably look for the longest prefix match rather than just the drive letter + if {[regexp -nocase -- [string cat ^$drive_letter {:\\\s+on\s+(\S+)}] $line _match mount_point]} { + set link_target [file join $mount_point $tail $suffix_path] + break + } + } + if {$link_target eq ""} { + #review - under what circumstances could this happen? If the drive letter doesn't match any mount points, then /mnt/drive_letter should generally already have been found above above + # - However, it may be possible for /mnt/drive_Letter to still exist even if it's not reflected in the output of mount or the output of mount is in an unexpected format. + + #nothing in mount result matches the drive letter - try looking for a mount point under /mnt with the drive letter as the name + if {[file exists /mnt/$drive_letter]} { + set link_target [file join /mnt/$drive_letter $tail $suffix_path] + } else { + if {$drive_letter eq [string tolower $drive_letter]]} { + set op_drive_letter [string toupper $drive_letter] + } else { + set op_drive_letter [string tolower $drive_letter] + } + if {[file exists /mnt/$op_drive_letter]} { + set link_target [file join /mnt/$op_drive_letter $tail $suffix_path] + } else { + #leave as is except for backslashes converted to forward + #- probably won't resolve correctly unless the unix system has a folder named drive_letter: in the current folder with a copy of the original filestructure. + set link_target [file join $localbase_path $suffix_path] + } + } + } else { + #shortcut basepath is a windows path with drive letter and we found a matching mount point - link_target is set to the resolved path + } + } else { + #shortcut basepath doesn't match expected windows path format - just join it with the suffix and hope for the best + #could be something like a network path or it could be something else entirely + set link_target [file join $localbase_path $suffix_path] + } + } + } + + # ---------------------------------------------------------------------- + #todo - get Data strings by parsing contents starting at $next_start + #stored in following order: + # description + # relative path + # working directory + # command line arguments + # icon location + + #Data strings format: + # 2 bytes: number of characters in the string + # following: The string. ASCII or UTF-16 little-endian string + + set datastring_dict [Contents_Get_DataStrings $contents $next_start] + + # ---------------------------------------------------------------------- + + set file_attributes [Header_Get_FileAttributes $contents] + set linktargetidlist [Get_LinkTargetIDList_iteminfo $contents] + + set target_type_info [Get_target_type $contents $file_attributes] + set target_type [dict get $target_type_info type] + set target_type_mech [dict get $target_type_info mechanism] + if {$target_type eq "unknown"} { + if {[file exists $link_target]} { + set target_type [file type $link_target] + set target_type_mech "filesystem" + } + } + + set result [dict create\ + link_target $link_target\ + link_flags $flags_enabled\ + file_attributes $file_attributes\ + creation_time [Header_Get_CreationTime $contents]\ + access_time [Header_Get_AccessTime $contents]\ + write_time [Header_Get_WriteTime $contents]\ + target_length [Header_Get_FileSize $contents]\ + icon_index ""\ + showwnd "$showwnd"\ + hotkey [Header_Get_HotKey $contents]\ + target_type $target_type\ + target_type_mech $target_type_mech\ + idlist $linktargetidlist\ + linkinfo $linkfields\ + ] + #relative_path "?" + } + + proc file_check_header {path} { + #*** !doctools + #[call [fun file_check_header] [arg path] ] + #[para]Return 0|1 + #[para]Determines if the .lnk file specified in path has a valid header for a windows shortcut + set c [Get_contents $path 20] + return [Contents_check_header $c] + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::resolve + @cmd -name punk::winlnk::resolve\ + -summary\ + "Return information about a .lnk file (windows shortcut)"\ + -help\ + "Return a dict of info obtained by parsing the binary data in a windows .lnk file. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut + file and the dictionary will contain an 'error' key." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc resolve {path} { + #*** !doctools + #[call [fun resolve] [arg path] ] + #[para] Return a dict of info obtained by parsing the binary data in a windows .lnk file + #[para] If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and the dictionary will contain an 'error' key + set c [Get_contents $path] + if {[Contents_check_header $c]} { + return [Contents_Get_Info $c] + } else { + return [dict create error "lnk_header_check_failed"] + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::file_show_info + @cmd -name punk::winlnk::file_show_info\ + -summary\ + "Show information about a .lnk file (windows shortcut)"\ + -help\ + "Print to stdout the information obtained by parsing the binary data in a windows .lnk file, in a human readable format. + If the .lnk header check fails, then the .lnk file probably isn't really a shortcut file and an error message will be printed." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc file_show_info {path} { + package require punk::lib + #punk::lib::showdict [resolve $path] */@* + set field_queries [dict create\ + link_target link_target\ + link_flags link_flags/@*\ + file_attributes file_attributes\ + creation_time creation_time\ + access_time access_time\ + write_time write_time\ + target_length target_length\ + icon_index icon_index\ + showwnd showwnd\ + hotkey hotkey\ + target_type target_type\ + idlist idlist/@*/@*.@*\ + linkinfo linkinfo/@*.@*\ + ] + set info [resolve $path] + if {[dict exists $info error]} { + return "Error: [dict get $info error]" + } else { + set querystring "" + foreach field [dict keys $info] { + if {[dict exists $field_queries $field]} { + append querystring "[dict get $field_queries $field] " + } else { + append querystring "$field " + } + } + puts "querystring: $querystring" + return [punk::lib::showdict $info {*}$querystring] + } + } + + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::winlnk::target + @cmd -name punk::winlnk::target\ + -summary\ + "Return the target path of a .lnk file (windows shortcut)"\ + -help\ + "Return the target path of the .lnk file specified in path. + This is a convenience function that extracts the target path from the .lnk file and returns it directly, + without all the additional information that resolve provides. If the .lnk header check fails, then + the .lnk file probably isn't really a shortcut file and an error message will be returned." + @values -min 1 -max 1 + path -type string -help "Path to the .lnk file to resolve" + }] + } + proc target {path} { + #*** !doctools + #[call [fun target] [arg path] ] + #[para]Return the target path of the .lnk file specified in path + set info [resolve $path] + if {[dict exists $info error]} { + error [dict get $info error] + } else { + return [dict get $info link_target] + } + } + + proc target_type {path} { + set content [Get_contents $path] + if {![Contents_check_header $content]} { + error "lnk_header_check_failed" + } + set info [Contents_Get_Info $content] + return [dict get $info target_type] + } + + proc Get_target_type {content file_attributes} { + #determine type based on info in the .lnk file, such as file attributes and link flags + + if {"DIRECTORY" in $file_attributes} { + return [dict create type directory mechanism file_attributes]" + } elseif {"ARCHIVE" in $file_attributes} { + return [dict create type file mechanism file_attributes] + } else { + set iteminfo [Get_LinkTargetIDList_iteminfo $content] + if {[llength $iteminfo] > 0} { + set first_item [lindex $iteminfo 0] + set first_item_type [dict get $first_item type] + set saw_2f 0 + switch -- $first_item_type { + "1F" { + #plain files and folders always seem to have a first item type of 1F + #so does "local disk" + set type_so_far "unknown" + #For a file, we may first see multiple items of type 32 (directory) as we go through the folder structure, + #and then finally an item of type 31 (file) at the end. + #For a network location, we may see an item of type 2F. + #So we need to loop through all the items and keep track of what we've seen so far. + foreach item $iteminfo { + set item_type [dict get $item type] + if {$item_type eq "31"} { + set type_so_far "directory" + } elseif {$item_type eq "32"} { + return [dict create type file mechanism idlist] + } elseif {$item_type eq "2F"} { + set saw_2f 1 + } + } + if {$type_so_far eq "unknown" && $saw_2f} { + return [dict create type "local disk" mechanism idlist] + } + return [dict create type $type_so_far mechanism idlist] + } + } + return [dict create type "unknown" mechanism idlist] + } else { + return [dict create type "unknown" mechanism idlist] + } + } + } + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::winlnk::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::winlnk::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::winlnk::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::winlnk::system { + #*** !doctools + #[subsection {Namespace punk::winlnk::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::winlnk +} +## Ready +package provide punk::winlnk [tcl::namespace::eval punk::winlnk { + variable pkg punk::winlnk + variable version + set version 0.1.1 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm index a876d781..9079dbbc 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm @@ -196,7 +196,8 @@ namespace eval punk::winpath { #https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file #according to the above: Use any character in the current code page for a name, including Unicode characters and characters in the extended character set (128–255), except for the following: set reserved [list < > : \" / \\ | ? *] - + #embedded nulls (\0) are also disallowed - but these are also disallowed on unix-like platforms. + set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. foreach seg [file split $path] { @@ -208,6 +209,14 @@ namespace eval punk::winpath { #/./ /../ segments don't require protection - keep checking. continue } + if {[string toupper [file rootname $seg]] in $windows_reserved_names} { + #windows reserved names + #there are reports that these names aren't usable even with file extension - e.g that CON.txt is reserved and can't be created by some standard tools. + #In practice on windows 11 in 2026, cmd.exe,notepad,explorer and powershell seem to handle creation and access of CON.txt and PRN.txt etc without issue. + # the windows documentation reference above however still states that these names with an extension should be avoided. + #For this reason - we will still treat these as reserved and require protection with dos device syntax - even though in practice they seem to be usable without it. + return 1 + } #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph