You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8592 lines
424 KiB
8592 lines
424 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from <pkg>-buildversion.txt |
|
# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2024 |
|
# |
|
# @@ Meta Begin |
|
# Application punk::lib 999999.0a1.0 |
|
# Meta platform tcl |
|
# Meta license BSD |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin punkshell_module_punk::lib 0 999999.0a1.0] |
|
#[copyright "2024"] |
|
#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {punk library}] [comment {-- Description at end of page heading --}] |
|
#[require punk::lib] |
|
#[keywords module utility lib] |
|
#[description] |
|
#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions. |
|
#[para]The base set includes string and math functions but has no specific theme |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of punk::lib |
|
#[subsection Concepts] |
|
#[para]The punk::lib modules should have no strong dependencies other than Tcl |
|
#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies. |
|
#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present. |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by punk::lib |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
package require punk::args |
|
package require punk::assertion |
|
#*** !doctools |
|
#[item] [package {Tcl 8.6-}] |
|
#[item] [package {punk::args}] |
|
|
|
# #package require frobz |
|
# #*** !doctools |
|
# #[item] [package {frobz}] |
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
|
|
tcl::namespace::eval punk::lib::ensemble { |
|
#wiki.tcl-lang.org/page/ensemble+extend |
|
# extend an ensemble-like routine with the routines in some namespace |
|
|
|
#NOTE - the extension ns becomes the '-namespace <extension_ns>' for the original routine name, |
|
#with -unknown handling the original subcommands. |
|
#This makes the original ensemble harder to introspect! |
|
#e.g (the original -map or -namespace not visible) |
|
#In this specific case (which, being published on the wiki might be common in the wild) |
|
#we could call {*}[namespace ensemble configure $routine -unknown] $routine <bogussubcommand> |
|
#and then detect that the first resulting word is an ensemble |
|
#For arbitrary '-unknown scripts' - sensible introspection is likely not possible |
|
|
|
proc extend {routine extension} { |
|
if {![string match ::* $routine]} { |
|
set resolved [uplevel 1 [list ::tcl::namespace::which $routine]] |
|
if {$resolved eq {}} { |
|
error [list {no such routine} $routine] |
|
} |
|
set routine $resolved |
|
} |
|
set routinens [tcl::namespace::qualifiers $routine] |
|
if {$routinens eq {::}} { |
|
set routinens {} |
|
} |
|
set routinetail [tcl::namespace::tail $routine] |
|
|
|
if {![string match ::* $extension]} { |
|
set extension [uplevel 1 [list [tcl::namespace::which namespace] current]]::$extension |
|
} |
|
|
|
if {![tcl::namespace::exists $extension]} { |
|
error [list {no such namespace} $extension] |
|
} |
|
|
|
set extension [tcl::namespace::eval $extension [list [tcl::namespace::which namespace] current]] |
|
|
|
tcl::namespace::eval $extension [list [tcl::namespace::which namespace] export *] |
|
|
|
while 1 { |
|
set renamed ${routinens}::${routinetail}_[clock clicks] ;#clock clicks unlikely to collide when not directly consecutive such as: list [clock clicks] [clock clicks] |
|
if {[tcl::namespace::which $renamed] eq {}} break |
|
} |
|
|
|
rename $routine $renamed |
|
|
|
tcl::namespace::eval $extension [ |
|
list namespace ensemble create -command $routine -unknown [ |
|
list apply {{renamed ensemble routine args} { |
|
list $renamed $routine |
|
}} $renamed |
|
] |
|
] |
|
|
|
return $routine |
|
} |
|
} |
|
|
|
# some (?) tcl bug check procs needed to exist before main punk::lib namespaces are evaluated |
|
tcl::namespace::eval punk::lib::check { |
|
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) |
|
#Not any sort of comprehensive check of known tcl bugs. |
|
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. |
|
|
|
proc has_tclbug_caseinsensitiveglob_windows {} { |
|
#https://core.tcl-lang.org/tcl/tktview/108904173c |
|
|
|
set bug 0 ;#default only |
|
if {"windows" ne $::tcl_platform(platform)} { |
|
set bug 0 |
|
} else { |
|
if {![catch {file tempdir} tmpdir]} { |
|
#tcl 9+ has 'file tempdir' |
|
set testfile [file join $tmpdir "bugtest"] |
|
} else { |
|
#fallback for older tcl versions - use env TEMP/TMP or current directory |
|
set tmpdir "" |
|
foreach e {TEMP TMP} { |
|
if {[info exists ::env($e)] && [file isdirectory ::env($e)]} { |
|
set tmpdir ::env($e) |
|
break |
|
} |
|
} |
|
if {$tmpdir eq ""} { |
|
#no env vars - fallback to current directory |
|
set tmpdir [pwd] |
|
} |
|
set testfile [file join $tmpdir "bugtest"] |
|
} |
|
|
|
set fd [open $testfile w] |
|
puts $fd test |
|
close $fd |
|
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] |
|
if {[file exists $testfile]} { |
|
file delete $testfile |
|
} |
|
foreach r $globresult { |
|
if {$r ne "bugtest"} { |
|
set bug 1 |
|
break |
|
} |
|
} |
|
} |
|
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized |
|
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. |
|
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results" level medium] |
|
} |
|
|
|
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. |
|
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, |
|
# or at least checking for an existing mounted case-insensitive filesystem. |
|
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. |
|
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. |
|
|
|
|
|
proc has_tclbug_regexp_emptystring {} { |
|
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces |
|
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, |
|
#but as an apparent violation of Tcl's normal parsing rules - was evidently seen as a bug and fixed in: |
|
#https://core.tcl-lang.org/tcl/info/cb03e57a (tcl 9.0.3+ ?) |
|
set bug [expr {![catch {regexp {} [error should_error]}]}] |
|
return [dict create bug $bug bugref cb03e57a description {regexp emptystring first argument over-optimised - difference in compiled vs traced behaviour.} level minor] |
|
} |
|
proc has_tclbug_lsearch_sorted_inline_subindices {} { |
|
if {[catch {lsearch -sorted -subindices -inline -index 0 {{a 1} {a 2} {b 3} {c 4} {c 5}} b} result]} { |
|
#probably tcl version doesn't support all options |
|
set bug 0 |
|
} else { |
|
set bug [expr {$result ne "b"}] |
|
} |
|
set description "lsearch -sorted with -subindices -inline - incorrect result." |
|
return [dict create bug $bug bugref bc4ac0 description $description level minor] |
|
} |
|
proc has_tclbug_script_var {} { |
|
|
|
set script {set j [list spud] ; list} |
|
append script \n |
|
uplevel #0 $script |
|
set rep1 [tcl::unsupported::representation $::j] |
|
set script "" |
|
set rep2 [tcl::unsupported::representation $::j] |
|
|
|
set nostring1 [string match "*no string" $rep1] |
|
set nostring2 [string match "*no string" $rep2] |
|
|
|
#we assume it should have no string rep in either case |
|
#Review: check Tcl versions for behaviour/consistency |
|
if {!$nostring2} { |
|
set bug true |
|
} else { |
|
set bug false |
|
} |
|
set description "string rep for list variable in script generated when script changed\n(not an acknowledged/reported bug)" |
|
return [dict create bug $bug bugref "" description $description level minor] |
|
} |
|
proc has_tclbug_lsearch_strideallinline {} { |
|
#bug only occurs with single -index value combined with -stride -all -inline -subindices |
|
#https://core.tcl-lang.org/tcl/tktview/5a1aaa201d |
|
if {[catch {lsearch -stride 3 -all -inline -index 1 -subindices {a1 a2 a3} *} result]} { |
|
#we aren't looking for an error result - error most likely indicates tcl too old to support -stride |
|
set bug 0 |
|
} else { |
|
set bug [expr {$result ne "a2"}] |
|
} |
|
set description "lsearch -stride with -subindices -inline -all and single index - incorrect results." |
|
return [dict create bug $bug bugref 5a1aaa201d description $description level major] |
|
} |
|
proc has_tclbug_lseq_sign {} { |
|
#https://core.tcl-lang.org/tcl/tktview/999b6966b2 |
|
if {[catch {lseq 1 10}]} { |
|
set bug 0 |
|
} else { |
|
set r1 [lseq 1 10 -9] |
|
set r2 [lseq 1 10 -10] |
|
set bug [expr {$r1 ne $r2}] |
|
} |
|
set description "lseq step sign not matching sequence direction - inconsistent results." |
|
return [dict create bug $bug bugref 999b6966b2 description $description level minor] |
|
} |
|
|
|
proc has_tclbug_list_quoting_emptyjoin {} { |
|
#https://core.tcl-lang.org/tcl/tktview/e38dce74e2 |
|
set v1 [list {*}[lindex #foo] {*}[]] ;#can return "#foo" instead of "{#foo}" under some beta 9 releases |
|
set v2 [list #foo] ;#normal tcl list quoting for 1st element that looks like a comment -> "{#foo}" |
|
set bug [expr {![string equal $v1 $v2]}] ;#if they're not equal - we have the bug. |
|
set description "lists elements not properly quoted in some cases. e.g 'list {*}[lindex {etc #foo} 1] {*}[list]' (#foo not braced)" |
|
return [dict create bug $bug bugref e38dc74e2 description $description level medium] |
|
} |
|
|
|
proc has_tclbug_safeinterp_compile {{show 0}} { |
|
#ensemble calls within safe interp not compiled |
|
#https://core.tcl-lang.org/tcl/tktview/1095bf7f756f9aed6bde |
|
namespace eval [namespace current]::testcompile { |
|
proc ensembletest {} {string index a 0} |
|
} |
|
|
|
set has_bug 0 |
|
|
|
set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest] |
|
if {$show} { |
|
puts outer: |
|
puts $bytecode_outer |
|
} |
|
if {![interp issafe]} { |
|
#test of safe subinterp only needed if we aren't already in a safe interp |
|
if {![catch { |
|
interp create x -safe |
|
} errMsg]} { |
|
x eval {proc ensembletest {} {string index a 0}} |
|
set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}] |
|
if {$show} { |
|
puts safe: |
|
puts $bytecode_safe |
|
} |
|
interp delete x |
|
#mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead) |
|
#It's possible the interp we're running in is also not compiling ensembles. |
|
#we could then get a result of 2 - which still indicates a problem |
|
if {[string last "invokeStk" $bytecode_safe] >= 1} { |
|
incr has_bug |
|
} |
|
} else { |
|
#our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp? |
|
#unlikely - but we should warn |
|
puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter" |
|
} |
|
} |
|
|
|
namespace delete [namespace current]::testcompile |
|
|
|
if {[string last "invokeStk" $bytecode_outer] >= 1} { |
|
incr has_bug |
|
} |
|
set description "ensemble commands not compiled in safe interps - heavy performance impact in safe interps" |
|
return [dict create bug $has_bug bugref 1095bf7f756f9aed6bde description $description level major] |
|
} |
|
} |
|
|
|
tcl::namespace::eval punk::lib::compat { |
|
#*** !doctools |
|
#[subsection {Namespace punk::lib::compat}] |
|
#[para] compatibility functions for features that may not be available in earlier Tcl versions |
|
#[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin. |
|
#[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so. |
|
|
|
#*** !doctools |
|
#[list_begin definitions] |
|
|
|
|
|
if {[catch { |
|
::tcl::mathfunc::isinf 1 |
|
}]} { |
|
#review - doesn't seem to cause int-rep of the value to shimmer - but does it reasonably emulate what tcl9's isinf does? |
|
proc ::tcl::mathfunc::isinf {v} { |
|
string match -nocase *inf* $v |
|
} |
|
} |
|
|
|
|
|
if {"::lremove" ne [info commands ::lremove]} { |
|
#puts stderr "Warning - no built-in lremove" |
|
interp alias {} lremove {} ::punk::lib::compat::lremove |
|
} |
|
proc lremove {list args} { |
|
#*** !doctools |
|
#[call [fun lremove] [arg list] [opt {index ...}]] |
|
#[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove |
|
|
|
set data [lmap v $list {list data $v}] |
|
foreach doomed_index $args { |
|
if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} |
|
lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value |
|
} |
|
set keep [lsearch -all -inline -not -exact $data x] |
|
return [lsearch -all -inline -index 1 -subindices $keep *] |
|
} |
|
#not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers |
|
proc lremove2 {list args} { |
|
set data [lmap v $list {list data $v}] |
|
foreach doomed_index $args { |
|
if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"} |
|
lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value |
|
} |
|
set keep [lsearch -all -inline -not -exact $data x] |
|
return [lmap v $keep {lindex $v 1}] |
|
} |
|
#outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element.. |
|
#flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 |
|
|
|
if {![info exists ::auto_index(readFile)]} { |
|
if {[info commands ::readFile] eq ""} { |
|
proc ::readFile {filename {mode text}} { |
|
#readFile not seen in auto_index or as command: installed by punk::lib |
|
# Parse the arguments |
|
set MODES {binary text} |
|
set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] |
|
set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] |
|
|
|
# Read the file |
|
set f [open $filename [dict get {text r binary rb} $mode]] |
|
try { |
|
return [read $f] |
|
} finally { |
|
close $f |
|
} |
|
} |
|
} |
|
} |
|
if {![info exists ::auto_index(writeFile)]} { |
|
if {[info commands ::writeFile] eq ""} { |
|
proc ::writeFile {args} { |
|
#writeFile not seen in auto_index or as command: installed by punk::lib |
|
# Parse the arguments |
|
switch [llength $args] { |
|
2 { |
|
lassign $args filename data |
|
set mode text |
|
} |
|
3 { |
|
lassign $args filename mode data |
|
set MODES {binary text} |
|
set ERR [list -level 1 -errorcode [list TCL LOOKUP MODE $mode]] |
|
set mode [tcl::prefix match -message "mode" -error $ERR $MODES $mode] |
|
} |
|
default { |
|
set COMMAND [lindex [info level 0] 0] |
|
return -code error -errorcode {TCL WRONGARGS} "wrong # args: should be \"$COMMAND filename ?mode? data\"" |
|
} |
|
} |
|
|
|
# Write the File |
|
set f [open $filename [dict get {text w binary wb} $mode]] |
|
try { |
|
puts -nonewline $f $data |
|
} finally { |
|
close $f |
|
} |
|
} |
|
} |
|
} |
|
|
|
if {"::lpop" ne [info commands ::lpop]} { |
|
#puts stderr "Warning - no built-in lpop" |
|
interp alias {} lpop {} ::punk::lib::compat::lpop |
|
punk::args::set_idalias ::punk::lib::compat::lpop ::lpop ;#point to the definition of ::lpop defined in punk::args::tclcore |
|
} |
|
proc lpop {lvar args} { |
|
#*** !doctools |
|
#[call [fun lpop] [arg listvar] [opt {index}]] |
|
#[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop |
|
#upvar $lvar l |
|
upvar 1 $lvar l |
|
if {![llength $args]} { |
|
set args [list end] |
|
} |
|
set v [lindex $l {*}$args] |
|
set newlist $l |
|
|
|
set path [list] |
|
set subl $l |
|
for {set i 0} {$i < [llength $args]} {incr i} { |
|
set idx [lindex $args $i] |
|
if {![llength [lrange $subl $idx $idx]]} { |
|
error "tcl_lpop index \"$idx\" out of range" |
|
} |
|
lappend path [lindex $args $i] |
|
set subl [lindex $l {*}$path] |
|
} |
|
|
|
set sublist_path [lrange $args 0 end-1] |
|
set tailidx [lindex $args end] |
|
if {![llength $sublist_path]} { |
|
#set newlist [lremove $newlist $tailidx] |
|
#set newlist [lreplace $newlist $tailidx $tailidx] |
|
set newlist [lreplace $newlist[set newlist {}] $tailidx $tailidx] |
|
#we avoid use of ledit here because if lpop is running as compat - ledit may also not be available as a builtin. |
|
} else { |
|
set sublist [lindex $newlist {*}$sublist_path] |
|
#set sublist [lremove $sublist $tailidx] |
|
#set sublist [lreplace $sublist $tailidx $tailidx] |
|
set sublist [lreplace $sublist[set sublist {}] $tailidx $tailidx] |
|
lset newlist {*}$sublist_path $sublist |
|
} |
|
#puts "[set l] -> $newlist" |
|
set l $newlist |
|
return $v |
|
} |
|
if {"::ledit" ni [info commands ::ledit]} { |
|
interp alias {} ledit {} ::punk::lib::compat::ledit |
|
punk::args::set_idalias ::punk::lib::compat::ledit ::ledit |
|
} |
|
proc ledit {lvar first last args} { |
|
upvar $lvar l |
|
#use lindex_resolve to support for example: ledit lst end+1 end+1 h i |
|
set fidx [punk::lib::lindex_resolve [llength $l] $first] |
|
switch -exact -- $fidx { |
|
-Inf { |
|
#index below lower bound |
|
set pre [list] |
|
set fidx -1 |
|
} |
|
Inf { |
|
#first index position is greater than index of last element in the list |
|
set pre [lrange $l 0 end] |
|
set fidx [llength $l] |
|
} |
|
default { |
|
#set pre [lrange $l 0 $first-1] |
|
set pre [lrange $l 0 $fidx-1] |
|
} |
|
} |
|
set lidx [punk::lib::lindex_resolve [llength $l] $last] |
|
if {$lidx < $fidx} { |
|
#from ledit man page: |
|
#If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. |
|
set post [lrange $l $fidx end] |
|
} else { |
|
#set post [lrange $l $last+1 end] |
|
switch -exact -- $lidx { |
|
-Inf { |
|
#index below lower bound |
|
set post [lrange $l 0 end] |
|
} |
|
Inf { |
|
#index above upper bound |
|
set post [list] |
|
} |
|
default { |
|
set post [lrange $l $lidx+1 end] |
|
} |
|
} |
|
} |
|
#switch -exact -- $lidx { |
|
# -Inf { |
|
# #index below lower bound |
|
# set post [lrange $l 0 end] |
|
# } |
|
# Inf { |
|
# #index above upper bound |
|
# set post [list] |
|
# } |
|
# default { |
|
# if {$lidx < $fidx} { |
|
# #from ledit man page: |
|
# #If last is less than first, then any specified elements will be inserted into the list before the element specified by first with no elements being deleted. |
|
# set post [lrange $l $fidx end] |
|
# } else { |
|
# #set post [lrange $l $last+1 end] |
|
# set post [lrange $l $lidx+1 end] |
|
# } |
|
# } |
|
#} |
|
set l [list {*}$pre {*}$args {*}$post] |
|
} |
|
|
|
|
|
#slight isolation - varnames don't leak - but calling context vars can be affected |
|
proc lmaptcl2 {varnames list script} { |
|
set result [list] |
|
set values [list] |
|
foreach v $varnames { |
|
lappend values "\$$v" |
|
} |
|
set linkvars [uplevel 1 [list ::tcl::info::vars]] |
|
set nscaller [uplevel 1 [list ::tcl::namespace::current]] |
|
|
|
set apply_script "" |
|
foreach vname $linkvars { |
|
append apply_script [string map [list %vname% $vname]\ |
|
{upvar 2 %vname% %vname%}\ |
|
] \n |
|
} |
|
append apply_script $script \n |
|
|
|
#puts "--> $apply_script" |
|
foreach $varnames $list { |
|
lappend result [apply {*}{ |
|
} [list {*}{ |
|
} $varnames {*}{ |
|
} $apply_script {*}{ |
|
} $nscaller {*}{ |
|
} |
|
] {*}[subst $values] |
|
] |
|
} |
|
return $result |
|
} |
|
|
|
if {"::lmap" ne [info commands ::lmap]} { |
|
#puts stderr "Warning - no built-in lpop" |
|
interp alias {} lmap {} ::punk::lib::compat::lmaptcl |
|
} |
|
#lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway |
|
proc lmaptcl {varnames list script} { |
|
set result [list] |
|
set varlist [list] |
|
foreach varname $varnames { |
|
upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc |
|
lappend varlist var_$varname |
|
} |
|
foreach $varlist $list { |
|
lappend result [uplevel 1 $script] |
|
} |
|
return $result |
|
} |
|
|
|
#tcl8.7/9 compatibility for 8.6 |
|
if {[info commands ::tcl::string::insert] eq ""} { |
|
#https://wiki.tcl-lang.org/page/string+insert |
|
# Pure Tcl implementation of [string insert] command. |
|
proc ::tcl::string::insert {string index insertString} { |
|
# Convert end-relative and TIP 176 indexes to simple integers. |
|
if {[regexp -expanded { |
|
^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace |
|
|[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace |
|
(?:([+-]) # op, omitted when index is "end" |
|
([+-]?\d+))? # n, omitted when index is "end" |
|
[\t\n\v\f\r ]*$ # optional whitespace (unless "end") |
|
} $index _ m op n]} { |
|
# Convert first index to an integer. |
|
switch $m { |
|
end {set index [string length $string]} |
|
default {scan $m %d index} |
|
} |
|
|
|
# Add or subtract second index, if provided. |
|
switch $op { |
|
+ {set index [expr {$index + $n}]} |
|
- {set index [expr {$index - $n}]} |
|
} |
|
} elseif {![string is integer -strict $index]} { |
|
# Reject invalid indexes. |
|
return -code error "bad index \"$index\": must be\ |
|
integer?\[+-\]integer? or end?\[+-\]integer?" |
|
} |
|
|
|
# Concatenate the pre-insert, insertion, and post-insert strings. |
|
string cat [string range $string 0 [expr {$index - 1}]] $insertString\ |
|
[string range $string $index end] |
|
} |
|
|
|
# Bind [string insert] to [::tcl::string::insert]. |
|
tcl::namespace::ensemble configure string -map [tcl::dict::replace\ |
|
[tcl::namespace::ensemble configure string -map]\ |
|
insert ::tcl::string::insert] |
|
} |
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::lib::compat ---}] |
|
} |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval punk::lib { |
|
variable PUNKARGS |
|
tcl::namespace::export * |
|
variable has_struct_list |
|
set has_struct_list [expr {![catch {package require struct::list}]}] |
|
variable has_struct_set |
|
set has_struct_set [expr {![catch {package require struct::set}]}] |
|
variable has_punk_ansi |
|
set has_punk_ansi [expr {![catch {package require punk::ansi}]}] |
|
set has_twapi 0 |
|
if {"windows" eq $::tcl_platform(platform)} { |
|
set has_twapi [expr {![catch {package require twapi}]}] |
|
} |
|
if {![llength [tcl::info::commands ::punk::lib::assert]]} { |
|
tcl::namespace::import ::punk::assertion::assert |
|
punk::assertion::active 1 |
|
} |
|
|
|
namespace eval argdoc { |
|
#non-colour SGR codes |
|
set I "\x1b\[3m" ;# [a+ italic] |
|
set NI "\x1b\[23m" ;# [a+ noitalic] |
|
set B "\x1b\[1m" ;# [a+ bold] |
|
set N "\x1b\[22m" ;# [a+ normal] |
|
set T "\x1b\[1\;4m" ;# [a+ bold underline] |
|
set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::is_main_script |
|
@cmd -name punk::lib::is_main_script\ |
|
-summary\ |
|
"Test if current script was launched directly."\ |
|
-help\ |
|
"The ${$B}main script${$N} is the primary script that is executed |
|
by the interpreter, e.g. tclsh or wish. |
|
(as opposed to being loaded by the 'source' command) |
|
|
|
see https://wiki.tcl-lang.org/page/main+script" |
|
@values -min 0 -max 0 |
|
}] |
|
} |
|
proc is_main_script {} { |
|
#see https://wiki.tcl-lang.org/page/main+script |
|
if {[info script] ne "" && [info exists ::argv0] |
|
&& |
|
[file dirname [file normalize [file join [info script] ...]]] |
|
eq |
|
[file dirname [file normalize [file join $::argv0 ...]]] |
|
} { |
|
return true |
|
} else { |
|
return false |
|
} |
|
} |
|
|
|
|
|
|
|
# == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == |
|
# Maintenance - This is the primary source for tm_version... functions |
|
# - certain packages script require these but without package dependency |
|
# - 1 punk boot script |
|
# - 2 packagetrace module |
|
# - These should be updated to sync with this |
|
# == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == |
|
proc tm_version_isvalid {versionpart} { |
|
#Needs to be suitable for use with Tcl's 'package vcompare' |
|
if {![catch [list package vcompare $versionpart $versionpart]]} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
proc tm_version_major {version} { |
|
if {![tm_version_isvalid $version]} { |
|
error "Invalid version '$version' is not a proper Tcl module version number" |
|
} |
|
set firstpart [lindex [split $version .] 0] |
|
#check for a/b in first segment |
|
if {[string is integer -strict $firstpart]} { |
|
return $firstpart |
|
} |
|
if {[string first a $firstpart] > 0} { |
|
return [lindex [split $firstpart a] 0] |
|
} |
|
if {[string first b $firstpart] > 0} { |
|
return [lindex [split $firstpart b] 0] |
|
} |
|
error "tm_version_major unable to determine major version from version number '$version'" |
|
} |
|
proc tm_version_canonical {ver} { |
|
#accepts a single valid version only - not a bounded or unbounded spec |
|
if {![tm_version_isvalid $ver]} { |
|
error "tm_version_canonical version '$ver' is not valid for a package version" |
|
} |
|
set parts [split $ver .] |
|
set newparts [list] |
|
foreach o $parts { |
|
set trimmed [string trimleft $o 0] |
|
set firstnonzero [string index $trimmed 0] |
|
switch -exact -- $firstnonzero { |
|
"" { |
|
lappend newparts 0 |
|
} |
|
a - b { |
|
#e.g 000bnnnn -> bnnnnn |
|
set tailtrimmed [string trimleft [string range $trimmed 1 end] 0] |
|
if {$tailtrimmed eq ""} { |
|
set tailtrimmed 0 |
|
} |
|
lappend newparts 0$firstnonzero$tailtrimmed |
|
} |
|
default { |
|
#digit |
|
if {[string is integer -strict $trimmed]} { |
|
#e.g 0100 -> 100 |
|
lappend newparts $trimmed |
|
} else { |
|
#e.g 0100b003 -> 100b003 (still need to process tail) |
|
if {[set apos [string first a $trimmed]] > 0} { |
|
set lhs [string range $trimmed 0 $apos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch |
|
set rhs [string range $trimmed $apos+1 end] ;#assert rhs non-empty and only digits |
|
set rhs [string trimleft $rhs 0] |
|
if {$rhs eq ""} { |
|
set rhs 0 |
|
} |
|
lappend newparts ${lhs}a${rhs} |
|
} elseif {[set bpos [string first b $trimmed]] > 0} { |
|
set lhs [string range $trimmed 0 $bpos-1] ;#assert lhs non-empty and only digits or wouldn't be in this branch |
|
set rhs [string range $trimmed $bpos+1 end] ;#assert rhs non-empty and only digits |
|
set rhs [string trimleft $rhs 0] |
|
if {$rhs eq ""} { |
|
set rhs 0 |
|
} |
|
lappend newparts ${lhs}b${rhs} |
|
} else { |
|
#assert - shouldn't get here trimmed val should have been empty, an int or contained an a or b |
|
error "tm_version_canonical error - trimfail - unexpected" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
return [join $newparts .] |
|
} |
|
proc tm_version_required_canonical {versionspec} { |
|
#also trim leading zero from any dottedpart? |
|
#Tcl *allows* leading zeros in any of the dotted parts - but they are not significant. |
|
#e.g 1.01 is equivalent to 1.1 and 01.001 |
|
#also 1b3 == 1b0003 |
|
|
|
if {[string trim $versionspec] eq ""} {return ""} ;#unspecified = any version |
|
set errmsg "tm_version_required_canonical - invalid version specification" |
|
if {[string first - $versionspec] < 0} { |
|
#no dash |
|
#looks like a minbounded version (ie a single version with no dash) convert to min-max form |
|
set from $versionspec |
|
if {![tm_version_isvalid $from]} { |
|
error "$errmsg '$versionpec'" |
|
} |
|
if {![catch {tm_version_major $from} majorv]} { |
|
set from [tm_version_canonical $from] |
|
return "${from}-[expr {$majorv +1}]" |
|
} else { |
|
error "$errmsg '$versionspec'" |
|
} |
|
} else { |
|
# min- or min-max |
|
#validation and canonicalisation (strip leading zeroes from each segment, including either side of a or b) |
|
set parts [split $versionspec -] ;#we expect only 2 parts |
|
lassign $parts from to |
|
if {![tm_version_isvalid $from]} { |
|
error "$errmsg '$versionspec'" |
|
} |
|
set from [tm_version_canonical $from] |
|
if {[llength $parts] == 2} { |
|
if {$to ne ""} { |
|
if {![tm_version_isvalid $to]} { |
|
error "$errmsg '$versionspec'" |
|
} |
|
set to [tm_version_canonical $to] |
|
return $from-$to |
|
} else { |
|
return $from- |
|
} |
|
} else { |
|
error "$errmsg '$versionspec'" |
|
} |
|
error "tm_version_required_canonical should have already returned a canonicalised versionspec - or produced an error with reason before this point" |
|
} |
|
} |
|
# end tm_version... functions |
|
# == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == == |
|
|
|
proc jtest {} { |
|
namespace eval jtest { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lswap |
|
@cmd -name punk::lib::lswap\ |
|
-summary\ |
|
"Swap list values in-place"\ |
|
-help\ |
|
"Similar to struct::list swap, except it fully supports basic |
|
list index expressions such as 7-2 end-1 etc. |
|
|
|
struct::list swap doesn't support 'end' offsets, and only |
|
sometimes appears to support basic expressions, depending on the |
|
expression compared to the list length." |
|
@values -min 3 -max 3 |
|
lvar -type string -help\ |
|
"name of list variable" |
|
a -type indexexpression |
|
z -type indexexpression |
|
}] |
|
} |
|
} |
|
|
|
|
|
# -- --- |
|
#https://stackoverflow.com/questions/17631269/whats-the-best-way-to-join-two-lists |
|
#DKF's 2013 recommendation of using list {*}$first {*}$second seems not to apply in 2024 |
|
#8.6,8.7,9.0 - 'lappend first {*}$second' is many times faster - especially as list grows |
|
# Review and retest as new versions come out. |
|
# -- --- |
|
proc list_multi_append1 {lvar1 lvar2} { |
|
#clear winner in 2024 |
|
upvar $lvar1 l1 $lvar2 l2 |
|
lappend l1 {*}$l2 |
|
return $l1 |
|
} |
|
proc list_multi_append2 {lvar1 lvar2} { |
|
upvar $lvar1 l1 $lvar2 l2 |
|
set l1 [list {*}$l1 {*}$l2] |
|
} |
|
proc list_multi_append3 {lvar1 lvar2} { |
|
upvar $lvar1 l1 $lvar2 l2 |
|
set l1 [lindex [list [list {*}$l1 {*}$l2] [unset l1]] 0] |
|
} |
|
#testing e.g |
|
#set l1_reset {a b c} |
|
#set l2 {a b c d e f g} |
|
#set l1 $l1_reset |
|
#time {list_multi_append1 l1 l2} 1000 |
|
#set l1 $l1_reset |
|
#time {list_multi_append2 l1 l2} 1000 |
|
# -- --- |
|
|
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lswap |
|
@cmd -name punk::lib::lswap\ |
|
-summary\ |
|
"Swap list values in-place"\ |
|
-help\ |
|
"Similar to struct::list swap, except it fully supports basic |
|
list index expressions such as 7-2 end-1 etc. |
|
|
|
struct::list swap doesn't support 'end' offsets, and only |
|
sometimes appears to support basic expressions, depending on the |
|
expression compared to the list length." |
|
@values -min 3 -max 3 |
|
lvar -type string -help\ |
|
"name of list variable" |
|
a -type indexexpression |
|
z -type indexexpression |
|
}] |
|
} |
|
proc lswap {lvar a z} { |
|
upvar $lvar l |
|
set len [llength $l] |
|
if {[lindex_resolve_basic $len $a] < 0 || [lindex_resolve_basic $len $z] < 0} { |
|
#lindex_resolve_basic returns only -Inf if out of range at either bound |
|
#if we didn't do this check - we could raise an error on second lset below - leaving list corrupted because only one lset occurred |
|
#(e.g using: lswap mylist end-2 end on a two element list) |
|
|
|
#on the unhapy path we can take time to check the nature of the out-of-boundness to give a nicer report |
|
#use full 'lindex_resolve' which can report which side via -Inf and Inf special results being lower and upper bound breaches respectively |
|
set a_index [lindex_resolve $len $a] |
|
set a_msg "" |
|
switch -- $a_index { |
|
-Inf { |
|
set a_msg "1st supplied index $a is below the lower bound for the list (0)" |
|
} |
|
Inf { |
|
set a_msg "1st supplied index $a is above the upper bound for the list ([llength $l])" |
|
} |
|
} |
|
set z_index [lindex_resolve $len $z] |
|
set z_msg "" |
|
switch -- $z_index { |
|
-Inf { |
|
set z_msg "2nd supplied index $z is below the lower bound for the list (0)" |
|
} |
|
Inf { |
|
set z_msg "2nd supplied index $z is above the upper bound for the list ([llength $l])" |
|
} |
|
} |
|
set errmsg "lswap cannot swap indices $a and $z" |
|
if {$a_msg ne ""} { |
|
append errmsg \n $a_msg |
|
} |
|
if {$z_msg ne ""} { |
|
append errmsg \n $z_msg |
|
} |
|
error $errmsg |
|
} |
|
set item2 [lindex $l $z] |
|
lset l $z [lindex $l $a] |
|
lset l $a $item2 |
|
return $l |
|
} |
|
#proc lswap2 {lvar a z} { |
|
# upvar $lvar l |
|
# #if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower |
|
# set l [concat [lrange $l 0 $a-1] [lindex $l $z] [lrange $l $a+1 $z-1] [lindex $l $a] [lrange $l $z+1 end]] |
|
#} |
|
|
|
proc lswap2 {lvar a z} { |
|
upvar $lvar l |
|
#if index a strictly less <= z we can do in one-liner for fun - but it's replacing whole list - so much slower |
|
set l [list {*}[lrange $l 0 $a-1] [lindex $l $z] {*}[lrange $l $a+1 $z-1] [lindex $l $a] {*}[lrange $l $z+1 end]] |
|
} |
|
|
|
#an experimental test of swapping vars without intermediate variables |
|
#It's an interesting idea - but probably of little to no practical use |
|
# - the swap_intvars3 version using intermediate var is faster in Tcl |
|
# - This is probably unsurprising - as it's simpler code. |
|
# Even if we tried this technique in c - the compiler would probably do a better job with the intermediate variable than with the math tricks. |
|
#proc swap_intvars {swapv1 swapv2} { |
|
# upvar $swapv1 _x $swapv2 _y |
|
# set _x [expr {[expr {$_x + $_y}] - [set _y $_x]}] |
|
#} |
|
#proc swap_intvars2 {swapv1 swapv2} { |
|
# upvar $swapv1 _x $swapv2 _y |
|
# set _x [expr {$_x ^ $_y}] |
|
# set _y [expr {$_x ^ $_y}] |
|
# set _x [expr {$_x ^ $_y}] |
|
#} |
|
#proc swap_intvars3 {swapv1 swapv2} { |
|
# #using intermediate variable |
|
# upvar $swapv1 _x $swapv2 _y |
|
# set z $_x |
|
# set _x $_y |
|
# set _y $z |
|
#} |
|
|
|
#*** !doctools |
|
#[subsection {Namespace punk::lib}] |
|
#[para] Core API functions for punk::lib |
|
#[list_begin definitions] |
|
|
|
if {[info commands lseq] ne ""} { |
|
#tcl 8.7+ lseq significantly faster, especially for larger ranges |
|
#The internal rep can be an 'arithseries' with no string representation |
|
#support minimal set from to |
|
proc range {from to {by 1}} { |
|
#note inconsistency with lseq 1 10 by -9 vs lseq 1 10 by -10 |
|
#https://core.tcl-lang.org/tcl/tktview/999b6966b2 |
|
lseq $from $to by $by |
|
} |
|
} else { |
|
#lseq accepts basic expressions e.g 4-2 for both arguments |
|
#e.g we can do lseq 0 [llength $list]-1 |
|
#if range is to be consistent with the lseq version above - it should support that, even though we don't support most lseq functionality in either wrapper. |
|
#our range function doesn't support double like lseq does. (deliberate) review |
|
proc range {from to {by ""}} { |
|
if {$by eq "0"} { |
|
#as per lseq, step (by) zero always gives no result |
|
return [list] |
|
} |
|
set to [offset_expr $to] |
|
set from [offset_expr $from] |
|
if {$by ne ""} { |
|
set by [offset_expr $by] |
|
} |
|
#assert $by is now empty string or an integer |
|
if {$to > $from} { |
|
switch -- $by { |
|
"" - 1 { |
|
set count [expr {($to -$from) + 1}] |
|
if {$from == 0} { |
|
return [lsearch -all [lrepeat $count 0] *] |
|
} else { |
|
incr from -1 |
|
return [lmap v [lrepeat $count 0] {incr from}] |
|
} |
|
} |
|
default { |
|
set count [expr {($to - $from + $by) / $by}] |
|
if {$count <= 0} { |
|
#return [list] |
|
#https://core.tcl-lang.org/tcl/tktview/999b6966b2 |
|
return [list $from] ;#review |
|
} |
|
set result [list] |
|
for {set i $from} {$i <= $to} {incr i $by} { |
|
lappend result $i |
|
} |
|
return $result |
|
|
|
#if we don't have lseq, we probably don't have lsearch -stride, which would make things simpler. |
|
#set count [expr {($to -$from) + 1}] |
|
#if {$from == 0} { |
|
# set fullrange [lsearch -all [lrepeat $count 0] *] |
|
#} else { |
|
# incr from -1 |
|
# set fullrange [lmap v [lrepeat $count 0] {incr from}] |
|
#} |
|
#set result [list] |
|
#for {set i 0} {$i < $count} {incr i} { |
|
# if {$i % $by == 0} { |
|
# lappend result [lindex $fullrange $i] |
|
# } |
|
#} |
|
#return $result |
|
} |
|
} |
|
#slower methods. |
|
#2) |
|
#set i -1 |
|
#set L [lrepeat $count 0] |
|
#lmap v $L {lset L [incr i] [incr from];lindex {}} |
|
#return $L |
|
#3) |
|
#set L {} |
|
#for {set i 0} {$i < $count} {incr i} { |
|
# lappend L [incr from] |
|
#} |
|
#return $L |
|
} elseif {$from > $to} { |
|
switch -- $by { |
|
"" - -1 { |
|
set count [expr {$from - $to} + 1] |
|
if {$to == 0} { |
|
return [lreverse [lsearch -all [lrepeat $count 0] *]] |
|
} else { |
|
incr from |
|
return [lmap v [lrepeat $count 0] {incr from -1}] |
|
} |
|
} |
|
default { |
|
set count [expr {($to - $from + $by) / $by}] |
|
if {$count <= 0} { |
|
#return [list] |
|
return [list $from] ;#review |
|
} |
|
set result [list] |
|
for {set i $from} {$i >= $to} {incr i $by} { |
|
lappend result $i |
|
} |
|
return $result |
|
} |
|
} |
|
|
|
#2) |
|
#set i -1 |
|
#set L [lrepeat $count 0] |
|
#lmap v $L {lset L [incr i] [incr from -1];lindex {}} |
|
#return $L |
|
#3) |
|
#set L {} |
|
#for {set i 0} {$i < $count} {incr i} { |
|
# lappend L [incr from -1] |
|
#} |
|
#return $L |
|
} else { |
|
return [list $from] |
|
} |
|
} |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lzip |
|
@cmd -name punk::lib::lzip\ |
|
-summary\ |
|
"zip any number of lists together."\ |
|
-help\ |
|
"Conceptually equivalent to converting a list of rows |
|
to a list of columns. |
|
|
|
The number of returned lists (columns) will be equal to |
|
the length of the longest supplied list (row). |
|
If lengths of supplied lists don't match, empty strings |
|
will be inserted in the resulting lists. |
|
|
|
e.g lzip {a b c d e} {1 2 3 4} {x y z} |
|
-> {a 1 x} {b 2 y} {c 3 z} {d 4 {}} {3 {} {}} |
|
" |
|
@values -min 0 -max -1 |
|
list -type list -multiple 1 -optional 1 |
|
}] |
|
} |
|
proc lzip {args} { |
|
switch -- [llength $args] { |
|
0 {return {}} |
|
1 {return [lindex $args 0]} |
|
2 {return [::punk::lib::system::lzip2lists {*}$args]} |
|
3 {return [::punk::lib::system::lzip3lists {*}$args]} |
|
4 {return [::punk::lib::system::lzip4lists {*}$args]} |
|
5 {return [::punk::lib::system::lzip5lists {*}$args]} |
|
6 {return [::punk::lib::system::lzip6lists {*}$args]} |
|
7 {return [::punk::lib::system::lzip7lists {*}$args]} |
|
8 {return [::punk::lib::system::lzip8lists {*}$args]} |
|
9 {return [::punk::lib::system::lzip9lists {*}$args]} |
|
10 {return [::punk::lib::system::lzip10lists {*}$args]} |
|
11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 - 20 - 21 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - 29 - 30 - 31 - 32 { |
|
set n [llength $args] |
|
if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { |
|
#puts "calling ::punk::lib::system::Build_lzipn $n" |
|
::punk::lib::system::Build_lzipn $n |
|
} |
|
return [::punk::lib::system::lzip${n}lists {*}$args] |
|
} |
|
default { |
|
if {[llength $args] < 4000} { |
|
set n [llength $args] |
|
if {[info commands ::punk::lib::system::lzip${n}lists] eq ""} { |
|
#puts "calling ::punk::lib::system::Build_lzipn $n" |
|
::punk::lib::system::Build_lzipn $n |
|
} |
|
return [::punk::lib::system::lzip${n}lists {*}$args] |
|
} else { |
|
return [::punk::lib::lzipn {*}$args] |
|
} |
|
} |
|
} |
|
} |
|
|
|
namespace eval system { |
|
proc Build_lzipn {n} { |
|
set arglist [list] |
|
#use punk::lib::range which defers to lseq if available |
|
set vars [lmap i [punk::lib::range 0 $n] {string cat v$i}] ;#v0 v1 v2.. (v0 ignored) |
|
set body "\nlmap " |
|
for {set i 1} {$i <= $n} {incr i} { |
|
lappend arglist l$i |
|
append body "[lindex $vars $i] \$l$i " |
|
} |
|
append body "\{list " |
|
for {set i 1} {$i <= $n} {incr i} { |
|
append body "\$[lindex $vars $i] " |
|
} |
|
append body "\}" \n |
|
#puts "proc punk::lib::system::lzip${n}lists {$arglist} \{" |
|
#puts "$body" |
|
#puts "\}" |
|
proc ::punk::lib::system::lzip${n}lists $arglist $body |
|
} |
|
|
|
#fastest is to know the number of lists to be zipped |
|
proc lzip2lists {l1 l2} { |
|
lmap a $l1 b $l2 {list $a $b} |
|
} |
|
proc lzip3lists {l1 l2 l3} { |
|
lmap a $l1 b $l2 c $l3 {list $a $b $c} |
|
} |
|
proc lzip4lists {l1 l2 l3 l4} { |
|
lmap a $l1 b $l2 c $l3 d $l4 {list $a $b $c $d} |
|
} |
|
proc lzip5lists {l1 l2 l3 l4 l5} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 {list $a $b $c $d $e} |
|
} |
|
proc lzip6lists {l1 l2 l3 l4 l5 l6} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 {list $a $b $c $d $e $f} |
|
} |
|
proc lzip7lists {l1 l2 l3 l4 l5 l6 l7} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 {list $a $b $c $d $e $f $g} |
|
} |
|
proc lzip8lists {l1 l2 l3 l4 l5 l6 l7 l8} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 {list $a $b $c $d $e $f $g $h} |
|
} |
|
proc lzip9lists {l1 l2 l3 l4 l5 l6 l7 l8 l9} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 {list $a $b $c $d $e $f $g $h $i} |
|
} |
|
proc lzip10lists {l1 l2 l3 l4 l5 l6 l7 l8 l9 l10} { |
|
lmap a $l1 b $l2 c $l3 d $l4 e $l5 f $l6 g $l7 h $l8 i $l9 j $l10 {list $a $b $c $d $e $f $g $h $i $j} |
|
} |
|
|
|
#neat algorithm - but while lmap seems better than foreach - it seems the script is evaluated a little slowly |
|
# review - |
|
proc lzipn_alt args { |
|
#stackoverflow - courtesy glenn jackman (modified) |
|
foreach l $args { |
|
lappend vars [incr n] |
|
lappend lmap_args $n $l |
|
} |
|
lmap {*}$lmap_args {lmap v $vars {set $v}} |
|
} |
|
|
|
#2024 - outperforms lmap version - presumably because list sizes reduced as it goes(?) |
|
proc lzipn_tcl8 {args} { |
|
#For tcl pre 9 (without lsearch -stride) |
|
#wiki - courtesy JAL |
|
set list_l $args |
|
set zip_l [] |
|
while {1} { |
|
set cur [lmap a_l $list_l { lindex $a_l 0 }] |
|
set list_l [lmap a_l $list_l { lrange $a_l 1 end }] |
|
|
|
if {[join $cur {}] eq {}} { |
|
break |
|
} |
|
lappend zip_l $cur |
|
} |
|
return $zip_l |
|
} |
|
proc lzipn_tcl9a {args} { |
|
#For Tcl 9+ (with lsearch -stride) |
|
#compared to wiki version |
|
#comparable for lists len <3 or number of args < 3 |
|
#approx 2x faster for large lists or more lists |
|
#needs -stride single index bug fix to use empty string instead of NULL |
|
if {![llength $args]} {return {}} |
|
set lens [lmap l $args {llength $l}] |
|
set numcolumns [::tcl::mathfunc::max {*}$lens] |
|
set flatlist [lrepeat [expr {$numcolumns * [llength $args]}] {}] |
|
set outlist [lrepeat $numcolumns {}] |
|
set s 0 |
|
foreach len $lens list $args { |
|
#ledit flatlist $s $e {*}$l {*}[lrepeat [expr {($numcolumns -([llength $l] % $numcolumns)) % $numcolumns}] NULL] |
|
ledit flatlist $s [expr {$s + $len - 1}] {*}$list |
|
incr s $numcolumns |
|
} |
|
#needs single index lstride bugfix |
|
for {set c 0} {$c < $numcolumns} {incr c} { |
|
ledit outlist $c $c [lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *] |
|
} |
|
return $outlist |
|
} |
|
proc lzipn_tcl9b {args} { |
|
if {![llength $args]} {return {}} |
|
set lens [lmap l $args {llength $l}] |
|
set numcolumns [::tcl::mathfunc::max {*}$lens] |
|
set flatlist [list] |
|
foreach len $lens list $args { |
|
lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] |
|
} |
|
lmap c [lseq 0 $numcolumns-1] {lsearch -stride $numcolumns -index $c -inline -all -subindices $flatlist *} |
|
} |
|
proc lzipn_tcl9c {args} { |
|
#SLOW |
|
if {![llength $args]} {return {}} |
|
set lens [lmap l $args {llength $l}] |
|
set numcolumns [::tcl::mathfunc::max {*}$lens] |
|
set flatlist [list] |
|
foreach len $lens list $args { |
|
lappend flatlist {*}$list {*}[lrepeat [expr {($numcolumns - ($len % $numcolumns)) % $numcolumns}] ""] |
|
} |
|
set zip_l {} |
|
set cols_remaining $numcolumns |
|
for {set c 0} {$c < $numcolumns} {incr c} { |
|
if {$cols_remaining == 1} { |
|
return [list {*}$zip_l $flatlist] |
|
} |
|
lappend zip_l [lsearch -stride $cols_remaining -index 0 -inline -all -subindices $flatlist *] |
|
set flen [llength $flatlist] |
|
set flatlist [lremove $flatlist {*}[lseq 0 to $flen-1 by $cols_remaining]] |
|
incr cols_remaining -1 |
|
} |
|
return $zip_l |
|
} |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lzipn |
|
@cmd -name punk::lib::lzipn\ |
|
-summary\ |
|
"zip any number of lists together (unoptimised)."\ |
|
-help\ |
|
"Conceptually equivalent to converting a list of rows |
|
to a list of columns. |
|
|
|
See lzip which provides the same functionality but with |
|
optimisations depending on the number of supplied lists. |
|
" |
|
@values -min 1 -max 1 |
|
lvar -type string -help\ |
|
"name of list variable" |
|
a -type indexexpression |
|
z -type indexexpression |
|
}] |
|
} |
|
#keep both lzipn_tclX functions available for side-by-side testing in Tcl versions where it's possible |
|
if {![package vsatisfies [package present Tcl] 9.0-] || [dict get [punk::lib::check::has_tclbug_lsearch_strideallinline] bug]} { |
|
#-stride either not available - or has bug preventing use of main algorithm below |
|
proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl8] |
|
} else { |
|
proc lzipn {args} [info body ::punk::lib::system::lzipn_tcl9a] |
|
} |
|
|
|
|
|
|
|
namespace import ::punk::args::lib::tstr |
|
proc tclscript_to_commands script { |
|
#https://wiki.tcl-lang.org/page/cmdSplit |
|
namespace upvar [namespace current] commands_info report |
|
set report {} |
|
set commands {} |
|
set command {} |
|
set comment 0 |
|
set lineidx 0 |
|
set offset 0 |
|
foreach line [split $script \n] { |
|
set parts [split $line \;] |
|
set numparts [llength $parts] |
|
set partidx 0 |
|
while 1 { |
|
set parts [lassign $parts[set parts {}] part] |
|
if {[string length $command]} { |
|
if {$partidx} { |
|
append command \;$part |
|
} else { |
|
append command \n$part |
|
} |
|
} else { |
|
set partlength [string length $part] |
|
set command [string trimleft $part[set part {}] "\f\n\r\t\v "] |
|
incr offset [expr {$partlength - [string length $command]}] |
|
if {[string match #* $command]} { |
|
set comment 1 |
|
} |
|
} |
|
|
|
if {$command eq {}} { |
|
incr offset |
|
} elseif {(!$comment || ( |
|
$comment && (!$numparts || ![llength $parts]))) |
|
&& [info complete $command\n]} { |
|
|
|
lappend commands $command |
|
set info [dict create character $offset line $lineidx] |
|
set offset [expr {$offset + [string length $command] + 1}] |
|
lappend report $info |
|
set command {} |
|
set comment 0 |
|
set info {} |
|
} |
|
|
|
incr partidx |
|
if {![llength $parts]} break |
|
} |
|
} |
|
incr lineidx |
|
if {$command ne {}} { |
|
error [list {incomplete command} $command] |
|
} |
|
return $commands |
|
} |
|
|
|
#expects a single command. (ie does not handle multiple commands separated by semicolons or newlines) |
|
proc cmd_words cmd { |
|
#https://wiki.tcl-lang.org/page/cmdSplit |
|
# (words2 PYK) |
|
if {![info complete $cmd]} { |
|
error [list {not a complete command} $cmd] |
|
} |
|
set words {} |
|
set logical {} |
|
set cmd [string trimleft $cmd[set cmd {}] "\f\n\r\t\v " ] |
|
while {[regexp {([^\f\n\r\t\v ]*)([\f\n\r\t\v ]+)(.*)} $cmd full first delim last]} { |
|
append logical $first |
|
if {[info complete $logical\n]} { |
|
lappend words $logical |
|
set logical {} |
|
} else { |
|
append logical $delim |
|
} |
|
set cmd $last[set last {}] |
|
} |
|
if {$cmd ne {}} { |
|
append logical $cmd |
|
} |
|
if {$logical ne {}} { |
|
lappend words $logical |
|
} |
|
return $words |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::tclscript_to_scriptlist |
|
@cmd -name punk::lib::tclscript_to_scriptlist\ |
|
-summary\ |
|
"Parse tcl script to toplevel list of lists."\ |
|
-help\ |
|
"Get topmost list of tcl language elements in script. |
|
produces a list of lists where each sublist is a commandlist or |
|
a comment string." |
|
@values -min 1 -max 1 |
|
script -type string |
|
}] |
|
} |
|
#review - consider returning dict with scriptlist and linerange info for each cmdlist/comment |
|
#- would be useful for error reporting and other use cases. |
|
#- a command can have multi-line arguments and can also be continued across lines with the line-continuation character (backslash). |
|
proc tclscript_to_scriptlist {script} { |
|
set scriptlist [list] |
|
set cmdlist [list] |
|
set scrlen [string length $script] |
|
set token "" |
|
set in_token 0 |
|
set in_cmdlist 0 |
|
set in_comment 0 |
|
set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? |
|
for {set i 0} {$i < $scrlen} {incr i} { |
|
set ch [string index $script $i] |
|
set chswitch [string map $charmap $ch] |
|
if {!$in_token} { |
|
switch -- $chswitch { |
|
{ } - TB { |
|
#ignore - continue being a non token |
|
} |
|
CR { |
|
if {[string index $script $i+1] eq "\n"} { |
|
if {$in_cmdlist} { |
|
#no active token - newline ends cmdlist |
|
set in_cmdlist 0 |
|
lappend scriptlist $cmdlist |
|
set cmdlist [list] |
|
} |
|
incr i |
|
} |
|
} |
|
LF - ";" { |
|
#no active token - newline or semicolon ends cmdlist |
|
if {$in_cmdlist} { |
|
set in_cmdlist 0 |
|
lappend scriptlist $cmdlist |
|
set cmdlist [list] |
|
} |
|
} |
|
BSL { |
|
if {[string index $script $i+1] eq "\n"} { |
|
#continuation of whitespace while no token - boring |
|
incr i |
|
} elseif {[string range $script $i+1 $i+2] eq "\r\n"} { |
|
#continuation of whitespace while no token - boring |
|
incr i 2 |
|
} else { |
|
#an uncommon possibility, a command wth surrounding spaces called in an strange way |
|
#REVIEW |
|
# e.g \ cmdname\ arg |
|
set in_token 1 |
|
set token_startline $line |
|
set token "\\[string index $script $i+1]" |
|
incr i |
|
if {!$in_cmdlist} { |
|
set in_cmdlist 1 |
|
} |
|
} |
|
} |
|
# { |
|
if {$in_cmdlist} { |
|
#ordinary data |
|
set in_token 1 |
|
set token # |
|
} else { |
|
if {!$in_comment} { |
|
set in_token 1 |
|
set in_comment 1 |
|
set token # |
|
} else { |
|
#wnen in comment - all will be a single token until comment ends |
|
append token # |
|
} |
|
} |
|
} |
|
default { |
|
#for completeness.. we should exclude other possible whitespace chars |
|
if {![string is space $ch]} { |
|
set in_token 1 |
|
set token $ch |
|
if {!$in_cmdlist} { |
|
set in_cmdlist 1 |
|
#lset cmdlist_linerange 0 $line ;#start line of cmdlist |
|
} |
|
} |
|
} |
|
} |
|
} else { |
|
#if we're in a token, we must be in a cmdlist or a comment (single token) |
|
#review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved |
|
#note that unbalanced curly in *toplevel* comment will still 'info complete' to true |
|
switch -- $chswitch { |
|
LF { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and cmdlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
set cmdlist "" |
|
set in_cmdlist 0 |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token \n |
|
} |
|
} else { |
|
#ends a comment |
|
lappend scriptlist $token ;#single token for comment |
|
set token "" |
|
set in_token 0 |
|
set in_comment 0 |
|
set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity |
|
} |
|
} |
|
";" { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and cmdlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
set cmdlist "" |
|
set in_cmdlist 0 |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token ";" |
|
} |
|
} else { |
|
#ordinary char for comment |
|
append token ";" |
|
} |
|
} |
|
CR { |
|
if {[string index $script $i+1] eq "\n"} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and commandlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
set cmdlist "" |
|
set in_cmdlist 0 |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token \r\n |
|
incr i |
|
} |
|
} else { |
|
append token \r |
|
} |
|
} |
|
BSL { |
|
if {[string index $script $i+1] eq "\n"} { |
|
#continuation - lf effectively becomes a space |
|
if {!$in_comment} { |
|
#token may end - but cmdlist goes on |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token " " |
|
} |
|
} else { |
|
append token " " |
|
} |
|
incr i ;#skip LF |
|
} elseif {[string range $script $i+1 $i+2] eq "\r\n"} { |
|
#continuation - cr-lf effectively becomes a space |
|
if {!$in_comment} { |
|
#token may end - but cmdlist goes on |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token " " |
|
} |
|
} else { |
|
append token " " |
|
} |
|
incr i 2 ;#skip CRLF |
|
} else { |
|
append token "\\[string index $script $i+1]" |
|
incr i |
|
} |
|
} |
|
default { |
|
if {![string is space $ch]} { |
|
append token $ch |
|
} else { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
} else { |
|
append token $ch |
|
} |
|
} else { |
|
append token $ch |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
#eof |
|
if {!$in_comment} { |
|
if {$in_token} { |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
} else { |
|
error "Eof reached whilst script incomplete. Unbalanced braces?\ntoken: '$token'" |
|
} |
|
} else { |
|
if {$in_cmdlist} { |
|
lappend scriptlist $cmdlist |
|
} |
|
} |
|
} else { |
|
lappend scriptlist $token |
|
} |
|
return $scriptlist |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::tclscript_to_toplevelinfo |
|
@cmd -name punk::lib::tclscript_to_toplevelinfo\ |
|
-summary\ |
|
"Parse tcl script to toplevel components and lineranges."\ |
|
-help\ |
|
"Get topmost tcl language elements in script. |
|
Produces a dictionary with keys 'scriptlist' and 'lineranges'. |
|
|
|
'scriptlist' is a list of elements that either represent a commandlist (list of words) or a comment (string). |
|
|
|
'lineranges' is a list of lists, where each sublist corresponds to an element in 'scriptlist' and contains |
|
the line range (start and end line numbers) for that element in the original script. |
|
|
|
For a comment line - the entire comment is treated as a single string element in 'scriptlist'. |
|
Only a single entry is included in 'lineranges' for the whole comment, covering all lines of the comment |
|
if it spans multiple lines via line continuation characters. |
|
|
|
For a commandlist - there will always be at least 2 entries in the corresponding sublist in 'lineranges' |
|
- the first entry is the line range for the whole commandlist, and then there is a line range for each word in the commandlist. |
|
|
|
Note that the members of scriptlist can be either a tcl list or a tcl string - but the type is not explicitly indicated in the output. |
|
The type can be inferred by checking the corresponding element in lineranges - if it's a single line range (llength 1), it's a comment string, |
|
if it's a list of line ranges with llength > 1, it's a commandlist. |
|
|
|
" |
|
@values -min 1 -max 1 |
|
script -type string |
|
}] |
|
} |
|
#return a dict with scriptlist and lineranges. |
|
#todo - we need a way to determine the start line for each word in each cmdlist - for error reporting and other use cases. |
|
#We can do this by tracking line numbers as we go, and then returning a list of line ranges for each cmdlist. |
|
#For comments we can just return the line range for the whole comment. |
|
|
|
#review: we may be able to re-implement this using the tclparser 'parse' command, |
|
# but it merges consecutive comments into a single comment range (which we could split back out) and also doesn't directly provide line number info. |
|
# we could still determine line number info by carefully checking the output of the parse command. |
|
#todo - make a test implementation using 'parse' and compare performance. The c implementation of 'parse' is likely to provide a performance benefit. |
|
#for now, we don't have a pure-tcl implementation of 'parse' available as a fallback, so we'll stick with this approach. |
|
proc tclscript_to_toplevelinfo {script} { |
|
set scriptlist [list] |
|
set lineranges [list] |
|
set cmdlist [list] |
|
|
|
#------------------------------------ |
|
#cmdlist_linerange is a list of lists |
|
#- each sublist is a pair of start and end line numbers for the entire current linelist, |
|
#followed by a pair of line numbers for each token in the cmdlist. |
|
set cmdlist_linerange [list [list 1 1]] ;#start and end line numbers for current cmdlist - 1-based. |
|
#------------------------------------ |
|
|
|
set scrlen [string length $script] |
|
|
|
set token "" |
|
set token_startline 1 |
|
set in_token 0 |
|
|
|
set in_cmdlist 0 |
|
set in_comment 0 |
|
set charmap [list \t TB \n LF \r CR \\ BSL] ;#for switch 'jump' preservation - review - may be slower than escapes in switch statement? |
|
set line 1 ;#1-based line numbers |
|
for {set i 0} {$i < $scrlen} {incr i} { |
|
set ch [string index $script $i] |
|
set chswitch [string map $charmap $ch] |
|
if {!$in_token} { |
|
switch -- $chswitch { |
|
{ } - TB { |
|
#ignore - continue being a non token |
|
} |
|
CR { |
|
if {[string index $script $i+1] eq "\n"} { |
|
if {$in_cmdlist} { |
|
#no active token - newline ends cmdlist |
|
set in_cmdlist 0 |
|
lappend scriptlist $cmdlist |
|
lappend lineranges $cmdlist_linerange |
|
incr line |
|
set cmdlist [list] |
|
set cmdlist_linerange [list [list $line $line]] |
|
} else { |
|
incr line |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
incr i |
|
} |
|
} |
|
LF { |
|
#no active token - newline ends cmdlist |
|
if {$in_cmdlist} { |
|
set in_cmdlist 0 |
|
lappend scriptlist $cmdlist |
|
lappend lineranges $cmdlist_linerange |
|
incr line |
|
set cmdlist [list] |
|
set cmdlist_linerange [list [list $line $line]] |
|
} else { |
|
incr line |
|
#lset cmdlist_linerange 1 $line |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
} |
|
";" { |
|
#no active token - semicolon ends cmdlist |
|
if {$in_cmdlist} { |
|
set in_cmdlist 0 |
|
lappend scriptlist $cmdlist |
|
lappend lineranges $cmdlist_linerange |
|
set cmdlist [list] |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
} |
|
BSL { |
|
if {[string index $script $i+1] eq "\n"} { |
|
#continuation of whitespace while no token - but backslash is acting as line continuation char. |
|
puts stderr "no-token backslash continuation with LF at line $line" |
|
incr i |
|
incr line |
|
if {$in_cmdlist} { |
|
lset cmdlist_linerange 0 1 $line ;#extend range of whole cmdlist |
|
} |
|
#review |
|
} elseif {[string range $script $i+1 $i+2] eq "\r\n"} { |
|
#continuation of whitespace while no token - but backslash is acting as line continuation char. |
|
puts stderr "no-token backslash continuation with CRLF at line $line" |
|
incr i 2 |
|
incr line |
|
if {$in_cmdlist} { |
|
lset cmdlist_linerange 0 1 $line ;#extend |
|
} |
|
#review |
|
} else { |
|
#an uncommon possibility, a command wth surrounding spaces called in an strange way |
|
# e.g \ cmdname\ arg |
|
set in_token 1 |
|
set token_startline $line |
|
set token "\\[string index $script $i+1]" |
|
incr i |
|
if {!$in_cmdlist} { |
|
set in_cmdlist 1 |
|
#lset cmdlist_linerange 0 $line ;#start line of cmdlist |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
} |
|
} |
|
# { |
|
if {$in_cmdlist} { |
|
#ordinary data |
|
set in_token 1 |
|
set token_startline $line |
|
set token # |
|
#lappend cmdlist_linerange [list $line $line] ;#start line of token |
|
} else { |
|
if {!$in_comment} { |
|
set in_token 1 |
|
set token_startline $line |
|
set in_comment 1 |
|
set token # |
|
} else { |
|
#wnen in comment - all will be a single token until comment ends |
|
append token # |
|
} |
|
} |
|
} |
|
default { |
|
#for completeness.. we should exclude other possible whitespace chars |
|
if {![string is space $ch]} { |
|
set in_token 1 |
|
set token_startline $line |
|
set token $ch |
|
if {!$in_cmdlist} { |
|
set in_cmdlist 1 |
|
#lset cmdlist_linerange 0 $line ;#start line of cmdlist |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
} |
|
} |
|
} |
|
} else { |
|
#if we're in a token, we must be in a cmdlist or a comment (single token) |
|
#review - not preserving whitespace in list of commands is ok, but for comments it should ideally be preserved |
|
#note that unbalanced curly in *toplevel* comment will still 'info complete' to true |
|
switch -- $chswitch { |
|
LF { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and cmdlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
lappend lineranges $cmdlist_linerange |
|
set in_cmdlist 0 |
|
set token "" |
|
set in_token 0 |
|
incr line |
|
set cmdlist "" |
|
set cmdlist_linerange [list [list $line $line]] |
|
} else { |
|
append token \n |
|
incr line |
|
lset cmdlist_linerange 0 1 $line |
|
} |
|
} else { |
|
#ends a comment |
|
lappend scriptlist $token ;#single token for comment |
|
#we don't need to track individual token line ranges for comments, as the whole comment is a single token - so we just append the line range for the whole comment |
|
lappend lineranges $cmdlist_linerange |
|
set token "" |
|
set in_token 0 |
|
set in_comment 0 |
|
set in_cmdlist 0 ;#shouldn't be necessary, but included for clarity |
|
incr line |
|
set cmdlist "" |
|
set cmdlist_linerange [list [list $line $line]] |
|
} |
|
} |
|
";" { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and cmdlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
lappend lineranges $cmdlist_linerange |
|
set in_cmdlist 0 |
|
set token "" |
|
set cmdlist "" |
|
set cmdlist_linerange [list [list $line $line]] |
|
set in_token 0 |
|
} else { |
|
append token ";" |
|
} |
|
} else { |
|
#ordinary char for comment |
|
append token ";" |
|
} |
|
} |
|
CR { |
|
if {[string index $script $i+1] eq "\n"} { |
|
if {[tcl::info::complete $token]} { |
|
#ends token and commandlist |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
lappend lineranges $cmdlist_linerange |
|
set in_cmdlist 0 |
|
set token "" |
|
incr line |
|
set cmdlist "" |
|
set cmdlist_linerange [list $line $line] |
|
set in_token 0 |
|
} else { |
|
append token \r\n |
|
incr i |
|
} |
|
} else { |
|
append token \r |
|
} |
|
} |
|
BSL { |
|
if {[string index $script $i+1] eq "\n"} { |
|
#continuation - lf effectively becomes a space |
|
incr line |
|
if {!$in_comment} { |
|
#token may end - but cmdlist goes on |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
} else { |
|
append token " " |
|
} |
|
} else { |
|
append token " " |
|
} |
|
lset cmdlist_linerange 0 1 $line ;#extend |
|
incr i ;#skip LF |
|
} elseif {[string range $script $i+1 $i+2] eq "\r\n"} { |
|
#continuation - cr-lf effectively becomes a space |
|
incr line |
|
if {!$in_comment} { |
|
#token may end - but cmdlist goes on |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
} else { |
|
append token " " |
|
} |
|
} else { |
|
append token " " |
|
} |
|
lset cmdlist_linerange 0 1 $line ;#extend |
|
incr i 2 ;#skip CRLF |
|
} else { |
|
append token "\\[string index $script $i+1]" |
|
incr i |
|
} |
|
} |
|
default { |
|
if {![string is space $ch]} { |
|
append token $ch |
|
} else { |
|
if {!$in_comment} { |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
set token "" |
|
set in_token 0 |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
} else { |
|
append token $ch |
|
} |
|
} else { |
|
append token $ch |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
#eof |
|
if {!$in_comment} { |
|
if {$in_token} { |
|
if {[tcl::info::complete $token]} { |
|
lappend cmdlist $token |
|
lappend scriptlist $cmdlist |
|
#eof ends token and cmdlist |
|
lappend cmdlist_linerange [list $token_startline $line] ;#end line of token |
|
lappend lineranges $cmdlist_linerange |
|
} else { |
|
error "Eof reached whilst script incomplete. Unbalanced braces? linerange:$cmdlist_linerange\ntoken: '$token'" |
|
} |
|
} else { |
|
if {$in_cmdlist} { |
|
lappend scriptlist $cmdlist |
|
lappend lineranges $cmdlist_linerange |
|
} |
|
} |
|
} else { |
|
lappend scriptlist $token |
|
lappend lineranges $cmdlist_linerange |
|
} |
|
#assert {[llength $scriptlist] == [llength $lineranges]} ;#sanity check - each cmdlist/comment should have a corresponding linerange |
|
return [dict create scriptlist $scriptlist lineranges $lineranges] |
|
} |
|
|
|
|
|
#tclscript_info should be evaluated in the namespace where the script is to be analysed, or with an explicit nscontext argument, |
|
#so that cmdinfo gives correct results for commands in the script's context. |
|
# - this is necessary for correct handling of ensembles and subcommands and in determining cmd types, and whether arguments are scripts or opaque strings. |
|
#This function is concerned with the complexity and what we can determine about the script without running it, |
|
#ie it is more akin to static analysis than runtime/dynamic analysis, despite the fact that we will be using cmdinfo to get contextual information about commands. |
|
|
|
#--------------------------------- |
|
#e.g "a string {[puts e1]xxx}" |
|
#e.g "a string [list {[puts n]yyy}]" |
|
#e.g "a string [list [puts e2[puts e1]]xxx {[puts n]yyy} "[puts e3]zzz"]" |
|
#for each of the above strings we should get a command recognised for the 'puts e*' items as well as the 'list' item, but not for the 'puts n' items since they are within curly braces and not subject to command substitution. |
|
#--------------------------------- |
|
proc tclscript_info {script {nscontext ""}} { |
|
package require parser |
|
#if the script is ANSI highlighted - the square brackets within the ANSI will disrupt our parsing. |
|
if {[punk::ansi::ta::detect $script]} { |
|
#we will strip it - but be noisy on stderr since a) it's a bi inefficient to pass in ansi highlighted scripts. |
|
#b) perhaps there could be edge cases where there is ANSI in the source, but within protected strings or comments. |
|
#error "tclscript_info does not currently support ANSI highlighted scripts. Please remove ANSI highlighting before analysis." |
|
puts stderr "[a+ red]tclscript_info: unescaped ANSI codes detected in script. Stripping ANSI codes for analysis - this may cause inaccuracies in some edge cases, and is inefficient - consider removing ANSI highlighting before analysis.[a]" |
|
set script [punk::ansi::ta::strip $script] |
|
} |
|
|
|
if {$nscontext eq ""} { |
|
set nscontext [uplevel 1 {namespace current}] |
|
} |
|
|
|
|
|
#use tclscript_to_toplevelinfo to analyse the script structure and give a rough measure of complexity based on number of command blocks, commands and nesting levels. |
|
set resultd [dict create commands_parsefail {} commands_parseskip {} comment_count 0 maxnesting 0 commands_count 0 commands {} commands_proc {} commands_native {} commands_notfound {} commands_unknown {} commands_recursive {} blocklist {}] |
|
set scriptinfo [tclscript_to_toplevelinfo $script] |
|
set scriptlist [dict get $scriptinfo scriptlist] |
|
set lineranges [dict get $scriptinfo lineranges] |
|
set cinfo_cache [dict create] |
|
foreach cmdlist $scriptlist cmdlineranges $lineranges { |
|
#If the cmdlist starts with a # it's a comment, and cmdlist is a string not a list. |
|
# but rather than shimmer all the cmdlist elements to strings to determine this, |
|
# we can test the length of the corresponding linerange - if it's a single line range, it's a comment string, if it's a list of line ranges with llength > 1, it's a commandlist. |
|
if {[llength $cmdlineranges] == 1} { |
|
dict incr resultd comment_count |
|
continue |
|
} |
|
#if {[string match #* $cmdlist]} { |
|
# dict incr resultd comment_count |
|
# continue |
|
#} |
|
#assert: cmdlist is a proper Tcl list of words representing a command and its arguments, and cmdlineranges is a list of line ranges corresponding to the cmdlist elements. |
|
|
|
|
|
#Any command or argument could be constructed in a dynamic way, possibly with multiple command calls for the line to be resolved. |
|
#e.g |
|
#[getcmd 1] [getsub 1] [getsub 2] [getoptions [mycommand options]] {*}[getargs remaining] |
|
#for our initial run we will assume the initial command and subcommands are all literal and not dynamically generated - review this assumption. |
|
#todo - apply the same checks we use for arguments below (regarding quotedness and square brackets) to the command and subcommand words |
|
#- if they are not simple words, then we should probably analyse them for command substitution and count any commands found as non-nested commands at the same level. |
|
|
|
#consider also that some dynamic arguments can stop us from determining the nesting behaviour since we won't be able to run the command to see what it does. |
|
#e.g |
|
#if {*}[get_if_args] |
|
#normally we could apply the arguments of the if command to see how they parse with regards to literal 'then' and 'elseif' and 'else' words |
|
#to determine which arguments are script blocks. |
|
#we should probably record instances of un-analysable lines for commands that are known to have complex argument parsing behaviour - e.g if, switch, apply, eval etc. |
|
#- so that a complexity formula that is looking at our returned dict can count those as increasing complexity by some amount. |
|
|
|
|
|
|
|
#review - ensemble commands can dispatch based on different argument positions. |
|
set dispatchwords [list] |
|
#puts stderr "cmdlist: $cmdlist" |
|
|
|
set args_remaining $cmdlist |
|
set args_remaining_lineranges [lrange $cmdlineranges 1 end] ;#the first element of cmdlineranges is the line range for the whole commandlist, so we take the remaining elements as the line ranges for dispatch(es) + args |
|
lappend dispatchwords [lpop args_remaining 0] |
|
lappend dispatch_lineranges [lpop args_remaining_lineranges 0] |
|
|
|
|
|
#we don't just call cmdinfo with {*}$dispatchwords since there can be documented subcommands that are not valid commands in their own right. |
|
# e.g 'string is xdigit' resolves to cmdtype 'notfound' (but with documentation that shows it is a valid deeper subcommand of 'string is') |
|
#this is because xdigit etc happen to be implemented within the tcl::string::is command but not as an ensemble. |
|
|
|
#we want to stop at the point where the command is not resolvable to a deeper level documented subcommand. |
|
#we will need to manually track the remaining unconsumed arguments. (lpop them as we go) |
|
|
|
#for now we'll just do toplevel commands in the script and their argument command-substitutions. |
|
#todo - use our punk::args::parse facility to determine arguments that are scripts (or expressions?) and analyse those recursively. |
|
|
|
if {![dict exists $cinfo_cache $dispatchwords]} { |
|
set cinfo [namespace eval $nscontext [list cmdinfo {*}$dispatchwords]] |
|
dict set cinfo_cache $dispatchwords $cinfo |
|
} else { |
|
set cinfo [dict get $cinfo_cache $dispatchwords] |
|
} |
|
set ctype [dict get $cinfo cmdtype] |
|
set origin [dict get $cinfo origin] |
|
if {$ctype eq "ensemble"} { |
|
for {set i 0} {$i < [llength $cmdlist]} {incr i} { |
|
lappend dispatchwords [lpop args_remaining 0] |
|
lappend dispatch_lineranges [lpop args_remaining_lineranges 0] |
|
#also check and update the cache for subcommands. |
|
if {![dict exists $cinfo_cache $dispatchwords]} { |
|
set test_cinfo [namespace eval $nscontext [list cmdinfo {*}$dispatchwords]] |
|
dict set cinfo_cache $dispatchwords $test_cinfo |
|
} else { |
|
set test_cinfo [dict get $cinfo_cache $dispatchwords] |
|
} |
|
|
|
|
|
if {[dict get $test_cinfo origin] eq $origin} { |
|
#no change in origin - so we are into the arguments of the command, or have an invalid subcommand - stop looking for subcommands |
|
#todo - detect invalid subcommand and count as unknown command? |
|
break |
|
} elseif {[dict get $test_cinfo cmdtype] in {"proc" "native" "notfound"}} { |
|
#we have a subcommand that won't be introspectable at a deeper level. |
|
set cinfo $test_cinfo |
|
set ctype [dict get $cinfo cmdtype] |
|
break |
|
} else { |
|
#review - we should perhaps only continue for cmdtypes that we explicilty determine are more deeply introspectable. |
|
#e.g ensembles and ooobjects (but this is not yet implemented) -- revew. |
|
set cinfo $test_cinfo |
|
set ctype [dict get $cinfo cmdtype] |
|
} |
|
} |
|
} |
|
|
|
#examine the arguments (to determine command substitutions in arguments that will occur before the full commandline itself can be run) |
|
#for each command before we add the command itself to the list of commands found. |
|
|
|
#This matches the behaviour of the tcl interpreter where arguments are fully processed (with regards to substitutions) before the command is executed, |
|
# so that the order of our lists matches the order of execution in the tcl interpreter. |
|
puts "cmdlist: $cmdlist cmdlineranges: $cmdlineranges dispatchwords:[a+ green]'$dispatchwords'[a] args_remaining:[a+ cyan]'$args_remaining'[a] ctype: $ctype origin: $origin" |
|
set cmdlist_parseinfo [parse command [join $cmdlist] {}] |
|
#puts stderr "[punk::lib::showdict -roottype list $cmdlist_parseinfo 0 1 2 3/*]" |
|
|
|
assert {[llength $args_remaining] == [llength $args_remaining_lineranges]} ;#sanity check - each remaining arg should have a corresponding linerange |
|
foreach arg $args_remaining arglr $args_remaining_lineranges { |
|
#arg could be expanded with leading {*} |
|
#we don't care about the expansion itself - but we need to check the trailing part for quoting and command substitution. |
|
#strip leading {*} if present for the purposes of analysis |
|
#- but we don't need to do anything with it other than perhaps record that it is present for the purposes of complexity analysis - review this assumption |
|
# (expansion of arguments with {*} is pretty common and reasonable, but expansion of commands or subcommands could reasonably be considered a complexity increasing feature) |
|
#similarly substitution of arguments with command substitution is pretty common and reasonable, but substitution of commands or subcommands could reasonably be considered a complexity increasing feature. |
|
|
|
#even substitution of commands or subcommands with variable substitution could be considered a complexity increasing feature - but we won't attempt to analyse that for now. |
|
#puts stderr "checking arg '$arg' for command substitution" |
|
|
|
#-------------------------------------- |
|
set argtest "bogus1 " |
|
append argtest $arg |
|
#we need a leading command word so that arg such as #etc is not parsed as a comment. |
|
#we can't use the main cmdlist_parseinfo structure since it auto-expands simple {*} constructs. sometimes producing no elements e.g {*}{ } and sometimes producing multiple. |
|
#(for other constructs with more complex content after the {*} it produces a single element with the first element of the parse tree being 'expand') |
|
|
|
set argparseinfo [parse command $argtest {}] |
|
lassign $argparseinfo commentRange commandRange restRange parseTree |
|
#sanity check the restRange - we expect the whole arg to be parsed as a single element, with no remaining text after parsing. |
|
if {[lindex $restRange 1] != 0} { |
|
puts stderr "Warning: 'parse command <arg> {}' did not consume the whole argument. This may indicate a parsing error or an edge case that is not handled correctly. arg: '$arg' restRange: $restRange" |
|
error "tclscript_info: unexpected parse result for argument. arg: '$arg' restRange: $restRange" |
|
} |
|
#the parse command will expand *simple* {*} constructs to return a parse tree with length > 1 (multiple 3-element lists) |
|
#e.g {*}"a b c" or {*}{a b c}. |
|
#whilst in a single line this simple construct is unlikely to appear in source code, |
|
#it is commonly used in source code when the argument spans multiple lines - e.g for creating dicts without using line-continuation characters. |
|
#e.g |
|
#dict create {*}{ |
|
# key1 value1 |
|
# key2 value2 |
|
#} |
|
|
|
#the parseTree list will be a list of 3 3-element lists something like: |
|
# simple {4 1} {{text {4 1} {}}} |
|
# simple {6 8} {{text {6 8} {}}} |
|
# simple {15 1} {{text {15 1} {}}} |
|
|
|
#if {*} is followed by more complex constructs, the parse tree will be a list of 1 3-element list, with the first element being the keyword 'expand'. |
|
#more complex constructs include those with command or variable or backslash substitution within them, |
|
#e.g {*}[getargs] or {*}"a [subcmd] c" or {*}"a $var c" or {*}"a \t c" |
|
#similar to variables or commands, any backslash (in a subtitution context such as bareword or doublequoted part) is parsed |
|
# as it's own token 'backslash' that covers the backslash and the following character, rather than being treated as a 'text' token. |
|
#- these constructs will have the first element being 'expand' since the presence of command substitution prevents the tcl parser from doing the expansion at parse time. |
|
#e.g the parseTree list for {{*}[somecmd]} will be a list containing a single 3-element list like: |
|
# expand {0 12} {{command {3 9} {}}} |
|
|
|
#The redundant case of argument {*}simpleword will be parsed the same as simpleword |
|
#-------------------------------------- |
|
set args_parsed [lrange [lindex $argparseinfo 3] 1 end] ;#first element is our 'bogus1' word we added for parsing purposes. |
|
if {[llength $args_parsed] == 0} { |
|
#there must have been a literal expansion with {*} that produced no text, e.g {*}{ } or {*}" " (or more likely a multiline version of that) etc - we can ignore these for the purposes of command substitution. |
|
continue |
|
} |
|
if {[llength $args_parsed] > 1} { |
|
#there must have been a simple expansion with {*} that produced multiple words, e.g {*}"a b c" or {*}{a b c} - we can ignore these for the purposes of command substitution since they are simple expansions that don't involve command substitution. |
|
continue |
|
} |
|
set type [lindex $args_parsed 0 0] |
|
if {$type eq "simple"} { |
|
#simple text - no command substitution possible - we can ignore these for the purposes of command substitution analysis. |
|
continue |
|
} |
|
#assert type is now word or expand. |
|
|
|
if {[string range $arg 0 2] eq "{*}"} { |
|
#strip leading {*} for analysis - but we don't need to do anything with it otherwise |
|
set arg [string range $arg 3 end] |
|
} |
|
if {[string index $arg 0] eq "\{" && [string index $arg end] eq "\}"} { |
|
#no variable or command substitution possible if arg is fully enclosed in curly braces - so we can skip analysis of command substitution within it |
|
continue |
|
} |
|
|
|
#----------------------------------------------------- |
|
#we presumably have a bareword or double quoted string. |
|
#e.g myargument[getnumber] or "my argument with [subcommand] etc" or "my argument with {*}[subcommand]" |
|
|
|
#---------------------------------------------------------------- |
|
#the parsing needs to be carefully done to avoid getting confused by square brackets that are within double quotes or curly braces. |
|
#e.g "a string {[puts emitted]xxx}" |
|
#e.g "a string [list {[puts not-emitted]yyy}]" |
|
#e.g "a string [list [puts emitted]xxx {[puts not-emitted]yyy}]" |
|
|
|
#Note that simply matching square brackets is not sufficient - nor is simply checking if a square bracket is within double quotes or curly braces |
|
#- we need to do a full parse of the argument to determine which square brackets are actually command substitutions that need to be analysed for complexity, |
|
#and which are just literal characters within the argument. |
|
#---------------------------------------------------------------- |
|
|
|
#even just running 'info complete' on the entire argument is not sufficient to determine if we can ignore it. |
|
#e.g "a string \{[puts emitted]" |
|
# |
|
# ignore - balance above curly just for editor \} |
|
|
|
#if {![info complete $arg]} { |
|
# #not a valid way to test if the argument can be ignored for the purposes of command substitution analysis |
|
# - since it could be incomplete due to unbalanced braces or quotes but still contain valid command substitutions that we need to analyse for complexity, |
|
# and it could still be a valid string as far as the cmdlist is concerned. |
|
# continue |
|
#} |
|
#----------------------------------------------------- |
|
|
|
# #arg is either enclosed in double quotes or a bareword (maybe even with trailing double quote) - we need to check for command substitution within it. |
|
# #tclword_to_scriptlist uses the tclparser library to do this parsing for us. |
|
# set sub_scriptlist [tclscript_info::tclword_to_scriptlist $arg] |
|
# #puts stderr "sub_scriptlist: $sub_scriptlist" |
|
# #set sub_tclscript_info [tclscript_info [join $sub_scriptlist \n] $nscontext] ;#wrong - we need to join the sub_scriptlist back into a string for analysis with tclscript_info, but we need to be careful to preserve the original structure of the argument for correct parsing of command substitutions within it - review this. |
|
# set combined_sub_script "" |
|
# foreach sub_script $sub_scriptlist { |
|
# append combined_sub_script [concat {*}$sub_script] \n |
|
# } |
|
# set sub_tclscript_info [tclscript_info $combined_sub_script $nscontext] |
|
|
|
|
|
#e.g script |
|
# set text " |
|
# cwd is [pwd] |
|
# tcl patch level: [info patch] |
|
# a bad command [if 1] |
|
# a variable value: $myvar |
|
# stuff: [dostuff [info name]] |
|
#" |
|
#we want to report the bad command 'if 1' as a parse fail with the correct line number, |
|
#and we want to report the other commands 'pwd' and 'info patch' as non-nested commands at the same level as the main command that the argument is for. |
|
#the 'info name' call should show up in the list of commands prior to the dostuff command, since it will be substituted before it by Tcl. |
|
|
|
set arg_start [lindex $arglr 0] |
|
puts stderr "arg_linerange: $arglr arg_start: $arg_start" |
|
|
|
#set arg_info [lindex $args_parsed 0 2] |
|
set arg_parts [lindex $args_parsed 0 2] |
|
puts stderr "arg_parts: $arg_parts" |
|
set arg_part_start $arg_start |
|
foreach ap $arg_parts { |
|
set ap_type [lindex $ap 0] |
|
set ap_range [lindex $ap 1] |
|
#expect types text, command, variable |
|
#( {*} will be a literal string, not an expansion operator even if it's at the beginning of a word - as it's within the argument) |
|
switch -- $ap_type { |
|
"text" { |
|
#ordinary text - no command substitution |
|
#we need to examine it for newlines to update the line number. |
|
set ap_text_value [parse getstring $argtest $ap_range] |
|
#puts " > text part: '$ap_text_value'" |
|
incr arg_part_start [parse countnewline $ap_text_value {}] |
|
} |
|
"command" { |
|
set ap_value [parse getstring $argtest $ap_range] |
|
puts " > command part: '$ap_value'" |
|
set ap_inner [string range $ap_value 1 end-1] ;#strip the square brackets for analysis |
|
set sub_tclscript_info [tclscript_info $ap_inner $nscontext] |
|
|
|
incr arg_part_start [parse countnewline $ap_value {}] |
|
|
|
###------------------------------------------------------------------------------------------------------------------------------------ |
|
#merge the results from the sub_tclscript_info into our main resultd dict |
|
#- we will want to keep track of the total number of commands and max nesting level across the whole script, and also keep a cumulative list of all commands found in the script for the purposes of complexity analysis. |
|
#we do not consider substitutions within the current cmdlist to be increasing the nesting level - they are non-nested commands at the same level as the current cmdlist - so we do not need to adjust the maxnesting value from the sub_tclscript_info when merging it in. |
|
dict incr resultd comment_count [dict get $sub_tclscript_info comment_count] |
|
#leave nesting level as is - we are not increasing nesting level for command substitutions within arguments |
|
dict incr resultd commands_count [dict get $sub_tclscript_info commands_count] |
|
foreach key {commands commands_proc commands_native commands_notfound commands_unknown} { |
|
set cmds_to_update [dict get $resultd $key] |
|
dict set resultd $key {} ;#unshare |
|
foreach cmd [dict get $sub_tclscript_info $key] { |
|
if {$cmd ni $cmds_to_update} { |
|
lappend cmds_to_update $cmd |
|
} |
|
} |
|
dict set resultd $key $cmds_to_update ;#restore |
|
} |
|
#todo |
|
###------------------------------------------------------------------------------------------------------------------------------------ |
|
set parsefails [dict get $sub_tclscript_info commands_parsefail] |
|
set adjusted_parsefails {} |
|
foreach pf $parsefails { |
|
puts "pf: $pf" |
|
set pf_linerange [lindex $pf 0] |
|
set pf_cmdwords [lindex $pf 1] |
|
|
|
lassign $pf_linerange pf_start pf_end |
|
|
|
#$arg_start is line 1 of the script, so we need to add $arg_start - 1 to the pf_start and pf_end to get the correct linerange relative to the whole script. |
|
set adjusted_pf_linerange [list [expr {$pf_start + ($arg_part_start -1)}] [ expr {$pf_end + ($arg_part_start-1)}]] |
|
lappend adjusted_parsefails [list $adjusted_pf_linerange $pf_cmdwords] |
|
} |
|
dict lappend resultd commands_parsefail {*}$adjusted_parsefails |
|
set parseskips [dict get $sub_tclscript_info commands_parseskip] |
|
set adjusted_parseskips {} |
|
foreach ps $parseskips { |
|
puts "ps: $ps" |
|
set ps_linerange [lindex $ps 0] |
|
set ps_cmdwords [lindex $ps 1] |
|
lassign $ps_linerange ps_start ps_end |
|
set adjusted_ps_linerange [list [expr {$ps_start + ($arg_part_start -1)}] [ expr {$ps_end + ($arg_part_start-1)}]] |
|
lappend adjusted_parseskips [list $adjusted_ps_linerange $ps_cmdwords] |
|
} |
|
dict lappend resultd commands_parseskip {*}$adjusted_parseskips |
|
|
|
###------------------------------------------------------------------------------------------------------------------------------------ |
|
|
|
|
|
|
|
} |
|
"variable" { |
|
#there can be nested command substitution within variable names when they are arrays e.g "val: $var([subcmd])]" |
|
set ap_value [parse getstring $argtest $ap_range] |
|
puts " > variable part: '$ap_value'" |
|
} |
|
default { |
|
puts stderr "Warning: unexpected arg part type '$ap_type' in argument parsing. This may indicate an edge case that is not handled correctly. arg: '$arg' arg_part: $ap" |
|
} |
|
} |
|
} |
|
#error temp |
|
|
|
|
|
} |
|
|
|
|
|
|
|
switch -- $ctype { |
|
"proc" { |
|
#dict incr resultd commands_proc |
|
if {$dispatchwords ni [dict get $resultd commands_proc]} { |
|
dict lappend resultd commands_proc $dispatchwords |
|
} |
|
} |
|
"native" { |
|
if {$dispatchwords ni [dict get $resultd commands_native]} { |
|
dict lappend resultd commands_native $dispatchwords |
|
} |
|
} |
|
"notfound" { |
|
if {$dispatchwords ni [dict get $resultd commands_notfound]} { |
|
dict lappend resultd commands_notfound $dispatchwords |
|
} |
|
} |
|
"ooclass" { |
|
#todo |
|
puts stderr "detected oo class command '$dispatchwords' - not currently analysing oo classes - review" |
|
} |
|
"ooobject" { |
|
#todo |
|
puts stderr "detected oo object command '$dispatchwords' - not currently analysing oo objects - review" |
|
} |
|
default { |
|
puts "[a+ red]tclscript_info: unhandled cmdtype '$ctype' for command '$dispatchwords' - treating as unknown command type for now - review[a]" |
|
if {$dispatchwords ni [dict get $resultd commands_unknown]} { |
|
dict lappend resultd commands_unknown $dispatchwords |
|
} |
|
} |
|
} |
|
dict incr resultd commands_count |
|
if {$dispatchwords ni [dict get $resultd commands]} { |
|
dict lappend resultd commands $dispatchwords |
|
} |
|
|
|
#we need to check each word of every command to check the quoting. |
|
#if unquoted or double quoted - we need to check any square brackets for command substitution and count those as non-nested commands at the same level. |
|
#puts "cinfo: $cinfo" |
|
|
|
#puts "cmdorigin: [dict get $cinfo origin] args_remaining: $args_remaining" |
|
|
|
|
|
|
|
#todo - nesting based on detection of if, while, for, foreach, switch, dict for etc. |
|
#consider what to do with eval, apply, uplevel. |
|
|
|
#---------------------------------------------- |
|
#namespace eval (and similar such as uplevel) |
|
#---------------------------------------------- |
|
#commonly we have structures like: |
|
# 'namespace eval ns [list cmdname arg1 arg2 ...]' |
|
#or 'namespace eval ns [linsert $args 0 cmdname]' |
|
#---------------------------------------------- |
|
#these will recognise the commands 'list' and 'linsert' as substituted commands, |
|
#but when we examine the arguments for 'namespace eval' and determine that there is a script argument, |
|
#we will miss the fact that 'cmdname' is being called. |
|
#- and we may miss detecting recursion. |
|
|
|
|
|
#e.g something more opaque 'namespace eval ns [getscript]' |
|
#in this case we won't be able to determine the actual script at all - but this is no different from |
|
# 'namespace eval ns [list nscommand]' where we would at most acknowledte the call to list but not the actual command being called - so we should at least be consistent in how we handle these cases. |
|
|
|
#In any case - the script is likely being evaluated in a different context to the current one, |
|
#so we *could* possibly not attempt to analyse the script even if it was provided in a way that is amenable to analysys: |
|
#- e.g 'namespace eval ns { |
|
# cmdname arg1 arg2 |
|
# etc ... |
|
# }' |
|
#It is arguable that if it is specified literally as a script then the complexity of the script should be counted as part of the complexity of the current script. |
|
#---------------------------------------------- |
|
|
|
#------------------------------------------------------------------------------------------------------------------------------------------------ |
|
#for this reason - we will test the commands if, while, for, foreach, switch, dict for etc for the presence of script arguments and analyse those |
|
#scripts if they are present as literal braced scripts but ignore them if they are not, since we have already analyzed command substitutions within arguments above. |
|
#this means a structure like: |
|
# namespace eval ns [list\ |
|
# if {condition} { |
|
# <multiline script> |
|
# }] |
|
#would not make the multiline script amenable to analysis despite it being coded directly in the analysed script - and could theoretically be used to hide complex scripts from analysis |
|
#- but it is not clear that this is a common or reasonable way to write code. |
|
#In a dynamic language like Tcl, there are commonly structures such as: |
|
# set script [string map $map {<multiline script>}] |
|
# namespace eval ns $script |
|
#These unfortunately are also not trivially analysable without implementing full data flow analysis to determine the possible values of 'script' at the point of the 'namespace eval' |
|
#- which is a non-trivial amount of work and is not on the roadmap for this implementation. |
|
#------------------------------------------------------------------------------------------------------------------------------------------------ |
|
#for apply, uplevel and namespace eval we will do similar, but attempt to provide the correct context for cmdinfo resolution. |
|
#This may not always be possible e.g if the namespace or level being evalled into is determined in a dynamic way |
|
#- but we will do our best to handle the common cases where the context can be determined from the script. |
|
#In practice, most 'script' accepting commands are resolved in the global scope and the context should often make little difference with regards, |
|
#to complexity analysis, but it is still worth trying to get it right where possible. |
|
#------------------------------------------------------------------------------------------------------------------------------------------------ |
|
|
|
|
|
set origin [dict get $cinfo origin] |
|
puts stderr "[a+ green]origin: $origin[a]" |
|
set parse_ok 1 ;#default assumption |
|
#we will attempt to parse the arguments with punk::args::parse to determine which arguments are scripts and analyse those as nested scripts |
|
#for commands where that is appropriate. |
|
set docid [dict get [cmdinfo $origin] docid] |
|
if {$docid ne ""} { |
|
puts stderr "[a+ green]detected command with id $origin - should be parsable with punk::args::parse for more detailed analysis of arguments[a]" |
|
#review - we have no way to substitute any variables or command substitutions within the arguments at this stage, |
|
#so we will just be parsing the raw arguments as they appear in the script |
|
#- this means that we may not be able to correctly parse some arguments if they are constructed in a complex way. |
|
# todo - a way to tell punk::args::parse to ignore type-checking for certain arguments would be useful. |
|
#In some cases the actual value of an argument may be needed to determine which 'form' of the command is being used. |
|
#e.g dynamically specified keywords can make it hard to determine the right form of the command to use for parsing - review. |
|
|
|
#get a copy of the resolved_def with overrides. |
|
#e.g for ::lrange |
|
#punk::args::resolved_def -override [list @id [list -id test-$origin] @cmd [list {-help ""}] first {-type any}] $origin |
|
|
|
|
|
#--------------------------------------- |
|
set form 0 |
|
#hack |
|
if {$origin eq "::switch"} { |
|
set form block ;#most common form of switch |
|
} else { |
|
#for other commands we will just use the default form 0 for now - review whether we want to attempt to detect other forms for other commands as well. |
|
set form 0 |
|
} |
|
#--------------------------------------- |
|
#review - a lot of what we're attempting to do here requires more serious analysis of the script structure than we are currently doing |
|
#we will need to get serious about proc/command return 'types' and variable tracking. |
|
|
|
#-------------------------- |
|
#These 3 lists should have same length and corresponding elements. |
|
set test_cmdargs [list] |
|
set test_cmdargs_quotestate [list] |
|
set test_cmdargs_linerange [list] |
|
#-------------------------- |
|
set has_expand_arg 0 |
|
set has_complex_arg 0 |
|
foreach a $args_remaining arglr $args_remaining_lineranges { |
|
|
|
#todo - avoid manually checking for leading {*} and instead use the parse tree from 'parse command' to determine if the argument includes an expansion. |
|
#consider: |
|
# {*}{a b c} - we should append a b c as separate elements - a literal list on a single line is unlikely but possible and a literal list/dict spanning multiple lines is reasonably common. |
|
#e.g {*}{ |
|
# k1 v1 |
|
# k2 v2 |
|
#} |
|
# {*}[my_getoptions x] - we don't know how many arguments (if any) this will expand to. |
|
|
|
|
|
#puts "[a+ blue]checking arg '$a' with linerange $arglr[a]" |
|
set argparseinfo [parse command $a {}] |
|
lassign $argparseinfo argCommentRange argCommandRange argRestRange argParseTree |
|
|
|
assert {[lindex $argRestRange 1] == 0} ;#we expect the parse command to consume the whole argument as a single element, so the restRange should start at 0. |
|
#if {[lindex $argRestRange 1] != 0} { |
|
# #failed sanity check. |
|
# #todo - just use assert? |
|
# puts stderr "Warning2: 'parse command <arg> {}' did not consume the whole argument. This may indicate a parsing error or an edge case that is not handled correctly. arg: '$a' restRange: $argRestRange" |
|
# error "tclscript_info: unexpected parse result for argument. arg: '$a' restRange: $argRestRange" |
|
#} |
|
|
|
if {[llength $argParseTree] == 0} { |
|
#no parse tree - This is likely for an empty argument with expansion e.g {*}{ } |
|
#This construct occurs when using {*} in place of line continuation for long lists or dicts, e.g |
|
#dict create {*}{ |
|
# } key1 $dynamic {*}{ |
|
# key2 value2 |
|
#} |
|
#review - the 'empty' argument will still have an entry in cmdlineranges - as although 'empty' in terms of how it expands it may be whitespace across multiple lines. |
|
#we will simply skip adding this to our test_ lists. |
|
continue |
|
} elseif {[llength $argParseTree] > 1} { |
|
#we have a simple {*} expansion with a parse tree that includes multiple elements for each word in the expansion, |
|
#e.g {*}{a b c} |
|
#e.g {*}{"a" b c} |
|
#% parse command {{*}{"a" {b} c}} {} |
|
# (result split into lines and whitespace added for readability - the actuall result is on a single line) |
|
# {0 0} {0 14} {14 0} { |
|
# {simple {4 3} { |
|
# {text {5 1} {}} |
|
# } |
|
# } |
|
# {simple {8 3} { |
|
# {text {9 1} {}} |
|
# } |
|
# } |
|
# {simple {12 1} { |
|
# {text {12 1} {}} |
|
# } |
|
# } |
|
# } |
|
#here we have an expansion to 3 arguments, dquoted, cquoted and bare |
|
foreach subarginfo $argParseTree { |
|
lassign $subarginfo subargtype subargRange subargParseTree |
|
#we expect a list of a single triplet in subargParseTree for each subarg, with the first element being 'text' - review this assumption. |
|
assert {$subargtype eq "simple"} |
|
assert {[llength $subargParseTree] == 1} |
|
|
|
set fullsubarg [parse getstring $a $subargRange] ;#arg possibly with quotes |
|
set textRange [lindex $subargParseTree 0 1] |
|
set textsubarg [parse getstring $a $textRange] ;#arg with quotes stripped |
|
lappend test_cmdargs $textsubarg |
|
if {[lindex $subargRange 1] - [lindex $textRange 1] == 2} { |
|
#was quoted with either {} or "" - we can determine the quotestate from the first and last character of the full argument string. |
|
if {[string index $fullsubarg 0] eq "\{" && [string index $fullsubarg end] eq "\}"} { |
|
lappend test_cmdargs_quotestate "cquoted" |
|
} elseif {[string index $fullsubarg 0] eq "\"" && [string index $fullsubarg end] eq "\""} { |
|
lappend test_cmdargs_quotestate "dquoted" |
|
} |
|
} else { |
|
lappend test_cmdargs_quotestate "bare" |
|
} |
|
lappend test_cmdargs_linerange $arglr |
|
} |
|
|
|
} else { |
|
#we have a single element in the parse tree - this could be a simple argument with no {*} expansion, or it could be a complex argument with {*} expansion that includes command substitution or variable substitution etc that prevents the tcl parser from doing the expansion at parse time, and results in a parse tree with a single element with the 'expand' keyword. |
|
if {[lindex $argParseTree 0 0] eq "expand"} { |
|
#we have an argument with leading {*} that is followed by more complex constructs such as command substitution, variable substitution or backslash substitution. |
|
#e.g {*}"a [subcmd] c" or {*}"a $var c" or {*}"a \t c" |
|
#or |
|
# {*}[ |
|
# #comment only |
|
# |
|
# #more comments |
|
#] |
|
#This comment example expands to an empty list. We should be able to determine that here and treat the same as 'llength $argparseTree == 0' case above - but for now we will just treat it as a complex expansion that we can't analyse. |
|
set has_expand_arg 1 |
|
break |
|
} |
|
if {[lindex $argParseTree 0 0] eq "simple"} { |
|
#we have a simple argument with no {*} expansion, so we can parse it as it is. |
|
set simpleRange [lindex $argParseTree 0 1] |
|
set simpletriplet [lindex $argParseTree 0 2 0] ;#expect only one text element in the parse tree for a simple argument |
|
assert {[lindex $simpletriplet 0] eq "text"} |
|
set simpletextRange [lindex $simpletriplet 1] |
|
set simpletext [parse getstring $a $simpletextRange] |
|
lappend test_cmdargs $simpletext |
|
if {[lindex $simpleRange 1] - [lindex $simpletextRange 1] == 2} { |
|
#was quoted with either {} or "" - we can determine the quotestate from the first and last character of the full argument string. |
|
set fullsimpletext [parse getstring $a $simpleRange] |
|
if {[string index $fullsimpletext 0] eq "\{" && [string index $fullsimpletext end] eq "\}"} { |
|
lappend test_cmdargs_quotestate "cquoted" |
|
} elseif {[string index $fullsimpletext 0] eq "\"" && [string index $fullsimpletext end] eq "\""} { |
|
lappend test_cmdargs_quotestate "dquoted" |
|
} |
|
} else { |
|
lappend test_cmdargs_quotestate "bare" |
|
} |
|
lappend test_cmdargs_linerange $arglr |
|
} elseif {[lindex $argParseTree 0 0] eq "word"} { |
|
#get wordparts |
|
#reconstruct the argument from the word parts - we need to do this to correctly handle cases such as "a string with [subcommand]" where the parse tree will split this into multiple |
|
#if the parts are a single 'command' or single 'variable' then we can (eventually) use type analysis on the variable or return value of the command to determine the structure of the argument for parsing with punk::args::parse - but for now we will just treat this as a complex case that we can't analyse. |
|
set wordRange [lindex $argParseTree 0 1] |
|
set wordLength [lindex $wordRange 1] ;#we compare this with the total length of the parts making up the word to determine if it was quoted or not. |
|
set wordparts [lindex $argParseTree 0 2] ;#list of triplets with types such as 'text', 'command', 'variable', 'backslash' etc. |
|
set wordparttypes [lmap wp $wordparts {lindex $wp 0}] |
|
#we know the wordparts have at least one of variable,command or backslash since otherwise it would have been parsed as a simple argument with a 'text' wordpart and not a 'word' with multiple wordparts - review this assumption. |
|
if {"variable" ni $wordparttypes && "command" ni $wordparttypes} { |
|
#we must have only text and/or backslash wordparts - so we can subst to get the final text of the argument for parsing with punk::args::parse - review this assumption. |
|
set escapedtext "" |
|
set partLengthSum 0 ;#to compare with wordLength to determine if the argument was quoted or not. |
|
foreach wp $wordparts { |
|
#we can just retrieve and join the parts and call subst on the whole thing to get the final text of the argument for parsing with punk::args::parse - review this assumption. |
|
set wptype [lindex $wp 0] |
|
set wpRange [lindex $wp 1] |
|
incr partLengthSum [lindex $wpRange 1] |
|
set wpParseTree [lindex $wp 2] |
|
set wpText [parse getstring $a $wpRange] |
|
append escapedtext $wpText |
|
} |
|
set finaltext [subst -nocommands -novariables $escapedtext] |
|
lappend test_cmdargs $finaltext |
|
if {$wordLength - $partLengthSum == 2} { |
|
#was quoted "". If it had been quoted with {} it would have been parsed as a simple argument with a 'text' wordpart and not a 'word' with multiple wordparts - review this assumption. |
|
lappend test_cmdargs_quotestate "dquoted" |
|
} else { |
|
lappend test_cmdargs_quotestate "bare" |
|
} |
|
lappend test_cmdargs_linerange $arglr |
|
} else { |
|
#we have a complex argument with command substitution or variable substitution etc that prevents the tcl parser from doing the expansion at parse time, and results in a parse tree with a single element with the 'word' keyword. |
|
#e.g "a [subcmd] c" or "a $var c" |
|
set has_complex_arg 1 |
|
break |
|
} |
|
} else { |
|
puts stderr "Warning: unexpected argParseTree element for argument '$a': [lindex $argParseTree 0] (expected type 'simple' or 'word'). This may indicate an edge case that is not handled correctly. argParseTree: $argParseTree" |
|
error "tclscript_info: unexpected argParseTree element for argument '$a': [lindex $argParseTree 0] (expected type 'simple' or 'word'). This may indicate an edge case that is not handled correctly. argParseTree: $argParseTree" |
|
} |
|
|
|
} |
|
} |
|
|
|
#temp hack for ::list |
|
if {$docid ne "::list" && ($has_expand_arg || $has_complex_arg)} { |
|
puts stderr "[a+ red]detected $origin command with arguments that include expansion and/or embedded commands/variables - this may cause inaccuracies in argument parsing since we either don't know how many arguments the expansion will produce or what type they will be. review[a]" |
|
set linerange [lindex $cmdlineranges 0] |
|
dict lappend resultd commands_parseskip [list $linerange $dispatchwords] |
|
set parse_ok 0 |
|
} else { |
|
#assert llength test_cmdargs == llength test_cmdargs_quotestate == llength args_remaining |
|
if {[catch {punk::args::parse $test_cmdargs -form $form -errorstyle minimal withid $docid} cmd_argd]} { |
|
set linerange [lindex $cmdlineranges 0] ;#we should have a linerange for the whole command at this point since we called punk::args::parse with the whole command line as the argument list. |
|
set errmsg $cmd_argd |
|
set argoutput "[a+ cyan]" |
|
foreach a $test_cmdargs { |
|
append argoutput " $a " \n |
|
} |
|
append argoutput "[a]" |
|
puts stderr "[a+ red]detected $origin command with arguments that we failed to parse with form '$form':\n$argoutput\n $errmsg[a]" |
|
dict lappend resultd commands_parsefail [list $linerange $dispatchwords] |
|
set parse_ok 0 |
|
} |
|
} |
|
} else { |
|
puts stderr "[a+ red]detected command $origin with no argdoc id - unable to use punk::args::parse for detailed analysis of arguments[a]" |
|
} |
|
|
|
if {$parse_ok} { |
|
switch -- $origin { |
|
"::if" { |
|
#script arguments that are nested are body and optionally multiple else_if clauses and an else clause. |
|
|
|
#Note that the expression argument can contain complex multiline scripts eg if {[catch {<multiline_script>} val]}... |
|
|
|
#consider: |
|
#if {$condition} {return [getcode]} |
|
#vs |
|
#if {$condition} "return [getcode]" |
|
#vs |
|
#if {$conditions} [list return [getcode]] |
|
#vs |
|
#if {$condition} [getscript] |
|
#the difference is in when the substitution of $x happens |
|
#- in the first case it happens when the if command is executed, |
|
#and in the following cases it happens when the argument is parsed. |
|
# |
|
#if we were to (above during arg substutition analysis) replace command-substitutions with some token like <opaque>, |
|
#we still wouldn't know the structure of a doublequoted argument such as "return <opaque>" as we don't know how many words <opaque> expands to. |
|
set expr1 [dict get $cmd_argd values expr1] |
|
#todo - use tclparser on expression |
|
#'parse expression $expr1 {0 end}' |
|
#todo - pull out any command substitutions within the expression and analyse those as well - review whether we want to count those as increasing complexity or not. |
|
#some complexity metrics are interested in the number of conditions in if statement conditions - review. |
|
|
|
#BODY1 - body of if command |
|
set scriptarg [dict get $cmd_argd values body1] |
|
set argindex [dict get $cmd_argd received body1] |
|
#no need to adjust for optional keywords, since body is always 2nd argument after the condition. |
|
|
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
} else { |
|
puts stderr "[a+ red]detected if command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
#todo - we should be able to analyse the body if it is a simple unbraced command e.g 'if {$condition} return' |
|
#this is functionally equivalent to 'if {$condition} {return}' |
|
} |
|
|
|
#elseif and else blocks should be processed similarly. |
|
if {[dict exists $cmd_argd values "elseif_clause"]} { |
|
set clauses [dict get $cmd_argd values elseif_clause] |
|
set elseif_clauses_argindices [list] |
|
dict for {k argidx} [dict get $cmd_argd received] { |
|
if {$k eq "elseif_clause"} { |
|
lappend elseif_clauses_argindices $argidx |
|
} |
|
} |
|
set elseif_number 0 |
|
foreach clause $clauses { |
|
lassign $clause _elseif elseif_cond _then elseif_script |
|
set scriptarg $elseif_script |
|
set cindex [lindex $elseif_clauses_argindices $elseif_number] |
|
#-------------------------------------------------------- |
|
#take into account optional keywords to determine the actual argindex of the scriptarg for this elseif clause. |
|
set argidx -1 |
|
set elseif_clauses [dict get $cmd_argd values "elseif_clause"] ;#list of elseif clauses in order of appearance in the arguments list |
|
foreach {clausename clauseindex} [dict get $cmd_argd received] { |
|
if {$clausename eq "elseif_clause"} { |
|
#determine length of any earlier (or current) elseif clause to adjust the argindex accordingly |
|
set prev_elseif_clause [lpop elseif_clauses 0] |
|
#clause of the form {elseif <cond> then <script>} or {elseif <cond> {} <script} |
|
#case 2 when keyword 'then' was omitted. |
|
if {[lindex $prev_elseif_clause 2] eq "then"} { |
|
incr argidx 4 ;#elseif <cond> then <script> |
|
} else { |
|
incr argidx 3 ;#elseif <cond> {} <script> |
|
} |
|
} else { |
|
incr argidx |
|
} |
|
if {$clauseindex eq $cindex} { |
|
#we have found the clause that corresponds to the current elseif clause we are processing - we can stop looking further. |
|
break |
|
} |
|
} |
|
set argindex $argidx |
|
puts stderr "[a+ cyan]detected elseif clause - checking arguments for script blocks to analyse as nested scripts (argindex $argindex in original argument list)[a]" |
|
#-------------------------------------------------------- |
|
|
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
|
|
#set scriptinner [string range $scriptarg 1 end-1] |
|
#set arg_tclscript_info [tclscript_info $scriptinner $nscontext] |
|
#dict incr resultd comment_count [dict get $arg_tclscript_info comment_count] |
|
#dict incr resultd commands_count [dict get $arg_tclscript_info commands_count] |
|
#foreach key {commands commands_proc commands_native commands_notfound commands_unknown} { |
|
# set cmds_to_update [dict get $resultd $key] |
|
# dict set resultd $key {} ;#unshare |
|
# foreach cmd [dict get $arg_tclscript_info $key] { |
|
# if {$cmd ni $cmds_to_update} { |
|
# #dict lappend resultd $key $cmd |
|
# lappend cmds_to_update $cmd |
|
# } |
|
# } |
|
# dict set resultd $key $cmds_to_update ;#restore |
|
#} |
|
} else { |
|
puts stderr "[a+ red]detected elseif clause with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
incr elseif_number |
|
} |
|
} |
|
|
|
#else block is optional - but if it exists it should be processed similarly to the if and elseif blocks. |
|
if {[dict exists $cmd_argd values "else_clause"]} { |
|
|
|
set elseinfo [dict get $cmd_argd values else_clause] |
|
lassign $elseinfo _else else_script |
|
set scriptarg $else_script |
|
#get the index of the else clause in the original arguments list so that we can get the linerange for any parsefail commands within the else block correct relative to the whole script rather than just the else block script. |
|
#we can use the 'received' key from cmd_argd. We can treat it as a dict in this case, even though it's actually a list (as it can in some cases have repeated keys). |
|
set cindex [dict get $cmd_argd received else_clause] |
|
#todo - we need to check the actual argument list because some keywords are optional |
|
set argindex [dict get $cmd_argd received else_clause] |
|
#-------------------------------------------------------- |
|
#take into account optional keywords to determine the actual argindex of the scriptarg for this elseif clause. |
|
set argidx -1 |
|
if {[dict exists $cmd_argd values "elseif_clause"]} { |
|
set elseif_clauses [dict get $cmd_argd values "elseif_clause"] ;#list of elseif clauses in order of appearance in the arguments list |
|
} else { |
|
set elseif_clauses [list] |
|
} |
|
foreach {clausename clauseindex} [dict get $cmd_argd received] { |
|
if {$clausename eq "elseif_clause"} { |
|
#determine length of any earlier (or current) elseif clause to adjust the argindex accordingly |
|
set prev_elseif_clause [lpop elseif_clauses 0] |
|
#clause of the form {elseif <cond> then <script>} or {elseif <cond> {} <script} |
|
#case 2 when keyword 'then' was omitted. |
|
if {[lindex $prev_elseif_clause 2] eq "then"} { |
|
incr argidx 4 ;#elseif <cond> then <script> |
|
} else { |
|
incr argidx 3 ;#elseif <cond> {} <script> |
|
} |
|
} else { |
|
incr argidx |
|
} |
|
if {$clauseindex eq $cindex} { |
|
#we have found the clause that corresponds to the current clause we are processing - we can stop looking further. |
|
break |
|
} |
|
} |
|
set argindex $argidx |
|
puts stderr "[a+ cyan]detected else clause - checking arguments for script blocks to analyse as nested scripts (argindex $argindex in original argument list)[a]" |
|
#-------------------------------------------------------- |
|
|
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
} else { |
|
puts stderr "[a+ red]detected else clause with body quotestate '[lindex $test_cmdargs_quotestate $argindex]' - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
} |
|
|
|
} |
|
::proc { |
|
#this defines a function/proc - regarding cyclomatic complexity, |
|
#we may want to analyse the body of the proc as a nested script - since it is a new scope with its own independent paths of execution. |
|
#perhaps this depends on whether the script actually calls the proc being defined |
|
#review |
|
|
|
|
|
#proc name args body |
|
|
|
#use the name to potentially update the nscontext for the body of the proc - this will allow us to correctly resolve commands within the body of the proc that are defined within the proc's namespace. |
|
set namearg [dict get $cmd_argd values name] |
|
set nsprefix [nsprefix $namearg] |
|
if {[string match ::* $nsprefix]} { |
|
#fully qualified |
|
set proc_ns $nsprefix |
|
} else { |
|
#relative. |
|
if {$nsprefix eq ""} { |
|
set proc_ns $nscontext |
|
} else { |
|
set proc_ns ${nscontext}::$nsprefix |
|
} |
|
} |
|
#ignore args ? |
|
|
|
|
|
set scriptarg [dict get $cmd_argd values body] |
|
set argindex [dict get $cmd_argd received body] |
|
#assert: our argindex is also valid into test_cmdargs and args_remaining since we are parsing the same argument list - review this. |
|
|
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
|
|
|
|
#set script [string range $scriptarg 1 end-1] |
|
#set cmd_tclscript_info [tclscript_info $script $proc_ns] |
|
##puts stderr "::proc cmd_tclscript_info: $cmd_tclscript_info" |
|
##merge the results from the cmd_tclscript_info into our main resultd dict |
|
#dict incr resultd comment_count [dict get $cmd_tclscript_info comment_count] |
|
#dict incr resultd commands_count [dict get $cmd_tclscript_info commands_count] |
|
#foreach key {commands commands_proc commands_native commands_notfound commands_unknown} { |
|
# set cmds_to_update [dict get $resultd $key] |
|
# dict set resultd $key {} ;#unshare |
|
# foreach cmd [dict get $cmd_tclscript_info $key] { |
|
# if {$cmd ni $cmds_to_update} { |
|
# #dict lappend resultd $key $cmd |
|
# lappend cmds_to_update $cmd |
|
# } |
|
# } |
|
# dict set resultd $key $cmds_to_update ;#restore |
|
#} |
|
|
|
} else { |
|
puts stderr "[a+ red]detected proc command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
} |
|
"::for" { |
|
#we have a for command - we should be able to determine the script arguments and analyse those as nested scripts. |
|
#puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
# for start test next body |
|
# start is a script that is executed once at the beginning of the loop (non-nested) |
|
# test is an expression that is executed at the beginning of each iteration of the loop |
|
# next is a script that is executed at the end of each iteration of the loop |
|
# body is a script that is executed at the end of each iteration of the loop (nested) |
|
|
|
#todo: |
|
# start, test, next |
|
|
|
|
|
set scriptarg [dict get $cmd_argd values body] |
|
set argindex [dict get $cmd_argd received body] |
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
} else { |
|
puts stderr "qstate: [lindex $test_cmdargs_quotestate $argindex]" |
|
puts stderr "[a+ red]detected $origin command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
|
|
|
|
} |
|
"::foreach" { |
|
#foreach varlist list [varlist list]... body |
|
set scriptarg [dict get $cmd_argd values body] |
|
set argindex [dict get $cmd_argd received body] |
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] eq "cquoted dquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
} else { |
|
puts stderr "[a+ red]detected $origin command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
} |
|
"::switch" { |
|
#similar to ::if whith regards to cyclometric complexity. |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
"::while" { |
|
#while test body |
|
# test is an expression that can contain command substitutions - we should analyse those for complexity but not count them as increasing nesting level. |
|
# body is a script that we should analyse as a nested script. |
|
|
|
set scriptarg [dict get $cmd_argd values body] |
|
set argindex [dict get $cmd_argd received body] |
|
#no need to adjust for optional keywords, since body is always 2nd argument after the test condition. |
|
|
|
#if {[string index $scriptarg 0] eq "\{" && [string index $scriptarg end] eq "\}"} {} |
|
if {[lindex $test_cmdargs_quotestate $argindex] in "dquoted cquoted"} { |
|
tclscript_info::update_resultd_for_script_arg $scriptarg $argindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
} else { |
|
puts stderr "[a+ red]detected $origin command with unbraced body - not currently analysing for complexity - review\n $cmd_argd[a]" |
|
} |
|
|
|
} |
|
::eval { |
|
# 'namespace eval namespace arg... |
|
#where arg values are concatenated together with a space between each one to form the script to be evaluated in the specified namespace. |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
|
|
} |
|
::tcl::namespace::eval { |
|
# 'namespace eval namespace arg... |
|
#where arg values are concatenated together with a space between each one to form the script to be evaluated in the specified namespace |
|
# in the same manner as the arg command. |
|
#puts stderr "[a+ cyan] $origin REVIEW \n $cmd_argd[a]" |
|
set arglist [dict get $cmd_argd values arg] |
|
#there are no optional keywords or multivalued clauses for namespace eval |
|
#so the clauseindex of each supplied arg argument should correspond to the argindex in the original argument list, |
|
#so we can use that to determine the linerange for any parsefail commands within the bodies of the namespace eval command. |
|
#however - each arg is not necessarily a complete script in itself |
|
#- the arg values are concatenated together with a space between each one to form the script to be evaluated, |
|
#so we must analyse the concatenation of all the arg values together as a single script, rather than analysing each arg value separately as a script. |
|
|
|
#Our function: update_resultd_for_script_arg {scriptarg argindex cmdlineranges nscontext} |
|
#is designed to analyse a single script argument, so we can concatenate the arg values together and then call update_resultd_for_script_arg with the |
|
#concatenated script and the argindex of the first arg value (since that is where the linerange for the whole script will start from). |
|
#This means the end-line will not technically be valid - but it shouldn't matter for our relative line-number adjustments for any parsefail commands within the script, |
|
#since those are based on the start-line of the linerange and the line numbers within the script itself, rather than the end-line of the linerange. |
|
|
|
set first_arg_received_posn [lsearch -exact [dict get $cmd_argd received] arg] |
|
set first_arg_clauseindex [lindex [dict get $cmd_argd received] $first_arg_received_posn+1] ;#use this as our argindex for our synthetic argument - the concatenated script. |
|
set concatenated_script [join $arglist " "] |
|
tclscript_info::update_resultd_for_script_arg $concatenated_script $first_arg_clauseindex $cmdlineranges $nscontext ;#we will upvar resultd |
|
|
|
} |
|
"::tcl::dict::for" { |
|
#we have a dict for command - we should be able to determine the script arguments and analyse those as nested scripts. |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
"::tcl::dict::with" { |
|
#we have a dict with command - we should be able to determine the script arguments and analyse those as nested scripts. |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
::tcl::dict::filter { |
|
#form 1: 'dict filter dictionaryValue filtertype=script keyVariable ValueVariable script' |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
::tcl::dict::update { |
|
#form 1: 'dict update dictionaryVariable key varName [key varName]... script' |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
"::apply" { |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
::lmap { |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
::after { |
|
#form 1 'after ms script [script]... |
|
#form 3 'after cancel script [script]...' |
|
#form 4 'after idle script [script]...' |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
|
|
} |
|
::time { |
|
#time script [count] |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
::timerate { |
|
#timerate [-direct] [-calibrate] [-overhead double] script [time [max-count]] |
|
puts stderr "[a+ red] $origin UNIMPLEMENTED \n $cmd_argd[a]" |
|
} |
|
default { |
|
#for other commands we will not attempt to determine which arguments are scripts to be analysed as nested scripts, |
|
#but we will still have done the analysis of command substitutions within arguments above, so we may still have picked up some nested commands that way. |
|
|
|
#todo - review tk commands, particularly with regards to callback scripts and their possible contribution to complexity metrics. |
|
} |
|
} |
|
} |
|
|
|
|
|
#todo - compare any commands found with our own command name to determine if the script is recursive. |
|
#This is very common in tcl scripts, and is a feature that complexity reports *may* want to factor in to their final score. |
|
#It is generally not a measure in standard 'cyclomatic complexity' metrics as it is more of a runtime behaviour than a static code structure feature - |
|
# ie it is not considered to increase the number of independent paths through the code, |
|
# but it is still a complexity increasing feature in the sense that it can lead to more complex runtime behaviour and harder to understand code. |
|
|
|
|
|
} |
|
return $resultd |
|
} |
|
proc tclparser_tcl {subcmd string range} { |
|
#provide a tcl parser with the same API as the tclparser c library. |
|
#https://chiselapp.com/user/aspect/repository/tclparser/index |
|
#or |
|
#https://github.com/ActiveState/teapot/tree/master/lib/tclparser |
|
|
|
set scriptlist [list] |
|
set argchars [split $string ""] |
|
set in_dq 0 ;#in double quotes |
|
set in_cb 0 ;#in curly braces |
|
set in_commandsub 0 ;#in command substitution (i.e within square brackets) |
|
#when we are in a command substitution - we should be able to keep appending whilst testing for info complete. review |
|
set escaped 0 |
|
set nesting_level 0 |
|
set scripttoken "" |
|
for {set i 0} {$i < [llength $argchars]} {incr i} { |
|
set ch [lindex $argchars $i] |
|
#todo |
|
error "tclparser_tcl not yet implemented - in the meantime install the tclparser c library (package require parser)" |
|
} |
|
|
|
} |
|
namespace eval tclscript_info { |
|
|
|
#note that we may call this (e.g for eval or namespace eval) with a scriptarg that is actually multiple arguments that are concatenated together to form the script to be evaluated, |
|
#in which case we will be passing the argindex for the first argument that forms the script. |
|
#In that case, the element of cmdlineranges that corresponds to the first argument that forms the script will have a linerange that may not cover the subsequent arguments. |
|
#We are primarily interested in the start-line for our adjustements, but we should be aware that the end-line for any parsefail commands within the script may be beyond the end-line of the linerange |
|
#for the first argument. |
|
proc update_resultd_for_script_arg {scriptarg argindex cmdlineranges nscontext} { |
|
upvar resultd resultd |
|
#puts "stderr [a+ cyan]analysing script argument for nested commands (argindex $argindex in original argument list)[a]" |
|
#puts stderr "-------------------------------------------------------------------" |
|
#puts stderr "scriptarg: $scriptarg" |
|
#puts stderr "-------------------------------------------------------------------" |
|
|
|
#set scriptinner [string range $scriptarg 1 end-1] |
|
#review - we already have inner value |
|
set scriptinner $scriptarg |
|
|
|
set arg_tclscript_info [punk::lib::tclscript_info $scriptinner $nscontext] |
|
dict incr resultd comment_count [dict get $arg_tclscript_info comment_count] |
|
dict incr resultd commands_count [dict get $arg_tclscript_info commands_count] |
|
foreach key {commands commands_proc commands_native commands_notfound commands_unknown} { |
|
set cmds_to_update [dict get $resultd $key] |
|
dict set resultd $key {} ;#unshare |
|
foreach cmd [dict get $arg_tclscript_info $key] { |
|
if {$cmd ni $cmds_to_update} { |
|
#dict lappend resultd $key $cmd |
|
lappend cmds_to_update $cmd |
|
} |
|
} |
|
dict set resultd $key $cmds_to_update ;#restore |
|
} |
|
|
|
#------------------------------------------------------------------------------------------------------------------------------------ |
|
#adjust linerange for parsefail commands found in the else block to be relative to the whole script rather than the scriptarg script |
|
#our passed in cmdlineranges list has lineranges for the whole command line and each argument |
|
#- we can use the argindex to find the linerange for the current argument, and then adjust the lineranges for any parsefail commands |
|
#within the scriptarg script to be relative to the whole script rather than just the scriptarg script. |
|
|
|
set parsefails [dict get $arg_tclscript_info commands_parsefail] |
|
#we called punk::args::parse with just the arguments - not the cmd itself. |
|
#our cmdlineranges list has a pair of start end numbers for each of: wholecmdline cmd arg1 arg2 ... |
|
set rangeindex [expr {$argindex + 2}] ;#add 2 to account for the wholecmdline and cmd elements at the start of the lineranges list |
|
set arg_linerange [lindex $cmdlineranges $rangeindex] |
|
set arg_start [lindex $arg_linerange 0] |
|
#puts stderr "arg_linerange: $arg_linerange arg_start: $arg_start" |
|
set adjusted_parsefails {} |
|
foreach pf $parsefails { |
|
set pf_linerange [lindex $pf 0] |
|
set pf_cmdwords [lindex $pf 1] |
|
|
|
lassign $pf_linerange pf_start pf_end |
|
|
|
#$arg_start is line 1 of the script, so we need to add $arg_start - 1 to the pf_start and pf_end to get the correct linerange relative to the whole script. |
|
set adjusted_pf_linerange [list [expr {$pf_start + ($arg_start -1)}] [ expr {$pf_end + ($arg_start-1)}]] |
|
lappend adjusted_parsefails [list $adjusted_pf_linerange $pf_cmdwords] |
|
} |
|
dict lappend resultd commands_parsefail {*}$adjusted_parsefails |
|
|
|
set parseskips [dict get $arg_tclscript_info commands_parseskip] |
|
#we called punk::args::parse with just the arguments - not the cmd itself. |
|
#our cmdlineranges list has a pair of start end numbers for each of: wholecmdline cmd arg1 arg2 ... |
|
set rangeindex [expr {$argindex + 2}] ;#add 2 to account for the wholecmdline and cmd elements at the start of the lineranges list |
|
set arg_linerange [lindex $cmdlineranges $rangeindex] |
|
set arg_start [lindex $arg_linerange 0] |
|
#puts stderr "arg_linerange: $arg_linerange arg_start: $arg_start" |
|
set adjusted_parseskips {} |
|
foreach pf $parseskips { |
|
set pf_linerange [lindex $pf 0] |
|
set pf_cmdwords [lindex $pf 1] |
|
|
|
lassign $pf_linerange pf_start pf_end |
|
|
|
#$arg_start is line 1 of the script, so we need to add $arg_start - 1 to the pf_start and pf_end to get the correct linerange relative to the whole script. |
|
set adjusted_pf_linerange [list [expr {$pf_start + ($arg_start -1)}] [ expr {$pf_end + ($arg_start-1)}]] |
|
lappend adjusted_parseskips [list $adjusted_pf_linerange $pf_cmdwords] |
|
} |
|
dict lappend resultd commands_parseskip {*}$adjusted_parseskips |
|
#------------------------------------------------------------------------------------------------------------------------------------ |
|
} |
|
|
|
|
|
#This is deliberately non-recursive. |
|
#It returns a list of *toplevel* commands in the string in order of evaluation. |
|
#(due to further substitutions within arguments of these toplevel commands, the final order of evaluation may include other commands before the ones we determine here) |
|
#tclinfo_script will call it for each argument that needs to be analysed for command substitution, and we will merge the results into the main resultd dict in tclscript_info. |
|
#ie tclinfo_script is responsible for the necessary recursion to analyse the full script, |
|
#whereas tclword_to_scriptlist is only responsible for parsing a single argument string for command substitutions and returning the list of toplevel commands within it. |
|
#note that a tclword in this context is a string that represents a single argument (usually) to some command, which may be a bareword, a double quoted string, or a curly braced string, |
|
#and importantly may span multiple lines. |
|
#A tcl commandline first element; ie the command itself; could be considered a tclword in this context as it could also have dynamic substitutions within it. |
|
#- commonly just a variable e.g 'set cmdname foo; $cmdname arg1 arg2' |
|
# but it could also have command subtitutions e.g 'cmdname_[get_suffix] arg1 arg2' |
|
|
|
proc tclword_to_scriptlist {string {nscontext ""}} { |
|
#consider 'list [puts a]$v(x,[puts b])[puts c]' |
|
# the command substitutions are [puts a], [puts b] and [puts c] |
|
#The order of evaluation is [puts a] then [puts b] then [puts c] but [puts b] is within the variable array syntax and so parses differently to the other two command substitutions. |
|
# (it is an element within the 'variable') so we need to recurse to get it. |
|
# |
|
if {$nscontext eq ""} { |
|
set nscontext [uplevel 1 {namespace current}] |
|
} |
|
puts stderr "------------tclword_to_scriptlist called with string: $string nscontext: $nscontext------------" |
|
#analyse a string that will undergo command substitution to determine the list of commands that will be substituted into the string. |
|
#e.g for "a string with [puts hello] and [puts world]" we would return a list of 2 items, each being a list of the command and its arguments, e.g {puts hello} and {puts world} |
|
#we need to do a full parse of the string to determine which square brackets are actually command substitutions that need to be analysed for complexity, and which are just literal characters within the string. |
|
#note that we can't just use tclscript_to_toplevelinfo as at the top level, the string is not being parsed as a list of commands - it's being parsed as a single argument that may contain command substitutions - so we need to do our own parsing to determine the command substitutions within it. |
|
#when we find a command substitution, we can use tclscript_to_toplevelinfo to parse the contents of the command substitution to get the list of commands within it. |
|
|
|
#we are given just a string that represents a single argument to some command. |
|
#ie we are parsing it 'out of context'. |
|
#The parsercommand subcommand 'command' will process a command possibly with argument(s) |
|
# (if we only passed in the argument string and parse it with 'parse command' it could have a leading # and be treated as a comment, |
|
# - so we need to add a dummy command word at the start to ensure it is parsed as a command with argument) |
|
set boguscmdline "bogus1 " |
|
append boguscmdline $string |
|
|
|
set scriptlist [list] |
|
|
|
if {![catch {package require parser}]} { |
|
#use tclparser library if available |
|
set parseinfo [::parse command $boguscmdline {0 end}] |
|
} else { |
|
puts stderr "tclparser library not available - using fallback tcl parser which may be less accurate - review" |
|
set parseinfo [::punk::lib::tclparser_tcl command $boguscmdline {0 end}] |
|
} |
|
#returns 4 items. |
|
#The first 3 are position pairs {startindex bytelen}. |
|
#pair 1 - position,len of comments |
|
#pair 2 - position,len of entire command including arguments. (may not be whole string if there are newlines or semicolons in the passed string, but in this case they should be absent) |
|
#pair 3 - position,len of rest of the string that is not part of the command or its arguments - this should be at tail and with len 0 if we are passing in a single argument string with no newlines or semicolons. |
|
#tree - a list-of-lists tree representing the parsed structure of the command and its arguments |
|
# This structure is a 'flat' structure in the sense that it is a list of lists where each sublist represents a command or argument and its position in the original string, |
|
# but it is not a 'nested' structure representing the nesting of commands within commands. |
|
# ie when we encounter a 'command' node in the tree we need to run further parsing on its contents to determine the commands within it, and their nesting structure. |
|
#Note that the position and length values are in bytes, not characters, so we need to use the parse getstring command to get the correct substring from the original string when we want to extract a command substitution string for further parsing. |
|
#------------------------------------------------------------------------------------------ |
|
# a sample from the tclparser library tests for command substitution parsing is as follows: |
|
#test parse-6.5 {ParseTokens procedure, command substitution} { |
|
# testparser {[foo $x bar]z} 0 |
|
#} {- {[foo $x bar]z} 1 word {[foo $x bar]z} 2 command {[foo $x bar]} 0 text z 0 {}} |
|
|
|
#(This has been unwrapped from the position,len*3 + tree structure by the testparser function in the test library) |
|
|
|
#The return value from parser for string {[foo $x bar]z} is as follows: |
|
# {0 0} {0 13} {13 0} {{word {0 13} {{command {0 12} {}} {text {12 1} {}}}}} |
|
#ie our tree list a list with a single element which is itself a list of 3 elements. |
|
#the first element is the type of node - in this case 'word' - which indicates that this is a single argument to a command. |
|
#the second element is the position and bytelength of this argument in the original string - in this case {0 13} which corresponds to the entire string. |
|
#the third element is a list of 2 3-element subnodes being nodes for the square-bracketed part and the immediately adjacent bare character z |
|
#------------------------------------------------------------------------------------------ |
|
|
|
#for our usecase we expect exactly two elements in our tree. |
|
#1) |
|
#the first being a node of type 'simple' which represents the span of our 'bogus1' command word that we added to ensure the string is parsed as a command with argument. |
|
# we can simply verify that it is present and has the posn,len 0 6. (the subelement is 'text {0 6} {}' which we can ignore) |
|
#2) |
|
#the second being a node of type 'simple' if curly braced (opaque string) or 'word' if it was double quoted or a bareword. |
|
# if it is of type 'simple' we can ignore it for the purposes of command substitution analysis, since it is fully opaque and can't contain command substitutions. |
|
# if it is of type 'word' then the toplevel nodes within element 3 of type command are of interest to us as subnodes representing command substitutions. |
|
|
|
lassign $parseinfo commentpos parsedpos restpos parse_tree |
|
if {[lindex $parse_tree 0 0] ne "simple" || [lindex $parse_tree 0 1 0] != 0 || [lindex $parse_tree 0 1 1] != 6} { |
|
error "Unexpected parse tree structure from tclparser for argument string '$string' - review" |
|
} |
|
if {[lindex $parse_tree 1 0] eq "simple"} { |
|
#fully opaque argument - no command substitution possible - return empty list |
|
#Note that our calling function may yet determine that this argument occupies a position in the command's argument structure that is normally occupied by a script block |
|
# (e.g for if, while, for, foreach, switch etc) |
|
#- in which case it may still call tclscript_to_toplevelinfo on this argument later, |
|
# but here we are dealing with the command substitutions only. |
|
# The *result* of these command substitutions will not be available in any case as we don't actually run the scripts. |
|
return {} |
|
} elseif {[lindex $parse_tree 1 0] eq "word"} { |
|
#potentially contains command substitutions - we need to look for subnodes of type 'command' within element 3 of this node |
|
set subnodes [lindex $parse_tree 1 2] |
|
foreach subnode $subnodes { |
|
if {[lindex $subnode 0] eq "command"} { |
|
set pos_bytes [lindex $subnode 1] |
|
#lassign [lindex $subnode 1] cmdpos cmdlen |
|
#set cmdstringfull [string range $boguscmdline $cmdpos [expr {$cmdpos + $cmdlen - 1}]] |
|
set cmdstringfull [parse getstring $boguscmdline $pos_bytes] ;#this must be used instead of string range to correctly handle any multibyte characters in the original string |
|
|
|
#get the inner part of the command substitution - stripping the square brackets - for parsing to get the list of commands within it. |
|
set cmdstring [string range $cmdstringfull 1 end-1] |
|
#we have the command string (which could be multiline or have commands separated by colons) |
|
#- we need to parse it (only at toplevel) to get the list of commands within it. |
|
#we can parse the contents of [...] using tclscript_to_toplevelinfo - ie as a script. |
|
set cmdscriptlistinfo [punk::lib::tclscript_to_toplevelinfo $cmdstring] |
|
set cmdscriptlist [dict get $cmdscriptlistinfo scriptlist] |
|
lappend scriptlist {*}$cmdscriptlist |
|
} elseif {[lindex $subnode 0] eq "variable"} { |
|
#e.g $v(x,[cmd]) - for array variables - we need to look for command substitutions within the variable subscript part. |
|
#there could in theory be some ugly constructs where there are more deeply nested command substitutions within inner array syntax |
|
#e.g $v(x,$v2(y,[cmd2])) |
|
#set var_subscript_subnodes [lindex $subnode 2] |
|
#we need to recursively examine 'variable' subnodes for any depth of nested 'variable' subnodes. |
|
#get the string for the entire variable and then call tclword_to_scriptlist on it to get any command substitutions within it. |
|
set varstringfull [parse getstring $boguscmdline [lindex $subnode 1]] |
|
#set var_subscript_cmds [punk::lib::tclscript_info::tclword_to_scriptlist $varstringfull $nscontext] |
|
set varsubnodes [lindex $subnode 2] |
|
foreach varsubnode $varsubnodes { |
|
puts stderr "[a+ cyan]varsubnodes: $varsubnodes type: [lindex $varsubnode 0][a]" |
|
if {[lindex $varsubnode 0] eq "command"} { |
|
set pos_bytes [lindex $varsubnode 1] |
|
set cmdstringfull [parse getstring $boguscmdline $pos_bytes] ;#this must be used instead of string range to correctly handle any multibyte characters in the original string |
|
|
|
#get the inner part of the command substitution - stripping the square brackets - for parsing to get the list of commands within it. |
|
set cmdstring [string range $cmdstringfull 1 end-1] |
|
#we have the command string (which could be multiline or have commands separated by colons) |
|
#- we need to parse it (only at toplevel) to get the list of commands within it. |
|
#we can parse the contents of [...] using tclscript_to_toplevelinfo - ie as a script. |
|
set cmdscriptlistinfo [punk::lib::tclscript_to_toplevelinfo $cmdstring] |
|
set cmdscriptlist [dict get $cmdscriptlistinfo scriptlist] |
|
lappend scriptlist {*}$cmdscriptlist |
|
} elseif {[lindex $varsubnode 0] eq "variable"} { |
|
set pos_bytes [lindex $varsubnode 1] |
|
set varstringfull [parse getstring $boguscmdline $pos_bytes] |
|
puts stderr "[a+ cyan]varstringfull: $varstringfull[a]" |
|
set varscriptlist [punk::lib::tclscript_info::tclword_to_scriptlist $varstringfull $nscontext] |
|
lappend scriptlist {*}$varscriptlist |
|
} |
|
} |
|
} |
|
} |
|
} else { |
|
error "Unexpected parse tree structure from tclparser for argument string '$string' - review" |
|
} |
|
return $scriptlist |
|
} |
|
} |
|
|
|
proc tclscript_showparse {script} { |
|
set scriptinfo [tclscript_info $script] |
|
#for testing purposes - show the script source with highlighted line ranges for commands_parsefail and commands_parseskip |
|
set parsefails [dict get $scriptinfo commands_parsefail] |
|
set parseskips [dict get $scriptinfo commands_parseskip] |
|
|
|
|
|
set lnum 1 |
|
set lines [split $script \n] |
|
set outlines [list] |
|
#we will prepend a single column of spaces to each line and background highlight the spaces for any lines that are within the lineranges for parsefails or parseskips, using different colours for each. |
|
set outnumbers [list] |
|
foreach ln $lines { |
|
#set ln_out " $ln" ;#default with no highlight |
|
set ln_out "" |
|
foreach rec $parseskips { |
|
#review - there may be multiple parsefails or parseskips that overlap on the same line |
|
#- we will just apply the first one we find in the list - review if this is sufficient |
|
#or if we need to apply multiple highlights or a different highlight if there are multiple overlapping parsefails or parseskips. |
|
#currently our overline/underline markers make the delimiting unclear when there are overlaps. |
|
#we could use underdotted/overdotted - but it would still only disambiguate for two overlapping ranges - if there are more than two overlapping ranges it would still be unclear. |
|
if {$lnum >= [lindex $rec 0 0] && $lnum <= [lindex $rec 0 1]} { |
|
set ln_out [grepstr -r a -highlight cyan [lindex $rec 1] $ln] |
|
set markers [list] |
|
if {$lnum == [lindex $rec 0 0]} { |
|
lappend markers overline |
|
} |
|
if {$lnum == [lindex $rec 0 1]} { |
|
lappend markers underline |
|
} |
|
set ln_out "[a+ Cyan {*}$markers] [a]$ln_out" |
|
break |
|
} |
|
} |
|
foreach rec $parsefails { |
|
if {$lnum >= [lindex $rec 0 0] && $lnum <= [lindex $rec 0 1]} { |
|
set ln_out [grepstr -r a -highlight red [lindex $rec 1] $ln] |
|
set markers [list] |
|
if {$lnum == [lindex $rec 0 0]} { |
|
lappend markers overline |
|
} |
|
if {$lnum == [lindex $rec 0 1]} { |
|
lappend markers underline |
|
} |
|
set ln_out "[a+ Red {*}$markers] [a]$ln_out" |
|
break |
|
} |
|
} |
|
if {$ln_out eq ""} { |
|
#no parsefail or parseskip override |
|
set ln_out " $ln" ;#default with no highlight |
|
} |
|
lappend outlines $ln_out |
|
lappend outnumbers $lnum |
|
incr lnum |
|
} |
|
if {[llength $outlines] == 0} { |
|
return "" |
|
} |
|
set lastnum [lindex $outnumbers end] |
|
set maxwidth [string length $lastnum] |
|
set final_lines [list] |
|
foreach lnum $outnumbers ln $outlines { |
|
set numstr [format "%${maxwidth}d" $lnum] |
|
set ln_out "$numstr $ln" |
|
lappend final_lines $ln_out |
|
} |
|
|
|
set out "" |
|
append out [join $final_lines \n] |
|
append out \n [punk::lib::showdict $scriptinfo] |
|
} |
|
|
|
proc complexity {script} { |
|
#get a single number representing the complexity of a script - based on tclscript_info |
|
set info [tclscript_info $script] |
|
#arbitrary formula - review |
|
#This is a placeholder. todo - produce better/more info from tclscript_info and use that to produce a more meaningful complexity score based on selected complexity mechanism. |
|
#todo - allow provision of 'metric' to use different weightings for different aspects of complexity |
|
#e.g basic cyclomatic complexity, which generally doesn't account for nesting depth. |
|
#e.g cognitive complexity. |
|
return [expr {[llength [dict get $info commands_proc]] * 3 + [llength [dict get $info commands_native]] * 1 + [llength [dict get $info commands_notfound]] * 4 + [llength [dict get $info commands_unknown]] * 5}] |
|
} |
|
|
|
|
|
|
|
proc invoke command { |
|
#*** !doctools |
|
#[call [fun invoke] [arg command]] |
|
#[para]Invoke an external command (using tcl open command) capturing stdout,stderr and the exitcode |
|
#[example { |
|
# set script { |
|
# puts stdout {hello on stdout} |
|
# puts stderr {hello on stderr} |
|
# exit 42 |
|
# } |
|
# invoke [list tclsh <<$script] |
|
#}] |
|
|
|
#see https://wiki.tcl-lang.org/page/open |
|
lassign [chan pipe] chanout chanin |
|
lappend command 2>@$chanin |
|
set fh [open |$command] |
|
set stdout [read $fh] |
|
close $chanin |
|
set stderr [read $chanout] |
|
close $chanout |
|
if {[catch {close $fh} cres e]} { |
|
dict with e {} |
|
lassign [set -errorcode] sysmsg pid exit |
|
if {$sysmsg eq {NONE}} { |
|
#output to stderr caused [close] to fail. Do nothing |
|
} elseif {$sysmsg eq {CHILDSTATUS}} { |
|
return [list $stdout $stderr $exit] |
|
} else { |
|
return -options $e $stderr |
|
} |
|
} |
|
return [list $stdout $stderr 0] |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
upvar ::punk::lib::has_punk_ansi has_punk_ansi |
|
|
|
#if {!$has_punk_ansi} { |
|
# set RST "" |
|
# set sep " = " |
|
# set sep_ \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) |
|
#} else { |
|
# set RST [punk::ansi::a] |
|
# #set sep " [a+ Web-seagreen]=[a] " |
|
# set sep " [punk::ansi::a+ Green]=$RST " ;#stick to basic default colours for wider terminal support |
|
# #set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " |
|
# #NOTE that \u2260 not suitable for non utf-8 terminals. |
|
# set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260$RST " |
|
#} |
|
#todo - consider ascii == and != instead of unicode when terminal doesn't support utf-8. |
|
# (safe detection methods for utf-8 support?) |
|
#if colour is disabled we want to refresh this. |
|
#therefore we use @dynamic |
|
proc get_sep {} { |
|
upvar ::punk::lib::has_punk_ansi has_punk_ansi |
|
if {!$has_punk_ansi} { |
|
set sep " = " |
|
} else { |
|
#set sep " [a+ Web-seagreen]=[a] " |
|
set sep " [punk::ansi::a+ Green]=[punk::ansi::a] " ;#stick to basic default colours for wider terminal support |
|
} |
|
return $sep |
|
} |
|
proc get_sep_mismatch {} { |
|
upvar ::punk::lib::has_punk_ansi has_punk_ansi |
|
if {!$has_punk_ansi} { |
|
set sep_mismatch \u2260 ;# equivalent [punk::ansi::convert_g0 [punk::ansi::g0 |]] (not equal symbol) |
|
} else { |
|
#set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]mismatch$RST " |
|
#NOTE that \u2260 not suitable for non utf-8 terminals. |
|
set sep_mismatch " [punk::ansi::a+ Brightred undercurly underline undt-white]\u2260[punk::ansi::a] " |
|
} |
|
return $sep_mismatch |
|
} |
|
set DYN_SEP {${[get_sep]}} |
|
set DYN_SEP_MISMATCH {${[get_sep_mismatch]}} |
|
lappend PUNKARGS [list { |
|
@dynamic |
|
@id -id ::punk::lib::pdict |
|
@cmd -name pdict -help\ |
|
"Print dict keys,values to channel |
|
The pdict function operates on variable names - passing the value to the showdict function which operates on values |
|
(see also showdict)" |
|
|
|
@opts -any 1 |
|
|
|
#default separator to provide similarity to tcl's parray function |
|
-separator -default "${$DYN_SEP}" |
|
-roottype -default "dict" |
|
-substructure -default {} |
|
-channel -default stdout -help\ |
|
"existing channel - or 'none' to return as string" |
|
|
|
@values -min 1 -max -1 |
|
|
|
dictvar -type string -help "name of variable. Can be a dict, list or array" |
|
|
|
patterns -type string -default "*" -multiple 1 -help {Multiple patterns can be specified as separate arguments. |
|
Each pattern consists of 1 or more segments separated by the hierarchy separator (forward slash) |
|
The system uses similar patterns to the punk pipeline pattern-matching system. |
|
The default assumed type is dict - but an array will automatically be extracted into key value pairs so will also work. |
|
Segments are classified into list,dict and string operations. |
|
Leading % indicates a string operation - e.g %# gives string length |
|
A segment with a single @ is a list operation e.g @0 gives first list element, @1-3 gives the lrange from 1 to 3 |
|
(todo - change to indexset syntax @1..3 @1..end-1 etc) |
|
A segment containing 2 @ symbols is a dict operation. e.g @@k1 retrieves the value for dict key 'k1' |
|
The operation type indicator is not always necessary if lower segments in the hierarchy are of the same type as the previous one. |
|
e.g1 pdict env */%# |
|
the pattern starts with default type dict, so * retrieves all keys & values, |
|
the next hierarchy switches to a string operation to get the length of each value. |
|
e.g2 pdict env W* S* |
|
Here we supply 2 patterns, each in default dict mode - to display keys and values where the keys match the glob patterns |
|
e.g3 pdict punk_testd */* |
|
This displays 2 levels of the dict hierarchy. |
|
Note that if the sublevel can't actually be interpreted as a dictionary (odd number of elements or not a list at all) |
|
- then the normal = separator will be replaced with a coloured (or underlined if colour off) 'mismatch' indicator. |
|
e.g4 set list {{k1 v1 k2 v2} {k1 vv1 k2 vv2}}; pdict list @0-end/@@k2 @*/@@k1 |
|
Here we supply 2 separate pattern hierarchies, where @0-end and @* are list operations and are equivalent |
|
The second level segment in each pattern switches to a dict operation to retrieve the value by key. |
|
When a list operation such as @* is used - integer list indexes are displayed on the left side of the = for that hierarchy level. |
|
} |
|
}] |
|
} |
|
proc pdict {args} { |
|
package require punk::args |
|
variable has_punk_ansi |
|
|
|
set argd [punk::args::parse $args withid ::punk::lib::pdict] |
|
set opts [dict get $argd opts] |
|
set dvar [dict get $argd values dictvar] |
|
set patterns [dict get $argd values patterns] |
|
set isarray [uplevel 1 [list ::tcl::array::exists $dvar]] |
|
if {$isarray} { |
|
set dvalue [uplevel 1 [list ::tcl::array::get $dvar]] |
|
if {![dict exists $opts -keytemplates]} { |
|
set arrdisplay [string map [list %dvar% $dvar] {${[if {[lindex $key 1] eq "query"} {val "%dvar% [lindex $key 0]"} {val "%dvar%($key)"}]}}] |
|
dict set opts -keytemplates [list $arrdisplay] |
|
} |
|
dict set opts -keysorttype dictionary |
|
} else { |
|
set dvalue [uplevel 1 [list set $dvar]] |
|
} |
|
showdict {*}$opts $dvalue {*}$patterns |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
|
|
lappend PUNKARGS [list { |
|
@dynamic |
|
@id -id ::punk::lib::showdict |
|
@cmd -name punk::lib::showdict -help "display dictionary keys and values" |
|
#todo - table tableobject |
|
-return -default "tailtohead" -choices {tailtohead sidebyside} |
|
-channel -default none |
|
-trimright -default 1 -type boolean -help\ |
|
"Trim whitespace off rhs of each line. |
|
This can help prevent a single long line that wraps in terminal from making |
|
every line wrap due to long rhs padding." |
|
-separator -default "${$DYN_SEP}" -help "Separator column between keys and values" |
|
-separator_mismatch -default "${$DYN_SEP_MISMATCH}" -help "Separator to use when patterns mismatch" |
|
-roottype -default "dict" -help\ |
|
"list,dict,string" |
|
-ansibase_keys -default "" -help\ |
|
"ansi list for each level in -substructure. e.g \[list \[a+ red\] \[a+ web-green\]\]" |
|
-substructure -default {} |
|
-ansibase_values -default "" |
|
-keytemplates -default {\$\{$key\}} -type list -help\ |
|
"list of templates for keys at each level" |
|
-keysorttype -default "none" -choices {none dictionary ascii integer real} |
|
-keysortdirection -default increasing -choices {increasing decreasing} |
|
-debug -default 0 -type boolean -help\ |
|
"When enabled, produces some rudimentary debug output on stderr" |
|
-- -type none -optional 1 |
|
@values -min 1 -max -1 |
|
dictvalue -type list -help\ |
|
"dict or list value" |
|
patterns -default "*" -type string -multiple 1 -help\ |
|
"key or key glob pattern" |
|
}] |
|
} |
|
#TODO - much. |
|
#showdict needs to be able to show different branches which share a root path |
|
#e.g show key a1/b* in its entirety along with a1/c* - (or even exact duplicates) |
|
# - specify ansi colour per pattern so different branches can be highlighted? |
|
# - ideally we want to be able to use all the dict & list patterns from the punk pipeline system eg @head @tail # (count) etc |
|
# - The current version is incomplete but passably usable. |
|
# - Copy proc and attempt rework so we can get back to this as a baseline for functionality |
|
proc showdict {args} { ;# analogous to parray (except that it takes the dict as a value) |
|
package require punk::pipe |
|
#package require punk ;#we need pipeline pattern matching features |
|
package require textblock |
|
|
|
set RST [punk::ansi::a] |
|
|
|
set argd [punk::args::parse $args withid ::punk::lib::showdict] |
|
|
|
#for punk::lib - we want to reduce pkg dependencies. |
|
# - so we won't even use the tcllib debug pkg here |
|
set opt_debug [dict get $argd opts -debug] |
|
if {$opt_debug} { |
|
if {[info body debug::showdict] eq ""} { |
|
proc ::punk::lib::debug::showdict {args} { |
|
catch {puts stderr "punk::lib::showdict-> [string cat {*}$args]"} |
|
} |
|
} |
|
} else { |
|
if {[info body debug::showdict] ne ""} { |
|
proc ::punk::lib::debug::showdict {args} {} |
|
} |
|
} |
|
|
|
set opt_sep [dict get $argd opts -separator] |
|
set opt_mismatch_sep [dict get $argd opts -separator_mismatch] |
|
set opt_keysorttype [dict get $argd opts -keysorttype] |
|
set opt_keysortdirection [dict get $argd opts -keysortdirection] |
|
set opt_trimright [dict get $argd opts -trimright] |
|
set opt_keytemplates [dict get $argd opts -keytemplates] |
|
debug::showdict "keytemplates ---> $opt_keytemplates <---" |
|
set opt_ansibase_keys [dict get $argd opts -ansibase_keys] |
|
set opt_ansibase_values [dict get $argd opts -ansibase_values] |
|
set opt_return [dict get $argd opts -return] |
|
set opt_roottype [dict get $argd opts -roottype] |
|
set opt_structure [dict get $argd opts -substructure] |
|
|
|
set dval [dict get $argd values dictvalue] |
|
set patterns [dict get $argd values patterns] |
|
|
|
set result "" |
|
|
|
#pattern hierarchy |
|
# */@1/@0,%#,%str @0/@1 - patterns each one is a pattern or pattern_nest |
|
# * @1 @0,%#,%str - segments |
|
# a b 1 0 %# %str - keys |
|
|
|
set pattern_key_index [list] ;#list of pattern_nests, same length as number of keys generated |
|
set pattern_next_substructure [dict create] |
|
set pattern_this_structure [dict create] |
|
|
|
# -- --- --- --- |
|
#REVIEW |
|
#as much as possible we should pass the indices along as a query to the pipeline pattern matching system so we're not duplicating the work and introducing inconsistencies. |
|
#The main difference here is that sometimes we are treating the result as key-val pairs with the key being the query, other times the key is part of the query, or from the result itself (list/dict indices/keys). |
|
#todo - determine if there is a more consistent rule-based way to do this rather than adhoc |
|
#e.g pdict something * |
|
#we want the keys from the result as individual lines on lhs |
|
#e.g pdict something @@<key> |
|
#we want <key> on lhs result on rhs |
|
#<key> = v0 |
|
#e.g pdict something @0-2,@4 |
|
#we currently return: |
|
#0 = v0 |
|
#1 = v1 |
|
#2 = v2 |
|
#4 = v4 |
|
#This means we've effectively auto-expanded the first list - elements 0-2. (or equivalently stated: we've flattened the 3 element and 1 element lists into one list of 4 elements) |
|
#ie pdict is doing 'magic' compared to the normal pattern matching syntax, to make useage more convenient. |
|
#this is a tradeoff that could create surprises and make things messy and/or inconsistent. |
|
#todo - see if we can find a balance that gives consistency and logicality to the results whilst allowing still simplified matching syntax that is somewhat intuitive. |
|
#It may be a matter of documenting what type of indexes are used directly as keys, and which return sets of further keys |
|
#The solution for more consistency/predictability may involve being able to bracket some parts of the segment so for example we can apply an @join or %join within a segment |
|
#that involves more complex pattern syntax & parsing (to be added to the main pipeline pattern syntax) |
|
# -- --- --- --- |
|
|
|
set filtered_keys [list] |
|
if {$opt_roottype in {dict list string}} { |
|
#puts "getting keys for roottype:$opt_roottype" |
|
if {[llength $dval]} { |
|
|
|
#TODO - change to indexset notation 0..1,3..end-1 etc |
|
|
|
set re_numdashnum {^([-+]{0,1}\d+)-([-+]{0,1}\d+)$} |
|
set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} |
|
foreach pattern_nest $patterns { |
|
set keyset [list] |
|
set keyset_structure [list] |
|
|
|
set segments [split $pattern_nest /] |
|
set levelpatterns [lindex $segments 0] ;#possibly comma separated patterns |
|
#we need to use _split_patterns to separate (e.g to protect commas that appear within quotes) |
|
set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] |
|
#puts stderr "showdict-->_split_patterns: $patterninfo" |
|
foreach v_idx $patterninfo { |
|
lassign $v_idx v idx |
|
#we don't support vars on lhs of index in this context - (because we support simplified glob patterns such as x* and literal dict keys such as kv which would otherwise be interpreted as vars with no index) |
|
set p $v$idx ;#_split_patterns has split too far in this context - the entire pattern is the index pattern |
|
if {[string index $p 0] eq "!"} { |
|
set get_not 1 |
|
set p [string range $p 1 end] |
|
} else { |
|
set get_not 0 |
|
} |
|
switch -exact -- $p { |
|
* - "" { |
|
if {$opt_roottype eq "list"} { |
|
set keys [punk::lib::range 0 [llength $dval]-1] ;#compat wrapper around subset of lseq functionality |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] list] |
|
dict set pattern_this_structure $p list |
|
} elseif {$opt_roottype eq "dict"} { |
|
set keys [dict keys $dval] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] dict] |
|
dict set pattern_this_structure $p dict |
|
} else { |
|
lappend keyset %string |
|
lappend keyset_structure string |
|
dict set pattern_this_structure $p string |
|
} |
|
} |
|
%# { |
|
dict set pattern_this_structure $p string |
|
lappend keyset %# |
|
lappend keyset_structure string |
|
} |
|
# { |
|
#todo get_not !# is test for listiness (see punk) |
|
dict set pattern_this_structure $p list |
|
lappend keyset # |
|
lappend keyset_structure list |
|
} |
|
## { |
|
dict set pattern_this_structure $p dict |
|
lappend keyset [list ## query] |
|
lappend keyset_structure dict |
|
} |
|
@* { |
|
#puts "showdict ---->@*<----" |
|
dict set pattern_this_structure $p list |
|
set keys [punk::lib::range 0 [llength $dval]-1] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] list] |
|
} |
|
@@ { |
|
#get first k v from dict |
|
dict set pattern_this_structure $p dict |
|
lappend keyset [list @@ query] |
|
lappend keyset_structure dict |
|
} |
|
@*k@* - @*K@* { |
|
#returns keys only |
|
lappend keyset [list $p query] |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
@*.@* { |
|
set keys [dict keys $dval] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] dict] |
|
dict set pattern_this_structure $p dict |
|
} |
|
default { |
|
#puts stderr "===p:$p" |
|
#the basic scheme also doesn't allow commas in dict keys access via the convenience @@key - which isn't great, especially for arrays where it is common practice! |
|
#we've already sacrificed whitespace in keys - so extra limitations should be reduced if it's to be passably useful |
|
#@@"key,etc" should allow any non-whitespace key |
|
switch -glob -- $p { |
|
{@k\*@*} - {@K\*@*} { |
|
#value glob return keys |
|
#set search [string range $p 4 end] |
|
#dict for {k v} $dval { |
|
# if {[string match $search $v]} { |
|
# lappend keyset $k |
|
# } |
|
#} |
|
if {$get_not} { |
|
lappend keyset [list !$p query] |
|
} else { |
|
lappend keyset [list $p query] |
|
} |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
@@* { |
|
#exact match key - review - should raise error to match punk pipe behaviour? |
|
set k [string range $p 2 end] |
|
if {$get_not} { |
|
if {[dict exists $dval $k]} { |
|
set keys [dict keys [dict remove $dval $k]] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] dict] |
|
} else { |
|
lappend keyset {*}[dict keys $dval] |
|
lappend keyset_structure {*}[lrepeat [dict size $dval] dict] |
|
} |
|
} else { |
|
if {[dict exists $dval $k]} { |
|
lappend keyset $k |
|
lappend keyset_structure dict |
|
} |
|
} |
|
dict set pattern_this_structure $p dict |
|
} |
|
@k@* - @K@* { |
|
#TODO get_not |
|
set k [string range $p 3 end] |
|
if {[dict exists $dval $k]} { |
|
lappend keyset $k |
|
lappend keyset_structure dict |
|
} |
|
dict set pattern_this_structure $p dict |
|
} |
|
{@\*@*} { |
|
#return list of values |
|
#set k [string range $p 3 end] |
|
#lappend keyset {*}[dict keys $dval $k] |
|
if {$get_not} { |
|
lappend keyset [list !$p query] |
|
} else { |
|
lappend keyset [list $p query] |
|
} |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
{@\*.@*} { |
|
#TODO get_not |
|
set k [string range $p 4 end] |
|
set keys [dict keys $dval $k] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] dict] |
|
dict set pattern_this_structure $p dict |
|
} |
|
{@v\*@*} - {@V\*@*} { |
|
#value-glob return value |
|
#error "dict value-glob value-return only not supported here - bad pattern '$p' in '$pattern_nest'" |
|
if {$get_not} { |
|
lappend keyset [list !$p query] |
|
} else { |
|
lappend keyset [list $p query] |
|
} |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
{@\*v@*} - {@\*V@*} { |
|
#key-glob return value |
|
lappend keyset [list $p query] |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
{@\*@*} - {@\*v@*} - {@\*V@} { |
|
#key glob return val |
|
lappend keyset [list $p query] |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
@??@* { |
|
#exact key match - no error |
|
lappend keyset [list $p query] |
|
lappend keyset_structure dict |
|
dict set pattern_this_structure $p dict |
|
} |
|
default { |
|
set this_type $opt_roottype |
|
if {[string match @* $p]} { |
|
#list mode - trim optional list specifier @ |
|
set p [string range $p 1 end] |
|
dict set pattern_this_structure $p list |
|
set this_type list |
|
} elseif {[string match %* $p]} { |
|
dict set pattern_this_structure $p string |
|
lappend keyset $p |
|
lappend keyset_structure string |
|
set this_type string |
|
} |
|
if {$this_type eq "list"} { |
|
dict set pattern_this_structure $p list |
|
if {[string is integer -strict $p]} { |
|
if {$get_not} { |
|
set keys [punk::lib::range 0 [llength $dval]-1] |
|
set keys [lremove $keys $p] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] list] |
|
} else { |
|
lappend keyset $p |
|
lappend keyset_structure list |
|
} |
|
} elseif {[punk::lib::is_indexset $p]} { |
|
set keys [punk::lib::indexset_resolve [llength $dval] $p] |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] list] |
|
} elseif {[string match "?*-?*" $p]} { |
|
#could be either - don't change type |
|
#list indices with tcl8.7 underscores? be careful. Before 8.7 we could have used regexp \d on integers |
|
#now we should map _ to "" first |
|
set p [string map {_ {}} $p] |
|
#lassign [textutil::split::splitx $p {\.\.}] a b |
|
if {![regexp $re_idxdashidx $p _match a b]} { |
|
error "unrecognised pattern $p" |
|
} |
|
#TODO - fix terminology. 'lower_resolve' is confusing here as range can be in descending order |
|
#change to start/end terminology? |
|
|
|
set lower_resolve [punk::lib::lindex_resolve [llength $dval] $a] ;#-Inf for too low, Inf for too high |
|
#keep lower_resolve as separate var to lower for further checks based on which side out-of-bounds |
|
if {${lower_resolve} == Inf} { |
|
##x |
|
#lower bound is above upper list range |
|
#match with decreasing indices is still possible |
|
set lower [expr {[llength $dval]-1}] ;#set to max |
|
} elseif {$lower_resolve == -Inf} { |
|
##x |
|
set lower 0 |
|
} else { |
|
set lower $lower_resolve |
|
} |
|
set upper [punk::lib::lindex_resolve [llength $dval] $b] |
|
if {$upper == -Inf} { |
|
##x |
|
#upper bound is below list range - |
|
if {$lower_resolve > -Inf} { |
|
##x |
|
set upper 0 |
|
} else { |
|
continue |
|
} |
|
} elseif {$upper == Inf} { |
|
#use max |
|
set upper [expr {[llength $dval]-1}] |
|
#assert - upper >=0 because we have ruled out empty lists |
|
} |
|
#note lower can legitimately be higher than upper - lib::range, like lseq can produce sequence in reverse order |
|
set keys [punk::lib::range $lower $upper] |
|
if {$get_not} { |
|
set fullrange [punk::lib::range 0 [llength $dval]-1] |
|
set keys [lremove $fullrange {*}$keys] |
|
if {$lower > $upper} { |
|
set keys [lreverse $keys] |
|
} |
|
} |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] list] |
|
} else { |
|
if {$get_not} { |
|
lappend keyset [list !@$p query] |
|
} else { |
|
lappend keyset [list @$p query] |
|
} |
|
lappend keyset_structure list |
|
} |
|
} elseif {$this_type eq "string"} { |
|
dict set pattern_this_structure $p string |
|
} elseif {$this_type eq "dict"} { |
|
#default equivalent to @\*@* |
|
dict set pattern_this_structure $p dict |
|
#puts "dict: appending keys from index '$p' keys: [dict keys $dval $p]" |
|
set keys [dict keys $dval $p] |
|
if {$get_not} { |
|
set keys [dict keys [dict remove $dval {*}$keys]] |
|
} |
|
lappend keyset {*}$keys |
|
lappend keyset_structure {*}[lrepeat [llength $keys] dict] |
|
} else { |
|
puts stderr "list: unrecognised pattern $p" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
# -- --- --- --- |
|
#check next pattern-segment for substructure type to use |
|
# -- --- --- --- |
|
set substructure "" |
|
set pnext [lindex $segments 1] |
|
set patterninfo [punk::pipe::lib::_split_patterns $levelpatterns] |
|
if {[llength $patterninfo] == 0} { |
|
# // ? -review - what does this mean? for xpath this would mean at any level |
|
set substructure [lindex $pattern_this_structure end] |
|
} elseif {[llength $patterninfo] == 1} { |
|
#ignore the NOT operator for purposes of query-type detection |
|
if {[string index $pnext 0] eq "!"} { |
|
set pnext [string range $pnext 1 end] |
|
} |
|
# single type in segment e.g /@@something/ |
|
switch -exact -- $pnext { |
|
"" { |
|
set substructure string |
|
} |
|
@*k@* - @*K@* - @*.@* - ## { |
|
set substructure dict |
|
} |
|
# { |
|
set substructure list |
|
} |
|
## { |
|
set substructure dict |
|
} |
|
%# { |
|
set substructure string |
|
} |
|
* { |
|
#set substructure $opt_roottype |
|
#set substructure [dict get $pattern_this_structure $pattern_nest] |
|
set substructure [lindex $pattern_this_structure end] |
|
} |
|
default { |
|
switch -glob -- $pnext { |
|
@??@* - @?@* - @@* { |
|
#all 4 or 3 len prefixes bounded by @ are dict |
|
set substructure dict |
|
} |
|
default { |
|
if {[string match @* $pnext]} { |
|
set substructure list |
|
} elseif {[string match %* $pnext]} { |
|
set substructure string |
|
} else { |
|
#set substructure $opt_roottype |
|
#set substructure [dict get $pattern_this_structure $pattern_nest] |
|
set substructure [lindex $pattern_this_structure end] |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} else { |
|
#e.g /@0,%str,.../ |
|
#doesn't matter what the individual types are - we have a list result |
|
set substructure list |
|
} |
|
#puts "--pattern_nest: $pattern_nest substructure: $substructure" |
|
dict set pattern_next_substructure $pattern_nest $substructure |
|
# -- --- --- --- |
|
|
|
if {$opt_keysorttype ne "none"} { |
|
set int_keyset 1 |
|
foreach k $keyset { |
|
if {![string is integer -strict $k]} { |
|
set int_keyset 0 |
|
break |
|
} |
|
} |
|
if {$int_keyset} { |
|
set sortindices [lsort -indices -integer $keyset] |
|
#set keyset [lsort -integer $keyset] |
|
} else { |
|
#set keyset [lsort -$opt_keysorttype $keyset] |
|
set sortindices [lsort -indices -$opt_keysorttype $keyset] |
|
} |
|
set keyset [lmap i $sortindices {lindex $keyset $i}] |
|
set keyset_structure [lmap i $sortindices {lindex $keyset_structure $i}] |
|
} |
|
|
|
foreach k $keyset { |
|
lappend pattern_key_index $pattern_nest |
|
} |
|
|
|
lappend filtered_keys {*}$keyset |
|
lappend all_keyset_structure {*}$keyset_structure |
|
|
|
#puts stderr "--->pattern_nest:$pattern_nest keyset:$keyset" |
|
} |
|
} |
|
#puts stderr "[dict get $pattern_this_structure $pattern_nest] keys: $filtered_keys" |
|
} else { |
|
puts stdout "unrecognised roottype: $opt_roottype" |
|
return $dval |
|
} |
|
|
|
if {[llength $filtered_keys]} { |
|
#both keys and values could have newline characters. |
|
#simple use of 'format' won't cut it for more complex dict keys/values |
|
#use block::width or our columns won't align in some cases |
|
switch -- $opt_return { |
|
"tailtohead" { |
|
#last line of key is side by side (possibly with separator) with first line of value |
|
#This is more intelligible when terminal wrapping occurs - and is closer to what happens with parray multiline keys and values |
|
#we still pad the key to max width so that the separator appears in the same column - which in the case of wide keys could cause that to wrap for all entries |
|
|
|
set kt [lindex $opt_keytemplates 0] |
|
if {$kt eq ""} { |
|
set kt {${$key}} |
|
} |
|
#set display_keys [lmap k $filtered_keys {tcl::string::map [list %k% $k] $kt}] |
|
set display_keys [lmap key $filtered_keys {tstr -ret string -allowcommands $kt}] |
|
set maxl [::tcl::mathfunc::max {*}[lmap v $display_keys {textblock::width $v}]] |
|
|
|
set kidx 0 |
|
set last_hidekey 0 |
|
foreach keydisplay $display_keys key $filtered_keys { |
|
set thisval "?" |
|
set hidekey 0 |
|
set pattern_nest [lindex $pattern_key_index $kidx] |
|
set pattern_nest_list [split $pattern_nest /] |
|
#set this_type [dict get $pattern_this_structure $pattern_nest] |
|
#set this_type [dict get $pattern_this_structure $key] |
|
set this_type [lindex $all_keyset_structure $kidx] |
|
#puts stderr "---> kidx:$kidx key:$key - pattern_nest:$pattern_nest this_type:$this_type" |
|
|
|
set is_match 1 ;#whether to display the normal separator or bad-match separator |
|
switch -- $this_type { |
|
dict { |
|
#todo? - slower lsearch if -dupes 1 flag set so we can display duplicate 'keys' if var not a proper dict but rather a dict-shaped list that we want to display as a dict |
|
# - default highlight dupes (ansi underline?) |
|
if {[lindex $key 1] eq "query"} { |
|
set qry [lindex $key 0] |
|
% thisval.= $qry= $dval |
|
} else { |
|
set thisval [tcl::dict::get $dval $key] |
|
} |
|
|
|
#set substructure [lrange $opt_structure 1 end] |
|
|
|
set nextpatterns [list] |
|
#which pattern nest applies to this branch |
|
set nextsub [dict get $pattern_next_substructure $pattern_nest] |
|
if {[llength $pattern_nest_list]} { |
|
set nest [lrange $pattern_nest_list 1 end] |
|
lappend nextpatterns {*}[join $nest /] |
|
} |
|
set nextopts [dict get $argd opts] |
|
|
|
|
|
set subansibasekeys [lrange $opt_ansibase_keys 1 end] |
|
set nextkeytemplates [lrange $opt_keytemplates 1 end] |
|
#dict set nextopts -substructure $nextsub |
|
dict set nextopts -keytemplates $nextkeytemplates |
|
dict set nextopts -ansibase_keys $subansibasekeys |
|
dict set nextopts -roottype $nextsub |
|
dict set nextopts -channel none |
|
#puts stderr "showdict {*}$nextopts $thisval [lindex $args end]" |
|
|
|
if {[llength $nextpatterns]} { |
|
if {[catch { |
|
set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] |
|
} errMsg]} { |
|
#puts stderr ">>> nextpatterns:'$nextpatterns' nextopts:'$nextopts'" |
|
set is_match 0 |
|
} |
|
} |
|
} |
|
list { |
|
if {[string is integer -strict $key]} { |
|
set thisval [lindex $dval $key] |
|
} else { |
|
if {[lindex $key 1] eq "query"} { |
|
set qry [lindex $key 0] |
|
} else { |
|
set qry $key |
|
} |
|
#pipeline - use punk patterns. |
|
% thisval.= $qry= $dval |
|
} |
|
|
|
set nextpatterns [list] |
|
#which pattern nest applies to this branch |
|
set nextsub [dict get $pattern_next_substructure $pattern_nest] |
|
if {[llength $pattern_nest_list]} { |
|
set nest [lrange $pattern_nest_list 1 end] |
|
lappend nextpatterns {*}[join $nest /] |
|
} |
|
set nextopts [dict get $argd opts] |
|
|
|
dict set nextopts -roottype $nextsub |
|
dict set nextopts -channel none |
|
|
|
#if {![llength $nextpatterns]} { |
|
# set nextpatterns * |
|
#} |
|
if {[llength $nextpatterns]} { |
|
if {[catch { |
|
set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] |
|
} errMsg]} { |
|
set is_match 0 |
|
} |
|
} |
|
} |
|
string { |
|
set hidekey 1 |
|
switch -- $key { |
|
"%string" - "%str" { |
|
set hidekey 1 |
|
set thisval $dval |
|
} |
|
"%ansiview" { |
|
set thisval [ansistring VIEW -lf 1 $dval] |
|
} |
|
"%ansiviewstyle" { |
|
set thisval [ansistring VIEWSTYLE -lf 1 $dval] |
|
} |
|
default { |
|
switch -glob -- $key { |
|
%XXXlpad-* { |
|
#todo - remove |
|
#moved to punk patterns |
|
set hidekey 1 |
|
lassign [split $key -] _ extra |
|
set width [expr {[textblock::width $dval] + $extra}] |
|
set thisval [textblock::pad $dval -which left -width $width] |
|
} |
|
%lpadstr-* { |
|
set hidekey 1 |
|
lassign [split $key -] _ extra |
|
set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] |
|
set thisval [textblock::pad $dval -which left -width $width -padchar $extra] |
|
} |
|
%rpad-* { |
|
set hidekey 1 |
|
lassign [split $key -] _ extra |
|
set width [expr {[textblock::width $dval] + $extra}] |
|
set thisval [textblock::pad $dval -which right -width $width] |
|
} |
|
%rpadstr-* { |
|
set hidekey 1 |
|
lassign [split $key -] _ extra |
|
set width [expr {[textblock::width $dval] + [tcl::string::length $extra]}] |
|
set thisval [textblock::pad $dval -which right -width $width -padchar $extra] |
|
} |
|
%XXXsplit-* { |
|
#todo - remove |
|
# moved to punk patterns. |
|
#supported here by default branch. |
|
#split on one or more chars - review |
|
set hidekey 1 |
|
lassign [split $key -] _ splitchars |
|
set thisval [split $dval $splitchars] |
|
} |
|
default { |
|
if {[lindex $key 1] eq "query"} { |
|
set qry [lindex $key 0] |
|
} else { |
|
set qry $key |
|
} |
|
set thisval $dval |
|
if {[string index $key 0] ne "%"} { |
|
set key %$key |
|
} |
|
#puts "---key:'$key'" |
|
set key [string map {; \\;} $key] ;#review |
|
#puts "---key:'$key'" |
|
#pipeline - use punk patterns. |
|
% thisval.= $key= $thisval |
|
} |
|
} |
|
|
|
} |
|
} |
|
|
|
set nextpatterns [list] |
|
#which pattern nest applies to this branch |
|
set nextsub [dict get $pattern_next_substructure $pattern_nest] |
|
if {[llength $pattern_nest_list]} { |
|
set nest [lrange $pattern_nest_list 1 end] |
|
lappend nextpatterns {*}[join $nest /] |
|
} |
|
#set nextopts [dict get $argd opts] |
|
dict set nextopts -roottype $nextsub |
|
dict set nextopts -channel none |
|
|
|
if {[llength $nextpatterns]} { |
|
set thisval [showdict {*}$nextopts -- $thisval {*}$nextpatterns] |
|
} |
|
|
|
} |
|
} |
|
if {$this_type eq "string" && $hidekey} { |
|
lassign [textblock::size $thisval] _vw vwidth _vh vheight |
|
#set blanks_above [string repeat \n [expr {$kheight -1}]] |
|
set vblock $opt_ansibase_values$thisval$RST |
|
#append result [textblock::join_basic -- $vblock] |
|
#review - we wouldn't need this space if we had a literal %sp %sp-x ?? |
|
append result " $vblock" |
|
} else { |
|
set ansibase_key [lindex $opt_ansibase_keys 0] |
|
|
|
lassign [textblock::size $keydisplay] _kw kwidth _kh kheight |
|
lassign [textblock::size $thisval] _vw vwidth _vh vheight |
|
|
|
set totalheight [expr {$kheight + $vheight -1}] |
|
set blanks_above [string repeat \n [expr {$kheight -1}]] |
|
set blanks_below [string repeat \n [expr {$vheight -1}]] |
|
|
|
if {$is_match} { |
|
set use_sep $opt_sep |
|
} else { |
|
set use_sep $opt_mismatch_sep |
|
} |
|
|
|
|
|
set sepwidth [textblock::width $use_sep] |
|
set kblock [textblock::pad $ansibase_key$keydisplay$RST$blanks_below -width $maxl] |
|
set sblock [textblock::pad $blanks_above$use_sep$blanks_below -width $sepwidth] |
|
set vblock $blanks_above$opt_ansibase_values$thisval$RST |
|
#only vblock is ragged - we can do a basic join because we don't care about rhs whitespace |
|
if {$last_hidekey} { |
|
append result \n |
|
} |
|
#append result [textblock::join_basic -- $kblock $sblock $vblock] \n |
|
append result [textblock::join_basic_raw $kblock $sblock $vblock] \n |
|
} |
|
set last_hidekey $hidekey |
|
incr kidx |
|
} |
|
} |
|
"sidebyside" { |
|
# TODO - fix |
|
#This is nice for multiline keys and values of reasonable length, will produce unintuitive results when line-wrapping occurs. |
|
#use ansibase_key etc to make the output more comprehensible in that situation. |
|
#This is why it is not the default. (review - terminal width detection and wrapping?) |
|
set maxl [::tcl::mathfunc::max {*}[lmap v $filtered_keys {textblock::width $v}]] |
|
foreach key $filtered_keys { |
|
set kt [lindex $opt_keytemplates 0] |
|
if {$kt eq ""} { |
|
set kt "%k%" |
|
} |
|
set keydisplay $opt_ansibase_keys[string map [list %k% $key] $kt]$RST |
|
#append result [format "%-*s = %s" $maxl $key [dict get $dval $key]] \n |
|
#differing height blocks (ie ragged) so we need a full textblock::join rather than join_basic |
|
append result [textblock::join -- [textblock::pad $keydisplay -width $maxl] $opt_sep "$opt_ansibase_values[dict get $dval $key]$RST"] \n |
|
} |
|
} |
|
} |
|
} |
|
if {$opt_trimright} { |
|
set result [::join [lines_as_list -line trimright $result] \n] |
|
} |
|
if {[string last \n $result] == [string length $result]-1} { |
|
set result [string range $result 0 end-1] |
|
} |
|
#stdout/stderr can exist but not be in 'chan names' (e.g when transforms in place) |
|
set chan [dict get $argd opts -channel] |
|
switch -- $chan { |
|
stderr - stdout { |
|
puts $chan $result |
|
} |
|
none { |
|
return $result |
|
} |
|
default { |
|
#review - check member of chan names? |
|
#just try outputting to the supplied channel for now |
|
puts $chan $result |
|
} |
|
} |
|
} |
|
|
|
proc is_list_all_in_list {small large} { |
|
if {[llength $small] > [llength $large]} {return 0} |
|
foreach x $large { |
|
::set ($x) {} |
|
} |
|
foreach x $small { |
|
if {![info exists ($x)]} { |
|
return 0 |
|
} |
|
} |
|
return 1 |
|
} |
|
#v2 generally seems slower |
|
proc is_list_all_in_list2 {small large} { |
|
set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]] |
|
return [struct::list equal [lsort $small] $small_in_large] |
|
} |
|
if {!$has_struct_list || !$has_struct_set} { |
|
set body { |
|
package require struct::list |
|
package require struct::set |
|
} |
|
append body [info body is_list_all_in_list2] |
|
proc is_list_all_in_list2 {small large} $body |
|
} |
|
|
|
proc is_list_all_ni_list {A B} { |
|
foreach x $B { |
|
::set ($x) {} |
|
} |
|
foreach x $A { |
|
if {[info exists ($x)]} { |
|
return 0 |
|
} |
|
} |
|
return 1 |
|
} |
|
proc is_list_all_ni_list2 {a b} { |
|
set i [struct::set intersect $a $b] |
|
return [expr {[llength $i] == 0}] |
|
} |
|
if {!$has_struct_set} { |
|
set body { |
|
package require struct::list |
|
} |
|
append body [info body is_list_all_ni_list2] |
|
proc is_list_all_ni_list2 {a b} $body |
|
} |
|
proc is_cachedlist_all_ni_list {a b} { |
|
upvar 0 ::punk::lib::caches::funcs_ni_list funcs |
|
if {[info exists funcs($a)]} { |
|
return [[set funcs($a)] $b] |
|
} |
|
set keybytes [encoding convertto utf-8 $a] |
|
set key [binary encode base64 $keybytes] ;#one single-line base64 string |
|
|
|
set expression "" |
|
foreach t $a { |
|
#append expression "({$t} ni \$b) && " |
|
append expression "{$t} ni \$b && " |
|
} |
|
set expression [string trimright $expression " &"] ;#trim trailing spaces and ampersands |
|
proc ::punk::lib::caches::ni_list_$key {b} [string map [list @expression@ $expression] { |
|
return [expr {@expression@}] |
|
}] |
|
|
|
set funcs($a) ::punk::lib::caches::ni_list_$key |
|
return [punk::lib::caches::ni_list_$key $b] |
|
} |
|
proc is_cachedlist_all_ni_list2 {a b} { |
|
upvar 0 ::punk::lib::caches::funcs_ni_list funcs |
|
if {[info exists funcs($a)]} { |
|
return [[set funcs($a)] $b] |
|
} |
|
set keybytes [encoding convertto utf-8 $a] |
|
set key [binary encode base64 $keybytes] ;#one single-line base64 string |
|
|
|
set d [dict create] |
|
foreach x $a { |
|
dict set d $x "" |
|
} |
|
#constructing a switch statement could be an option |
|
# - but would need to avoid using escapes in order to get a jump-table |
|
# - this would need runtime mapping of values - unlikely to be a win |
|
proc ::punk::lib::caches::ni_list_$key {b} [string map [list @d@ $d] { |
|
foreach x $b { |
|
if {[::tcl::dict::exists {@d@} $x]} { |
|
return 0 |
|
} |
|
} |
|
return 1 |
|
}] |
|
|
|
set funcs($a) ::punk::lib::caches::ni_list_$key |
|
return [punk::lib::caches::ni_list_$key $b] |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::ldiff |
|
@cmd -name punk::lib::ldiff\ |
|
-summary\ |
|
"Difference consisting of items with removeitems removed."\ |
|
-help\ |
|
"Somewhat like struct::set difference, but order preserving, and doesn't |
|
treat as a 'set' so preserves any duplicates in items. |
|
|
|
struct::set difference may happen to preserve ordering when items are |
|
integers, but order can't be relied on, especially as struct::set has |
|
2 differening implementations (tcl vs critcl) which return results with |
|
different ordering to each other and different deduping behaviour in |
|
some cases (e.g when 2nd arg is empty)" |
|
@values -min 2 -max 2 |
|
items -type list |
|
removeitems -type list |
|
}] |
|
} |
|
proc ldiff {items removeitems} { |
|
if {[llength $removeitems] == 0} {return $items} |
|
set result {} |
|
foreach item $items { |
|
if {$item ni $removeitems} { |
|
lappend result $item |
|
} |
|
} |
|
return $result |
|
} |
|
#with ledit (also avail in 8.6 using punk::lib::compat::ledit |
|
proc ldiff2 {fromlist removeitems} { |
|
if {[llength $removeitems] == 0} {return $fromlist} |
|
foreach item $removeitems { |
|
set posns [lsearch -all -exact $fromlist $item] |
|
foreach p $posns {ledit fromlist $p $p} |
|
} |
|
return $fromlist |
|
} |
|
proc ldiff3 {fromlist removeitems} { |
|
set doomed [list] |
|
foreach item $removeitems { |
|
lappend doomed {*}[lsearch -all -exact $fromlist $item] |
|
} |
|
lremove $fromlist {*}$doomed |
|
} |
|
|
|
#fix for tcl impl of struct::set::diff which doesn't dedupe |
|
proc struct_set_diff_unique {A B} { |
|
package require struct::set ;#require even if tcl impl - so the dependency isn't missed accidentally due to Loaded state of programmers machine. |
|
if {[struct::set::Loaded] eq "tcl"} { |
|
return [punk::lib::setdiff $A $B] |
|
} else { |
|
#use (presumably critcl) implementation for speed |
|
return [struct::set difference $A $B] |
|
} |
|
} |
|
|
|
|
|
#non-dupe preserving - for consistency in dedupe behaviour we don't shortcircuit empty B |
|
#consistent dedupe and order-maintenance of remaining items in A differentiate this from struct::set difference (dedupes with critcl, doesn't with tcl implementation 2024) |
|
#also struct::set difference with critcl is faster |
|
proc setdiff {A B} { |
|
if {[llength $A] == 0} {return {}} |
|
set d [dict create] |
|
foreach x $A {dict set d $x {}} |
|
foreach x $B {dict unset d $x} |
|
return [dict keys $d] |
|
} |
|
#bulk dict remove is slower than a foreach with dict unset |
|
#proc setdiff2 {fromlist removeitems} { |
|
# #if {[llength $fromlist] == 0} {return {}} |
|
# set d [dict create] |
|
# foreach x $fromlist { |
|
# dict set d $x {} |
|
# } |
|
# return [dict keys [dict remove $d {*}$removeitems]] |
|
#} |
|
#array is about 15% faster than dict - but unordered. (which is ok for sets - but we have struct::set for that) |
|
proc setdiff_unordered {A B} { |
|
if {[llength $A] == 0} {return {}} |
|
array set tmp {} |
|
foreach x $A {::set tmp($x) .} |
|
foreach x $B {catch {unset tmp($x)}} |
|
return [array names tmp] |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lunique_unordered |
|
@cmd -name punk::lib::lunique_unordered\ |
|
-summary\ |
|
"unique values in list"\ |
|
-help\ |
|
"Return unique values in provided list. |
|
This removes duplicates but *may* rearrange the |
|
order of the returned elements compared to the |
|
original list. |
|
|
|
When struct::set is available this will be used |
|
for the implementation, as it can be *slightly* |
|
faster if acceleration is present. When struct::set |
|
is not available it will fallback to lunique and |
|
provide the same functionality with order preserved." |
|
@values -min 1 -max 1 |
|
list -type list |
|
}] |
|
} |
|
#default/fallback implementation |
|
proc lunique_unordered {list} { |
|
lunique $list |
|
} |
|
if {$has_struct_set} { |
|
if {[struct::set equal [struct::set union {a a} {}] {a}]} { |
|
proc lunique_unordered {list} { |
|
struct::set union $list {} |
|
} |
|
} else { |
|
#struct::set union operates on a 'set' - so this probably won't change, and hopefully is |
|
#consistent across unacelerated versions and those implemented in accelerators, |
|
#but if it ever does change - be a little noisy about it. |
|
puts stderr "punk::lib WARNING: struct::set union <list> <emptylist> no longer dedupes!" |
|
#we could also test a sequence of: struct::set add |
|
} |
|
} |
|
|
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lunique |
|
@cmd -name punk::lib::lunique\ |
|
-summary\ |
|
"Order-preserving unique values in list"\ |
|
-help\ |
|
"Return unique values in provided list. |
|
This removes duplicates whilst preserving the |
|
original order of the provided list. |
|
|
|
When struct::set is available with acceleration, |
|
lunique_unordered may be slightly faster." |
|
@values -min 1 -max 1 |
|
list -type list |
|
}] |
|
} |
|
proc lunique {list} { |
|
set new {} |
|
foreach item $list { |
|
if {$item ni $new} { |
|
lappend new $item |
|
} |
|
} |
|
return $new |
|
} |
|
proc lunique2 {list} { |
|
set doomed [list] |
|
#expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?) |
|
for {set i 0} {$i < [llength $list]} {} { |
|
set item [lindex $list $i] |
|
lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end] |
|
while {[incr i] in $doomed} {} |
|
} |
|
lremove $list {*}$doomed |
|
} |
|
#The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env |
|
proc lmapflat_closure {varnames list script} { |
|
set result [list] |
|
set values [list] |
|
foreach v $varnames { |
|
lappend values "\$$v" |
|
} |
|
# -- --- --- |
|
#capture - use uplevel 1 or namespace eval depending on context |
|
set capture [uplevel 1 { |
|
apply { varnames { |
|
set capturevars [tcl::dict::create] |
|
set capturearrs [tcl::dict::create] |
|
foreach fullv $varnames { |
|
set v [tcl::namespace::tail $fullv] |
|
upvar 1 $v var |
|
if {[info exists var]} { |
|
if {(![array exists var])} { |
|
tcl::dict::set capturevars $v $var |
|
} else { |
|
tcl::dict::set capturearrs capturedarray_$v [array get var] |
|
} |
|
} else { |
|
#A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set |
|
} |
|
} |
|
return [tcl::dict::create vars $capturevars arrs $capturearrs] |
|
} } [info vars] |
|
} ] |
|
# -- --- --- |
|
set cvars [tcl::dict::get $capture vars] |
|
set carrs [tcl::dict::get $capture arrs] |
|
set apply_script "" |
|
foreach arrayalias [tcl::dict::keys $carrs] { |
|
set realname [string range $arrayalias [string first _ $arrayalias]+1 end] |
|
|
|
#review |
|
append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] { |
|
array set %realname% [set %arrayalias%][unset %arrayalias%] |
|
}] |
|
} |
|
|
|
append apply_script [string map [list %script% $script] { |
|
#foreach arrayalias [info vars capturedarray_*] { |
|
# set realname [string range $arrayalias [string first _ $arrayalias]+1 end] |
|
# array set $realname [set $arrayalias][unset arrayalias] |
|
#} |
|
#return [eval %script%] |
|
%script% |
|
}] |
|
#puts "--> $apply_script" |
|
foreach $varnames $list { |
|
lappend result {*}[apply {*}{ |
|
} [list {*}{ |
|
} [concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ] {*}{ |
|
} $apply_script {*}{ |
|
} |
|
] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ] |
|
} |
|
return $result |
|
} |
|
#link version - can write to vars in calling context - but keeps varnames themselves isolated |
|
#performance much better than capture version - but still a big price to pay for the isolation |
|
proc lmapflat_link {varnames list script} { |
|
set result [list] |
|
set values [list] |
|
foreach v $varnames { |
|
lappend values "\$$v" |
|
} |
|
set linkvars [uplevel 1 [list info vars]] |
|
set nscaller [uplevel 1 [list namespace current]] |
|
|
|
set apply_script "" |
|
foreach vname $linkvars { |
|
append apply_script [string map [list %vname% $vname]\ |
|
{upvar 2 %vname% %vname%}\ |
|
] \n |
|
} |
|
append apply_script $script \n |
|
|
|
#puts "--> $apply_script" |
|
foreach $varnames $list { |
|
lappend result {*}[apply {*}{ |
|
} [list {*}{ |
|
} $varnames {*}{ |
|
} $apply_script {*}{ |
|
} $nscaller {*}{ |
|
} |
|
] {*}[subst $values]\ |
|
] |
|
} |
|
return $result |
|
} |
|
|
|
#proc lmapflat {varnames list script} { |
|
# concat {*}[uplevel 1 [list lmap $varnames $list $script]] |
|
#} |
|
#lmap can accept multiple var list pairs |
|
proc lmapflat {args} { |
|
concat {*}[uplevel 1 [list lmap {*}$args]] |
|
} |
|
proc lmapflat2 {args} { |
|
concat {*}[uplevel 1 lmap {*}$args] |
|
} |
|
|
|
#proc dict_getdef {dictValue args} { |
|
# if {[llength $args] < 1} { |
|
# error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"} |
|
# } |
|
# set keys [lrange $args -1 end-1] |
|
# if {[tcl::dict::exists $dictValue {*}$keys]} { |
|
# return [tcl::dict::get $dictValue {*}$keys] |
|
# } else { |
|
# return [lindex $args end] |
|
# } |
|
#} |
|
if {[info commands ::tcl::dict::getdef] eq ""} { |
|
proc dict_getdef {dictValue args} { |
|
set keys [lrange $args 0 end-1] |
|
if {[tcl::dict::exists $dictValue {*}$keys]} { |
|
return [tcl::dict::get $dictValue {*}$keys] |
|
} else { |
|
return [lindex $args end] |
|
} |
|
} |
|
} else { |
|
#we pay a minor perf penalty for the wrap |
|
interp alias "" ::punk::lib::dict_getdef "" ::tcl::dict::getdef |
|
} |
|
|
|
|
|
#proc sample1 {p1 n args} { |
|
# #*** !doctools |
|
# #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] |
|
# #[para]Description of sample1 |
|
# #[para] Arguments: |
|
# # [list_begin arguments] |
|
# # [arg_def tring p1] A description of string argument p1. |
|
# # [arg_def integer n] A description of integer argument n. |
|
# # [list_end] |
|
# return "ok" |
|
#} |
|
|
|
#supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features |
|
#safe in that we don't evaluate the expression as a string. |
|
proc offset_expr {expression} { |
|
set expression [tcl::string::map {_ {}} $expression] ;#review - this is for 8.6 to understand underscored ints |
|
if {[tcl::string::is integer -strict $expression]} { |
|
return [expr {$expression}] |
|
} |
|
if {[regexp {([^+-]*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { |
|
if {$op eq "-"} { |
|
return [expr {$a - $b}] |
|
} else { |
|
return [expr {$a + $b}] |
|
} |
|
} else { |
|
error "bad expression '$expression': must be integer?\[+-\]integer?" |
|
} |
|
} |
|
|
|
punk::args::define { |
|
@id -id ::punk::lib::is_indexset |
|
@cmd -name punk::lib::is_indexset\ |
|
-summary\ |
|
"Validate string is a comma-delimited 'indexset'."\ |
|
-help\ |
|
"Validate that a string is an 'indexset' |
|
|
|
An indexset consists of a comma delimited list of indexes or index-ranges. |
|
No particular base is assumed for the purposes of validating an indexset here. |
|
While in Tcl, lists are zero-based - an indexset can be applied to lists of any base. |
|
e.g -10..-1 is an indexset that just won't resolve any results for a list with a base >= 0. |
|
To validate if an indexset is strictly within range, both the length of the data and the base would |
|
need to be considered. |
|
|
|
The normal 'range' specifier is .. but can be of the form .x. where x is the step value. |
|
The range specifier can appear at the beginning, middle or end, or even alone to indicate the entire |
|
range of valid values. |
|
e.g the following are all valid ranges |
|
1.. |
|
(index 1 to 'max') |
|
..10 |
|
(index 'base' to 10) |
|
2..11 |
|
(index 2 to 11) |
|
.. |
|
(all indices) |
|
.3. |
|
(1st index and every 3rd index thereafter) |
|
|
|
Common whitespace elements space,tab,newlines are ignored. |
|
Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, |
|
e.g end-2 or 2+2. |
|
|
|
see indexset_resolve" |
|
@values -min 1 -max 1 |
|
indexset -type string |
|
} |
|
proc is_indexset {indexset} { |
|
#collapse internal whitespace (for basic whitespace set we allow) |
|
set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] |
|
if {![regexp {^[\-\+_end,\.0-9]*$} $indexset]} { |
|
return 0 |
|
} |
|
set ranges [split $indexset ,] |
|
foreach r $ranges { |
|
set validateindices [list] |
|
set rposn [string first .. $r] |
|
if {$rposn >= 0} { |
|
set sepsize 2 |
|
set step 1 |
|
#review - whitespace between ints? |
|
lappend validateindices {*}[string range $r 0 $rposn-1] {*}[string range $r $rposn+2 end] |
|
} elseif {[string first . $r] >= 0} { |
|
set stripped [string map {. ""} $r] |
|
if {[tcl::string::length $stripped] != [tcl::string::length $r]-2} { |
|
#if one dot exists - must be exactly 2 dots in total - possibly separated by positive/negative int (not zero) |
|
return 0 |
|
} |
|
#assert - we have exactly 2 dots separated by something. |
|
#check for .n. 'stepped' range |
|
set fdot [string first . $r] |
|
set ldot [string last . $r] |
|
set step [string range $r $fdot+1 $ldot-1] |
|
#todo - allow basic mathops for step: 2+1 2+-1 etc same as tcl lindex, lseq |
|
#1.0.10 should be valid but behave similarly to lseq 1 0 by 0 ie returns nothing |
|
|
|
#1.end.10 or similar shouldn't be valid - but we need to allow other basic index expressions. |
|
if {[string match *end* $step] || [catch {lindex {} $step}]} { |
|
return 0 |
|
} |
|
#if {![string is integer -strict $step] || $step == 0} { |
|
# return 0 |
|
#} |
|
lappend validateindices {*}[string range $r 0 $fdot-1] {*}[string range $r $ldot+1 end] |
|
} else { |
|
#'range' is just an index |
|
set validateindices [list $r] |
|
} |
|
|
|
foreach v $validateindices { |
|
if {$v eq "" || $v eq "end"} {continue} |
|
if {[string is integer -strict $v]} {continue} |
|
if {[catch {lindex {} $v}]} { |
|
return 0 |
|
} |
|
} |
|
} |
|
return 1 |
|
} |
|
#review - compare to IMAP4 methods of specifying ranges? |
|
#TODO add tests to test::punk::lib indexset_resolve is a little tricky |
|
punk::args::define { |
|
@id -id ::punk::lib::indexset_resolve |
|
@cmd -name punk::lib::indexset_resolve\ |
|
-summary\ |
|
"Resolve an indexset to a list of integers based on supplied list or string length."\ |
|
-help\ |
|
"Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value. |
|
e.g in a basic case: for a list of 10 items, 'indexset_resolve 10 end' will return the index 9 |
|
|
|
An indexset consists of a comma delimited list of indexes or index-ranges. |
|
Ranges must be specified with a range-indicator such as .. as the separator, with an empty value at |
|
either side of the separator representing beginning and end of the index range respectively. |
|
The range-separator can be of the form .x. where x is an integer or basic expression |
|
(single +/- operation) that indicates the step value to use. This is equivalent to the 'by' value |
|
in the tcl9 lseq command. |
|
|
|
When the start index is lower than the end, the step value defaults to 1. |
|
ie indexset_resolve 0..7 is equivalent to indexset_resolve 0.1.7 |
|
When the start index is higher than the end, the step value defaults to -1. |
|
ie indexset_resolve 7..0 is equivalent to indexset_resolve 0.-1.7 |
|
|
|
If start and end are ommitted, increasing order is assumed if the step isn't specified. |
|
eg |
|
.. represents the range from the base to the end |
|
.-1. would represent end to base with step -1 |
|
|
|
If start is omitted and only the end is supplied: |
|
The default step is 1 indicating ascension and the missing start is equivalent to the base. |
|
indexset_resolve 5 ..2 |
|
-> 0 1 2 |
|
The default start is 'end' if the step is negative |
|
indexset_resolve 5 .-1.2 |
|
-> 4 3 2 |
|
If end is omitted and only the start is supplied: |
|
The default step is 1 indicating ascension and the missing end is equivalent to 'end' |
|
indexset_resolve 5 2.. |
|
-> 2 3 4 |
|
The default end is the base if the step is negative |
|
indexset_resolve 5 2.-1. |
|
-> 2 1 0 |
|
|
|
Like the tcl9 lseq command - a step (by) value of zero produces no results. |
|
|
|
The indexes are 0-based by default, but the base can be specified. |
|
indexset_resolve 7 .. |
|
-> 0 1 2 3 4 5 6 |
|
indexset_resolve -base -3 7 .. |
|
-> -3 -2 -1 0 1 2 3 |
|
|
|
Whitespace is ignored. |
|
Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands, |
|
e.g end-2 or 2+2. |
|
|
|
end means the last item. |
|
end-1 means the second last item. |
|
0.. is the same as 0..end |
|
|
|
indexset examples: |
|
|
|
These assume the default 0-based indices (-base 0) |
|
|
|
1,3.. |
|
output the index 1 (2nd item) followed by all from index 3 to the end. |
|
indexset_resolve 4 1,3.. |
|
-> 1 3 |
|
indexset_resolve 10 1,3.. |
|
-> 1 3 4 5 6 7 8 9 |
|
0..2,end |
|
output the first 3 indices, and the last index. |
|
end-1..0 |
|
output the indexes in reverse order from 2nd last item to first item." |
|
@leaders -min 0 -max 0 |
|
@opts |
|
-base -type integer -prefix 1 -default 0 -help\ |
|
"This is the starting index. It can be positive, negative or zero. |
|
This affects the start and end calculations, limiting what indices will be |
|
returned. |
|
e.g with base 1 'end' will give a different value from base 0 |
|
|
|
for 10 items 'end' is 10 when 1-based |
|
for 10 items 'end' is 9 when 0-based |
|
|
|
For base 1, index 0 is considered to be below the range. |
|
ie |
|
indexset_resolve -base 1 10 0..3 |
|
-> 1 2 3 |
|
indexset_resolve -base 0 10 0..3 |
|
-> 0 1 2 3 |
|
|
|
It does not *convert* indexes within the range. |
|
|
|
indexset_resolve -base 1 10 5 |
|
-> 5 |
|
indexset_resolve -base 0 10 5 |
|
-> 5 |
|
|
|
ie if you ask for a 1-based resolution of an indexset the integers that are within |
|
the range will come out the same, so the result needs to be treated as a 1-based |
|
set of indices when performing further operations. |
|
" |
|
-return -type string -default indices -choices {indices pairs} -choicecolumns 1 -choicelabels { |
|
indices |
|
" return a list of all indices in the order specified by the indexset, |
|
with duplicates if specified by the indexset. |
|
So for example |
|
indexset_resolve 6 3..0,2,4,end |
|
would return 3 2 1 0 2 4 5" |
|
pairs |
|
" return a list of index pairs representing the start and end of each range, |
|
which may be increasing or decreasing, or just a single index |
|
(where start and end are the same). |
|
So for example |
|
indexset_resolve -return pairs 6 3..0,2,4,end |
|
would return {3 0} {2 2} {4 5} |
|
indexset_resolve -return pairs 7 3..0,2,4,end |
|
would return {3 0} {2 2} {4 4} {6 6}" |
|
} |
|
@values -min 2 -max 3 |
|
numitems -type integer |
|
indexset -type indexset -help "comma delimited specification for indices to return" |
|
} |
|
|
|
#limit punk::args parsing to unhappy paths where possible |
|
proc indexset_resolve {args} { |
|
# -------------------------------------------------- |
|
# Manual parsing of happy path args instead of using punk::args::parse $args withid ::punk::lib::indexset_resolve |
|
# This is because indexset_resolve is *somewhat* low level, has only a few args, and we don't want any overhead. |
|
# for the unhappy path - the punk::args::parse is fine to generate the usage/error information. |
|
# -------------------------------------------------- |
|
if {[llength $args] < 2} { |
|
#too few args - use parser to generate error message |
|
punk::args::resolve $args withid ::punk::lib::indexset_resolve |
|
} |
|
set numitems [lindex $args end-1] |
|
set indexset [lindex $args end] |
|
if {![string is integer -strict $numitems] || ![is_indexset $indexset]} { |
|
#use parser on unhappy path only |
|
set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
} |
|
#assert we have 2 or more args |
|
set optlist [lrange $args 0 end-2] |
|
if {[llength $optlist] % 2 != 0} { |
|
#options should come in pairs |
|
set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
} |
|
set returntype "indices" ;#default |
|
set base 0 ;#default |
|
dict for {opt val} $optlist { |
|
set fulloptname [tcl::prefix::match -error "" {-base -return} $opt] |
|
switch -exact -- $fulloptname { |
|
-return { |
|
set fullval [tcl::prefix::match -error "" {indices pairs} $val] |
|
if {$fullval ni {indices pairs}} { |
|
set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
} |
|
set returntype $fullval |
|
} |
|
-base { |
|
if {![string is integer -strict $val]} { |
|
set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
} |
|
set base $val |
|
} |
|
default { |
|
set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
} |
|
} |
|
} |
|
|
|
#set base 0 ;#default |
|
#if {[llength $args] > 2} { |
|
# #if more than just numitems and indexset - we expect only -base <int> ie 4 args in total |
|
# if {[llength $args] != 4} { |
|
# set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
# uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
# } |
|
# set optname [lindex $args 0] |
|
# set optval [lindex $args 1] |
|
# set fulloptname [tcl::prefix::match -error "" -base $optname] |
|
# if {$fulloptname ne "-base" || ![string is integer -strict $optval]} { |
|
# set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve] |
|
# uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg] |
|
# } |
|
# set base $optval |
|
#} |
|
# -------------------------------------------------- |
|
|
|
|
|
set indexset [string map [list " " "" \t "" \r\n "" \n ""] $indexset] ;#collapse basic whitespace |
|
set index_list [list] ;#list of actual indexes within the range |
|
set iparts [split $indexset ,] |
|
set based_max [expr {$numitems -1 + $base}] |
|
|
|
#we already did is_indexset check above, so we can make assumptions about well-formedness of each part |
|
foreach ipart $iparts { |
|
set ipart [string trim $ipart] |
|
#we need to cater for n..m as well as n.s.m where s is 'step' |
|
set rposn [string first . $ipart] |
|
if {$rposn>=0} { |
|
#if we found one dot - there must be exactly 2 dots in the ipart, separated by nothing, or a basic integer-expression |
|
set rposn2 [string last . $ipart] |
|
if {$rposn2 == $rposn+1} { |
|
#.. |
|
set step "default" ;#could be 1 or -1 |
|
} else { |
|
set step [tcl::string::range $ipart $rposn+1 $rposn2-1] |
|
} |
|
lassign [punk::lib::string_splitbefore_indices $ipart $rposn $rposn2+1] rawa _ rawb |
|
|
|
set rawa [string trim $rawa] |
|
set rawb [string trim $rawb] |
|
if {$rawa eq "" && $rawb eq ""} { |
|
if {$step eq "default"} { |
|
set step 1 ;#default ascending when no start and no end |
|
} |
|
if {$step < 0} { |
|
set rawa end |
|
set rawb $base |
|
} else { |
|
set rawa $base |
|
set rawb end |
|
} |
|
#if neither start nor end specified - we won't get out of range results from lindex_resolve |
|
set a [punk::lib::lindex_resolve $numitems $rawa $base] |
|
set b [punk::lib::lindex_resolve $numitems $rawb $base] |
|
} else { |
|
if {$rawa eq ""} { |
|
if {$step eq "default"} { |
|
#when start not specified, but end is - default direction always ascending |
|
#(even if end is base or below range) |
|
set step 1 |
|
} |
|
if {$step < 0} { |
|
set rawa end |
|
} else { |
|
set rawa $base |
|
} |
|
} |
|
set a [punk::lib::lindex_resolve $numitems $rawa $base] |
|
if {$a == -Inf} { |
|
#undershot - leave negative |
|
} elseif {$a == Inf} { |
|
#overshot |
|
set a [expr {$based_max + 1}] ;#put it outside the range on the upper side |
|
} |
|
#review - a may be -Inf |
|
|
|
if {$rawb eq ""} { |
|
if {$step eq "default"} { |
|
set step 1 |
|
} |
|
if {$step < 0} { |
|
if {$a < $base} { |
|
#make sure both <undershot> |
|
#mathfunc::isinf is tcl9+ |
|
if {[catch { |
|
if {[::tcl::mathfunc::isinf $a]} { |
|
set a [expr {$base -1}] |
|
} |
|
}]} { |
|
if {[string match -nocase *inf* $a]} { |
|
set a [expr {$base -1}] |
|
} |
|
} |
|
set rawb $a |
|
} else { |
|
set rawb $base |
|
} |
|
} else { |
|
if {$a > $based_max} { |
|
set rawb $a ;#make sure <overshot>.. doesn't return last item - should return nothing |
|
} else { |
|
set rawb end |
|
} |
|
} |
|
} |
|
set b [punk::lib::lindex_resolve $numitems $rawb $base] |
|
if {$b == -Inf} { |
|
#undershot - leave negative |
|
} elseif {$b == Inf} { |
|
#set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side |
|
set b [expr {$based_max + 1}] ;#overshot - put it outside the range on the upper side |
|
} |
|
} |
|
|
|
#JJJ |
|
|
|
#e.g make sure <overshot>.. doesn't return last item - should return nothing as both are above the range. |
|
if {$a >= $base && $a <= $based_max && $b >=$base && $b <= $based_max} { |
|
#assert a & b are integers within the range |
|
if {$step eq "default"} { |
|
#unspecified step - base direction on order of a & b |
|
if {$a <= $b} { |
|
set step 1 |
|
} else { |
|
set step -1 |
|
} |
|
} |
|
lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. |
|
} else { |
|
if {$a >= $base && $a <= $based_max} { |
|
#only a is in the range |
|
if {$b < $base} { |
|
set b $base |
|
} else { |
|
set b $based_max |
|
} |
|
if {$step eq "default"} { |
|
if {$a <= $b} { |
|
set step 1 |
|
} else { |
|
set step -1 |
|
} |
|
} |
|
lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. |
|
} elseif {$b >=$base && $b <= $based_max} { |
|
#only b is in the range |
|
if {$step eq "default"} { |
|
if {$a <= $b} { |
|
set step 1 |
|
} else { |
|
set step -1 |
|
} |
|
} |
|
if {$step < 0} { |
|
if {$a < $base} { |
|
#negative step from below - doesn't matter if b is in range - recast both to an int below $base |
|
#(a may be -Inf) |
|
set a [expr {$base -1}] |
|
set b $a |
|
set step 0 ;#we should return nothing |
|
} |
|
} else { |
|
if {$a < $base} { |
|
set a $base |
|
} else { |
|
set a $based_max |
|
} |
|
} |
|
lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. |
|
} else { |
|
#both outside the range |
|
if {$a < $base && $b > $base} { |
|
#spans the range in forward order |
|
set a $base |
|
set b $based_max |
|
if {$step eq "default"} { |
|
set step 1 |
|
} |
|
lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. |
|
} elseif {$a > $base && $b < $base} { |
|
#spans the range in reverse order |
|
set a $based_max |
|
set b $base |
|
if {$step eq "default"} { |
|
set step -1 |
|
} |
|
lappend index_list {*}[punk::lib::range $a $b $step] ;#required for tcl8.6, on tcl9 this will call lseq internally. |
|
} |
|
#both outside of range on same side |
|
} |
|
} |
|
} else { |
|
set idx [punk::lib::lindex_resolve_basic $numitems $ipart $base] |
|
#returns only -Inf for out of range at either end |
|
if {$idx >= $base} { |
|
#index within the range |
|
lappend index_list $idx |
|
} |
|
} |
|
} |
|
if {$returntype eq "pairs"} { |
|
return [indices_to_pairs $index_list] |
|
} |
|
return $index_list |
|
} |
|
proc indices_to_pairs {indices} { |
|
#convert a list of indices to a list of pairs representing the start and end of contiguous runs of indices which can be increasing or decreasing. |
|
#if the direction of the run changes - we end the previous run and start a new one |
|
if {[llength $indices] == 0} { |
|
return [list] |
|
} |
|
set pairs [list] |
|
set start [lindex $indices 0] |
|
set prev $start |
|
set direction 0 ;#0 = unknown, 1 = increasing, -1 = decreasing |
|
for {set i 1} {$i < [llength $indices]} {incr i} { |
|
set idx [lindex $indices $i] |
|
if {![string is integer -strict $idx]} { |
|
error "non-integer index '$idx' in indices list" |
|
} |
|
if {$idx == $prev + 1} { |
|
#increase since prev |
|
if {$direction == 0} { |
|
set direction 1 |
|
} elseif {$direction == -1} { |
|
#direction changed - end previous run and start new one |
|
lappend pairs [list $start $prev] |
|
set start $idx |
|
set direction 0 |
|
} else { |
|
#still increasing |
|
} |
|
set prev $idx |
|
} elseif {$idx == $prev - 1} { |
|
#decrease since prev |
|
if {$direction == 0} { |
|
set direction -1 |
|
} elseif {$direction == 1} { |
|
#direction changed - end previous run and start new one |
|
lappend pairs [list $start $prev] |
|
set start $idx |
|
set direction 0 |
|
} |
|
set prev $idx |
|
} else { |
|
#run ended - add pair to list |
|
lappend pairs [list $start $prev] |
|
set start $idx |
|
set prev $idx |
|
set direction 0 |
|
} |
|
} |
|
# add final run |
|
lappend pairs [list $start $prev] |
|
return $pairs |
|
} |
|
|
|
#proc indices_to_pairs {indices} { |
|
# #convert a list of indices to a list of pairs representing the start and end of contiguous runs of indices |
|
# set pairs [list] |
|
# set start [lindex $indices 0] |
|
# set prev $start |
|
# for {set i 1} {$i < [llength $indices]} {} { |
|
# set idx [lindex $indices $i] |
|
# if {$idx == $prev + 1} { |
|
# #still in a run |
|
# set prev $idx |
|
# } else { |
|
# #run ended - add pair to list |
|
# lappend pairs [list $start $prev] |
|
# set start $idx |
|
# set prev $idx |
|
# } |
|
# } |
|
# # add final run |
|
# lappend pairs [list $start $prev] |
|
# return $pairs |
|
#} |
|
# showdict uses lindex_resolve results -Inf & Inf to determine whether index is out of bounds on lower vs upper side |
|
#This doesn't need the list itself - just the length suffices. |
|
punk::args::define { |
|
@id -id ::punk::lib::lindex_resolve |
|
@cmd -name punk::lib::lindex_resolve\ |
|
-summary\ |
|
"Resolve an indexexpression to an integer based on supplied list or string length."\ |
|
-help\ |
|
"Resolve an index which may be of the forms accepted by Tcl list or string commands such as end-2 or 2+2 |
|
to the actual integer index for the supplied list/string length, or to a negative value below -1 indicating |
|
whether the index was below or above the range of possible indices for the length supplied. |
|
|
|
Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. |
|
This means the proc may be called with something like $x+2 end-$y etc |
|
Sometimes the actual integer index is desired. |
|
|
|
We want to resolve the index used, without passing arbitrary expressions into the 'expr' function |
|
- which could have security risks. |
|
lindex_resolve will parse the index expression and return: |
|
a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) |
|
b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) |
|
The similar function lindex_resolve_basic uses -Inf to denote |
|
out of range at either end of the list/string. |
|
Otherwise it will return an integer corresponding to the position in the data. |
|
This is in stark contrast to Tcl list/string function indices which will return empty strings for out of |
|
bounds indices, or in the case of lrange, return results anyway. |
|
Like Tcl list commands - it will produce an error if the form of the index is not acceptable. |
|
For empty lists/string (datalength 0), end and end+x indices are considered to be out of bounds on the upper side |
|
- thus returning -2 |
|
|
|
Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. |
|
We will get something like 10+1 - which can be resolved safely with expr |
|
" |
|
@values -min 2 -max 2 |
|
datalength -type integer -range {0 ""} |
|
index -type indexexpression |
|
} |
|
proc lindex_resolve {len index {base 0}} { |
|
#*** !doctools |
|
#[call [fun lindex_resolve] [arg len] [arg index]] |
|
#[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list/string length |
|
#[para]Users may define procs which accept a list/string index and wish to accept the forms understood by Tcl. |
|
#[para]This means the proc may be called with something like $x+2 end-$y etc |
|
#[para]Sometimes the actual integer index is desired. |
|
#[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks. |
|
#[para]lindex_resolve will parse the index expression and return: |
|
#[para] a) -Inf if the supplied index expression is below the lower bound for the supplied list. (< 0) |
|
#[para] b) Inf if the supplied index expression is above the upper bound for the supplied list. (> end) |
|
#[para] We don't return -1 - as the similar function lindex_resolve_basic uses this to denote out of range at either end of the list/string |
|
#[para]Otherwise it will return an integer corresponding to the position in the list. |
|
#[para]This is in stark contrast to Tcl list function indices which will return empty strings for out of bounds indices, or in the case of lrange, return results anyway. |
|
#[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable |
|
#[para]For empty lists, end and end+x indices are considered to be out of bounds on the upper side - thus returning -2 |
|
|
|
#Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which can be resolved safely with expr |
|
|
|
|
|
#REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? |
|
#A basic string map means we aren't properly validating |
|
#todo - be stricter about malformations such as 1000_ |
|
if {![string is integer -strict 1_0]} { |
|
#basic forward compatibility with integers such as 1_000 for 8.6.x |
|
set index [tcl::string::map {_ {}} $index] |
|
set len [tcl::string::map {_ {}} $len] |
|
set base [tcl::string::map {_ {}} $base] |
|
} |
|
|
|
if {![string is integer -strict $len] || $len < 0} { |
|
error "lindex_resolve len must be a positive integer." |
|
} |
|
set based_max [expr {$len -1 + $base}] |
|
|
|
if {[string is integer -strict $index]} { |
|
#review - base? |
|
#can match +i -i |
|
if {$index < $base} { |
|
return -Inf |
|
} elseif {$index > $based_max} { |
|
return Inf |
|
} else { |
|
#integer may still have + sign - normalize with expr |
|
return [expr {$index}] |
|
} |
|
} else { |
|
if {[string match end* $index]} { |
|
if {$index ne "end"} { |
|
set op [string index $index 3] |
|
set offset [string range $index 4 end] |
|
#note - offset could have leading + or - |
|
# 'string is integer -strict +1' ==> true |
|
#e.g end+-1 is valid (end++-1 is not) |
|
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} |
|
if {$offset == 0} { |
|
#(offset +0, -0 or 0 or 000 0_0 etc) |
|
#op either + or - is irrelevant |
|
#set index [expr {$len-1}] ;#+ base ? |
|
set index $based_max |
|
if {$index < $base} { |
|
#return -2 ;#special case - equivalent to 'end', with empty list - treat like a positive number out of bounds |
|
return Inf |
|
} else { |
|
return $index |
|
} |
|
} |
|
|
|
#set index [if {$op eq "+"} {expr {($len-1) + $offset}} else {expr {($len-1) - $offset}}] |
|
set index [if {$op eq "+"} {expr {$based_max + $offset}} else {expr {$based_max - $offset}}] |
|
if {$index < $base} { |
|
return -Inf |
|
} elseif {$index > $based_max} { |
|
return Inf |
|
} else { |
|
return $index |
|
} |
|
} else { |
|
#index is 'end' |
|
if {$len == 0} { |
|
#special case - 'end' with empty list - treat end like a positive number out of bounds |
|
return Inf |
|
} |
|
#return [expr {$len - 1 + $base}] |
|
return $based_max |
|
} |
|
} else { |
|
#plain +-<int> already handled above. (but not +-<int>+-<int> etc) |
|
#we are trying to avoid evaluating unbraced expr of potentially insecure origin |
|
#regexp must split a++b to a + +b (not a+ + b) ie first +/- is the op |
|
if {[regexp {([+-]{0,1}[^+-]*)([+-])(.*)} $index _match a op b]} { |
|
if {[string is integer -strict $a] && [string is integer -strict $b]} { |
|
if {$op eq "-"} { |
|
set index [expr {$a - $b}] |
|
} else { |
|
set index [expr {$a + $b}] |
|
} |
|
} else { |
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
} |
|
} else { |
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
} |
|
if {$index < $base} { |
|
return -Inf |
|
} elseif {$index > $based_max} { |
|
return Inf |
|
} |
|
return $index |
|
} |
|
} |
|
} |
|
proc lindex_resolve_basic {len index {base 0}} { |
|
#*** !doctools |
|
#[call [fun lindex_resolve_basic] [arg len] [arg index]] |
|
#[para] Accepts index of the forms accepted by Tcl's list commands. (e.g compound indices such as 3+1 end-2) |
|
#[para] returns -Inf for out of range at either end, or a valid integer index |
|
#[para] Unlike lindex_resolve; lindex_resolve_basic can't determine if an out of range index was out of range at the lower or upper bound |
|
#[para] This is only likely to be faster than average over lindex_resolve for small lists and for Tcl which has the builtin lseq command |
|
#[para] The performance advantage is more likely to be present when using compound indexes such as $x+1 or end-1 |
|
#[para] For pure integer indices the performance should be equivalent |
|
|
|
#REVIEW - we need compat for 1_000 etc to handle things like toml even in 8.6? |
|
#A basic string map means we aren't properly validating |
|
#todo - be stricter about malformations such as 1000_ |
|
if {![string is integer -strict 1_0]} { |
|
#basic forward compatibility with integers such as 1_000 for 8.6.x |
|
set index [tcl::string::map {_ {}} $index] |
|
set len [tcl::string::map {_ {}} $len] |
|
set base [tcl::string::map {_ {}} $base] |
|
} |
|
|
|
if {![string is integer -strict $len] || $len < 0} { |
|
error "lindex_resolve_basic len must be an integer greater than or equal to zero" |
|
} |
|
if {![string is integer -strict $base]} { |
|
#base can be negative |
|
error "lindex_resolve_basic base must be an integer" |
|
} |
|
set based_max [expr {$len -1 + $base}] |
|
|
|
set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000 |
|
if {[string is integer -strict $index]} { |
|
#can match +i -i |
|
#avoid even the lseq overhead when the index is simple |
|
if {$index < $base || ($index > $based_max)} { |
|
#even though in this case we could return -Inf or Inf like lindex_resolve; |
|
#for consistency we don't return Inf for upper-boudn violation, |
|
#as which bound is violated is not always directly determinable for compound index expressions (such as end-x) using the lseq+lindex mechanism. |
|
return -Inf |
|
} else { |
|
#!NOTE! index within range is unchanged - no matter the base |
|
#integer may still have + sign - normalize with expr |
|
return [expr {$index}] |
|
} |
|
} |
|
if {$len > 0} { |
|
#For large len - this is a wasteful allocation if no true lseq available in Tcl version. |
|
#lseq produces an 'arithseries' object which we can index into without allocating an entire list (REVIEW) |
|
set testlist [punk::lib::range $base $based_max] ;# uses lseq if available, has fallback of creating a potentially large list of numbers. |
|
} else { |
|
set testlist [list] |
|
#we want to call 'lindex' even in this case - to get the appropriate error message |
|
} |
|
set idx [lindex $testlist $index] |
|
if {$idx eq ""} { |
|
#we have no way to determine if out of bounds is at lower vs upper end |
|
return -Inf |
|
} else { |
|
return $idx |
|
} |
|
} |
|
proc lindex_get {list index} { |
|
set resultlist [lrange $list $index $index] |
|
if {![llength $resultlist]} { |
|
return -1 |
|
} else { |
|
#we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself. |
|
#we can return the value - but only in a way that won't collide with our -1 out-of-range indicator |
|
return [tcl::dict::create value [lindex $resultlist 0]] |
|
} |
|
} |
|
|
|
proc string_splitbefore {str index} { |
|
if {![string is integer -strict $index]} { |
|
set index [punk::lib::lindex_resolve [string length $str] $index] |
|
switch -- $index { |
|
-Inf { |
|
return [list "" $str] |
|
} |
|
Inf { |
|
return [list $str ""] |
|
} |
|
} |
|
} |
|
return [list [string range $str 0 $index-1] [string range $str $index end]] |
|
#scan %s stops at whitespace - not useful here. |
|
#scan $s %${p}s%s |
|
} |
|
proc string_splitbefore_indices {str args} { |
|
set parts [list $str] |
|
set sizes [list [string length $str]] |
|
set s 0 |
|
foreach index $args { |
|
if {![string is integer -strict $index]} { |
|
set index [punk::lib::lindex_resolve [string length $str] $index] |
|
switch -- $index { |
|
-Inf { |
|
if {[lindex $sizes 0] != 0} { |
|
ledit parts 0 0 {} [lindex $parts 0] |
|
ledit sizes 0 0 0 [lindex $sizes 0] |
|
} |
|
continue |
|
} |
|
Inf { |
|
if {[lindex $sizes end] != 0} { |
|
ledit parts end end [lindex $parts end] {} |
|
ledit sizes end end [lindex $sizes end] 0 |
|
} |
|
continue |
|
} |
|
} |
|
} |
|
if {$index <= 0} { |
|
if {[lindex $sizes 0] != 0} { |
|
ledit parts 0 0 {} [lindex $parts 0] |
|
ledit sizes 0 0 0 [lindex $sizes 0] |
|
} |
|
continue |
|
} |
|
if {$index >= [string length $str]} { |
|
if {[lindex $sizes end] != 0} { |
|
ledit parts end end [lindex $parts end] {} |
|
ledit sizes end end [lindex $sizes end] 0 |
|
} |
|
continue |
|
} |
|
set i -1 |
|
set a 0 |
|
foreach sz $sizes { |
|
incr i |
|
if {$a + $sz > $index} { |
|
set p [lindex $parts $i] |
|
#puts "a:$a index:$index" |
|
if {$a == $index} { |
|
break |
|
} |
|
ledit parts $i $i [string range $p 0 [expr {$index -$a -1}]] [string range $p $index-$a end] |
|
ledit sizes $i $i [expr {$index - $a}] [expr {($a + $sz)-$index}] |
|
break |
|
} |
|
incr a $sz |
|
} |
|
#puts "->parts:$parts" |
|
#puts "->sizes:$sizes" |
|
} |
|
return $parts |
|
} |
|
|
|
proc K {x y} {return $x} |
|
#*** !doctools |
|
#[call [fun K] [arg x] [arg y]] |
|
#[para]The K-combinator function - returns the first argument, x and discards y |
|
#[para]see [uri https://wiki.tcl-lang.org/page/K] |
|
#[para]It is used in cases where command-substitution at the calling-point performs some desired effect. |
|
|
|
|
|
proc is_utf8_multibyteprefix {bytes} { |
|
#*** !doctools |
|
#[call [fun is_utf8_multibyteprefix] [arg str]] |
|
#[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character |
|
#[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint |
|
#[para] Will return false for an already complete utf-8 codepoint |
|
#[para] It is assumed the incomplete sequence is at the beginning of the bytes argument |
|
#[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes |
|
#[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb] |
|
regexp {(?x) |
|
^ |
|
(?: |
|
[\xC0-\xDF] | #possible prefix for two-byte codepoint |
|
[\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint |
|
[\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for |
|
) |
|
$ |
|
} $bytes |
|
} |
|
|
|
proc is_utf8_first {str} { |
|
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) |
|
^ |
|
(?: |
|
[\x00-\x7F] | # Single-byte chars (ASCII range) |
|
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) |
|
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) |
|
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) |
|
) |
|
} $str |
|
} |
|
proc is_utf8_single {1234bytes} { |
|
#*** !doctools |
|
#[call [fun is_utf8_single] [arg 1234bytes]] |
|
#[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint) |
|
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) |
|
^ |
|
(?: |
|
[\x00-\x7F] | # Single-byte chars (ASCII range) |
|
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) |
|
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) |
|
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) |
|
) |
|
$ |
|
} $1234bytes |
|
} |
|
proc get_utf8_leading {rawbytes} { |
|
#*** !doctools |
|
#[call [fun get_utf8_leading] [arg rawbytes]] |
|
#[para] return the leading portion of rawbytes that is a valid utf8 sequence. |
|
#[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint |
|
#[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character. |
|
#[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all. |
|
#[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics |
|
#[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned |
|
#[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes |
|
if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) |
|
\A ( |
|
[\x00-\x7F] | # Single-byte chars (ASCII range) |
|
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) |
|
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) |
|
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) |
|
) + |
|
} $rawbytes completeChars]} { |
|
return $completeChars |
|
} |
|
return "" |
|
} |
|
proc hex2dec {args} { |
|
#*** !doctools |
|
#[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]] |
|
#[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values |
|
#[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535 |
|
#[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon. |
|
#[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15 |
|
#[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0 |
|
|
|
set list_largeHex [lindex $args end] |
|
set argopts [lrange $args 0 end-1] |
|
if {[llength $argopts]%2 !=0} { |
|
error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'" |
|
} |
|
set opts [tcl::dict::create {*}{ |
|
-validate 1 |
|
-empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced" |
|
}] |
|
set known_opts [tcl::dict::keys $opts] |
|
foreach {k v} $argopts { |
|
tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v |
|
} |
|
# -- --- --- --- |
|
set opt_validate [tcl::dict::get $opts -validate] |
|
set opt_empty [tcl::dict::get $opts -empty_as_hex] |
|
# -- --- --- --- |
|
|
|
#set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map {_ ""} [string trim $h]}] |
|
set list_largeHex [lmap h $list_largeHex[set list_largeHex {}] {string map {_ ""} [string trim $h]}] |
|
if {$opt_validate} { |
|
#Note appended F so that we accept list of empty strings as per the documentation |
|
if {![string is xdigit -strict [join $list_largeHex ""]F ]} { |
|
error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex" |
|
} |
|
} |
|
if {![string is xdigit -strict [string map {_ ""} $opt_empty]]} { |
|
#mapping empty string to a value destroys any advantage of -scanonly |
|
#todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long |
|
#set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}] |
|
if {[lsearch $list_largeHex ""] >=0} { |
|
error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty" |
|
} |
|
} else { |
|
set opt_empty [string trim [string map {_ ""} $opt_empty]] |
|
if {[set first_empty [lsearch $list_largeHex ""]] >= 0} { |
|
#set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}] |
|
set nonempty_head [lrange $list_largeHex 0 $first_empty-1] |
|
set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] |
|
} |
|
} |
|
return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]] |
|
} |
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::dec2hex |
|
@cmd -name punk::lib::dec2hex\ |
|
-summary\ |
|
"Convert a list of decimal integers to a list of hex values."\ |
|
-help\ |
|
"" |
|
@opts |
|
-width -type integer|literal(auto) -default auto -range {1 ""} -help\ |
|
"Minimum width of each hex value in the output list, padded with leading zeroes if necessary. |
|
Default of 'auto' means the width will be just enough to fit the largest hex value in the list. |
|
The value 1 means no padding - hex values will be as wide as necessary with no leading zeroes." |
|
-case -type string -default upper -choices {upper lower} -help\ |
|
"Determines the case of the hex letters in the output. Default is upper" |
|
-empty_as_decimal -type string -default "INVALID. set -empty_as_decimal to a number if empty values should be replaced" -help\ |
|
"If set to a valid decimal string, any empty values in the input list will be replaced |
|
with this value before conversion. |
|
If not set to a valid decimal string, any empty values will cause an error" |
|
@values -min 1 -max 1 |
|
#we need a way for punk::args to specify that the items in the list are decimals |
|
#e.g -type list(integer) or something - but for now we'll just document it and rely on the name of the argument and the documentation. |
|
declist -type list |
|
}] |
|
} |
|
proc dec2hex {args} { |
|
set list_decimals [lindex $args end] |
|
set argopts [lrange $args 0 end-1] |
|
if {[llength $argopts]%2 !=0} { |
|
error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'" |
|
} |
|
set defaults [tcl::dict::create {*}{ |
|
-width auto |
|
-case upper |
|
-empty_as_decimal "INVALID. set -empty_as_decimal to a number if empty values should be replaced" |
|
}] |
|
set known_opts [tcl::dict::keys $defaults] |
|
set fullopts [tcl::dict::create] |
|
foreach {k v} $argopts { |
|
tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v |
|
} |
|
set opts [tcl::dict::merge $defaults $fullopts] |
|
# -- --- --- --- |
|
set opt_width [tcl::dict::get $opts -width] |
|
set opt_case [tcl::dict::get $opts -case] |
|
set opt_empty [tcl::dict::get $opts -empty_as_decimal] |
|
if {![string is integer -strict $opt_width]} { |
|
if {$opt_width eq "auto"} { |
|
# 'auto' means width should be just enough to fit the largest hex value in the list |
|
set non_empty [lsearch -all -inline -not $list_decimals ""] |
|
if {[string is integer -strict $opt_empty]} { |
|
lappend non_empty $opt_empty |
|
} |
|
set largestnumber [lindex [lsort -integer -decreasing $non_empty] 0] |
|
set opt_width [expr {int(log($largestnumber)/log(16)) + 1}] |
|
} else { |
|
error "[namespace current]::dec2hex error: -width must be an integer greater than or equal to 1" |
|
} |
|
} |
|
if {$opt_width < 1} { |
|
error "[namespace current]::dec2hex error: -width must be an integer greater than or equal to 1" |
|
} |
|
# -- --- --- --- |
|
|
|
|
|
set resultlist [list] |
|
set fullmatch [tcl::prefix::match -error "" {upper lower} $opt_case] |
|
switch -- $fullmatch { |
|
upper { |
|
set spec X |
|
} |
|
lower { |
|
set spec x |
|
} |
|
default { |
|
error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower" |
|
} |
|
} |
|
set fmt "%${opt_width}.${opt_width}ll${spec}" |
|
|
|
#set list_decimals [lmap d $list_decimals[unset list_decimals] {string map {_ ""} [string trim $d]}] |
|
set list_decimals [lmap d $list_decimals[set list_decimals {}] {string map {_ ""} [string trim $d]}] |
|
if {![string is digit -strict [string map {_ ""} $opt_empty]]} { |
|
if {[lsearch $list_decimals ""] >=0} { |
|
error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty" |
|
} |
|
} else { |
|
set opt_empty [string map {_ ""} $opt_empty] |
|
if {[set first_empty [lsearch $list_decimals ""]] >= 0} { |
|
set nonempty_head [lrange $list_decimals 0 $first_empty-1] |
|
set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]] |
|
} |
|
} |
|
return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals] |
|
} |
|
|
|
proc log2 x "expr {log(\$x)/[expr log(2)]}" |
|
#*** !doctools |
|
#[call [fun log2] [arg x]] |
|
#[para]log base2 of x |
|
#[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time |
|
#[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions]) |
|
|
|
proc logbase {b x} { |
|
#*** !doctools |
|
#[call [fun logbase] [arg b] [arg x]] |
|
#[para]log base b of x |
|
#[para]This function uses expr's natural log and the change of base division. |
|
#[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996 |
|
#[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10 |
|
expr {log($x)/log($b)} |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::factors |
|
@cmd -name punk::lib::factors\ |
|
-summary\ |
|
"Sorted factors of positive integer x"\ |
|
-help\ |
|
"Return a sorted list of the positive factors of x when x > 0 |
|
|
|
When x < 0, the factors of -x are returned with their negative counterparts, so for example factors -6 will return -6 -3 -2 -1 1 2 3 6 |
|
Both positive and negative numbers have both positive and negative factors - so technically factors -200 has the same factors as factors 200. |
|
We take the approach that supplying a negative x is a request for the negative factors as well as the positive factors. |
|
|
|
This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors |
|
Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions |
|
See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers |
|
Comparisons were done with some numbers below 17 digits long |
|
For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. |
|
The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers |
|
but has the disadvantage of being slower for 'small' numbers and using more memory. |
|
If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get |
|
there than computing the whole list, even for small values of x |
|
|
|
For x = 0 we produce an error - as zero has an infinite number of factors (including zero itself) - so factors is not defined for zero in this context. |
|
|
|
math::numtheory::factors in tcllib returns {0 1} for factors 0. |
|
Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py |
|
In other mathematical contexts zero may be considered not to divide anything." |
|
@values -min 1 -max 1 |
|
x -type integer |
|
}] |
|
} |
|
proc factors {x} { |
|
#*** !doctools |
|
#[call [fun factors] [arg x]] |
|
#[para]Return a sorted list of the positive factors of x where x > 0 |
|
#[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)* |
|
#[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors |
|
#[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions |
|
#[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers |
|
#[para]Comparisons were done with some numbers below 17 digits long |
|
#[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms. |
|
#[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers |
|
#but has the disadvantage of being slower for 'small' numbers and using more memory. |
|
#[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x |
|
#[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py |
|
#[para] In other mathematical contexts zero may be considered not to divide anything. |
|
|
|
if {$x == 0} { |
|
error "zero has an infinite number of factors - including zero itself - so factors is not defined for zero in this context" |
|
} |
|
|
|
set posx [expr {abs($x)}] |
|
set factors [list 1] |
|
set j 2 |
|
set max [expr {sqrt($posx)}] |
|
while {$j <= $max} { |
|
if {($posx % $j) == 0} { |
|
lappend factors $j [expr {$posx / $j}] |
|
} |
|
incr j |
|
} |
|
lappend factors $posx |
|
if {$x < 0} { |
|
# add negative counterparts: for every positive factor f - there is a negative factor -f |
|
lappend factors {*}[lmap f $factors {expr {-$f}}] |
|
} |
|
return [lsort -unique -integer $factors] |
|
} |
|
proc compare_numberline_dist {paira pairb} { |
|
set dista [expr {abs([lindex $paira 0] - [lindex $paira 1])}] |
|
set distb [expr {abs([lindex $pairb 0] - [lindex $pairb 1])}] |
|
if {$dista < $distb} { |
|
return -1 |
|
} elseif {$dista > $distb} { |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::factorpairs |
|
@cmd -name punk::lib::factorpairs\ |
|
-summary\ |
|
"All unique factor pairs of x (where x != 0)"\ |
|
-help\ |
|
"Returns a list of the unique factor pairs of x, sorted in order of increasing distance on the numberline |
|
between the factors in each pair, and with the smaller factor first in each pair. |
|
For equidistant pairs, the pair with smallest factor first in the pair is returned first. |
|
|
|
Negative factors are included whether x is positive or negative - so for example factorpairs 6 will return |
|
{-6 1} {1 6} {-3 -2} {2 3} |
|
and factorpairs -6 will return |
|
{-6 1} {-1 6} {-3 2} {-2 3} |
|
|
|
Note that for perfect squares - there will be one less unique pairs of factors for a negative number than for |
|
the corresponding positive number. For example factorpairs 16 will return |
|
{-16 1} {1 16} {-8 2} {2 8} {-4 4} {4 4} |
|
and factorpairs -16 will return |
|
{-16 1} {-1 16} {-8 2} {-2 8} {-4 4} |
|
|
|
" |
|
@values -min 1 -max 1 |
|
x -type integer -help\ |
|
"Non-zero integer" |
|
}] |
|
} |
|
proc factorpairs {x} { |
|
if {$x == 0} { |
|
error "zero has an infinite number of factors - including zero itself - so factorpairs is not defined for zero in this context" |
|
} |
|
|
|
if {$x > 0} { |
|
set posx [expr {abs($x)}] |
|
set factors [list [list [expr {-$posx}] -1] [list 1 $posx]] |
|
set j 2 |
|
set max [expr {sqrt($posx)}] |
|
while {$j <= $max} { |
|
if {($posx % $j) == 0} { |
|
lappend factors [list [expr {-$posx / $j}] [expr {-$j}]] |
|
lappend factors [list $j [expr {$posx / $j}]] |
|
} |
|
incr j |
|
} |
|
} else { |
|
set posx [expr {abs($x)}] |
|
set factors [list [list [expr {-$posx}] 1] [list -1 $posx]] |
|
set j 2 |
|
set max [expr {sqrt($posx)}] |
|
while {$j <= $max} { |
|
if {($posx % $j) == 0} { |
|
if {($posx / $j) == $j} { |
|
#perfect square - only one pair of factors |
|
#for a perfect square we expect one less unique pairs of factors for a negative number than for the corresponding positive number |
|
lappend factors [list [expr {-$j}] [expr {$posx / $j}]] |
|
} else { |
|
lappend factors [list [expr {-$posx / $j}] $j] [list [expr {-$j}] [expr {$posx / $j}]] |
|
} |
|
} |
|
incr j |
|
} |
|
} |
|
#algorithm already produces pairs in order of increasing distance on the numberline, |
|
#and with the smaller factor first in the pair, |
|
#and for equidistant pairs, the pair with smallest factor first in the pair, first |
|
#- so we don't need to do any sorting here. |
|
return $factors |
|
#puts "unsorted pairs: $factors" |
|
#set dist_order [lsort -command compare_numberline_dist -decreasing $factors] |
|
#puts " sorted pairs: $dist_order" |
|
#assert $dist_order == $factors. |
|
#return $dist_order |
|
} |
|
proc oddFactors {x} { |
|
#*** !doctools |
|
#[call [fun oddFactors] [arg x]] |
|
#[para]Return a list of odd integer factors of x, sorted in ascending order |
|
if {$x == 0} { |
|
error "zero has an infinite number of factors - so oddFactors is not defined for zero in this context" |
|
} |
|
|
|
set posx [expr {abs($x)}] |
|
set j 2 |
|
set max [expr {sqrt($posx)}] |
|
set factors [list 1] |
|
while {$j <= $max} { |
|
if {$posx % $j == 0} { |
|
set other [expr {$posx / $j}] |
|
if {$other % 2} { |
|
if {$other ni $factors} { |
|
lappend factors $other |
|
} |
|
} |
|
if {$j % 2} { |
|
if {$j ni $factors} { |
|
lappend factors $j |
|
} |
|
} |
|
} |
|
incr j |
|
} |
|
if {$x < 0} { |
|
# add negative counterparts: for every positive factor f - there is a negative factor -f |
|
lappend factors {*}[lmap f $factors {expr {-$f}}] |
|
} |
|
return [lsort -integer -increasing $factors] |
|
} |
|
proc greatestFactorBelow {x} { |
|
#*** !doctools |
|
#[call [fun greatestFactorBelow] [arg x]] |
|
#[para]Return the largest factor of x excluding itself |
|
#[para]factor functions can be useful for console layout calculations |
|
#[para]See Tcllib math::numtheory for more extensive implementations |
|
if {$x == 0} { |
|
error "zero has an infinite number of factors - so greatestFactorBelow is not defined for zero in this context" |
|
} |
|
if {$x == 1 || $x < 0} { |
|
return {} |
|
} |
|
|
|
if {$x % 2 == 0 || $x == 0} { |
|
return [expr {$x / 2}] |
|
} |
|
set j 3 |
|
set max [expr {sqrt($x)}] |
|
while {$j <= $max} { |
|
if {$x % $j == 0} { |
|
return [expr {$x / $j}] |
|
} |
|
incr j 2 |
|
} |
|
return 1 |
|
} |
|
proc greatestOddFactorBelow {x} { |
|
#*** !doctools |
|
#[call [fun greatestOddFactorBelow] [arg x]] |
|
#[para]Return the largest odd integer factor of x excluding x itself |
|
if {$x == 0} { |
|
error "zero has an infinite number of factors - so greatestFactorBelow is not defined for zero in this context" |
|
} |
|
if {$x == 1 || $x < 0} { |
|
return {} |
|
} |
|
if {$x %2 == 0} { |
|
return [greatestOddFactor $x] |
|
} |
|
set j 3 |
|
#dumb brute force - time taken to compute is wildly variable on big numbers |
|
#todo - use a (memoized?) generator of primes to reduce the search space |
|
#tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers. |
|
set god 1 |
|
set max [expr {sqrt($x)}] |
|
while { $j <= $max} { |
|
if {$x % $j == 0} { |
|
set other [expr {$x / $j}] |
|
if {$other % 2 == 0} { |
|
set god $j |
|
} else { |
|
set god [expr {$x / $j}] |
|
#lowest j - so other side must be highest |
|
break |
|
} |
|
} |
|
incr j 2 |
|
} |
|
return $god |
|
} |
|
proc greatestOddFactor {x} { |
|
#*** !doctools |
|
#[call [fun greatestOddFactor] [arg x]] |
|
#[para]Return the largest odd integer factor of x |
|
#[para]For an odd value of x - this will always return x |
|
if {$x == 0} { |
|
error "greatestOddFactor is not defined for zero" |
|
} |
|
set x [expr {abs($x)}] |
|
|
|
if {$x % 2 != 0 || $x == 0} { |
|
return $x |
|
} |
|
set r [expr {$x / 2}] |
|
while {$r % 2 == 0} { |
|
set r [expr {$r / 2}] |
|
} |
|
return $r |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::gcd |
|
@cmd -name punk::lib::gcd\ |
|
-summary\ |
|
"Greatest common divisor of m and n."\ |
|
-help\ |
|
"Return the greatest common divisor of m and n. |
|
See also Lars Hellström's math::numtheory library in Tcllib |
|
|
|
Graphical use: |
|
An a by b rectangle can be covered with square tiles of side-length c, |
|
only if c is a common divisor of a and b" |
|
@values -min 2 -max 2 |
|
m -type integer |
|
n -type integer |
|
}] |
|
} |
|
proc gcd {n m} { |
|
#see also Lars Hellström's math::numtheory library in Tcllib |
|
#(this version uses absolute values - whereas the numtheory version seems to return negative gcd if both inputs are negative) |
|
set n [expr {abs($n)}] |
|
set m [expr {abs($m)}] |
|
|
|
# |
|
# Apply Euclid's good old algorithm |
|
# |
|
if { $n > $m } { |
|
set t $n |
|
set n $m |
|
set m $t |
|
} |
|
|
|
while { $n > 0 } { |
|
set r [expr {$m % $n}] |
|
set m $n |
|
set n $r |
|
} |
|
|
|
return $m |
|
} |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::lcm |
|
@cmd -name punk::lib::lcm\ |
|
-summary\ |
|
"Lowest common multiple of m and n."\ |
|
-help\ |
|
"Return the lowest common multiple of m and n." |
|
@values -min 2 -max 2 |
|
m -type integer |
|
n -type integer |
|
#@return -type integer |
|
}] |
|
} |
|
proc lcm {n m} { |
|
if {$n== 0 && $m == 0} { |
|
return 0 |
|
} |
|
#gcd implementation takes absolute values of n and m - so we don't need to worry about that here. |
|
set gcd [gcd $n $m] |
|
return [expr {abs($n)*(abs($m)/$gcd)}] |
|
} |
|
proc commonDivisors {x y} { |
|
#*** !doctools |
|
#[call [fun commonDivisors] [arg x] [arg y]] |
|
#[para]Return a list of all the common factors of x and y |
|
#[para](equivalent to factors of their gcd) |
|
return [factors [gcd $x $y]] |
|
} |
|
|
|
proc sieve n { |
|
#variation on DKF's nsieve which counts, modified to return the list of primes instead of the count. |
|
incr n |
|
variable a |
|
if {[llength $a] < $n} { |
|
set a [lrepeat $n 0] |
|
} |
|
#Bit0=sieved,bit1=nonprime |
|
for {set i 2} {$i<$n} {incr i} { |
|
if {([set v [lindex $a $i]]&2)==0} { |
|
lset a $i 1 |
|
#puts "setting $i to 1" |
|
} |
|
if {!$v} { |
|
# $v trick to take advantage of the cache in variable a. |
|
for {set j [expr {$i+$i}]} {$j<$n} {incr j $i} { |
|
lset a $j 3 |
|
} |
|
} |
|
#puts "$i \n $a" |
|
} |
|
#our list is only 1's and 3's - and we can use the indices of the 1's to get the primes |
|
lsearch -all -start 2 -integer $a 1 |
|
} |
|
|
|
#experimental - there are probably better/faster ways |
|
proc sieve1 n { |
|
#list based implementation is faster than dict based (in tcl 9 anyway) |
|
if {$n < 2} {return {}} |
|
set test [lseq 0 $n] |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {lset test $i 0} |
|
lset test $next $next |
|
set next [lsearch -inline -start $next+1 -integer -not $test 0] |
|
} |
|
lsearch -all -inline -start 2 -integer -not $test 0 |
|
} |
|
proc sieve1b n { |
|
#list based implementation is faster than dict based (in tcl 9 anyway) |
|
if {$n < 2} {return {}} |
|
#set test [lseq 0 $n] |
|
#lseq will shimmer to list upon first lset below. |
|
#There seem to be oddities with timing when using lseq. (slowdowns after a few calls - tcl 9) |
|
set test {} |
|
for {set i 0} {$i <= $n} {incr i} {lappend test $i} |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {lset test $i 0} |
|
lset test $next $next |
|
set next [lsearch -inline -start $next+1 -integer -not $test 0] |
|
} |
|
lsearch -all -inline -start 2 -integer -not $test 0 |
|
} |
|
set sievelist {} |
|
proc sieve1c n { |
|
#list based implementation is faster than dict based (in tcl 9 anyway) |
|
if {$n < 2} {return {}} |
|
variable sievelist |
|
if {[llength $sievelist] < $n} { |
|
#set sievelist [lrepeat $n 0] |
|
set sievelist [lseq 0 $n] |
|
} |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {lset sievelist $i 0} |
|
lset sievelist $next $next |
|
set next [lsearch -inline -start $next+1 -integer -not $sievelist 0] |
|
} |
|
lsearch -all -inline -start 2 -integer -not $sievelist 0 |
|
} |
|
|
|
proc sieve2 n { |
|
set primes [list] |
|
if {$n < 2} {return $primes} |
|
set test [lseq 0 $n] |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {lset test $i 0} |
|
lappend primes $next |
|
set next [lsearch -inline -start $next -integer -not $test 0] |
|
} |
|
list {*}$primes {*}[lsearch -all -inline -start $next -integer -not $test 0] |
|
} |
|
|
|
#dict based |
|
proc sieve3 n { |
|
set primes [list] |
|
if {$n < 2} {return $primes} |
|
set nums [tcl::dict::create] |
|
for {set i 2} {$i <= $n} {incr i} { |
|
tcl::dict::set nums $i "" |
|
} |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} |
|
lappend primes $next |
|
tcl::dict::for {next -} $nums break |
|
} |
|
return [concat $primes [tcl::dict::keys $nums]] |
|
} |
|
proc sieve4 n { |
|
set primes [list] |
|
if {$n < 2} {return $primes} |
|
set nums [tcl::dict::create] |
|
for {set i 2} {$i <= $n} {incr i} { |
|
tcl::dict::set nums $i "" |
|
} |
|
set next 2 |
|
set limit [expr {sqrt($n)}] |
|
while {$next <= $limit} { |
|
for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i} |
|
lappend primes $next |
|
#dict for {next -} $nums break |
|
set next [lindex $nums 0] ;#will shimmer dict to list rep |
|
} |
|
return [concat $primes [tcl::dict::keys $nums]] |
|
} |
|
|
|
#count primes up to n-1 |
|
set a {} |
|
proc nsieve n { |
|
#https://wiki.tcl-lang.org/page/Eratosthenes+Sieve |
|
#by DKF |
|
incr n |
|
variable a |
|
if {[llength $a] < $n} { |
|
set a [lrepeat $n 0] |
|
} |
|
set count 0 |
|
#Bit0=sieved,bit1=nonprime |
|
for {set i 2} {$i<$n} {incr i} { |
|
if {([set v [lindex $a $i]]&2)==0} { |
|
incr count |
|
lset a $i 1 |
|
} |
|
if {!$v} { |
|
#this $v trick is to take advantage of the cache in variable a. |
|
for {set j [expr {$i+$i}]} {$j<$n} {incr j $i} { |
|
lset a $j 3 |
|
} |
|
} |
|
} |
|
#puts "Primes up to\t[expr {$n-1}]\t$count" |
|
return $count |
|
} |
|
proc nsieve_cacheless n { |
|
#variation on DKF's nsieve which doesn't use the cache in variable a. |
|
#This is faster for the first time it's used with a given n - but slower on subsequent calls with the same or smaller n. |
|
incr n |
|
set a [lrepeat $n 0] |
|
set count 0 |
|
#Bit0=sieved,bit1=nonprime |
|
for {set i 2} {$i<$n} {incr i} { |
|
if {([lindex $a $i]&2)==0} { |
|
incr count |
|
lset a $i 1 |
|
for {set j [expr {$i+$i}]} {$j<$n} {incr j $i} { |
|
lset a $j 3 |
|
} |
|
} |
|
} |
|
return $count |
|
} |
|
proc nsieve_insane n { |
|
#test of insane bignum method. |
|
incr n |
|
set sieved 0 |
|
set primes 0 |
|
for {set i 2} {$i<$n} {incr i} { |
|
set bit [expr {2**$i}] |
|
if {($sieved & $bit)==0} { |
|
set primes [expr {$primes | $bit}] |
|
for {set j [expr {$i+$i}]} {$j<$n} {incr j $i} { |
|
set sieved [expr {$sieved | 2**$j}] |
|
} |
|
} |
|
} |
|
set count 0 |
|
set p $primes |
|
while {$p} { |
|
set p [expr {$p & ($p-1)}] |
|
incr count |
|
} |
|
return $count |
|
} |
|
|
|
proc hasglobs {str} { |
|
#*** !doctools |
|
#[call [fun hasglobs] [arg str]] |
|
#[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb] |
|
#[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter. |
|
regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving |
|
} |
|
|
|
proc trimzero {number} { |
|
#*** !doctools |
|
#[call [fun trimzero] [arg number]] |
|
#[para]Return number with left-hand-side zeros trimmed off - unless all zero |
|
#[para]If number is all zero - a single 0 is returned |
|
set trimmed [string trimleft $number 0] |
|
if {[string length $trimmed] == 0} { |
|
set trimmed 0 |
|
} |
|
return $trimmed |
|
} |
|
proc substring_count {str substring} { |
|
#*** !doctools |
|
#[call [fun substring_count] [arg str] [arg substring]] |
|
#[para]Search str and return number of occurrences of substring |
|
|
|
#faster than lsearch on split for str of a few K |
|
if {$substring eq ""} {return 0} |
|
set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}] |
|
return [expr {$occurrences / [string length $substring]}] |
|
} |
|
|
|
proc dict_merge_ordered {defaults main} { |
|
#*** !doctools |
|
#[call [fun dict_merge_ordered] [arg defaults] [arg main]] |
|
#[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence. |
|
#[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data. |
|
#[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults. |
|
|
|
#1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values |
|
return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main] |
|
} |
|
|
|
proc askuser {question} { |
|
#*** !doctools |
|
#[call [fun askuser] [arg question]] |
|
#[para]A basic utility to read an answer from stdin |
|
#[para]The prompt is written to the terminal and then it waits for a user to type something |
|
#[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so. |
|
#[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode. |
|
#[para](Generic terminal raw vs linemode detection not yet present) |
|
#[para]The user must hit enter to submit the response |
|
#[para]The return value is the string if any that was typed prior to hitting enter. |
|
#[para]The question argument can be manually colourised using the various punk::ansi funcitons |
|
#[example_begin] |
|
# set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb] |
|
# if {[lb]string match y* [lb]string tolower $answer[rb][rb]} { |
|
# puts "Proceeding" |
|
# } else { |
|
# puts "Cancelled by user" |
|
# } |
|
#[example_end] |
|
puts stdout $question |
|
flush stdout |
|
set stdin_state [chan configure stdin] |
|
if {[catch { |
|
package require punk::console |
|
set console_raw [tsv::get console is_raw] |
|
} err_console]} { |
|
#assume normal line mode |
|
set console_raw 0 |
|
} |
|
try { |
|
chan configure stdin -blocking 1 |
|
if {$console_raw} { |
|
punk::console::disableRaw |
|
set answer [gets stdin] |
|
punk::console::enableRaw |
|
} else { |
|
set answer [gets stdin] |
|
} |
|
} finally { |
|
chan configure stdin -blocking [tcl::dict::get $stdin_state -blocking] |
|
} |
|
return $answer |
|
} |
|
|
|
#like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter. |
|
proc indent {text {prefix " "}} { |
|
set result [list] |
|
foreach line [split $text \n] { |
|
if {[string trim $line] eq ""} { |
|
lappend result "" |
|
} else { |
|
lappend result $prefix[string trimright $line] |
|
} |
|
} |
|
return [join $result \n] |
|
} |
|
#dedent? |
|
proc undent {text {max -1}} { |
|
if {$text eq ""} { |
|
return "" |
|
} |
|
set lines [split $text \n] |
|
set nonblank [list] |
|
foreach ln $lines { |
|
if {[string trim $ln] eq ""} { |
|
continue |
|
} |
|
lappend nonblank $ln |
|
} |
|
set lcp [longestCommonPrefix $nonblank] |
|
if {$lcp eq ""} { |
|
return $text |
|
} |
|
regexp {^([\t ]*)} $lcp _m lcp |
|
if {$lcp eq ""} { |
|
return $text |
|
} |
|
set len [string length $lcp] |
|
if {$max != -1} { |
|
set len [expr {min($len,$max)}] |
|
} |
|
set result [list] |
|
foreach ln $lines { |
|
if {[string trim $ln] eq ""} { |
|
lappend result "" |
|
} else { |
|
lappend result [string range $ln $len end] |
|
} |
|
} |
|
return [join $result \n] |
|
} |
|
#A version of textutil::string::longestCommonPrefixList |
|
proc longestCommonPrefix {items} { |
|
if {[llength $items] <= 1} { |
|
return [lindex $items 0] |
|
} |
|
#set items [lsort $items[unset items]] |
|
set items [lsort $items[set items {}]] |
|
set min [lindex $items 0] |
|
set max [lindex $items end] |
|
#if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list) |
|
#(sort order nothing to do with length - e.g min may be longer than max) |
|
if {[string length $min] > [string length $max]} { |
|
set temp $min |
|
set min $max |
|
set max $temp |
|
} |
|
set n [string length $min] |
|
set prefix "" |
|
set i -1 |
|
while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} { |
|
append prefix $c |
|
} |
|
return $prefix |
|
} |
|
|
|
#e.g linesort -decreasing $data |
|
proc linesort {args} { |
|
#*** !doctools |
|
#[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]] |
|
#[para]Sort lines in textblock |
|
#[para]Returns another textblock with lines sorted |
|
#[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique |
|
if {[llength $args] < 1} { |
|
error "linesort missing lines argument" |
|
} |
|
set lines [lindex $args end] |
|
set opts [lrange $args 0 end-1] |
|
#.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts |
|
list_as_lines [lsort {*}$opts [linelist $lines]] |
|
} |
|
|
|
proc list_as_lines {args} { |
|
#*** !doctools |
|
#[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]] |
|
#[para]This simply joins the elements of the list with -joinchar |
|
#[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines <le> |
|
#[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line. |
|
if {[set eop [lsearch $args --]] == [llength $args]-2} { |
|
#end-of-opts not really necessary - except for consistency with lines_as_list |
|
set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]] |
|
} |
|
if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} { |
|
set joinchar [lindex $args 1] |
|
set lines [lindex $args 2] |
|
} elseif {[llength $args] == 1} { |
|
set joinchar "\n" |
|
set lines [lindex $args 0] |
|
} else { |
|
error "list_as_lines usage: list_as_lines ?-joinchar <char>? <linelist>" |
|
} |
|
return [join $lines $joinchar] |
|
} |
|
proc list_as_lines2 {args} { |
|
#eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible? |
|
lassign [tcl::dict::values [punk::args::parse $args withdef { |
|
-joinchar -default \n |
|
@values -min 1 -max 1 |
|
}]] leaders opts values |
|
|
|
return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]] |
|
} |
|
|
|
proc lines_as_list {args} { |
|
#*** !doctools |
|
#[call [fun lines_as_list] [opt {option value ...}] [arg text]] |
|
#[para]Returns a list of possibly trimmed lines depeding on options |
|
#[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf |
|
#[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements |
|
|
|
#The underlying function linelist has the validation code which gives nicer usage errors. |
|
#we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error |
|
#..because we don't know what to say if there are odd numbers of args |
|
#we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work |
|
#e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway |
|
|
|
if {[lsearch $args "--"] == [llength $args]-2} { |
|
set opts [lrange $args 0 end-2] |
|
} else { |
|
set opts [lrange $args 0 end-1] |
|
} |
|
#set opts [tcl::dict::merge {-block {}} $opts] |
|
set bposn [lsearch $opts -block] |
|
if {$bposn < 0} { |
|
lappend opts -block {} |
|
} |
|
set text [lindex $args end] |
|
#tailcall linelist {*}$opts $text |
|
return [linelist {*}$opts $text] |
|
} |
|
#this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds |
|
proc lines_as_list2 {args} { |
|
#pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults |
|
#-anyopts 1 avoids having to know what to say if odd numbers of options passed etc |
|
#we don't have to decide what is an opt vs a value |
|
#even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block) |
|
lassign [tcl::dict::values [punk::args::parse $args withdef { |
|
@opts -any 1 |
|
-block -default {} |
|
}]] leaderdict opts valuedict |
|
tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict] |
|
} |
|
|
|
# important for pipeline & match_assign |
|
# -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ? |
|
# -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace |
|
|
|
set linelist_body { |
|
set usage "linelist ?-ansiresets auto|<bool>? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix <cmdlist> text" |
|
if {[llength $args] == 0} { |
|
error "linelist missing textchunk argument usage:$usage" |
|
} |
|
set text [lindex $args end] |
|
set text [string map {\r\n \n} $text] ;#review - option? |
|
|
|
set arglist [lrange $args 0 end-1] |
|
set opts [tcl::dict::create {*}{ |
|
-block {trimhead1 trimtail1} |
|
-line {} |
|
-commandprefix "" |
|
-ansiresets auto |
|
-ansireplays 0 |
|
}] |
|
foreach {o v} $arglist { |
|
switch -- $o { |
|
-block - -line - -commandprefix - -ansiresets - -ansireplays { |
|
tcl::dict::set opts $o $v |
|
} |
|
default { |
|
error "linelist: Unrecognized option '$o' usage:$usage" |
|
} |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_block [tcl::dict::get $opts -block] |
|
if {[llength $opt_block]} { |
|
foreach bo $opt_block { |
|
switch -- $bo { |
|
trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} |
|
default { |
|
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] |
|
error "linelist: unknown -block option value: $bo known values: $known_blockopts" |
|
} |
|
} |
|
} |
|
#normalize certain combos |
|
if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimall" in $opt_block} { |
|
#no other block options make sense in combination with this |
|
set opt_block [list "trimall"] |
|
} |
|
|
|
#TODO |
|
if {"triminner" in $opt_block } { |
|
error "linelist -block triminner not implemented - sorry" |
|
} |
|
|
|
} |
|
|
|
|
|
# -- --- --- --- --- --- |
|
set opt_line [tcl::dict::get $opts -line] |
|
set tl_left 0 |
|
set tl_right 0 |
|
set tl_both 0 |
|
foreach lo $opt_line { |
|
switch -- $lo { |
|
trimline { |
|
set tl_both 1 |
|
} |
|
trimleft { |
|
set tl_left 1 |
|
} |
|
trimright { |
|
set tl_right 1 |
|
} |
|
default { |
|
set known_lineopts [list trimline trimleft trimright] |
|
error "linelist: unknown -line option value: $lo known values: $known_lineopts" |
|
} |
|
} |
|
} |
|
#normalize trimleft trimright combo |
|
if {$tl_left && $tl_right} { |
|
set opt_line [list "trimline"] |
|
set tl_both 1 |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_commandprefix [tcl::dict::get $opts -commandprefix] |
|
# -- --- --- --- --- --- |
|
set opt_ansiresets [tcl::dict::get $opts -ansiresets] |
|
# -- --- --- --- --- --- |
|
set opt_ansireplays [tcl::dict::get $opts -ansireplays] |
|
if {$opt_ansireplays} { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 1 |
|
} |
|
} else { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 0 |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set linelist [list] |
|
set nlsplit [split $text \n] |
|
if {![llength $opt_line]} { |
|
set linelist $nlsplit |
|
#lappend linelist {*}$nlsplit |
|
} else { |
|
#already normalized trimleft+trimright to trimline |
|
if {$tl_both} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trim $ln] |
|
} |
|
} elseif {$tl_left} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trimleft $ln] |
|
} |
|
} elseif {$tl_right} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trimright $ln] |
|
} |
|
} |
|
} |
|
|
|
if {"collateempty" in $opt_block} { |
|
set inputlist $linelist[set linelist [list]] |
|
set last "-" |
|
foreach input $inputlist { |
|
if {$input ne ""} { |
|
lappend linelist $input |
|
set last "-" |
|
} else { |
|
if {$last ne ""} { |
|
lappend linelist "" |
|
} |
|
set last "" |
|
} |
|
} |
|
} |
|
|
|
if {"trimall" in $opt_block} { |
|
set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] |
|
} else { |
|
set start 0 |
|
if {"trimhead" in $opt_block} { |
|
set idx 0 |
|
set lastempty -1 |
|
foreach ln $linelist { |
|
if {[lindex $linelist $idx] ne ""} { |
|
break |
|
} else { |
|
set lastempty $idx |
|
} |
|
incr idx |
|
} |
|
if {$lastempty >=0} { |
|
set start [expr {$lastempty +1}] |
|
} |
|
} |
|
set linelist [lrange $linelist $start end] |
|
|
|
if {"trimtail" in $opt_block} { |
|
set revlinelist [lreverse $linelist][set linelist {}] |
|
set i 0 |
|
foreach ln $revlinelist { |
|
if {$ln ne ""} { |
|
set linelist [lreverse [lrange $revlinelist $i end]] |
|
break |
|
} |
|
incr i |
|
} |
|
} |
|
|
|
# --- --- |
|
set start 0 |
|
set end "end" |
|
if {"trimhead1" in $opt_block} { |
|
if {[lindex $linelist 0] eq ""} { |
|
set start 1 |
|
} |
|
} |
|
if {"trimtail1" in $opt_block} { |
|
if {[lindex $linelist end] eq ""} { |
|
set end "end-1" |
|
} |
|
} |
|
set linelist [lrange $linelist $start $end] |
|
} |
|
|
|
#review - we need to make sure ansiresets don't accumulate/grow on any line |
|
#Each resulting line should have a reset of some type at start and a pure-reset at end to stop |
|
#see if we can find an ST sequence that most terminals will not display for marking sections? |
|
if {$opt_ansireplays} { |
|
<require_punk_ansi> ;#package require punk::ansi |
|
if {$opt_ansiresets} { |
|
set RST "\x1b\[0m" |
|
} else { |
|
set RST "" |
|
} |
|
set replaycodes $RST ;#todo - default? |
|
set transformed [list] |
|
#shortcircuit common case of no ansi |
|
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. |
|
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) |
|
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable |
|
#detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) |
|
|
|
#we use detectcode_in_list instead of detect_in_list |
|
#detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) |
|
# - but the main reason is it is slightly faster. |
|
if {![punk::ansi::ta::detectcode_in_list $linelist]} { |
|
if {$opt_ansiresets} { |
|
foreach ln $linelist { |
|
lappend transformed $RST$ln$RST |
|
} |
|
set linelist $transformed |
|
} |
|
} else { |
|
|
|
#INLINE punk::ansi::codetype::is_sgr_reset |
|
#regexp {\x1b\[0*m$} $code |
|
set re_is_sgr_reset {\x1b\[0*m$} |
|
#INLINE punk::ansi::codetype::is_sgr |
|
#regexp {\033\[[0-9;:]*m$} $code |
|
set re_is_sgr {\x1b\[[0-9;:]*m$} |
|
|
|
foreach ln $linelist { |
|
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable |
|
|
|
#set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. |
|
#get_codes_single lists only the codes. no plaintext or empty elements |
|
set ansisplits [punk::ansi::ta::get_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. |
|
if {[llength $ansisplits] == 0} { |
|
#plaintext only - no ansi codes in line |
|
lappend transformed [string cat $replaycodes $ln $RST] |
|
#leave replaycodes as is for next line |
|
set nextreplay $replaycodes |
|
} else { |
|
set tail $RST |
|
set lastcode [lindex $ansisplits end] ;#may or may not be SGR |
|
set lastcodeoffset [expr {[string length $lastcode]-1}] |
|
if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { |
|
if {[string range $ln end-$lastcodeoffset end] eq $lastcode} { |
|
#last plaintext is empty. So the line is already suffixed with a reset |
|
set tail "" |
|
set nextreplay $RST |
|
} else { |
|
#trailing text has been reset within line - but no tail reset present |
|
#we normalize by putting a tail reset on anyway |
|
set tail $RST |
|
set nextreplay $RST |
|
} |
|
} elseif {[string range $ln end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { |
|
#code is at tail (no trailing plaintext) |
|
#No tail reset - and no need to examine whole line to determine stack that is in effect |
|
set tail $RST |
|
set nextreplay $lastcode |
|
} else { |
|
#last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect |
|
#last codeset doesn't end in a pure-reset |
|
#whether code was at very end or not - add a reset tail |
|
set tail $RST |
|
#determine effective replay for line |
|
set codestack [list start] |
|
foreach code $ansisplits { |
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
set codestack [list] ;#different from 'start' marked - this means we've had a reset |
|
} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { |
|
set codestack [list $code] |
|
} else { |
|
if {[punk::ansi::codetype::is_sgr $code]} { |
|
#todo - proper test of each code - so we only take latest background/foreground etc. |
|
#requires handling codes with varying numbers of parameters. |
|
#basic simplification - remove straight dupes. |
|
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. |
|
set codestack [lremove $codestack {*}$dup_posns] |
|
lappend codestack $code |
|
} ;#else gx0 or other code - we don't want to stack it with SGR codes |
|
} |
|
} |
|
if {$codestack eq [list start]} { |
|
#No SGRs - may have been other codes |
|
set line_has_sgr 0 |
|
} else { |
|
#list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes |
|
set line_has_sgr 1 |
|
if {[lindex $codestack 0] eq "start"} { |
|
set codestack [lrange $codestack 1 end] |
|
} |
|
} |
|
|
|
#set newreplay [join $codestack ""] |
|
#set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] |
|
set newreplay [punk::ansi::codetype::sgr_merge $codestack] |
|
|
|
if {$line_has_sgr && $newreplay ne $replaycodes} { |
|
#adjust if it doesn't already does a reset at start |
|
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { |
|
set nextreplay $newreplay |
|
} else { |
|
set nextreplay $RST$newreplay |
|
} |
|
} else { |
|
set nextreplay $replaycodes |
|
} |
|
} |
|
if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { |
|
#no point attaching any replay |
|
lappend transformed [string cat $ln $tail] |
|
} else { |
|
lappend transformed [string cat $replaycodes $ln $tail] |
|
} |
|
} |
|
set replaycodes $nextreplay |
|
} |
|
set linelist $transformed |
|
} |
|
} |
|
|
|
if {[llength $opt_commandprefix]} { |
|
set transformed [list] |
|
foreach ln $linelist { |
|
lappend transformed [{*}$opt_commandprefix $ln] |
|
} |
|
set linelist $transformed |
|
} |
|
|
|
return $linelist |
|
} |
|
if {$has_punk_ansi} { |
|
#optimise linelist as much as possible |
|
set linelist_body [string map {<require_punk_ansi> ""} $linelist_body] |
|
} else { |
|
#punk ansi not avail at time of package load. |
|
#by putting in calls to punk::ansi the user will get appropriate error messages |
|
set linelist_body [string map {<require_punk_ansi> "package require punk::ansi"} $linelist_body] |
|
} |
|
|
|
set linelist_body_original { |
|
set usage "linelist ?-ansiresets auto|<bool>? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix <cmdlist> text" |
|
if {[llength $args] == 0} { |
|
error "linelist missing textchunk argument usage:$usage" |
|
} |
|
set text [lindex $args end] |
|
set text [string map {\r\n \n} $text] ;#review - option? |
|
|
|
set arglist [lrange $args 0 end-1] |
|
set opts [tcl::dict::create {*}{ |
|
-block {trimhead1 trimtail1} |
|
-line {} |
|
-commandprefix "" |
|
-ansiresets auto |
|
-ansireplays 0 |
|
}] |
|
foreach {o v} $arglist { |
|
switch -- $o { |
|
-block - -line - -commandprefix - -ansiresets - -ansireplays { |
|
tcl::dict::set opts $o $v |
|
} |
|
default { |
|
error "linelist: Unrecognized option '$o' usage:$usage" |
|
} |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_block [tcl::dict::get $opts -block] |
|
if {[llength $opt_block]} { |
|
foreach bo $opt_block { |
|
switch -- $bo { |
|
trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} |
|
default { |
|
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] |
|
error "linelist: unknown -block option value: $bo known values: $known_blockopts" |
|
} |
|
} |
|
} |
|
#normalize certain combos |
|
if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimall" in $opt_block} { |
|
#no other block options make sense in combination with this |
|
set opt_block [list "trimall"] |
|
} |
|
|
|
#TODO |
|
if {"triminner" in $opt_block } { |
|
error "linelist -block triminner not implemented - sorry" |
|
} |
|
|
|
} |
|
|
|
|
|
# -- --- --- --- --- --- |
|
set opt_line [tcl::dict::get $opts -line] |
|
set tl_left 0 |
|
set tl_right 0 |
|
set tl_both 0 |
|
foreach lo $opt_line { |
|
switch -- $lo { |
|
trimline { |
|
set tl_both 1 |
|
} |
|
trimleft { |
|
set tl_left 1 |
|
} |
|
trimright { |
|
set tl_right 1 |
|
} |
|
default { |
|
set known_lineopts [list trimline trimleft trimright] |
|
error "linelist: unknown -line option value: $lo known values: $known_lineopts" |
|
} |
|
} |
|
} |
|
#normalize trimleft trimright combo |
|
if {$tl_left && $tl_right} { |
|
set opt_line [list "trimline"] |
|
set tl_both 1 |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_commandprefix [tcl::dict::get $opts -commandprefix] |
|
# -- --- --- --- --- --- |
|
set opt_ansiresets [tcl::dict::get $opts -ansiresets] |
|
# -- --- --- --- --- --- |
|
set opt_ansireplays [tcl::dict::get $opts -ansireplays] |
|
if {$opt_ansireplays} { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 1 |
|
} |
|
} else { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 0 |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set linelist [list] |
|
set nlsplit [split $text \n] |
|
if {![llength $opt_line]} { |
|
set linelist $nlsplit |
|
#lappend linelist {*}$nlsplit |
|
} else { |
|
#already normalized trimleft+trimright to trimline |
|
if {$tl_both} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trim $ln] |
|
} |
|
} elseif {$tl_left} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trimleft $ln] |
|
} |
|
} elseif {$tl_right} { |
|
foreach ln $nlsplit { |
|
lappend linelist [string trimright $ln] |
|
} |
|
} |
|
} |
|
|
|
if {"collateempty" in $opt_block} { |
|
set inputlist $linelist[set linelist [list]] |
|
set last "-" |
|
foreach input $inputlist { |
|
if {$input ne ""} { |
|
lappend linelist $input |
|
set last "-" |
|
} else { |
|
if {$last ne ""} { |
|
lappend linelist "" |
|
} |
|
set last "" |
|
} |
|
} |
|
} |
|
|
|
if {"trimall" in $opt_block} { |
|
set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] |
|
} else { |
|
set start 0 |
|
if {"trimhead" in $opt_block} { |
|
set idx 0 |
|
set lastempty -1 |
|
foreach ln $linelist { |
|
if {[lindex $linelist $idx] ne ""} { |
|
break |
|
} else { |
|
set lastempty $idx |
|
} |
|
incr idx |
|
} |
|
if {$lastempty >=0} { |
|
set start [expr {$lastempty +1}] |
|
} |
|
} |
|
set linelist [lrange $linelist $start end] |
|
|
|
if {"trimtail" in $opt_block} { |
|
set revlinelist [lreverse $linelist][set linelist {}] |
|
set i 0 |
|
foreach ln $revlinelist { |
|
if {$ln ne ""} { |
|
set linelist [lreverse [lrange $revlinelist $i end]] |
|
break |
|
} |
|
incr i |
|
} |
|
} |
|
|
|
# --- --- |
|
set start 0 |
|
set end "end" |
|
if {"trimhead1" in $opt_block} { |
|
if {[lindex $linelist 0] eq ""} { |
|
set start 1 |
|
} |
|
} |
|
if {"trimtail1" in $opt_block} { |
|
if {[lindex $linelist end] eq ""} { |
|
set end "end-1" |
|
} |
|
} |
|
set linelist [lrange $linelist $start $end] |
|
} |
|
|
|
#review - we need to make sure ansiresets don't accumulate/grow on any line |
|
#Each resulting line should have a reset of some type at start and a pure-reset at end to stop |
|
#see if we can find an ST sequence that most terminals will not display for marking sections? |
|
if {$opt_ansireplays} { |
|
#package require punk::ansi |
|
<require_punk_ansi> |
|
if {$opt_ansiresets} { |
|
set RST "\x1b\[0m" |
|
} else { |
|
set RST "" |
|
} |
|
set replaycodes $RST ;#todo - default? |
|
set transformed [list] |
|
#shortcircuit common case of no ansi |
|
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. |
|
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) |
|
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable |
|
#detect_in_list will check at first level. (not intended for detecting ansi in deeper structures) |
|
if {![punk::ansi::ta::detect_in_list $linelist]} { |
|
if {$opt_ansiresets} { |
|
foreach ln $linelist { |
|
lappend transformed $RST$ln$RST |
|
} |
|
set linelist $transformed |
|
} |
|
} else { |
|
|
|
#INLINE punk::ansi::codetype::is_sgr_reset |
|
#regexp {\x1b\[0*m$} $code |
|
set re_is_sgr_reset {\x1b\[0*m$} |
|
#INLINE punk::ansi::codetype::is_sgr |
|
#regexp {\033\[[0-9;:]*m$} $code |
|
set re_is_sgr {\x1b\[[0-9;:]*m$} |
|
|
|
foreach ln $linelist { |
|
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable |
|
if {![punk::ansi::ta::detect $ln]} { |
|
#plaintext only - no ansi codes in line |
|
lappend transformed [string cat $replaycodes $ln $RST] |
|
set nextreplay $replaycodes |
|
set replaycodes $nextreplay |
|
continue |
|
} |
|
|
|
set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split seems to account for a large portion of the time taken to run this function. |
|
#if {[llength $ansisplits]<= 1} { |
|
# #plaintext only - no ansi codes in line |
|
# lappend transformed [string cat $replaycodes $ln $RST] |
|
# #leave replaycodes as is for next line |
|
# set nextreplay $replaycodes |
|
#} else { |
|
set tail $RST |
|
set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR |
|
if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { |
|
if {[lindex $ansisplits end] eq ""} { |
|
#last plaintext is empty. So the line is already suffixed with a reset |
|
set tail "" |
|
set nextreplay $RST |
|
} else { |
|
#trailing text has been reset within line - but no tail reset present |
|
#we normalize by putting a tail reset on anyway |
|
set tail $RST |
|
set nextreplay $RST |
|
} |
|
} elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { |
|
#No tail reset - and no need to examine whole line to determine stack that is in effect |
|
set tail $RST |
|
set nextreplay $lastcode |
|
} else { |
|
#last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect |
|
#last codeset doesn't end in a pure-reset |
|
#whether code was at very end or not - add a reset tail |
|
set tail $RST |
|
#determine effective replay for line |
|
set codestack [list start] |
|
foreach {pt code} $ansisplits { |
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
set codestack [list] ;#different from 'start' marked - this means we've had a reset |
|
} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { |
|
set codestack [list $code] |
|
} else { |
|
if {[punk::ansi::codetype::is_sgr $code]} { |
|
#todo - proper test of each code - so we only take latest background/foreground etc. |
|
#requires handling codes with varying numbers of parameters. |
|
#basic simplification - remove straight dupes. |
|
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. |
|
set codestack [lremove $codestack {*}$dup_posns] |
|
lappend codestack $code |
|
} ;#else gx0 or other code - we don't want to stack it with SGR codes |
|
} |
|
} |
|
if {$codestack eq [list start]} { |
|
#No SGRs - may have been other codes |
|
set line_has_sgr 0 |
|
} else { |
|
#list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes |
|
set line_has_sgr 1 |
|
if {[lindex $codestack 0] eq "start"} { |
|
set codestack [lrange $codestack 1 end] |
|
} |
|
} |
|
|
|
#set newreplay [join $codestack ""] |
|
#set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] |
|
set newreplay [punk::ansi::codetype::sgr_merge $codestack] |
|
|
|
if {$RST ne "" && $line_has_sgr && $newreplay ne $replaycodes} { |
|
#adjust if it doesn't already does a reset at start |
|
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { |
|
set nextreplay $newreplay |
|
} else { |
|
set nextreplay $RST$newreplay |
|
} |
|
} else { |
|
set nextreplay $replaycodes |
|
} |
|
} |
|
if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} { |
|
#no point attaching any replay |
|
lappend transformed [string cat $ln $tail] |
|
} else { |
|
lappend transformed [string cat $replaycodes $ln $tail] |
|
} |
|
#} |
|
set replaycodes $nextreplay |
|
} |
|
set linelist $transformed |
|
} |
|
} |
|
|
|
if {[llength $opt_commandprefix]} { |
|
set transformed [list] |
|
foreach ln $linelist { |
|
lappend transformed [{*}$opt_commandprefix $ln] |
|
} |
|
set linelist $transformed |
|
} |
|
|
|
return $linelist |
|
} |
|
if {$has_punk_ansi} { |
|
#optimise linelist as much as possible |
|
set linelist_body [string map {<require_punk_ansi> ""} $linelist_body] |
|
} else { |
|
#punk ansi not avail at time of package load. |
|
#by putting in calls to punk::ansi the user will get appropriate error messages |
|
set linelist_body [string map {<require_punk_ansi> "package require punk::ansi"} $linelist_body] |
|
} |
|
proc linelist {args} $linelist_body |
|
set linelist_body2 { |
|
set usage "linelist ?-ansiresets auto|<bool>? ?-ansireplays 0|1? ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix <cmdlist> text" |
|
if {[llength $args] == 0} { |
|
error "linelist missing textchunk argument usage:$usage" |
|
} |
|
set text [lindex $args end] |
|
set text [string map {\r\n \n} $text] ;#review - option? |
|
|
|
set arglist [lrange $args 0 end-1] |
|
set opts [tcl::dict::create {*}{ |
|
-block {trimhead1 trimtail1} |
|
-line {} |
|
-commandprefix "" |
|
-ansiresets auto |
|
-ansireplays 0 |
|
}] |
|
foreach {o v} $arglist { |
|
switch -- $o { |
|
-block - -line - -commandprefix - -ansiresets - -ansireplays { |
|
tcl::dict::set opts $o $v |
|
} |
|
default { |
|
error "linelist: Unrecognized option '$o' usage:$usage" |
|
} |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_block [tcl::dict::get $opts -block] |
|
if {[llength $opt_block]} { |
|
foreach bo $opt_block { |
|
switch -- $bo { |
|
trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {} |
|
default { |
|
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty] |
|
error "linelist: unknown -block option value: $bo known values: $known_blockopts" |
|
} |
|
} |
|
} |
|
#normalize certain combos |
|
if {"trimhead" in $opt_block && [set posn [lsearch $opt_block trimhead1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimtail" in $opt_block && [set posn [lsearch $opt_block trimtail1]] >=0} { |
|
#set opt_block [lreplace $opt_block $posn $posn] |
|
ledit opt_block $posn $posn |
|
} |
|
if {"trimall" in $opt_block} { |
|
#no other block options make sense in combination with this |
|
set opt_block [list "trimall"] |
|
} |
|
|
|
#TODO |
|
if {"triminner" in $opt_block } { |
|
error "linelist -block triminner not implemented - sorry" |
|
} |
|
|
|
} |
|
|
|
|
|
# -- --- --- --- --- --- |
|
set opt_line [tcl::dict::get $opts -line] |
|
set tl_left 0 |
|
set tl_right 0 |
|
set tl_both 0 |
|
foreach lo $opt_line { |
|
switch -- $lo { |
|
trimline { |
|
set tl_both 1 |
|
} |
|
trimleft { |
|
set tl_left 1 |
|
} |
|
trimright { |
|
set tl_right 1 |
|
} |
|
default { |
|
set known_lineopts [list trimline trimleft trimright] |
|
error "linelist: unknown -line option value: $lo known values: $known_lineopts" |
|
} |
|
} |
|
} |
|
#normalize trimleft trimright combo |
|
if {$tl_left && $tl_right} { |
|
set opt_line [list "trimline"] |
|
set tl_both 1 |
|
} |
|
# -- --- --- --- --- --- |
|
set opt_commandprefix [tcl::dict::get $opts -commandprefix] |
|
# -- --- --- --- --- --- |
|
set opt_ansiresets [tcl::dict::get $opts -ansiresets] |
|
# -- --- --- --- --- --- |
|
set opt_ansireplays [tcl::dict::get $opts -ansireplays] |
|
if {$opt_ansireplays} { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 1 |
|
} |
|
} else { |
|
if {$opt_ansiresets eq "auto"} { |
|
set opt_ansiresets 0 |
|
} |
|
} |
|
# -- --- --- --- --- --- |
|
#set linelist [list] |
|
#set nlsplit [split $text \n] |
|
|
|
set linelist [split $text \n] |
|
set original_length [llength $linelist] |
|
|
|
#--------------------------- |
|
#todo - consider applying these inline later |
|
if {![llength $opt_line]} { |
|
#set linelist $nlsplit |
|
#lappend linelist {*}$nlsplit |
|
} else { |
|
#already normalized trimleft+trimright to trimline |
|
set nlsplit $linelist |
|
#set linelist [list] |
|
if {$tl_both} { |
|
set i 0 |
|
foreach ln $linelist { |
|
#lappend linelist [string trim $ln] |
|
lset linelist $i [string trim $ln] |
|
incr i |
|
} |
|
} elseif {$tl_left} { |
|
set i 0 |
|
foreach ln $linelist { |
|
#lappend linelist [string trimleft $ln] |
|
lset linelist $i [string trimleft $ln] |
|
incr i |
|
} |
|
} elseif {$tl_right} { |
|
set i 0 |
|
foreach ln $nlsplit { |
|
#lappend linelist [string trimright $ln] |
|
lset linelist $i [string trimright $ln] |
|
incr i |
|
} |
|
} |
|
} |
|
#--------------------------- |
|
|
|
set remove_indices [list] |
|
|
|
if {"collateempty" in $opt_block} { |
|
set last "-" |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {[lindex $linelist $i] ne ""} { |
|
set last "-" |
|
} else { |
|
if {$last ne ""} { |
|
lappend remove_indices $i |
|
set last "" |
|
} |
|
} |
|
} |
|
} |
|
|
|
if {"trimall" in $opt_block} { |
|
#we have already made sure there are no other block options that would conflict with this |
|
#set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""] |
|
#set remove_indices [list] |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {[lindex $linelist $i] eq ""} { |
|
lappend remove_indices $i |
|
} |
|
} |
|
} else { |
|
if {"trimhead" in $opt_block} { |
|
#set remove_indices [list] |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {[lindex $linelist $i] ne ""} { |
|
break |
|
} else { |
|
lappend remove_indices $i |
|
} |
|
} |
|
} |
|
|
|
if {"trimtail" in $opt_block} { |
|
set remove_indices [list] |
|
for {set i [expr {$original_length-1}]} {$i >=0} {incr i -1} { |
|
if {[lindex $linelist $i] ne ""} { |
|
break |
|
} else { |
|
lappend remove_indices $i |
|
} |
|
} |
|
|
|
#set revlinelist [lreverse $linelist][set linelist {}] |
|
#set i 0 |
|
#foreach ln $revlinelist { |
|
# if {$ln ne ""} { |
|
# set linelist [lreverse [lrange $revlinelist $i end]] |
|
# break |
|
# } |
|
# incr i |
|
#} |
|
} |
|
|
|
# --- --- |
|
set start 0 |
|
set end "end" |
|
if {"trimhead1" in $opt_block} { |
|
if {[lindex $linelist 0] eq ""} { |
|
lappend remove_indices 0 |
|
} |
|
} |
|
if {"trimtail1" in $opt_block} { |
|
if {[lindex $linelist end] eq ""} { |
|
lappend remove_indices [expr {$original_length-1}] |
|
} |
|
} |
|
#set linelist [lrange $linelist $start $end] |
|
} |
|
|
|
#review - we need to make sure ansiresets don't accumulate/grow on any line |
|
#Each resulting line should have a reset of some type at start and a pure-reset at end to stop |
|
#see if we can find an ST sequence that most terminals will not display for marking sections? |
|
if {$opt_ansireplays} { |
|
<require_punk_ansi> ;#package require punk::ansi |
|
if {$opt_ansiresets} { |
|
set RST "\x1b\[0m" |
|
} else { |
|
set RST "" |
|
} |
|
set replaycodes $RST ;#todo - default? |
|
#set transformed [list] |
|
#shortcircuit common case of no ansi |
|
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour. |
|
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled) |
|
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable |
|
#detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures) |
|
|
|
#we use detectcode_in_list instead of detect_in_list |
|
#detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message) |
|
# - but the main reason is it is slightly faster. |
|
if {![punk::ansi::ta::detectcode_in_list $linelist]} { |
|
if {$opt_ansiresets} { |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {$i in $remove_indices} { |
|
continue |
|
} |
|
lset linelist $i $RST[lindex $linelist $i]$RST |
|
} |
|
} |
|
} else { |
|
|
|
#INLINE punk::ansi::codetype::is_sgr_reset |
|
#regexp {\x1b\[0*m$} $code |
|
set re_is_sgr_reset {\x1b\[0*m$} |
|
#INLINE punk::ansi::codetype::is_sgr |
|
#regexp {\033\[[0-9;:]*m$} $code |
|
set re_is_sgr {\x1b\[[0-9;:]*m$} |
|
|
|
|
|
#foreach ln $linelist {} |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {$i in $remove_indices} { |
|
continue |
|
} |
|
#set ln [lindex $linelist $i] |
|
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable |
|
|
|
#set ansisplits [punk::ansi::ta::split_codes_single $ln] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. |
|
#get_codes_single lists only the codes. no plaintext or empty elements |
|
|
|
set ansisplits [punk::ansi::ta::get_codes_single [lindex $linelist $i]] ;#REVIEW - this split accounts for a large portion of the time taken to run this function. |
|
|
|
if {[llength $ansisplits] == 0} { |
|
#plaintext only - no ansi codes in line |
|
#lappend transformed [string cat $replaycodes $ln $RST] |
|
lset linelist $i $replaycodes[lindex $linelist $i]$RST |
|
#leave replaycodes as is for next line |
|
set nextreplay $replaycodes |
|
} else { |
|
set tail $RST |
|
set lastcode [lindex $ansisplits end] ;#may or may not be SGR |
|
set lastcodeoffset [expr {[string length $lastcode]-1}] |
|
if {[punk::ansi::codetype::is_sgr_reset $lastcode]} { |
|
if {[string range [lindex $linelist $i] end-$lastcodeoffset end] eq $lastcode} { |
|
#last plaintext is empty. So the line is already suffixed with a reset |
|
set tail "" |
|
} else { |
|
#trailing text has been reset within line - but no tail reset present |
|
#we normalize by putting a tail reset on anyway |
|
set tail $RST |
|
} |
|
set nextreplay $RST |
|
} elseif {[string range [lindex $linelist $i] end-$lastcodeoffset end] eq $lastcode && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} { |
|
#code is at tail (no trailing plaintext) |
|
#No tail reset - and no need to examine whole line to determine stack that is in effect |
|
set tail $RST |
|
set nextreplay $lastcode |
|
} else { |
|
#last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect |
|
#last codeset doesn't end in a pure-reset |
|
#whether code was at very end or not - add a reset tail |
|
set tail $RST |
|
#determine effective replay for line |
|
set codestack [list start] |
|
foreach code $ansisplits { |
|
if {[tcl::string::index $code end] eq "m"} { |
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
set codestack [list] ;#different from 'start' marked - this means we've had a reset |
|
} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { |
|
set codestack [list $code] |
|
} else { |
|
if {[punk::ansi::codetype::is_sgr $code]} { |
|
#todo - proper test of each code - so we only take latest background/foreground etc. |
|
#requires handling codes with varying numbers of parameters. |
|
#basic simplification - remove straight dupes. |
|
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars. |
|
set codestack [lremove $codestack {*}$dup_posns] |
|
lappend codestack $code |
|
} |
|
} |
|
} |
|
;#else gx0 or other code - we don't want to stack it with SGR codes |
|
} |
|
if {[llength $codestack] == 1 && [lindex $codestack 0] eq "start"} { |
|
#No SGRs - may have been other codes |
|
set line_has_sgr 0 |
|
} else { |
|
#list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes |
|
set line_has_sgr 1 |
|
if {[lindex $codestack 0] eq "start"} { |
|
#set codestack [lrange $codestack 1 end] |
|
ledit codestack 0 0 |
|
} |
|
} |
|
|
|
if {$line_has_sgr} { |
|
#set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack] |
|
set newreplay [punk::ansi::codetype::sgr_merge $codestack] |
|
if {$newreplay ne $replaycodes} { |
|
#adjust if it doesn't already does a reset at start |
|
if {$RST ne ""} { |
|
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} { |
|
set nextreplay $newreplay |
|
} else { |
|
set nextreplay $RST$newreplay |
|
} |
|
} else { |
|
set nextreplay $newreplay |
|
} |
|
} else { |
|
set nextreplay $replaycodes |
|
} |
|
} else { |
|
set nextreplay $replaycodes |
|
} |
|
} |
|
if {"$replaycodes$tail" ne ""} { |
|
if {[punk::ansi::codetype::has_sgr_leadingreset [lindex $linelist $i]]} { |
|
#no point attaching any replay |
|
#lappend transformed [string cat $ln $tail] |
|
if {$tail ne ""} { |
|
lset linelist $i [lindex $linelist $i]$tail |
|
} |
|
} else { |
|
#lappend transformed [string cat $replaycodes $ln $tail] |
|
lset linelist $i $replaycodes[lindex $linelist $i]$tail |
|
} |
|
} |
|
} |
|
set replaycodes $nextreplay |
|
} |
|
#jjj |
|
#set linelist $transformed |
|
} |
|
} |
|
|
|
#todo - run this before ansireplay processing and adjust indices accordingly? or just run it after as is and accept that commandprefix will be added to each line after replay processing? |
|
if {[llength $opt_commandprefix]} { |
|
for {set i 0} {$i < $original_length} {incr i} { |
|
if {$i in $remove_indices} { |
|
continue |
|
} |
|
lset linelist $i [{*}$opt_commandprefix [lindex $linelist $i]] |
|
} |
|
#set transformed [list] |
|
#foreach ln $linelist { |
|
# lappend transformed [{*}$opt_commandprefix $ln] |
|
#} |
|
#set linelist $transformed |
|
} |
|
if {[llength $remove_indices]} { |
|
set linelist [lremove $linelist {*}$remove_indices] |
|
} |
|
return $linelist |
|
} |
|
if {$has_punk_ansi} { |
|
#optimise linelist as much as possible |
|
set linelist_body2 [string map {<require_punk_ansi> ""} $linelist_body2] |
|
} else { |
|
#punk ansi not avail at time of package load. |
|
#by putting in calls to punk::ansi the user will get appropriate error messages |
|
set linelist_body2 [string map {<require_punk_ansi> "package require punk::ansi"} $linelist_body2] |
|
} |
|
proc linelist {args} $linelist_body2 |
|
|
|
|
|
interp alias {} errortime {} punk::lib::errortime |
|
proc errortime {script groupsize {iters 2}} { |
|
#by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance |
|
set i 0 |
|
set times {} |
|
if {$iters < 2} {set iters 2} |
|
|
|
for {set i 0} {$i < $iters} {incr i} { |
|
set result [uplevel [list time $script $groupsize]] |
|
lappend times [lindex $result 0] |
|
} |
|
|
|
set average 0.0 |
|
set s2 0.0 |
|
|
|
foreach time $times { |
|
set average [expr {$average + double($time)/$iters}] |
|
} |
|
|
|
foreach time $times { |
|
set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}] |
|
} |
|
|
|
set sigma [expr {int(sqrt($s2))}] |
|
set average [expr {int($average)}] |
|
|
|
return "$average +/- $sigma microseconds per iteration" |
|
} |
|
|
|
#test function to use with show_jump_tables |
|
#todo - check if switch compilation to jump tables differs by Tcl version |
|
proc switch_char_test {c} { |
|
set dec [scan $c %c] |
|
foreach t [list 1 2 3] { |
|
switch -- $c { |
|
x { |
|
return [list $dec x $t] |
|
} |
|
y { |
|
return [list $dec y $t] |
|
} |
|
z { |
|
return [list $dec z $t] |
|
} |
|
} |
|
} |
|
|
|
#tcl 8.6/8.7 (at least) |
|
#curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable |
|
switch -- $c { |
|
a { |
|
return [list $dec a] |
|
} |
|
{"} { |
|
return [list $dec dquote] |
|
} |
|
{[} {return [list $dec lb]} |
|
{]} {return [list $dec rb]} |
|
"{" { |
|
return [list $dec lbrace] |
|
} |
|
"}" { |
|
return [list $dec rbrace] |
|
} |
|
default { |
|
return [list $dec $c] |
|
} |
|
} |
|
|
|
|
|
|
|
} |
|
|
|
#we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164) |
|
proc show_jump_tables {args} { |
|
#avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06. |
|
if {[llength $args] == 1} { |
|
set data [tcl::unsupported::disassemble proc [lindex $args 0]] |
|
} elseif {[llength $args] == 2} { |
|
#review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself. |
|
#not sure if this handles more complex hierarchies or mixins etc. |
|
lassign $args obj method |
|
if {![info object isa object $obj]} { |
|
error "show_jump_tables unable to examine '$args'. $obj is not an oo object" |
|
} |
|
#classes are objects too and can have direct methods |
|
if {$method in [info object methods $obj]} { |
|
set data [tcl::unsupported::disassemble objmethod $obj $method] |
|
} else { |
|
if {![info object isa class $obj]} { |
|
set obj [info object class $obj] |
|
} |
|
set data [tcl::unsupported::disassemble method $obj $method] |
|
} |
|
} else { |
|
error "show_jump_tables expected a procname or a class/object and method" |
|
} |
|
set result "" |
|
set in_jt 0 |
|
foreach ln [split $data \n] { |
|
set tln [::tcl::string::trim $ln] |
|
if {!$in_jt} { |
|
if {[::tcl::string::match *jumpTable* $ln]} { |
|
punk::ns::call_frame |
|
append result $ln \n |
|
set in_jt 1 |
|
} |
|
} else { |
|
if {[::tcl::string::match Command* $tln] || [::tcl::string::match "(*) *" $tln]} { |
|
set in_jt 0 |
|
} else { |
|
append result $ln \n |
|
} |
|
} |
|
} |
|
return $result |
|
} |
|
|
|
#a test |
|
# punk::ns::cmdtracereturn punk::lib::disassemble ::punk::ns::test_switch4 |
|
# Note the different disassemble result when trace is running. |
|
proc disassemble {procname} { |
|
tcl::unsupported::disassemble proc $procname |
|
} |
|
|
|
proc temperature_f_to_c {deg_fahrenheit} { |
|
return [expr {($deg_fahrenheit -32) * (5/9.0)}] |
|
} |
|
proc temperature_c_to_f {deg_celsius} { |
|
return [expr {($deg_celsius * (9/5.0)) + 32}] |
|
} |
|
|
|
proc interp_sync_package_paths {interp} { |
|
if {![interp exists $interp]} { |
|
error "interp_sync_package_paths error. interp '$interp' not found. Create it first with \[interp create $interp\]" |
|
} |
|
interp eval $interp [list set ::auto_path $::auto_path] |
|
interp eval $interp {tcl::tm::remove {*}[tcl::tm::list]} |
|
interp eval $interp [list tcl::tm::add {*}[lreverse [tcl::tm::list]]] |
|
} |
|
|
|
proc valcopy {obj} { |
|
append obj2 $obj {} |
|
} |
|
proc set_valcopy {varname obj} { |
|
#used by repl's codeinterp. Maintains internal rep, easier to call e.g interp eval code [list punk::set_valcopy varnmame $val] |
|
append obj2 $obj {} |
|
uplevel 1 [list set $varname $obj2] |
|
} |
|
|
|
|
|
|
|
proc format_number {numbers_or_commaformattednumbers {delim ""} {groupsize ""}} { |
|
variable has_twapi |
|
if {$has_twapi} { |
|
if {$delim eq "" && $groupsize eq ""} { |
|
set localeid [twapi::get_system_default_lcid] |
|
} |
|
} |
|
#when using twapi we currently only get the localeid - not the specific defaults |
|
#when not using twapi, or on non-windows platforms - we don't currently have a mechanism to look up user preferences for this |
|
set default_delim "," |
|
set default_groupsize 3 |
|
|
|
set results [list] |
|
set nums [valcopy $numbers_or_commaformattednumbers] ;#stops single num from getting internal rep of list |
|
foreach inputnum $nums { |
|
set number [valcopy $inputnum] |
|
#also handle tcl 8.7+ underscores in numbers |
|
set number [string map [list _ "" , ""] $number] |
|
#normalize e.g 2e4 -> 20000.0 |
|
set number [expr {$number}] |
|
|
|
if {$has_twapi} { |
|
if {$delim eq "" && $groupsize eq ""} { |
|
lappend results [twapi::format_number $number $localeid -idigits -1] |
|
continue |
|
} else { |
|
#setting just one of delim or groupsize means we don't get the user's localeid based default for the non-set one |
|
#todo - document it? Find a way to lookup localeid based defaults whenever either is unspecified? |
|
if {$delim eq ""} {set delim $default_delim} |
|
if {$groupsize eq ""} {set groupsize $default_groupsize} |
|
lappend results [twapi::format_number $number 0 -idigits -1 -sthousand $delim -sgrouping $groupsize] |
|
continue |
|
} |
|
} |
|
#todo - get configured user defaults |
|
if {$delim eq ""} { |
|
set delim $default_delim |
|
} |
|
if {$groupsize eq ""} { |
|
set groupsize $default_groupsize |
|
} |
|
|
|
lappend results [delimit_number $number $delim $groupsize] |
|
} |
|
|
|
if {[llength $results] == 1} { |
|
#keep intrep as string rather than list |
|
return [lindex $results 0] |
|
} |
|
return $results |
|
} |
|
|
|
|
|
#from wiki https://wiki.tcl-lang.org/page/Delimiting+Numberse |
|
# Given a number represented as a string, insert delimiters to break it up for |
|
# readability. Normally, the delimiter will be a comma which will be inserted every |
|
# three digits. However, the delimiter and groupsize are optional arguments, |
|
# permitting use in other locales. |
|
# |
|
# The string is assumed to consist of digits, possibly preceded by spaces, |
|
# and possibly containing a decimal point, i.e.: [:space:]*[:digit:]*\.[:digit:]* |
|
|
|
proc delimit_number {unformattednumber {delim ","} {GroupSize 3}} { |
|
set number [valcopy $unformattednumber] |
|
set number [string map {_ ""} $number] |
|
#normalize using expr - e.g 2e4 -> 20000.0 |
|
set number [expr {$number}] |
|
# First, extract right hand part of number, up to and including decimal point |
|
set point [string last "." $number]; |
|
if {$point >= 0} { |
|
set PostDecimal [string range $number $point+1 end]; |
|
set PostDecimalP 1; |
|
} else { |
|
set point [expr {[string length $number] + 1}] |
|
set PostDecimal ""; |
|
set PostDecimalP 0; |
|
} |
|
|
|
# Now extract any leading spaces. review - regex for whitespace instead of just ascii space? |
|
set ind 0; |
|
while {[string equal [string index $number $ind] \u0020]} { |
|
incr ind; |
|
} |
|
set FirstNonSpace $ind; |
|
set LastSpace [expr {$FirstNonSpace - 1}]; |
|
set LeadingSpaces [string range $number 0 $LastSpace]; |
|
|
|
# Now extract the non-fractional part of the number, omitting leading spaces. |
|
set MainNumber [string range $number $FirstNonSpace $point-1]; |
|
|
|
# Insert commas into the non-fractional part. |
|
set Length [string length $MainNumber]; |
|
set Phase [expr {$Length % $GroupSize}] |
|
set PhaseMinusOne [expr {$Phase -1}]; |
|
set DelimitedMain ""; |
|
|
|
#First we deal with the extra stuff. |
|
if {$Phase > 0} { |
|
append DelimitedMain [string range $MainNumber 0 $PhaseMinusOne]; |
|
} |
|
set FirstInGroup $Phase; |
|
set LastInGroup [expr {$FirstInGroup + $GroupSize -1}]; |
|
while {$LastInGroup < $Length} { |
|
if {$FirstInGroup > 0} { |
|
append DelimitedMain $delim; |
|
} |
|
append DelimitedMain [string range $MainNumber $FirstInGroup $LastInGroup]; |
|
incr FirstInGroup $GroupSize |
|
incr LastInGroup $GroupSize |
|
} |
|
|
|
# Reassemble the number. |
|
if {$PostDecimalP} { |
|
return [format "%s%s.%s" $LeadingSpaces $DelimitedMain $PostDecimal]; |
|
} else { |
|
return [format "%s%s" $LeadingSpaces $DelimitedMain]; |
|
} |
|
} |
|
|
|
#sugar |
|
#exclusive end - more intuitive for some cases and more consistent with other languages |
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::FOR |
|
@cmd -name punk::lib::FOR\ |
|
-summary\ |
|
"BASIC style integer 'for loop'"\ |
|
-help\ |
|
"Sugar syntax for a common looping pattern. |
|
The loop variable takes on values from start to end (exclusive) in increments of step. |
|
If step is not specified, it defaults to 1 or -1 depending on the relative values of start and end. |
|
This is a common looping pattern that isn't directly supported by Tcl's built in control structures, |
|
and this syntax is more concise for convenient interactive usage. For example: |
|
|
|
FOR i 0 10 {puts $i} |
|
|
|
will print the numbers 0 to 9. |
|
|
|
This wrapper necessarily has some slight overhead compared to builtin Tcl for, foreach and while loops, |
|
so may not be suitable for performance critical inner loops. |
|
|
|
See also: https://wiki.tcl-lang.org/page/Simple+shorthand+%27for%27+loop |
|
" |
|
@values -min 3 -max 4 |
|
varname -type string -help "loop variable name" |
|
start -type integer -help "initial value for loop variable" |
|
end -type integer -help "end value for loop variable (exclusive)" |
|
step -type integer -optional 1 -help "step value for loop variable (defaults to 1 or -1 depending on start and end values)" |
|
script -type script -help "script to execute for each loop iteration" |
|
}] |
|
} |
|
proc FOR { var args } { |
|
switch -- [llength $args] { |
|
3 { |
|
# FOR x start end {} |
|
lassign $args start end script |
|
set step [ expr {$start > $end ? - 1 : 1} ] |
|
} |
|
4 { |
|
# FOR x start end step {} |
|
lassign $args start end step script |
|
} |
|
default { |
|
error "FOR: wrong # args, should be: FOR varName startValue endValue ?stepValue? script" |
|
} |
|
} |
|
if {![string is integer -strict $start] || ![string is integer -strict $end] || ![string is integer -strict $step]} { |
|
error "FOR: start,end and step values must be integers" |
|
} |
|
upvar $var loopVar |
|
set loopVar [expr {$start - $step}] |
|
#to support 'continue' we have to increment the loopVar prior to the script evaluation, within the loop condition |
|
if {$start < $end} { |
|
if {$step <= 0} { |
|
error "FOR: step value must be positive when start < end" |
|
} |
|
while {[incr loopVar $step] < $end} { |
|
uplevel $script |
|
} |
|
} else { |
|
if {$step >= 0} { |
|
error "FOR: step value must be negative when start > end" |
|
} |
|
while {[incr loopVar $step] > $end} { |
|
uplevel $script |
|
} |
|
} |
|
} |
|
|
|
#review - there are various type of uuid - we should use something consistent across platforms |
|
#twapi is used on windows because it's about 5 times faster - but is this more important than consistency? |
|
#twapi is much slower to load in the first place (e.g 75ms vs 6ms if package names already loaded) - so for oneshots tcllib uuid is better anyway |
|
#(counterpoint: in the case of punk - we currently need twapi anyway on windows) |
|
#does tcllib's uuid use the same mechanisms on different platforms anyway? |
|
if {$has_twapi} { |
|
interp alias "" ::punk::lib::uuid "" twapi::new_uuid |
|
} else { |
|
catch {package require uuid} |
|
interp alias "" ::punk::lib::uuid "" uuid::uuid generate |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
tcl::namespace::eval punk::lib::flatgrid { |
|
namespace export filler_count rows cols col row block |
|
|
|
#WARNING - requires lseq and 'lsearch -stride' |
|
#WARNING - lsearch -stride oddity with empty strings https://core.tcl-lang.org/tcl/tktview/edebb6a4 |
|
#todo - 8.6 fallback? |
|
|
|
proc filler_count {listlen numcolumns} { |
|
#if {$numcolumns <= 0} {error "filler_count requires 1 or more numcolumns"} ;#or allow divide by zero error |
|
#if {$listlen == 0} {return $numcolumns} ;#an option - but returning zero might make more sense |
|
expr {($numcolumns - ($listlen % $numcolumns)) % $numcolumns} |
|
} |
|
proc rows {list numcolumns {blank NULL}} { |
|
set numblanks [filler_count [llength $list] $numcolumns] |
|
set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] |
|
set splits [lseq 0 to [llength $padded_list] by $numcolumns] |
|
set rows [list] |
|
set i 1 |
|
foreach s [lrange $splits 0 end-1] { |
|
lappend rows [lrange $padded_list $s [lindex $splits $i]-1] |
|
incr i |
|
} |
|
return $rows |
|
} |
|
proc cols {list numcolumns {blank NULL}} { |
|
set cols [list] |
|
foreach colindex [lseq 0 $numcolumns-1] { |
|
lappend cols [lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] *] |
|
} |
|
return $cols |
|
} |
|
proc cols2 {list numcolumns {blank NULL}} { |
|
set cols [list] |
|
foreach colindex [lseq 0 $numcolumns-1] { |
|
lappend cols [col2 $list $numcolumns $colindex $blank] |
|
} |
|
return $cols |
|
} |
|
proc col {list numcolumns colindex {blank NULL}} { |
|
lsearch -stride $numcolumns -index [list $colindex 0] -subindices -all -inline [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] * |
|
} |
|
proc col2 {list numcolumns colindex {blank NULL}} { |
|
set numblanks [filler_count [llength $list] $numcolumns] |
|
set padded_list [list {*}$list {*}[lrepeat $numblanks $blank]] |
|
set splits [lseq 0 to [llength $padded_list] by $numcolumns] |
|
set col [list] |
|
foreach s [lrange $splits 0 end-1] { |
|
lappend col [lindex $padded_list $s+$colindex] |
|
} |
|
return $col |
|
} |
|
proc col3 {list numcolumns colindex {blank NULL}} { |
|
set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] |
|
lmap s [lrange [lseq 0 to [llength $padded_list] by $numcolumns] 0 end-1] {lindex $padded_list $s+$colindex} |
|
} |
|
proc col4 {list numcolumns colindex {blank NULL}} { |
|
#slow |
|
set vars [lrepeat $numcolumns _] |
|
lset vars $colindex v |
|
if {$blank eq ""} { |
|
return [lmap $vars $list {set v}] |
|
} |
|
set padded_list [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] |
|
lmap $vars [list {*}$list {*}[lrepeat [filler_count [llength $list] $numcolumns] $blank]] {set v} |
|
} |
|
|
|
proc block {list numcolumns {blank NULL}} { |
|
set colblocks [list] |
|
foreach c [cols $list $numcolumns $blank] { |
|
lappend colblocks [join $c \n] " " |
|
} |
|
textblock::join -- {*}$colblocks |
|
} |
|
proc block2 {list numcolumns {blank NULL}} { |
|
set colblocks [list] |
|
foreach c [cols2 $list $numcolumns $blank] { |
|
lappend colblocks [join $c \n] " " |
|
} |
|
textblock::join -- {*}$colblocks |
|
} |
|
} |
|
|
|
tcl::namespace::eval punk::lib::test { |
|
|
|
|
|
|
|
} |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#todo - way to generate 'internal' docs separately? |
|
#*** !doctools |
|
#[section Internal] |
|
tcl::namespace::eval punk::lib::system { |
|
#*** !doctools |
|
#[subsection {Namespace punk::lib::system}] |
|
#[para] Internal functions that are not part of the API |
|
#[list_begin definitions] |
|
namespace eval argdoc { |
|
#non-colour SGR codes |
|
set I "\x1b\[3m" ;# [a+ italic] |
|
set NI "\x1b\[23m" ;# [a+ noitalic] |
|
set B "\x1b\[1m" ;# [a+ bold] |
|
set N "\x1b\[22m" ;# [a+ normal] |
|
set T "\x1b\[1\;4m" ;# [a+ bold underline] |
|
set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] |
|
} |
|
|
|
|
|
|
|
proc mostFactorsBelow {n} { |
|
##*** !doctools |
|
#[call [fun mostFactorsBelow] [arg n]] |
|
#[para]Find the number below $n which has the greatest number of factors |
|
#[para]This will get slow quickly as n increases (100K = 1s+ 2024) |
|
set most 0 |
|
set mostcount 0 |
|
for {set i 1} {$i < $n} {incr i} { |
|
set fc [llength [punk::lib::factors $i]] |
|
if {$fc > $mostcount} { |
|
set most $i |
|
set mostcount $fc |
|
} |
|
} |
|
return [list number $most numfactors $mostcount] |
|
} |
|
proc factorCountBelow_punk {n} { |
|
##*** !doctools |
|
#[call [fun factorCountBelow] [arg n]] |
|
#[para]For numbers 1 to n - keep a tally of the total count of factors |
|
#[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result |
|
#[para]and as a rudimentary performance comparison |
|
#[para]gets slow quickly! |
|
set tally 0 |
|
for {set i 1} {$i <= $n} {incr i} { |
|
incr tally [llength [punk::lib::factors $i]] |
|
} |
|
return $tally |
|
} |
|
proc factorCountBelow_numtheory {n} { |
|
##*** !doctools |
|
#[call [fun factorCountBelow] [arg n]] |
|
#[para]For numbers 1 to n - keep a tally of the total count of factors |
|
#[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result |
|
#[para]and as a rudimentary performance comparison |
|
#[para]gets slow quickly! (significantly slower than factorCountBelow_punk) |
|
package require math::numtheory |
|
set tally 0 |
|
for {set i 1} {$i <= $n} {incr i} { |
|
incr tally [llength [math::numtheory::factors $i]] |
|
} |
|
return $tally |
|
} |
|
|
|
proc factors2 {x} { |
|
##*** !doctools |
|
#[call [fun factors2] [arg x]] |
|
#[para]Return a sorted list of factors of x |
|
#[para]A similar brute-force mechanism to factors - but keeps result ordering as we go. |
|
set smallfactors [list 1] |
|
set j 2 |
|
set max [expr {sqrt($x)}] |
|
while {$j < $max} { |
|
if {($x % $j) == 0} { |
|
lappend smallfactors $j |
|
lappend largefactors [expr {$x / $j}] |
|
} |
|
incr j |
|
} |
|
#handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop |
|
if {($x % $j) == 0} { |
|
if {$j == ($x / $j)} { |
|
lappend smallfactors $j |
|
} |
|
} |
|
return [concat $smallfactors [lreverse $largefactors] $x] |
|
} |
|
|
|
|
|
|
|
namespace eval argdoc { |
|
variable PUNKARGS |
|
lappend PUNKARGS [list { |
|
@id -id ::punk::lib::system::incomplete |
|
@cmd -name punk::lib::system::incomplete\ |
|
-summary\ |
|
"return stack of block-opening characters that have not been closed in a command/script"\ |
|
-help\ |
|
"This is used by punk::repl to determine what characters are needed to complete a command/script |
|
for the purposes of auto-completion and multi-line input. |
|
It returns a list of characters for which the corresponding close characters are still *potentially* |
|
needed to complete the command/script. |
|
|
|
For example, for input |
|
'puts \"Hello' |
|
- the command/script is incomplete because it is missing the closing quote. |
|
This function will return a list containing a single double-quote character. |
|
|
|
For input |
|
'puts \{hello' |
|
This function will return a list containing a single left curly brace character. |
|
|
|
For input 'puts \{hello \"there' |
|
This function will return a list containing a left curly brace character and a double-quote character. |
|
NOTE - this does *not* mean the double quote must be closed in order to make the script complete. |
|
In this case 'puts \{hello \"there\} is both complete, and valid TCL syntax. |
|
In other cases a script may be complete at the toplevel, but invalid syntax within one of the arguments. |
|
|
|
In processing a Tcl script, we can't always be sure that a block of text being passed as an argument is even |
|
intended to be Tcl code. It may deliberately be a partical Tcl script - or something entirely different. |
|
|
|
Nevertheless, the overall command/script must satisify Tcl's syntax rules and so arbitrary text can't be |
|
used within apparent subelements unless they are properly quoted or braced with respect to the enclosing |
|
structure. |
|
" |
|
@values -min 1 -max 1 |
|
partial -type string |
|
}] |
|
} |
|
|
|
#report which is the innermost bracket/quote etc awaiting completion for a Tcl command |
|
#----------------------------------------------------------------------------- |
|
#important - used by punk::repl |
|
#Do not touch this until you have something better implemented and tested. |
|
#----------------------------------------------------------------------------- |
|
proc incomplete {partial {debug 0}} { |
|
#we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. |
|
if {[info complete $partial]} { |
|
return [list] |
|
} |
|
set clist [split $partial ""] |
|
#puts stderr "-->$clist<--" |
|
set opened [list ""] |
|
set innerpartials [list ""] |
|
set escaped 0 |
|
set i 0 |
|
foreach c $clist { |
|
if {$c eq "\\"} { |
|
set escaped [expr {!$escaped}] |
|
incr i |
|
continue |
|
} ;# set escaped 0 at end |
|
set p [lindex $innerpartials end] |
|
if {$escaped == 0} { |
|
#NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) |
|
switch -- $c { |
|
{"} { |
|
if {![info complete ${p}]} { |
|
lappend opened {"} |
|
lappend innerpartials "" |
|
} else { |
|
if {[lindex $opened end] eq {"}} { |
|
#this quote is endquote |
|
ledit opened end end |
|
ledit innerpartials end end |
|
} else { |
|
if {![info complete ${p}$c]} { |
|
lappend opened {"} |
|
lappend innerpartials "" |
|
} else { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
} |
|
} |
|
} |
|
{[} { |
|
if {![info complete ${p}$c]} { |
|
lappend opened "\[" |
|
lappend innerpartials "" |
|
} else { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
} |
|
"{" { |
|
if {![info complete ${p}$c]} { |
|
lappend opened "\{" |
|
lappend innerpartials "" |
|
} else { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
} |
|
"}" { |
|
#brace has higher precedence and can close off intermediates such as double quote or square bracket. |
|
set rposn [expr {[llength $opened] - 1}] |
|
foreach o [lreverse $opened] { |
|
if {$o eq "\{"} { |
|
break |
|
} |
|
incr rposn -1 |
|
} |
|
if {$rposn >= 0} { |
|
ledit opened $rposn end |
|
ledit innerpartials $rposn end |
|
} else { |
|
#unmatched close brace - treat as literal |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
|
|
} |
|
{]} { |
|
if {[lindex $opened end] eq "\["} { |
|
ledit opened end end |
|
ledit innerpartials end end |
|
} else { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
} |
|
default { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
} |
|
} else { |
|
append p $c |
|
lset innerpartials end $p |
|
} |
|
set escaped 0 |
|
incr i |
|
} |
|
if {$debug} { |
|
foreach o $opened p $innerpartials { |
|
puts stderr "->awaiting close of :'$o' partial: '$p'" |
|
} |
|
} |
|
return [lrange $opened 1 end] |
|
} |
|
|
|
proc incomplete1 {partial} { |
|
#we can apparently get away without concatenating current innerpartial to previous in list - REVIEW. |
|
if {[info complete $partial]} { |
|
return [list] |
|
} |
|
set clist [split $partial ""] |
|
#puts stderr "-->$clist<--" |
|
set waiting [list ""] |
|
set innerpartials [list ""] |
|
set escaped 0 |
|
set i 0 |
|
foreach c $clist { |
|
if {$c eq "\\"} { |
|
set escaped [expr {!$escaped}] |
|
incr i |
|
continue |
|
} ;# set escaped 0 at end |
|
set p [lindex $innerpartials end] |
|
if {$escaped == 0} { |
|
#NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least) |
|
switch -- $c { |
|
{"} { |
|
if {![info complete ${p}]} { |
|
lappend waiting {"} |
|
lappend innerpartials "" |
|
} else { |
|
if {[lindex $waiting end] eq {"}} { |
|
#this quote is endquote |
|
set waiting [lrange $waiting 0 end-1] |
|
set innerpartials [lrange $innerpartials 0 end-1] |
|
} else { |
|
if {![info complete ${p}$c]} { |
|
lappend waiting {"} |
|
lappend innerpartials "" |
|
} else { |
|
set p ${p}${c} |
|
lset innerpartials end $p |
|
} |
|
} |
|
} |
|
} |
|
{[} { |
|
if {![info complete ${p}$c]} { |
|
lappend waiting "\]" |
|
lappend innerpartials "" |
|
} else { |
|
set p ${p}${c} |
|
lset innerpartials end $p |
|
} |
|
} |
|
"{" { |
|
if {![info complete ${p}$c]} { |
|
lappend waiting "\}" |
|
lappend innerpartials "" |
|
} else { |
|
set p ${p}${c} |
|
lset innerpartials end $p |
|
} |
|
} |
|
"}" - |
|
default { |
|
set waitingfor [lindex $waiting end] |
|
if {$c eq "$waitingfor"} { |
|
set waiting [lrange $waiting 0 end-1] |
|
set innerpartials [lrange $innerpartials 0 end-1] |
|
} else { |
|
set p ${p}${c} |
|
lset innerpartials end $p |
|
} |
|
} |
|
} |
|
} else { |
|
set p ${p}${c} |
|
lset innerpartials end $p |
|
} |
|
set escaped 0 |
|
incr i |
|
} |
|
set incomplete [list] |
|
foreach w $waiting { |
|
#to be treated as literals - curly braces must be unescaped here - and balanced - hence the commented left-curly empty arm. |
|
switch -- $w { |
|
{"} { |
|
lappend incomplete $w |
|
} |
|
{]} { |
|
lappend incomplete "\[" |
|
} |
|
"{" {} |
|
"}" { |
|
lappend incomplete "\{" |
|
} |
|
} |
|
} |
|
set debug 0 |
|
if {$debug} { |
|
foreach w $waiting p $innerpartials { |
|
puts stderr "->awaiting closure of:'$w' partial: '$p'" |
|
} |
|
} |
|
return $incomplete |
|
} |
|
|
|
|
|
#-------------------------------------------------------- |
|
# incomplete_naive. |
|
#-------------------------------------------------------- |
|
#This only works for very simple cases |
|
if 0 { |
|
incomplete "list x \}aa\{ \"" |
|
returns {"} |
|
incomplete_naive "list x \}aa\{ \"" |
|
returns "\{" {"} |
|
} |
|
|
|
#review: |
|
# {set x "a["""} |
|
#-------------------------------------------------------- |
|
proc incomplete_naive {partial} { |
|
if {[info complete $partial]} { |
|
return [list] |
|
} |
|
set clist [split $partial ""] |
|
set waiting [list] |
|
set escaped 0 |
|
foreach c $clist { |
|
if {$c eq "\\"} { |
|
set escaped [expr {!$escaped}] |
|
continue |
|
} |
|
if {!$escaped} { |
|
if {$c eq {"}} { |
|
if {[lindex $waiting end] eq {"}} { |
|
set waiting [lrange $waiting 0 end-1] |
|
} else { |
|
lappend waiting {"} |
|
} |
|
} elseif {$c eq "\["} { |
|
lappend waiting "\]" |
|
} elseif {$c eq "\{"} { |
|
lappend waiting "\}" |
|
} else { |
|
set waitingfor [lindex $waiting end] |
|
if {$c eq "$waitingfor"} { |
|
set waiting [lrange $waiting 0 end-1] |
|
} |
|
} |
|
} |
|
} |
|
set incomplete [list] |
|
foreach w $waiting { |
|
if {$w eq {"}} { |
|
lappend incomplete $w |
|
} elseif {$w eq "\]"} { |
|
lappend incomplete "\[" |
|
} elseif {$w eq "\}"} { |
|
lappend incomplete "\{" |
|
} |
|
} |
|
return $incomplete |
|
} |
|
|
|
|
|
#get info about punk nestindex key ie type: list,dict,undetermined |
|
# pdict devel |
|
proc nestindex_info {args} { |
|
set argd [punk::args::parse $args withdef { |
|
-parent -default "" |
|
nestindex |
|
}] |
|
set opt_parent [dict get $argd opts -parent] |
|
if {$opt_parent eq ""} { |
|
set parent_type undetermined |
|
} else { |
|
set parent_type [nestindex_info -parent "" $opt_parent] ;#make sure we explicitly set parent of parent to empty so we don't just recurse forever doing nothing |
|
} |
|
|
|
#??? |
|
|
|
} |
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::lib::system ---}] |
|
} |
|
|
|
tcl::namespace::eval punk::lib::caches { |
|
|
|
} |
|
|
|
tcl::namespace::eval punk::lib::debug { |
|
proc showdict {args} {} |
|
} |
|
|
|
namespace eval ::punk::args::register { |
|
#use fully qualified so 8.6 doesn't find existing var in global namespace |
|
lappend ::punk::args::register::NAMESPACES ::punk::lib ::punk::lib::ensemble ::punk::lib::system |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide punk::lib [tcl::namespace::eval punk::lib { |
|
variable pkg punk::lib |
|
variable version |
|
set version 999999.0a1.0 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|