diff --git a/src/modules/opunk/str-999999.0a1.0.tm b/src/modules/opunk/str-999999.0a1.0.tm index cf1c5562..78c05a43 100644 --- a/src/modules/opunk/str-999999.0a1.0.tm +++ b/src/modules/opunk/str-999999.0a1.0.tm @@ -26,6 +26,7 @@ package require voo package require punk::assertion package require punk::char package require punk::ansi +package require punk::args #Note we are doing a heavy trade for space vs time when using punk::str @@ -48,31 +49,49 @@ tcl::namespace::eval ::opunk::str { #--------------------------------------- } - set unshare_script { - #--------------------------------------- - #unshare fields - set o_class [lindex $this 0] - lset this 0 {} - set o_string [lindex $this 1] - lset this 1 {} - set o_count [lindex $this 2] - lset this 2 {} - set o_ansisplits [lindex $this 3] - lset this 3 {} - set o_elements [lindex $this 4] - lset this 4 {} - set o_graphemes [lindex $this 5] - lset this 5 {} - set o_ptindex [lindex $this 5] - lset this 6 {} - set o_ptlist [lindex $this 7] - lset this 7 {} - set o_sgrstacks [lindex $this 8] - lset this 8 {} - set o_gx0states [lindex $this 9] - lset this 9 {} - #--------------------------------------- + set unshare_script1 { + #--------------------------------------- + #unshare fields + #Note VOO's somewhat unfortunate choice to use the user's field names as namespace variables for the field index values. + #these shouldn't collide unless you use 'variable o_fieldname' in a method + set o_class [lindex $this 0] + lset this 0 {} + set o_string [lindex $this 1] + lset this 1 {} + set o_count [lindex $this 2] + lset this 2 {} + set o_ansisplits [lindex $this 3] + lset this 3 {} + set o_elements [lindex $this 4] + lset this 4 {} + set o_graphemes [lindex $this 5] + lset this 5 {} + set o_ptindex [lindex $this 6] + lset this 6 {} + set o_ptlist [lindex $this 7] + lset this 7 {} + set o_sgrstacks [lindex $this 8] + lset this 8 {} + set o_gx0states [lindex $this 9] + lset this 9 {} + #--------------------------------------- } + set unshare_script [punk::args::lib::tstr { + #--------------------------------------- + #unshare fields + lassign $this o_class o_string o_count o_ansisplits o_elements o_graphemes o_ptindex o_ptlist o_sgrstacks o_gx0states + lset this 0 {} + lset this 1 {} + lset this 2 {} + lset this 3 {} + lset this 4 {} + lset this 5 {} + lset this 6 {} + lset this 7 {} + lset this 8 {} + lset this 9 {} + #--------------------------------------- + }] set restore_script_runtime { @@ -85,26 +104,27 @@ tcl::namespace::eval ::opunk::str { } #--------------------------------------- } - set restore_script { - #--------------------------------------- - #restore fields - lset this 0 $o_class - lset this 1 $o_string - lset this 2 $o_count - lset this 3 $o_ansisplits - lset this 4 $o_elements - lset this 5 $o_graphemes - lset this 6 $o_ptindex - lset this 7 $o_ptlist - lset this 8 $o_sgrstacks - lset this 9 $o_gx0states - #--------------------------------------- - } + + set restore_script [punk::args::lib::tstr { + #--------------------------------------- + #restore fields + lset this 0 $o_class + lset this 1 $o_string + lset this 2 $o_count + lset this 3 $o_ansisplits + lset this 4 $o_elements + lset this 5 $o_graphemes + lset this 6 $o_ptindex + lset this 7 $o_ptlist + lset this 8 $o_sgrstacks + lset this 9 $o_gx0states + #--------------------------------------- + }] variable etype_grapheme "g" - variable etype_sgr "sgr" - variable etype_gx0 "gx0" - variable etype_other "o" + variable etype_sgr "sgr" + variable etype_gx0 "gx0" + variable etype_other "o" #warning: uses tcl::unsupported::representation. proc estimate_list_mem {listVar {seen {}}} { @@ -150,7 +170,8 @@ tcl::namespace::eval ::opunk::str { #catches 'value is a pure string' #for utf-8 strings the actual memory usage could be up to 3 bytes per char, but we will estimate based on the common case of 1 byte per char for ascii, #and the fact that many strings will be ascii or mostly ascii. - incr total_size [string length $item] + #incr total_size [string length $item] + incr total_size [string length [encoding convertto utf-8 $item]] } list { # Recursively estimate memory for nested lists @@ -197,7 +218,7 @@ tcl::namespace::eval ::opunk::str { private { #we still use o_ prefix for private variables even though voo has the 'my' prefix, because we still want to distinguish them from local variables in 'update' methods. #This also gives consistency with the tclOO implementations of similar classes (e.g ansistring) and makes it easier to copy code between them. - string_t o_class "opunk::Str" + string_t o_class "::opunk::Str" ;#fully qualified string_t o_string "" int_t o_count -1 ;#count first updated when string appended or a method causes my.MakeSplit to run (or by count method if constructor argument was empty string) list_t o_ansisplits [list] ;#split version of string - odd number of elements - alternating plain text and ansicodestrings. Updated by my.makesplit @@ -217,10 +238,10 @@ tcl::namespace::eval ::opunk::str { #my.makesplit is called whenever string is modified (e.g by append) or when count method is called with an empty string argument (e.g constructor) #my.makesplit should update count to be the number of plain text graphemes in the split version of string - upvar ::opunk::str::etype_grapheme etype_grapheme - upvar ::opunk::str::etype_sgr etype_sgr - upvar ::opunk::str::etype_gx0 etype_gx0 - upvar ::opunk::str::etype_other etype_other + #g upvar ::opunk::str::etype_grapheme etype_grapheme + #sgr upvar ::opunk::str::etype_sgr etype_sgr + #gx0 upvar ::opunk::str::etype_gx0 etype_gx0 + #o upvar ::opunk::str::etype_other etype_other #The split with each code as it's own element is more generally useful. set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; @@ -253,7 +274,7 @@ tcl::namespace::eval ::opunk::str { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state foreach grapheme [punk::char::grapheme_split $pt] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex @@ -274,26 +295,26 @@ tcl::namespace::eval ::opunk::str { #set codestack [list "\x1b\[m"] #vs set codestack [list $code] ;#pass through as is. - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 - lappend o_elements [list $etype_gx0 [expr {true}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 [expr {true}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 - lappend o_elements [list $etype_gx0 [expr {false}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 [expr {false}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { #lappend o_elements [list other $code] - lappend o_elements [list $etype_other $code] + lappend o_elements [list o $code] } } #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index @@ -308,13 +329,13 @@ tcl::namespace::eval ::opunk::str { #set num_splits [llength [my.get.o_ansisplits $this]] assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} - - } - method test {} { + if {[llength $this] != 10} { + error "invalid object" + } puts stdout "string is '[my.get.o_string $this]' and count is '[my.get.o_count $this]'" } } @@ -322,18 +343,38 @@ tcl::namespace::eval ::opunk::str { method get.class {} { return [my.get.o_class $this] } + method debug {} { + if {[llength $this] != 10} { + error "invalid object" + } + set fields [punk::lib::showdict -roottype list [class.fields]] + set data [punk::lib::showdict -roottype list $this] + set result [textblock::join -- $fields " " $data] + + set o_ptlist [my.get.o_ptlist $this] + set o_sgrstacks [my.get.o_sgrstacks $this] + set o_gx0states [my.get.o_gx0states $this] + set o_graphemes [my.get.o_graphemes $this] + set o_ptindex [my.get.o_ptindex $this] + if {[llength $o_ptlist] != [llength $o_sgrstacks]} { + set result "$result\n\x1b\[31mERROR: length of ptlist and sgrstacks do not match - [llength $o_ptlist] vs [llength $o_sgrstacks]\x1b\[m" + } + if {[llength $o_ptlist] != [llength $o_gx0states]} { + set result "$result\n\x1b\[31mERROR: length of ptlist and gx0states do not match - [llength $o_ptlist] vs [llength $o_gx0states]\x1b\[m" + } + if {[llength $o_graphemes] != [llength $o_ptindex]} { + set result "$result\n\x1b\[31mERROR: length of graphemes and ptindex do not match - [llength $o_graphemes] vs [llength $o_ptindex]\x1b\[m" + } + + return $result + } #This is the count of visible graphemes + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!! - method count {} -upvar { - if {[my.get.o_count $this] == -1} { - #only initial string present - if {[my.get.o_string $this] eq ""} { - my.set.o_count this 0 - return 0 - } - my.makesplit this + method count {} { + if {[llength $this] != 10} { + error "invalid object" } - return [my.get.o_count $this] + my.get.o_count $this } method storage_estimate2 {} { #very slow @@ -343,11 +384,16 @@ tcl::namespace::eval ::opunk::str { method storage_estimate {} { #we can speed it up a little by not doing the recursive estimation for the top level lists that we know are just lists of strings or ints #this involves many less calls to tcl::unsupported::representation. + if {[llength $this] != 10} { + error "invalid object" + } array set seen {} set total_size 0 set idx 0 - upvar o_elements idx_o_elements - upvar o_ptindex idx_o_ptindex + # if the voo author had named these sensibly to avoid collisions we could have just done: variable idx_o_string. + upvar ::opunk::Str::o_elements idx_o_elements + upvar ::opunk::Str::o_graphemes idx_o_graphemes + upvar ::opunk::Str::o_ptindex idx_o_ptindex foreach element $this { if {$idx == $idx_o_elements} { #o_elements @@ -362,10 +408,13 @@ tcl::namespace::eval ::opunk::str { } incr total_size 48 } - } elseif {$idx = $idx_o_graphemes} { + } elseif {$idx == $idx_o_graphemes} { incr total_size 48 incr total_size 8 - incr total_size 1 + set count [llength $element] + #ascii vs utf-8 - we will estimate based on the common case of 1 byte per char for ascii, but some graphemes may be multiple bytes. + #review + incr total_size [expr {$count * 1}] } elseif {$idx == $idx_o_ptindex} { #o_splitindex set count [llength $element] @@ -380,55 +429,44 @@ tcl::namespace::eval ::opunk::str { return $total_size } - #we can't use -update if we also use other helper methods that take 'this' as an argument and may modify any of the same fields - #- so we have to do the upvar-detach-try-finally dance ourselves. - method append {args} -upvar [string map [list %unshare% $::opunk::str::unshare_script_runtime %restore% $::opunk::str::restore_script_runtime] { - upvar $thisVar this - upvar o_string idx_o_string - if {![llength $args]} { + # -undent 0 so source of proc will match indentation as in this file. (just as for any other 'info proc' output). + method append {args} -upvar [punk::args::lib::tstr -undent 0 { + if {[llength $this] != 10} { + error "invalid object $thisVar" + } + if {![llength $args] || [set catstr [string cat {*}$args]] eq ""} { #nothing to append - just return current string - return [lindex $this $idx_o_string] + return [my.get.o_string $this] } - upvar o_ansisplits idx_o_ansisplits - - set catstr [::join $args ""] set catstr_has_ansi [punk::ansi::ta::detect $catstr] - if {!$catstr_has_ansi} { - #if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state - if {![llength [lindex $this $idx_o_ansisplits 0]]} { - #initialise o_count because we need to add to it. - #The count method will do this by calling makesplit only if it needs to. (which will create ansisplits for anything except empty string) - count this - } - } - - if {!$catstr_has_ansi} { -%unshare% - + ${$::opunk::str::unshare_script} try { - upvar ::opunk::str::etype_grapheme etype_grapheme + #upvar ::opunk::str::etype_grapheme etype_grapheme #ansi-free additions #puts stderr "this: $this" ::append o_string $catstr;# only append after updating using count method above #puts stderr "after append: $o_string" + + + #TODO if {![llength $o_ptlist]} { #If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits lappend o_ptlist $catstr lappend o_sgrstacks [list] lappend o_gx0states [expr {false}] } else { - lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] $catstr] - lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] $catstr] + ledit o_ptlist end end [tcl::string::cat [lindex $o_ptlist end] $catstr] } + ledit o_ansisplits end end [tcl::string::cat [lindex $o_ansisplits end] $catstr] set ptindex [expr {[llength $o_ptlist] - 1}] foreach grapheme [punk::char::grapheme_split $catstr] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex @@ -437,28 +475,28 @@ tcl::namespace::eval ::opunk::str { } #incr o_count [my DoCount $catstr] ;#from before we were doing grapheme split.. review return $o_string + } trap * {emsg eopt} { + puts stderr "Error in append method: $emsg" } finally { -%restore% + ${$::opunk::str::restore_script} } - - + #todo error? } #appending string with ANSI codes. - upvar o_string idx_o_string - upvar o_ansisplits idx_o_ansisplits - if {![llength [lindex $this $idx_o_ansisplits]]} { + if {![llength [my.get.o_ansisplits $this]]} { + #string will only have no ansisplits if it is empty string. + #----- + # obsolete. #if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append - #set o_string [my.get.o_string $this] - set o_string [lindex $this $idx_o_string] + #----- + set o_string [my.get.o_string $this] lset this $idx_o_string {} puts stderr "initial string: '$o_string' and catstr: '$catstr'" ::append o_string $catstr ;#append before split and count on whole lot lset this $idx_o_string $o_string my.makesplit this ;#update o_count - #set combined_plaintext [join $o_ptlist ""] - #set o_count [my DoCount $combined_plaintext] #assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} return $o_string } @@ -466,19 +504,18 @@ tcl::namespace::eval ::opunk::str { #we have pre-existing ansisplits. #update each element of internal state incrementally without reprocessing what is already there. -%unshare% + ${$::opunk::str::unshare_script} try { - upvar ::opunk::str::etype_grapheme etype_grapheme - upvar ::opunk::str::etype_sgr etype_sgr - upvar ::opunk::str::etype_gx0 etype_gx0 - upvar ::opunk::str::etype_other etype_other + # upvar ::opunk::str::etype_grapheme etype_grapheme + # upvar ::opunk::str::etype_sgr etype_sgr + # upvar ::opunk::str::etype_gx0 etype_gx0 + # upvar ::opunk::str::etype_other etype_other ::append o_string $catstr set newsplits [punk::ansi::ta::split_codes_single $catstr] set ptnew "" set codestack [lindex $o_sgrstacks end] set gx0_state [lindex $o_gx0states end] - set current_split_index [expr {[llength $o_ansisplits] -1}] #first pt must be merged with last element of o_ptlist set new_pt_list [list] @@ -494,40 +531,38 @@ tcl::namespace::eval ::opunk::str { ::append ptnew $pt foreach grapheme [punk::char::grapheme_split $pt] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex incr o_count } - incr current_split_index ;#increment 1 of 2 within each loop if {$code ne ""} { #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list $code] ;#pass through as is. #vs #set codestack [list "\x1b\[m"] ;#normalize. - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 - lappend o_elements [list $etype_gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 - lappend o_elements [list $etype_gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { - lappend o_elements [list $etype_other $code] + lappend o_elements [list o $code] } } - incr current_split_index ;#increment 2 of 2 } incr ptindex } @@ -540,36 +575,155 @@ tcl::namespace::eval ::opunk::str { lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $newsplits 0]] lappend o_ansisplits {*}[lrange $newsplits 1 end] - #if {$o_count eq ""} { - # #we have splits - but didn't count graphemes? - # set o_count [my DoCount [join $o_ptlist ""]] ;#o_ptlist already has ptnew parts - #} else { - # incr o_count [my DoCount $ptnew] - #} - assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} return $o_string } finally { -%restore% + ${$::opunk::str::restore_script} } }] - #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already - method stripped -upvar {} { - if {![llength [my.get.o_ansisplits $this]]} {my.makesplit this} - return [join [my.get.o_ptlist $this] ""] + + + # accept multiple opunk::Str objects to append to this object + method appendobj {args} -upvar [punk::args::lib::tstr -undent 0 { + if {[llength $this] != 10} { + error "invalid object" + } + if {![llength $args]} { + #nothing to append - just return current value + return $this + } + + ${$::opunk::str::unshare_script} + #it would be safer to retain the original values in case of error, + #but this seems likely to reduce the efficiency of our copy on write avoidance strategy. review + #set orig_o_string $o_string + #set orig_o_count $o_count + #set orig_o_ansisplits $o_ansisplits + #set orig_o_elements $o_elements + #set orig_o_graphemes $o_graphemes + #set orig_o_ptindex $o_ptindex + #set orig_o_ptlist $o_ptlist + #set orig_o_sgrstacks $o_sgrstacks + #set orig_o_gx0states $o_gx0states + try { + foreach a $args { + if {[llength $a] != 10} { + error "invalid object in appendobj argument list" + } + lassign $a new_o_class new_o_string new_o_count new_o_ansisplits new_o_elements new_o_graphemes new_o_ptindex new_o_ptlist new_o_sgrstacks new_o_gx0states + + ::append o_string $new_o_string + + incr o_count $new_o_count + + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_o_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_o_ansisplits 1 end] + + lappend o_elements {*}$new_o_elements + + lappend o_graphemes {*}$new_o_graphemes + + #o_ptindex is a mapping of grapheme to ptlist index - we need to update the new list's indices before appending + #we also need to recognise that the first pt in the new list will be merged with the last pt in the existing list + #example data: + # o_graphemes: x x x b l a h + # o_ptindex : 1 1 1 2 2 2 2 + # o_ptlist : {} xxx blah + # o_graphemes: y y y + # o_ptindex : 0 0 1 + # o_ptlist : yy y + # r merge: + # o_graphemes: x x x b l a h y y y + # o_ptindex : 1 1 1 2 2 2 2 2 2 3 + # o_ptlist : {} xxx blahyy y + + set last_ptindex [expr {[llength $o_ptlist]-1}] + #note there may be skipped ptindex values for empty string pt blocks. (since there are no graphemes corresponding to them). + #i.e last_ptindex is the index of the last pt block - not necessarily the value of the last entry in o_ptindex (which only tracks graphemes) + #by adding last_ptindex, new_o_ptindex zero items in appended string will be merged to the same value as the last pt block index of the existing string, and the rest will be incremented accordingly. + foreach v $new_o_ptindex { + lappend o_ptindex [expr {$v + $last_ptindex}] + } + + lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_o_ptlist 0]] + lappend o_ptlist {*}[lrange $new_o_ptlist 1 end] + + + #prepend the previous sgr stack to all stacks in the new list. + #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. + #ie just call sgr_merge_list once now. + set laststack [lindex $o_sgrstacks end] + #set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + set mergedtail [punk::ansi::codetype::sgr_merge $laststack] + lset o_sgrstacks end [list $mergedtail {*}[lindex $new_o_sgrstacks 0]] + foreach n [lrange $new_o_sgrstacks 1 end] { + lappend o_sgrstacks [list $mergedtail {*}$n] + } + + set last_gx0state [lindex $o_gx0states end] + #merge the last gx0 state of the existing string with the first gx0 state of the appended string. + #use the policy that the new gx0 state wins. + set first_new_gx0state [lindex $new_o_gx0states 0] + #set merged_gx0state [expr {$last_gx0state || $first_new_gx0state}] ;experimental. + set merged_gx0state $first_new_gx0state + lset o_gx0states end $merged_gx0state + lappend o_gx0states {*}[lrange $new_o_gx0states 1 end] + + } + } finally { + ${$::opunk::str::restore_script} + #If our assert fails - we have already written invalid state to the object, + #but we will at least know about it and can fix the bug. + #Trying to restore the original state on error would be safer but would potentially reduce the efficiency of our copy on write avoidance strategy, + #and it may be helpful to be able to inspect the invalid state that we have written to the object when debugging the cause of the failure. + #review. + assert { + [llength $o_ptlist] == [llength $o_sgrstacks] && + [llength $o_ptlist] == [llength $o_gx0states] && + [llength $o_graphemes] == [llength $o_ptindex] + } + } + return $this + }] + + #returns the ansiless string - doesn't affect the stored state + method stripped {} { + #review - the ptlist has original gx0 characters in it, so it differs to ansistrip which replaces them with their gx0 unicode equivalents + join [my.get.o_ptlist $this] "" } method get {} { - return [my.get.o_string $this] + my.get.o_string $this } method has_ansi {} { - if {![llength [my.get.o_ansisplits $this]]} { + if {[llength [my.get.o_ansisplits $this]] > 1} { #initial string - for large strings,it's faster to run detect than update the internal split-state. return [punk::ansi::ta::detect [my.get.o_string $this]] } else { + #empty string = zero ansisplits, or string with no ANSI codes = single ansisplits element containing whole string. #string will continue to have a single o_ansisplits element if only non-ansi appended - return [expr {[llength [my.get.o_ansisplits $this]] != 1}] + return [expr {false}] + } + } + #review - has_other is roughly equivalent to a test for whether string is 'rendered' + method has_other {} { + #we expect only g,sgr and gx0 element types in 'rendered' strings + set posn [lsearch -index 0 -exact [my.get.o_elements] o] + return [expr {$posn != -1}] + } + method has_linefeed {} { + #test if there is a grapheme that is exactly \n or a graphme that is exactly \r\n + #we don't recognise a lone \r as a linefeed in this context. review. + #if present, we expect linefeeds to be near the end of the grapheme list slightly more often than the start, so we will search from the end. + set o_graphemes [lreverse [my.get.o_graphemes $this]] + if {[lsearch -exact $o_graphemes "\n"] != -1} { + return true + } + if {[lsearch -exact $o_graphemes "\r\n"] != -1} { + return true } + return false } + method length_raw {} { tcl::string::length [my.get.o_string $this] } @@ -592,8 +746,8 @@ tcl::namespace::eval ::opunk::str { " @opts @values -min 2 -max 2 - thisVar -type varname -help\ - "The variable name of the Str object to query. This is passed by name and will be upvar'd to 'this' within the method body." + strobj -type list -help\ + "The value of the Str object to query." idx -type int -help\ "The index of the character to return. Zero-based index into the original string, excluding ANSI codes. @@ -601,13 +755,16 @@ tcl::namespace::eval ::opunk::str { }] } - method INDEXCHAR -upvar {idx} { + method INDEXCHAR {idx} { #this is not the same as the character at that index in the original string, or even in the ansistripped string - which may be an ANSI code or *part of a grapheme cluster* - set o_graphemes [my.get.o_graphemes $this] - if {![llength $o_graphemes]} { - my.makesplit this - set o_ographemes [my.get.o_graphemes $this] + if {[llength $this] != 10} { + error "invalid object $this" } + set o_graphemes [my.get.o_graphemes $this] + #if {![llength $o_graphemes]} { + # my.makesplit this + # set o_ographemes [my.get.o_graphemes $this] + #} return [lindex $o_graphemes $idx] #experimental. @@ -620,23 +777,40 @@ tcl::namespace::eval ::opunk::str { # return [lindex $o_elements $required_posn 1] #} } - method INDEX -upvar {idx} { + method INDEX {idx} { #returns the index in the original string of the character at the given grapheme index - i.e the index of the start of the grapheme cluster in the original string - upvar o_graphemes idx_o_graphemes - set o_graphemes [lindex $this $idx_o_graphemes] - if {![llength $o_graphemes]} { - my.makesplit this - set o_graphemes [lindex $this $idx_o_graphemes] + if {[llength $this] != 10} { + error "invalid object $this" } - upvar o_sgrstacks idx_o_sgrstacks - upvar o_ptindex idx_o_ptindex + set o_graphemes [my.get.o_graphemes $this] - set ptindex [lindex $this $idx_o_ptindex] - set ptidx [lindex $ptindex $idx] + set ptindex [my.get.o_ptindex $this] + set ptidx [lindex $ptindex $idx] + #idx could be out of range + if {$ptidx eq ""} { + #review + return "" + } + set g [lindex $o_graphemes $idx] - set stack [lindex $this $idx_o_sgrstacks $ptidx] + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set mergedstack [punk::ansi::codetype::sgr_merge $stack] set ansi [punk::ansi::codetype::sgr_merge $stack] - return $ansi[lindex $o_graphemes $idx] + + set obj [list {*}{ + } ::opunk::Str {*}{ + } $ansi$g {*}{ + } 1 {*}{ + } [list {} $ansi $g] {*}{ + } [list [list sgr $ansi] [list g $g]] {*}{ + } [list $g] {*}{ + } [list 1] {*}{ + } [list "" $g] {*}{ + } [list "" [list $mergedstack]] {*}{ + } [list [expr {false}] [expr {false}]] + ] + #puts stdout $ansi$g + return $obj } namespace eval argdoc { variable PUNKARGS @@ -656,8 +830,8 @@ tcl::namespace::eval ::opunk::str { This is effectively the ANSI code that would need to be applied to the character returned by INDEX to get the same visual effect as the original string at that position." @values -min 2 -max 2 - string -type string -help\ - "String to index into, which may contain ANSI codes." + strobj -type list -help\ + "Value of string object to index into, which may contain ANSI codes." index -type indexexpression -help\ "Index of character to get code for, or as a special case: the index ${$B}end+1${$N} or numerical equivalent to get the code in effect after the last character (if any). @@ -665,27 +839,337 @@ tcl::namespace::eval ::opunk::str { effect after the last character." }] } - method INDEXCODE -upvar {idx} { + method INDEXCODE {idx} { + #todo - tests for out of bounds indices at both ends, and for end+1 case. + if {[llength $this] != 10} { + error "invalid object $this" + } #TODO - special cases for pure ANSI string and end+1 equivalent index to get code in effect after last character. - upvar o_graphemes idx_o_graphemes - set o_graphemes [lindex $this $idx_o_graphemes] - if {![llength $o_graphemes]} { - my.makesplit this - set o_graphemes [lindex $this $idx_o_graphemes] - } - upvar o_sgrstacks idx_o_sgrstacks - upvar o_ptindex idx_o_ptindex - set ptindex [lindex $this $idx_o_ptindex] - set ptidx [lindex $ptindex $idx] - set stack [lindex $this $idx_o_sgrstacks $ptidx] - set ansi [punk::ansi::codetype::sgr_merge $stack] + #set o_graphemes [my.get.o_graphemes $this] + set ptindexlist [my.get.o_ptindex $this] + if {[llength $ptindexlist] == 0} { + #no graphemes - so no ptindex entries + #we may still have a ptlist of any length - all empty strings, and a corresponding list of the same length for sgrstacks. + #(a string made of multiple separate ansi codes with no graphemes would be an example of this) + #we can consider the INDEXCODE for any index in this case to be the merge of all the sgrstacks + #- which will be the same as the merge of all the codes in the string since there are no graphemes to cause splits in the stacks. + #- it will also be the same as the merge of the single stack at the end of the o_sgrstacks list, + #since it was built by merging all the codes in the string as we went along. + return [punk::ansi::codetype::sgr_merge [lindex [my.get.o_sgrstacks $this] end]] + } + + set ptidx [lindex $ptindexlist $idx] + if {[string is integer -strict $ptidx]} { + #in bounds index - we can just return the merged stack for that index. + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set ansi [punk::ansi::codetype::sgr_merge $stack] + return $ansi + } + #if $ptidx is empty string - using it as the subsequent index for lindex into sgrstacks will return the entire list. + #this merged result is technically what we want for the end+1 case - except that for large ansi-alternating strings this is very inefficient. + #also - relying on the out-of-bounds empty string from lindex would also do the same for out-of-bounds indices at the lower end - which would be incorrect. + #we can use the use the ptindex of the first or last grapheme to determine which stacks to use from the o_sgrstacks list. + #use the lindex_resolve helper so that we cater for end-n etc in the index expression. + set idxinfo [punk::lib::lindex_resolve [my.get.o_count $this] $idx] + #we should only get -Inf for lower end out-of-bounds, and Inf for upper end out-of-bounds. + switch -exact $idxinfo { + "-Inf" { + #out of bounds at lower end - same as the ansi we would get for the first grapheme. + #assert - we would already have returned if there were no graphemes, so we know there is at least one grapheme and therefore at least one ptindex entry. + set ptidx [lindex $ptindexlist 0] + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set ansi [punk::ansi::codetype::sgr_merge $stack] + } + "Inf" { + #out of bounds at upper end - return merged stack for end of string. + set stack [lindex [my.get.o_sgrstacks $this] end] + set ansi [punk::ansi::codetype::sgr_merge $stack] + } + default { + #we expect this case to be unreachable. + #If the original idx was in bounds we would have returned above + error "opunk::Str::INDEXCODE unexpected index resolution result: $idxinfo for index $idx with count [my.get.o_count $this]" + } + } return $ansi } + #test performance of get method vs upvar access to internal state. + method perftest1 {} { + if {[llength $this] != 10} { + error "invalid object $this" + } + upvar ::opunk::Str::o_graphemes idx_o_graphemes + upvar ::opunk::Str::o_ansisplits idx_o_ansisplits + upvar ::opunk::Str::o_ptlist idx_o_ptlist + upvar ::opunk::Str::o_ptindex idx_o_ptindex + upvar ::opunk::Str::o_gx0states idx_o_gx0states + upvar ::opunk::Str::o_sgrstacks idx_o_sgrstacks + set o_graphemes [lindex $this $idx_o_graphemes] + set o_ansisplits [lindex $this $idx_o_ansisplits] + set o_ptlist [lindex $this $idx_o_ptlist] + set o_ptindex [lindex $this $idx_o_ptindex] + set o_gx0states [lindex $this $idx_o_gx0states] + set o_sgrstacks [lindex $this $idx_o_sgrstacks] + return [list $o_graphemes $o_ansisplits $o_ptlist $o_ptindex $o_gx0states $vo_sgrstacks] + } + method perftest2 {} { + if {[llength $this] != 10} { + error "invalid object $this" + } + #this is faster than upvaring the internal index variables. + set o_graphemes [my.get.o_graphemes $this] ;#test get method + set o_ansisplits [my.get.o_ansisplits $this] ;#test get method + set o_ptlist [my.get.o_ptlist $this] ;#test get method + set o_ptindex [my.get.o_ptindex $this] ;#test get method + set o_gx0states [my.get.o_gx0states $this] ;#test get method + set o_sgrstacks [my.get.o_sgrstacks $this] ;#test get method + return [list $o_graphemes $o_ansisplits $o_ptlist $o_ptindex $o_gx0states $o_sgrstacks] + } + + + #Returns the substring of string between the character positions startindex and endindex, inclusive, + #where the character positions are determined by INDEX. + #The returned substring will include any ANSI codes that are in effect for those characters. + method RANGE {startindex endindex} { + #TODO - special case processing for end+n and equivalents to return trailing ANSI codes as well. + #(similar to INDEXCODE) + + set o_graphemes [my.get.o_graphemes $this] + #set o_ansisplits [my.get.o_ansisplits $this] + set o_ptlist [my.get.o_ptlist $this] + set o_ptindex [my.get.o_ptindex $this] + set o_gx0states [my.get.o_gx0states $this] + set o_sgrstacks [my.get.o_sgrstacks $this] + + set o_count [my.get.o_count $this] ;#assert same as [llength $graphemes] + + #for range we may need to return a result even if one or both indices are out of bounds + #if both are out of bounds at the same end - we return empty string for consistency with Tcl string index behaviour + set index_start [punk::lib::lindex_resolve $o_count $startindex] + set index_end [punk::lib::lindex_resolve $o_count $endindex] + if {$index_start eq "-Inf" && $index_end eq "-Inf"} {return ""} ;#out of bounds at lower end + if {$index_start eq "Inf" && $index_end eq "Inf"} {return ""} ;#out of bounds at upper end + #if the start is out bounds at the upper end and end is out of bounds at the lower end we also return empty string - as the range is effectively empty + #Also if start index > end index we return empty string - as the range is empty + #the expr intrinsic to if can test with -Inf and Inf against each other and integers - so we can do this with a single condition + if {$index_start > $index_end} {return ""} + #we can now assert that our range covers at least part of the string. + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set pt_found -1 + set char "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] + #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go + #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + if {$index_start eq "-Inf"} { + set index_start 0 + } + if {$index_end eq "Inf"} { + set index_end [expr {$o_count - 1}] + } + if {$index_start == 0 && $index_end == [expr {$o_count - 1}]} { + #shortcut for whole string - just return the original string object? + #review end vs end+1 trailing codes or not? + #if INDEX n doesn't return codes after each character n, then RANGE m n shouldn't either, + #but if RANGE 0 end doesn't return trailing codes that may violate user expectations. + #consider that joining INDEX 0 .. INDEX end should give the same result as RANGE 0 end + #- which would argue for including trailing codes in both cases or neither case. + #- returning trailing codes for INDEX n - would however also violate user expectations. + #we may have to solve some of this with tests and documentation to clarify the behaviour and set user expectations appropriately. + #ANSI strings are complicated and there may not be a single behaviour that meets all user expectations in all cases. + #The option to treat end differently to numeric value that happens to be the same as end should be considered, + # - there is a precedent for this in the Tcl linsert command which treats an end-relative index differently to a start-relative index. + return $this + } + + set START $index_start + set END $index_end + set grapheme_base 0 + set range_o_class ::opunk::Str + set range_o_string "" ;#string value - todo - change to return opunk::Str object representing the range not just the string. + set range_o_count 0 + set range_o_ansisplits [list] + set range_o_elements [list] + set range_o_graphemes [list] + set range_o_ptindex [list] + set range_o_ptlist [list] + set range_o_sgrstacks [list] + set range_o_gx0states [list] + + set ptindex_from [lindex $o_ptindex $index_start] + set ptindex_to [lindex $o_ptindex $index_end] + + #set idx -1 + set idx [expr {$ptindex_from - 1}] + set grapheme_base [lsearch -integer $o_ptindex $ptindex_from] ;#index at which this pt block starts in the grapheme list. + set high [expr {$grapheme_base - 1}] + + #foreach {pt code} [lrange $o_ansisplits [expr {$ptindex_from * 2}] end] {} + foreach pt [lrange $o_ptlist $ptindex_from end] codestack [lrange $o_sgrstacks $ptindex_from end] gx0state [lrange $o_gx0states $ptindex_from end] { + incr idx + #puts "idx: $idx pt: '$pt'" + set include_code [punk::ansi::codetype::sgr_merge $codestack] + if {$pt ne ""} { + #set graphemes [lindex $pt_graphemes $idx] + #set grapheme_to_pt_index [lsearch -all -inline $o_ptindex $idx] ;# e.g {0 0 0 0} if first pt is 4 graphemes, then {1 1} if next pt is 2 graphemes etc. + set pt_graphemes [list] + for {set i $grapheme_base} {$i < [llength $o_ptindex]} {incr i} { + set gidx [lindex $o_ptindex $i] + if {$gidx != $idx} { + break + } + lappend pt_graphemes [lindex $o_graphemes $i] + } + #puts "--->pt_graphemes: $pt_graphemes for pt: '$pt' with idx: $idx and grapheme_base: $grapheme_base" + + set low [expr {$high + 1}] ;#last high + set high [expr {$low + [llength $pt_graphemes]-1}] + + #if {$START >= $low && $START <= $high} {} + if {$index_start >= $low && $index_start <= $high} { + #puts "--->start in this pt '$pt' - index_start:$index_start >= low:$low and <= high:$high index_end: $index_end" + #starts in this pt + if {$END >= $low && $END <= $high} { + #both start and end in this pt + set include_graphemes [lrange $pt_graphemes $START-$low $END-$low] + set include_pt [::string cat {*}$include_graphemes] + ::append range_o_string $include_code$include_pt + incr range_o_count [llength $include_graphemes] + #start of ansisplits is always PT. + #end of ansisplits is always PT too + if {$include_code ne ""} { + lappend range_o_ansisplits "" $include_code $include_pt + lappend range_o_ptlist "" $include_pt + lappend range_o_sgrstacks [list] + lappend range_o_gx0states [expr {false}] + } else { + lappend range_o_ansisplits $include_pt + lappend range_o_ptlist $include_pt + } + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states [lindex $o_gx0states $idx] + break + } else { + #start in this pt but end is not - so we want to take the rest of this pt and then keep going until we find the end + set include_graphemes [lrange $pt_graphemes $START-$low end] + set include_pt [::string cat {*}$include_graphemes] + ::append range_o_string $include_code$include_pt + incr range_o_count [llength $include_graphemes] + if {$include_code ne ""} { + lappend range_o_ansisplits "" $include_code $include_pt + lappend range_o_ptlist "" $include_pt + lappend range_o_sgrstacks [list] + lappend range_o_gx0states [expr {false}] + } else { + lappend range_o_ansisplits $include_pt + lappend range_o_ptlist $include_pt + } + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + incr START [expr {[llength $pt_graphemes] - ($START - $low)}] ;#we want to start from the next character after this pt + } + } else { + #does not start in this pt + #if {$START < $low} {} + if {$index_start < $low} { + #already passed the start + if {$END >= $low && $END <= $high} { + #end in this pt but start is not - so we want to take the start of this pt up to the end index and then break + #puts "--->end in this pt but start is not - so we want to take the start of this pt up to the end index and then break" + set include_graphemes [lrange $pt_graphemes 0 $END-$low] + set include_pt [::string cat {*}$include_graphemes] + + ::append range_o_string $include_code$include_pt + lappend range_o_ansisplits $include_code $include_pt + incr range_o_count [llength $include_graphemes] + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + lappend range_o_ptlist $include_pt + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + #puts stdout "adding gx0state: [lindex $o_gx0states $idx] for pt: '$pt' idx $idx with include_graphemes: $include_graphemes" + break + } else { + #neither start nor end in this pt - so we want to take the whole pt and keep going + set include_pt [::string cat {*}$pt_graphemes] + ::append range_o_string $include_code[string cat {*}$pt_graphemes] + lappend range_o_ansisplits $include_code $include_pt + incr range_o_count [llength $pt_graphemes] + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $pt_graphemes {list g $g}] + lappend range_o_graphemes {*}$pt_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $pt_graphemes] $idx] + lappend range_o_ptlist $include_pt + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + #? + incr START [expr {[llength $pt_graphemes] - ($START - $low)}] ;#we want to start from the next character after this pt + } + } + } + incr grapheme_base [llength $pt_graphemes];#set up for next loop + } else { + #leave grapheme_base as is. + #if {$START < $high && $END > $high} {} + if {$index_start < $high && $index_end > $high} { + #we are in a gap between pts that covers part of our range - we want to include any codes in this gap that are in effect + ::append range_o_string $include_code + lappend range_o_ansisplits $include_code "" + lappend range_o_elements [list sgr $include_code] + #no graphemes or ptindex to add in this case. + lappend range_o_ptlist "" + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + } + } + } + #avoid line continuation to support debugging with punk::lib::cmdtrace (proper linenumber matching) + set obj [list {*}{ + } ::opunk::Str {*}{ + } $range_o_string {*}{ + } $range_o_count {*}{ + } $range_o_ansisplits {*}{ + } $range_o_elements {*}{ + } $range_o_graphemes {*}{ + } $range_o_ptindex {*}{ + } $range_o_ptlist {*}{ + } $range_o_sgrstacks {*}{ + } $range_o_gx0states + ] + #puts stdout "[a] $obj" + #puts stdout "[a] $range_o_string" + #return $range_o_string + return $obj + } + + } constructor {initial_string} { - return [list "opunk::Str" $initial_string -1 [list] [list] [list] [list] [list] [list] [list]] + #use fully qualified classname - so that upvar in methods will not fail due to namespace not existing when (erroneously) value passed to a method instead of varname. + #We can then test in the methods whether the upvar worked or not (using info exists) to give a more helpful error message about passing a value instead of a variable name. + set init [list "::opunk::Str" $initial_string -1 [list] [list] [list] [list] [list] [list] [list]] + my.makesplit init + return $init } } } @@ -750,10 +1234,10 @@ tcl::namespace::eval opunk::str { proc default_topics {} {return [list Description *]} # ------------------------------------------------------------- - # get_topic_ functions add more to auto-include in about topics + # get_topic_ functions add more to auto-include in about topics # ------------------------------------------------------------- proc get_topic_Description {} { - punk::args::lib::tstr [string trim { + punk::args::lib::tstr [string trim { package opunk::str voo classes for ANSI-aware strings. } \n] @@ -786,7 +1270,7 @@ tcl::namespace::eval opunk::str { # we re-use the argument definition from punk::args::standard_about and override some items set overrides [dict create] dict set overrides @id -id "::opunk::str::about" - dict set overrides @cmd -name "opunk::str::about" + dict set overrides @cmd -name "opunk::str::about" dict set overrides @cmd -help [string trim [punk::args::lib::tstr { About opunk::str }] \n] diff --git a/src/modules/overtype-999999.0a1.0.tm b/src/modules/overtype-999999.0a1.0.tm index 678341dc..e5bcbfec 100644 --- a/src/modules/overtype-999999.0a1.0.tm +++ b/src/modules/overtype-999999.0a1.0.tm @@ -253,7 +253,6 @@ tcl::namespace::eval overtype { coloured as this doesn't affect the display width. Default is \uFFFD - the unicode replacement char.} - -experimental -default 0 -cp437 -default 0 -type boolean -looplimit -default \uFFEF\ -type integer -help\ "internal failsafe - experimental" @@ -263,8 +262,8 @@ tcl::namespace::eval overtype { -wrap -default 0 -type boolean -info -default 0 -type boolean -help\ "When set to 1, return a dictionary (experimental)" - -format -default ansi -type string -choices {ansi binarytext-bios binarytext-ice xbin} - -binarytext -default "" -type string -choices {"" bios ice} + -format -default ansi -type string -choices {ansi binarytext-bios binarytext-ice xbin} -help\ + "Format of new data being applied as an overlay" -console -default {stdin stdout stderr} -type list @values -min 1 -max 2 @@ -329,7 +328,6 @@ tcl::namespace::eval overtype { -transparent 0 -exposed1 \uFFFD -exposed2 \uFFFD - -experimental 0 -cp437 0 -looplimit \uFFEF -crm_mode 0 @@ -337,7 +335,6 @@ tcl::namespace::eval overtype { -insert_mode 0 -wrap 0 -info 0 - -binarytext "" -format ansi -console {stdin stdout stderr} }] @@ -355,11 +352,11 @@ tcl::namespace::eval overtype { foreach {k v} $argsflags { switch -- $k { -looplimit - -width - -height - -startcolumn - -startrow - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - - -transparent - -exposed1 - -exposed2 - -experimental + - -transparent - -exposed1 - -exposed2 - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -info - -binarytext - -format - -console { + - -info - -format - -console { tcl::dict::set opts $k $v } -wrap - -autowrap_mode { @@ -381,6 +378,7 @@ tcl::namespace::eval overtype { set opt_height [tcl::dict::get $opts -height] set opt_startcolumn [tcl::dict::get $opts -startcolumn] set opt_startrow [tcl::dict::get $opts -startrow] + #review -appendlines - this needs thought regarding interaction with terminal height concept and scrolling set opt_appendlines [tcl::dict::get $opts -appendlines] set opt_transparent [tcl::dict::get $opts -transparent] set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] @@ -399,7 +397,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] set opt_info [tcl::dict::get $opts -info] - set opt_binarytext [tcl::dict::get $opts -binarytext] set opt_format [tcl::dict::get $opts -format] set opt_console [tcl::dict::get $opts -console] @@ -419,24 +416,6 @@ tcl::namespace::eval overtype { #} #-------------------------------------------------------------------------- - # ---------------------------- - # -experimental dev flag to set flags etc - # ---------------------------- - set data_mode 0 - set edit_mode 0 - set opt_experimental [tcl::dict::get $opts -experimental] - foreach o $opt_experimental { - switch -- $o { - data_mode { - set data_mode 1 - } - edit_mode { - set edit_mode 1 - } - } - } - # ---------------------------- - #--------------------------------------------------------- #underblock is expected to be pre-rendered - ie any ANSI codes have already been processed and rendered into the text. @@ -484,12 +463,17 @@ tcl::namespace::eval overtype { } insert_mode $opt_insert_mode {*}{ } autowrap_mode $opt_autowrap_mode {*}{ } cp437 $opt_cp437 {*}{ + } row 1 {*}{ + } col 1 {*}{ + } topmargin 1 {*}{ + } bottommargin $renderheight {*}{ } ] #modes #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l #opt_startcolumn ?? - DECSLRM ? set vtstate $initial_state + dict set vtstate col $opt_startcolumn # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -497,7 +481,6 @@ tcl::namespace::eval overtype { set blankline [string repeat \u0000 $renderwidth] set underlines [lrepeat $renderheight $blankline] } else { - #---- #this splits into lines - only to rejoin - which is inefficient. #It also has code to handle joining multiple blocks - but we only have one in this case. @@ -511,16 +494,8 @@ tcl::namespace::eval overtype { } else { set underlines [split $underblock \n] } - } - #if {$underblock eq ""} { - # set blank "\x1b\[0m\x1b\[0m" - # #set underlines [list "\x1b\[0m\x1b\[0m"] - # set underlines [lrepeat $renderheight $blank] - #} else { - # #lines_as_list -ansiresets 1 will do nothing if -ansiresets 1 isn't specified - REVIEW - # set underlines [lines_as_list -ansiresets 1 $underblock] - #} + #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext # -- --- --- --- #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. @@ -542,270 +517,82 @@ tcl::namespace::eval overtype { #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height #lassign [blocksize $overblock] _w overblock_width _h overblock_height - #temporary scheme selector for experimenting with different approaches to chunking the input overlay for processing. - set scheme 4 - switch -- $scheme { - 0 { - #one big chunk - set inputchunks [list mixed $overblock] - } - 1 { - #todo - set inputchunks [punk::ansi::ta::split_codes $overblock] - } - 2 { - #todo - #split into lines if possible first - then into plaintext/ansi-sequence chunks ? - set inputchunks [list ""] ;#put an empty plaintext split in for starters - set i 1 - set lines [split $overblock \n] - foreach ln $lines { - if {$i < [llength $lines]} { - append ln \n - } - set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? - set lastpt [lindex $inputchunks end] - lset inputchunks end [tcl::string::cat $lastpt [lindex $sequence_split 0]] - lappend inputchunks {*}[lrange $sequence_split 1 end] - incr i - } - } - 3 { - #todo - #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice - set lflines [list] - set inputchunks [split $overblock \n] - foreach ln $inputchunks { - append ln \n - lappend lflines $ln + set inputchunks [list] + switch -- $opt_format { + ansi { + #ansi is commonly but not always line-based. + #some ansi is a string of data with ansi movements and no linefeeds. + set overblock [tcl::string::map {\r\n \n} $overblock] + foreach ln [split $overblock \n] { + lappend inputchunks [list mixed $ln\n] } - if {[llength $lflines]} { - lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] + if {[llength $inputchunks]} { + #strip trailing newline from last line. + lset inputchunks end 1 [tcl::string::range [lindex $inputchunks end 1] 0 end-1] } - #set inputchunks $lflines[unset lflines] - set inputchunks [lindex [list $lflines [unset lflines]] 0] - } - 4 { - #active development scheme - 2026. - set inputchunks [list] - switch -- $opt_format { - ansi { - set overblock [tcl::string::map {\r\n \n} $overblock] - foreach ln [split $overblock \n] { - lappend inputchunks [list mixed $ln\n] - } - if {[llength $inputchunks]} { - lset inputchunks end 1 [tcl::string::range [lindex $inputchunks end 1] 0 end-1] - } + binarytext-bios { + #16 fg, 8 fg + possible blink + set input "" + set ansisplit [list ""] + set charpair 0 + foreach {ch at} [split $overblock ""] { + #review - does binarytext only apply to cp437??? we need to know the original encoding + if {[catch {punk::ansi::colour::byteAnsi $at} code]} { + puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" + #append input [punk::ansi::a+ brightred White] \uFFef + set code [punk::ansi::a+ brightred White] } - binarytext-bios { - #16 fg, 8 fg + possible blink - set input "" - set ansisplit [list ""] - set charpair 0 - foreach {ch at} [split $overblock ""] { - #review - does binarytext only apply to cp437??? we need to know the original encoding - set at [encoding convertto cp437 $at] - if {[catch {punk::ansi::colour::byteAnsi $at} code]} { - puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" - #append input [punk::ansi::a+ brightred White] \uFFef - set code [punk::ansi::a+ brightred White] - set ch \uFFeF - } - append input $code $ch - lappend ansisplit $code $ch - incr charpair - } - #lappend inputchunks [list mixed $input] - lappend inputchunks [list ansisplit $ansisplit] - } - binarytext-ice { - #16 fg, 16 bg (no blink) - set input "" - foreach {ch at} [split $overblock ""] { - set at [encoding convertto cp437 $at] - append input [punk::ansi::colour::byteAnsiIce $at]$ch - } - lappend inputchunks [list mixed $input] + if {[catch {encoding convertfrom cp437 $ch} ch]} { + puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" + set ch \uFFeF } - xbin { - set xbin_header [string range $overblock 0 10] ;#11 bytes - set xbin_header_info [punk::ansi::xbin::parse_header $xbin_header] - set overblock [string range $overblock 11 end] - - set flags [dict get $xbin_header_info flags] - set xbin_width [dict get $xbin_header_info width] - set xbin_height [dict get $xbin_header_info height] - set expected_cells [expr {$xbin_width * $xbin_height}] - set xbin_nonblink [expr {"nonblink" in $flags}] ;# ice. - set xbin_palette [punk::ansi::xbin::default_palette] - - puts "xbin ${xbin_width}x${xbin_height}" - puts "xbin flags $flags" - - #optional 16-entry palette, 3 bytes per entry, RGB values 0..63 - if {"palette" in $flags} { - #puts stderr "renderspace warning - palette unimplemented" - set xbin_palette [punk::ansi::xbin::parse_palette [string range $overblock 0 47]] - set overblock [string range $overblock 48 end] - } - - #todo - font. - #hack - skip over font 256 x fontsize or 512 x fontsize - if {"512chars" in $flags} { - set sz 512 - } else { - set sz 256 - } - #temp - set skip [expr {$sz * [dict get $xbin_header_info fontsize]}] - if {"font" in $flags} { - #todo - consider sixel or similar for font data - but for now we just skip over it. - puts stderr "renderspace warning - xbin font unimplemented" - set overblock [string range $overblock $skip end] - } - puts stdout "xbin image data size [string length $overblock]" - - set ansisplit [list ""] - if {"compress" in $flags} { - #puts stderr "renderspace warning - compress experimental" - #process 'repeatcounter' bytes - #first 2 bits - compression type - # 00 - no compression - # 01 - character compression - # 10 - attribute compression - # 11 - character/attribute compression - #remaining 6 bits - counter - set input "" - set bytes [split $overblock ""] - set byte_count [llength $bytes] - set decoded_cells 0 - for {set b 0} {$b < $byte_count} {} { - set rc [lindex $bytes $b] - set dec [scan $rc %c] - set ctype [expr {$dec >> 6}] - #0x3F - 00111111 - set count [expr {$dec & 0x3F}] - incr count ;#count stored as 1 less than actual number of repeats - if {$count < 1 || $count > 64} { - puts stderr "xbin - something wrong - max must be between 1 and 64 inclusive. received $count" - } - incr b - if {$decoded_cells + $count > $expected_cells} { - error "overtype::renderspace xbin decode overflow: record would emit $count cells at decoded offset $decoded_cells, expected total $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" - } - switch -exact -- $ctype { - 0 { - set needed [expr {$count * 2}] - } - 1 - - 2 { - set needed [expr {$count + 1}] - } - 3 { - set needed 2 - } - default { - error "overtype::renderspace xbin invalid compression type $ctype in repeatcounter byte '$rc' at offset $b" - } - } - if {$b + $needed > $byte_count} { - error "overtype::renderspace xbin truncated record: type $ctype requires $needed bytes at payload offset $b, but only [expr {$byte_count - $b}] bytes remain." - } - switch -exact -- $ctype { - 0 { - #no compression - for {set c 0} {$c < $count*2} {incr c 2} { - set ch [lindex $bytes $b+$c] - set ch [encoding convertfrom cp437 $ch] - set at [lindex $bytes [expr {$b+$c+1}]] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ red] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - lappend ansisplit $clr $ch - } - incr b [expr {$count*2}] - } - 1 { - #char compression - set ch [lindex $bytes $b] - set ch [encoding convertfrom cp437 $ch] - incr b - for {set c 0} {$c < $count} {incr c} { - set at [lindex $bytes $b+$c] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ cyan] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - lappend ansisplit $clr $ch - } - incr b [expr {$count}] - } - 2 { - #attribute compression - set at [lindex $bytes $b] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ green] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - incr b - for {set c 0} {$c < $count} {incr c} { - set ch [lindex $bytes $b+$c] - set ch [encoding convertfrom cp437 $ch] - lappend ansisplit $clr $ch - } - incr b $count - } - 3 { - #attribute and char compression - set ch [lindex $bytes $b] - set ch [encoding convertfrom cp437 $ch] - set at [lindex $bytes $b+1] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ white] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - for {set c 0} {$c < $count} {incr c} { - lappend ansisplit $clr $ch - } - incr b 2 - } - } - incr decoded_cells $count - } - if {$decoded_cells != $expected_cells} { - puts stderr "overtype::renderspace xbin decoded $decoded_cells cells, expected $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" - } - lappend inputchunks [list ansisplit $ansisplit] - } else { - foreach {ch at} [split $overblock ""] { - #binary scan $at cu code - #set clr [a+ term-$code] - if {$at eq ""} { - #eg src/testansi/formatsamples/image/xbin/test.xb - #has trailing nul byte. for now just warn. - puts stderr "renderspace warning - xbin attribute byte is empty at char '[ansistring VIEW $ch]'" - #break ? - #experiment - treat as a reset. - lappend ansisplit [a+] $ch - } else { - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - set ch [encoding convertfrom cp437 $ch] - lappend ansisplit $clr $ch - } - } - lappend inputchunks [list ansisplit $ansisplit] - } - puts stdout "xbin decoded" - flush stdout + append input $code $ch + lappend ansisplit $code $ch + incr charpair + } + #lappend inputchunks [list mixed $input] + lappend inputchunks [list ansisplit $ansisplit] + } + binarytext-ice { + #16 fg, 16 bg (no blink) + set input "" + foreach {ch at} [split $overblock ""] { + if {$at ne ""} { + append input [punk::ansi::colour::byteAnsiIce $at] } + set ch [encoding convertfrom cp437 $ch] + append input $ch } + lappend inputchunks [list mixed $input] } - } + xbin { + set parse_dict [punk::ansi::xbin::parse $overblock] + set ansisplit [dict get $parse_dict ansisplit] + set xbin_header_info [dict get $parse_dict header] + set flags [dict get $xbin_header_info flags] + set xbin_width [dict get $xbin_header_info width] + set xbin_height [dict get $xbin_header_info height] + puts stdout "xbin dimensions ${xbin_width}x${xbin_height} decoded [dict get $parse_dict decoded_cells] of [dict get $parse_dict expected_cells] expected cells" + puts stdout "xbin flags $flags" + set warnings [dict get $parse_dict warnings] + foreach w $warnings { + puts stderr "xbin warning: $w" + } + puts stdout "xbin decoded" + flush stdout + lappend inputchunks [list ansisplit $ansisplit] + } + } + #we have a list of 2 element input chunks {overtext_type overtext} in $inputchunks + #- each chunk is either a string of text with embedded ANSI codes (type 'mixed') + #- or a list of alternating ANSI code and text segments (type 'ansisplit') + #For ansi files each chunk may commonly correspond to a line of text - but this is not necessarily the case, as ANSI cursor movements and other codes may be present which affect the layout in ways that can't be determined until processing. + #for binary files - there may be no newlines at all - just a stream of bytes with ANSI codes interspersed to control the layout and colours. + #The chunks are processed in order, with the output of each chunk being rendered onto the current 'underlay' of the output, + #and then becoming the new 'underlay' for the next chunk to render onto. set replay_codes_underlay [tcl::dict::create 1 ""] @@ -819,13 +606,6 @@ tcl::namespace::eval overtype { set outputlines $underlines set overidx 0 - #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext - set row 1 - #if {$data_mode} { - # set col [_get_row_append_column $row] - #} else { - set col $opt_startcolumn - #} set instruction_stats [tcl::dict::create] @@ -843,7 +623,10 @@ tcl::namespace::eval overtype { continue } #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" - set undertext [lindex $outputlines [expr {$row -1}]] + set undertext [lindex $outputlines [dict get $vtstate row]-1] + if {[tcl::dict::exists $replay_codes_underlay [dict get $vtstate row]]} { + set undertext [tcl::dict::get $replay_codes_underlay [dict get $vtstate row]]$undertext + } #renderline pads each underly line to width with spaces and should track where end of data is @@ -878,19 +661,17 @@ tcl::namespace::eval overtype { #} ###################### - set renderedrow $row + #remember the row we are just about to render. + set renderedrow [dict get $vtstate row] if {$renderedrow > $renderedrow_max} { set renderedrow_max $renderedrow } - if {[tcl::dict::exists $replay_codes_underlay $row]} { - set undertext [tcl::dict::get $replay_codes_underlay $row]$undertext - } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderopts [list -experimental $opt_experimental {*}{ + set renderopts [list {*}{ } -cp437 $opt_cp437 {*}{ } -info 1 {*}{ } -crm_mode [tcl::dict::get $vtstate crm_mode] {*}{ @@ -903,8 +684,8 @@ tcl::namespace::eval overtype { } -exposed1 $opt_exposed1 {*}{ } -exposed2 $opt_exposed2 {*}{ } -expand_right $opt_expand_right {*}{ - } -cursor_column $col {*}{ - } -cursor_row $row {*}{ + } -cursor_column [tcl::dict::get $vtstate col] {*}{ + } -cursor_row [tcl::dict::get $vtstate row] {*}{ } -overtext_type $overtext_type {*}{ } ] @@ -935,6 +716,8 @@ tcl::namespace::eval overtype { set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + + #review - this assumes lines are rendered in order - but this isn't always true. tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] @@ -973,7 +756,7 @@ tcl::namespace::eval overtype { #-- todo - detect looping properly - if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { + if {[dict get $vtstate row] > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == [dict get $vtstate row] && $instruction eq ""} { puts stderr "overtype::renderspace loop?" puts [ansistring VIEW $rinfo] break @@ -999,25 +782,23 @@ tcl::namespace::eval overtype { tcl::dict::incr instruction_stats $instruction_type switch -- $instruction_type { reset { - #reset the 'renderspace terminal' (not underlying terminal) - set row 1 - set col 1 + #reset the 'renderspace virtual terminal' (not underlying terminal) set vtstate [tcl::dict::merge $vtstate $initial_state] #todo - clear screen } {} { #end of supplied line input #lf included in data - set row $post_render_row - set col $post_render_col + dict set vtstate row $post_render_row + #dict set vtstate col $post_render_col if {![llength $unapplied_list]} { if {$overflow_right ne ""} { - incr row + dict incr vtstate row } } else { puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" } - set col $opt_startcolumn + dict set vtstate col $opt_startcolumn } up { @@ -1031,87 +812,42 @@ tcl::namespace::eval overtype { #puts stderr "up $post_render_row" #puts stderr "$rinfo" - #puts stdout "1 row:$row col $col" - set row $post_render_row - #data_mode (naming?) determines if we move to end of existing data or not. - #data_mode 0 means ignore existing line length and go to exact column - #set by -experimental flag - if {$data_mode == 0} { - set col $post_render_col - } else { - #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data - #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $renderwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $renderwidth - } - } - - #puts stdout "2 row:$row col $col" - #puts stdout "-----------------------" - #puts stdout $rinfo - #flush stdout + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } down { - if {$data_mode == 0} { + #cursor down. Will not force scroll if at bottom of screen. + if {$post_render_row > [llength $outputlines]} { #renderline doesn't know how far down we can go.. - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" - } - } - set row $post_render_row - set col $post_render_col - } else { - puts stderr "renderspace down - data_mode 1 - review" - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" + set post_render_row [llength $outputlines] + } + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col + } + down_scrolling { + #todo - scrolling region. take account of decstbm. + + #renderline doesn't know how far down we can go.. + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff $bce_line] } + lappend outputlines $bce_line } - # ---- - # review - set col $post_render_col - #just because it's out of range of the renderwidth - doesn't mean a move down should jump to within the range - 2025 - #---- - - #set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - #set lastdatacol [punk::ansi::printing_length $existingdata] - - #set col [expr {$lastdatacol+1}] - - #if {$lastdatacol < $renderwidth} { - # set col [expr {$lastdatacol+1}] - #} else { - # set col $renderwidth - #} - } + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } restore_cursor { #testfile belinda.ans uses this #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" if {[tcl::dict::exists $cursor_saved_position row]} { - set row [tcl::dict::get $cursor_saved_position row] - set col [tcl::dict::get $cursor_saved_position column] + dict set vtstate row [tcl::dict::get $cursor_saved_position row] + dict set vtstate col [tcl::dict::get $cursor_saved_position column] #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" #set nextprefix $cursor_saved_attributes #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes @@ -1159,6 +895,47 @@ tcl::namespace::eval overtype { set overflow_handled 1 } + decstbm { + #scrolling region - CSI r + #renderline will have rendered the line based on the current vtstate row/col + #- but the scrolling region change may have caused a move to be rendered to the output which changes the row/col for the next line + #- so we need to update our vtstate cursor position. + lassign $instruction _ margin_top margin_bottom + + set margins_valid 1 ;#default assumption + #todo - validate margins. A valid scrolling region requires at least two lines. + if {$margin_top eq ""} {set margin_top 1} + if {$margin_bottom eq ""} {set margin_bottom $screen_rows} + if {![string is integer -strict $margin_top] || $margin_top < 1} { + puts stderr "DECSTBM invalid margin_top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {![string is integer -strict $margin_bottom] || $margin_bottom < 1} { + puts stderr "DECSTBM invalid margin_bottom '$margin_bottom' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margin_bottom - $margin_top < 1} { + puts stderr "DECSTBM invalid margins - bottom '$margin_bottom' must be greater than top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margins_valid} { + #todo - return these for the caller to process.. + puts stderr "overtype::renderspace DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + #review - examine DECOM state to determine new cursor position? + dict set vtstate row 1 + dict set vtstate col 1 + + #incr idx_over + #priv::render_to_unapplied $overlay_grapheme_control_list $gci + #set instruction [list decstbm $margin_top $margin_bottom] + dict set vtstate topmargin $margin_top + dict set vtstate bottommargin $margin_bottom + } else { + puts stderr "overtype::renderspace DECSTBM invalid margins - ignoring [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + #don't update the vtstate margins. + } + } move { ######## if {$post_render_row > [llength $outputlines]} { @@ -1170,67 +947,95 @@ tcl::namespace::eval overtype { if {$diff > 0} { lappend outputlines {*}[lrepeat $diff $bce_line] } - set row $post_render_row + dict set vtstate row $post_render_row } else { - set row [llength $outputlines] + dict set vtstate row [llength $outputlines] } } else { - set row $post_render_row + dict set vtstate row $post_render_row } ####### - set col $post_render_col + dict set vtstate col $post_render_col #overflow + unapplied? } + clear_to_end_display { + #ED 0 + #review - needs to operate within top and bottom margins if set (decstbm) - but for now we assume full screen clear + #Current row already partially erased by renderline. + if {$post_render_row > [llength $outputlines]} { + dict set vtstate row [llength $outputlines] + } else { + dict set vtstate row $post_render_row + } + dict set vtstate col $post_render_col + set overflow_right "" + + set start_idx [expr {[dict get $vtstate row]}] + if {$start_idx < 0} {set start_idx 0} + for {set i $start_idx} {$i < [llength $outputlines]} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m + } + } + clear_to_start_display { + #ED 1 + #Current row already partially erased by renderline. + if {$post_render_row > [llength $outputlines]} { + dict set vtstate row [llength $outputlines] + } else { + dict set vtstate row $post_render_row + } + dict set vtstate col $post_render_col + set overflow_right "" + + set stop_idx [expr {[dict get $vtstate row] - 1}] + if {$stop_idx >= [llength $outputlines]} { + set stop_idx [expr {[llength $outputlines] - 1}] + } + for {set i 0} {$i < $stop_idx} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m + } + } clear_and_move { - #e.g 2J + #ED 2J if {$post_render_row > [llength $outputlines]} { - set row [llength $outputlines] + dict set vtstate row [llength $outputlines] } else { - set row $post_render_row + dict set vtstate row $post_render_row } - set col $post_render_col + dict set vtstate col $post_render_col set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant - set clearedlines [list] - foreach ln $outputlines { - lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m - - #set lineparts [punk::ansi::ta::split_codes $ln] - #set numcells 0 - #foreach {pt _code} $lineparts { - # if {$pt ne ""} { - # foreach grapheme [punk::char::grapheme_split $pt] { - # switch -- $grapheme { - # " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - - # a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - - # z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { - # incr numcells 1 - # } - # default { - # if {$grapheme eq "\u0000"} { - # #review - # incr numcells 1 - # } elseif {$grapheme eq "\t"} { - # #set tstops [lsort -integer -unique [punk::console::get_tabstops]] - # puts stderr "tab at numcells: $numcells - REVIEW renderspace" - # set nexttabstop [expr {((int($numcells / 8) + 1) * 8)}] - # incr numcells [expr {$nexttabstop - $numcells}] - # } else { - # incr numcells [grapheme_width_cached $grapheme] - # } - # } - # } - - # } - # } - #} - ##replays/resets each line - #lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $numcells]\x1b\[0m + for {set i 0} {$i < [llength $outputlines]} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m } - set outputlines $clearedlines #todo - determine background/default to be in effect - DECECM ? puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" - #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] - + } + delete_lines { + #DL n + set delete_count [lindex $instruction 1] + set r $renderedrow + puts stderr "delete_lines $delete_count at row $r" + if {$delete_count > 0} { + #set outputlines [lreplace $outputlines [dict get $vtstate row] [expr {[dict get $vtstate row] + $delete_count - 1}]] + set delidx_first [expr {$r - 1}] ;#convert to 0-based index + set delidx_last [expr {$delidx_first + ($delete_count - 1)}] ;#inclusive index of last line to delete + #if delete_count is 1 - we are only deleting the current line. + ledit outputlines $delidx_first $delidx_last + } + dict set vtstate row $renderedrow + if {[llength $outputlines] < [dict get $vtstate row]} { + dict set vtstate row [llength $outputlines] + } + #we need to ensure 'unapplied' data is still applied to the current line after deletion. + #Any overflow on the current line should be abandoned. + if {[llength $unapplied_ansisplit]} { + #set inputchunks [linsert $inputchunks 0 $nextprefix] + #JMN - assume backwards compat ledit available from punk::lib (for tcl <9) + ledit inputchunks -1 -1 [list ansisplit $unapplied_ansisplit] + } + incr overidx + incr loop + continue } lf_start { #raw newlines @@ -1239,9 +1044,9 @@ tcl::namespace::eval overtype { #test - treating as newline below... #append rendered $overflow_right #set overflow_right "" - set row $renderedrow - incr row - if {$row > [llength $outputlines]} { + + dict set vtstate row [expr {$renderedrow + 1}] + if {[dict get $vtstate row] > [llength $outputlines]} { #lappend outputlines "" # BCE lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] @@ -1250,137 +1055,88 @@ tcl::namespace::eval overtype { # ---------------------- } lf_mid { - set edit_mode 0 - if {$edit_mode} { - #set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] - #JMN - #ledit inputchunks -1 -1 $overflow_right$unapplied - - set pt_ansi_pt [punk::ansi::ta::split_codes_single $overflow_right] - #join the trailing and leading pt parts of the 2 lists - ledit pt_ansi_pt end end "[lindex $pt_ansi_pt end][lindex $unapplied_ansisplit 0]" - lappend pt_ansi_pt [lrange $unapplied_ansisplit 1 end] - - ledit inputchunks -1 -1 [list ansisplit $pt_ansi_pt] ;#combined overflow_right and unapplied - in ansisplit form - + if {$overflow_right ne ""} { + if {$opt_expand_right} { + append rendered $overflow_right set overflow_right "" - set unapplied "" - set unapplied_list [list] - set unapplied_ansisplit [list] - - set row $post_render_row - #set col $post_render_col - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } } else { - if 1 { - if {$overflow_right ne ""} { - if {$opt_expand_right} { - append rendered $overflow_right - set overflow_right "" - } else { - #review - we should really make renderline do the work...? - set overflow_width [punk::ansi::printing_length $overflow_right] - if {$visualwidth + $overflow_width <= $renderwidth} { - append rendered $overflow_right - set overflow_right "" - } else { - if {[tcl::dict::get $vtstate autowrap_mode]} { - #set outputlines [linsert $outputlines $renderedrow $overflow_right] - #ledit outputlines $renderedrow $renderedrow-1 $overflow_right - puts stderr "REVIEW wrap overflow_right to next line: $overflow_right" - #this looks wrong - ledit outputlines $renderedrow -1 $overflow_right - set overflow_right "" - set row [expr {$renderedrow + 2}] - } else { - set overflow_right "" ;#abandon - } + #review - we should really make renderline do the work...? + set overflow_width [punk::ansi::printing_length $overflow_right] + if {$visualwidth + $overflow_width <= $renderwidth} { + append rendered $overflow_right + set overflow_right "" + } else { + if {[tcl::dict::get $vtstate autowrap_mode]} { + #set outputlines [linsert $outputlines $renderedrow $overflow_right] + #ledit outputlines $renderedrow $renderedrow-1 $overflow_right + puts stderr "REVIEW wrap overflow_right to next line: $overflow_right" + #this looks wrong + ledit outputlines $renderedrow -1 $overflow_right + set overflow_right "" + #review - why are we setting this here when we override it below? + dict set vtstate row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } - if {0 && $visualwidth < $renderwidth} { - puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" - error "incomplete - abandon?" - set overflowparts [punk::ansi::ta::split_codes $overflow_right] - set remaining_overflow $overflowparts - set filled 0 - foreach {pt code} $overflowparts { - lpop remaining_overflow 0 - if {$pt ne ""} { - set graphemes [punk::char::grapheme_split $pt] - set add "" - set addlen $visualwidth - foreach g $graphemes { - set w [overtype::grapheme_width_cached $g] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - } else { - set filled 1 - break - } - } - append rendered $add - } - if {!$filled} { - lpop remaining_overflow 0 ;#pop code - } + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break } - set overflow_right [join $remaining_overflow ""] } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code } } - } - set row $post_render_row - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - # BCE - #puts "row: $row > outputlines: [llength $outputlines] overlay replay:[ansistring VIEW $replay_codes_overlay]" - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } - } else { - #old version - known to work with various ansi graphics - e.g fruit.ans - # - but fails to limit lines to renderwidth when expand_right == 0 - append rendered $overflow_right - set overflow_right "" - set row $post_render_row - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] + set overflow_right [join $remaining_overflow ""] } } } + } + dict set vtstate row $post_render_row + dict set vtstate col $opt_startcolumn + if {[dict get $vtstate row] > [llength $outputlines]} { + #lappend outputlines {*}[lrepeat 1 ""] + # BCE + #puts "row: $row > outputlines: [llength $outputlines] overlay replay:[ansistring VIEW $replay_codes_overlay]" + lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + } } lf_overflow { - #linefeed after renderwidth e.g at column 81 for an 80 col width - #we may also have other control sequences that came after col 80 e.g cursor save + #linefeed after renderwidth e.g at column 81 for an 80 col width + #we may also have other control sequences that came after col 80 e.g cursor save - if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" - - #assertion - there should be no overflow.. - puts $lhs - } - if {![tcl::dict::get $vtstate insert_mode]} { - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode - } - - set row $post_render_row - #set row $renderedrow - #incr row - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - # BCE - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } - set col $opt_startcolumn + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode + } + dict set vtstate row $post_render_row + #only add newline if we're at the bottom + if {[dict get $vtstate row] > [llength $outputlines]} { + #lappend outputlines {*}[lrepeat 1 ""] + # BCE + lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + } + dict set vtstate col $opt_startcolumn } newlines_above { #we get a newlines_above instruction when received at column 1 @@ -1390,76 +1146,53 @@ tcl::namespace::eval overtype { puts "--->newlines_above" puts "rinfo: $rinfo" #renderline doesn't advance the row for us - the caller has the choice to implement or not - set row $post_render_row - set col $post_render_col + set temp_row $post_render_row if {$insert_lines_above > 0} { - set row $renderedrow + set temp_row $renderedrow #set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] #ledit outputlines $renderedrow-1 $renderedrow-2 {*}[lrepeat $insert_lines_above ""] # BCE (background color erase) set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] ledit outputlines $renderedrow-1 -1 {*}[lrepeat $insert_lines_above $bce_line] #ledit outputlines $renderedrow-1 -1 {*}[lrepeat $insert_lines_above ""] - incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above + incr temp_row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above #? set row $post_render_row #can renderline tell us? } + dict set vtstate row $temp_row + dict set vtstate col $post_render_col } newlines_below { #obsolete? - use for ANSI insert lines sequence - if {$data_mode == 0} { - puts --->nl_below - set row $post_render_row - set col $post_render_col - if {$insert_lines_below == 1} { - #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] - #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] - #set rhs "" - #if {$overflow_right ne ""} { - # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] - # set rhs [textblock::frame -title overflow_right $rhs] - #} - #puts [textblock::join $lhs $rhs] - - #rendered - append rendered $overflow_right - # + puts --->nl_below + set temp_row $post_render_row + set temp_col $post_render_col + if {$insert_lines_below == 1} { + #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] + #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] + #set rhs "" + #if {$overflow_right ne ""} { + # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] + # set rhs [textblock::frame -title overflow_right $rhs] + #} + #puts [textblock::join $lhs $rhs] + #rendered + append rendered $overflow_right + # - set overflow_right "" - set row $renderedrow - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - lappend outputlines {*}[lrepeat $insert_lines_below $bce_line] - #lappend outputlines {*}[lrepeat $insert_lines_below ""] - } - incr row $insert_lines_below - set col $opt_startcolumn - } - } else { - set row $post_render_row - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" - } - } else { - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $renderwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $renderwidth - } + set overflow_right "" + set temp_row $renderedrow + #only add newline if we're at the bottom + if {$temp_row > [llength $outputlines]} { + set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + lappend outputlines {*}[lrepeat $insert_lines_below $bce_line] + #lappend outputlines {*}[lrepeat $insert_lines_below ""] } + incr temp_row $insert_lines_below + set temp_col $opt_startcolumn } + dict set vtstate row $temp_row + dict set vtstate col $temp_col } wrapmoveforward { #doesn't seem to be used by fruit.ans testfile @@ -1493,8 +1226,8 @@ tcl::namespace::eval overtype { set c $post_render_col } #puts stderr "wrapmoveforward - moving from row $row col $col to row $r col $c" - set row $r - set col $c + dict set vtstate row $r + dict set vtstate col $c } wrapmovebackward { set c $renderwidth @@ -1522,8 +1255,8 @@ tcl::namespace::eval overtype { } else { puts stderr "Wrapmovebackward - but postrendercol >= 1???" } - set row $r - set col $c + dict set vtstate row $r + dict set vtstate col $c } overflow { #normal single-width grapheme overflow @@ -1539,13 +1272,13 @@ tcl::namespace::eval overtype { #example trigger: textblock::frame -width 80 [ansicat us-pump_up_the_jam-355.ans 355x100] - set row $post_render_row ;#renderline will not advance row when reporting overflow char + set r $post_render_row ;#renderline will not advance row when reporting overflow char #puts stderr "overflow autowrap - wrap to next line row: $row autowrap: [tcl::dict::get $vtstate autowrap_mode] renderwidth: $renderwidth visualwidth: $visualwidth [ansistring VIEW $unapplied]" if {[tcl::dict::get $vtstate autowrap_mode]} { - incr row - set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? + incr r + set c $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { - set col $post_render_col + set c $post_render_col #The overflow can only be triggered by a grapheme (todo cluster?) - but our unapplied could contain SGR codes prior to the grapheme that triggered overflow - so we need to skip beyond any SGRs #There may be more than one, because although the stack leading up to overflow may have been merged - codes between the last column and the overflowing grapheme will remain separate #We don't expect any movement or other ANSI codes - as if they came before the grapheme, they would have triggered a different instruction to 'overflow' @@ -1598,9 +1331,12 @@ tcl::namespace::eval overtype { set overflow_handled 1 #handled by dropping overflow if any } + dict set vtstate row $r + dict set vtstate col $c } overflow_splitchar { - set row $post_render_row ;#renderline will not advance row when reporting overflow char + set r $post_render_row ;#renderline will not advance row when reporting overflow char + set c $post_render_col #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc @@ -1622,8 +1358,8 @@ tcl::namespace::eval overtype { #review - inefficient to re-split (return unapplied_tagged instead of unapplied_ansisplit?) set unapplied_ansisplit [punk::ansi::ta::split_codes_single $unapplied] } else { - set col $opt_startcolumn - incr row + set c $opt_startcolumn + incr r } } else { set overflow_handled 1 @@ -1646,13 +1382,14 @@ tcl::namespace::eval overtype { set unapplied_ansisplit [punk::ansi::ta::split_codes_single $unapplied] } } - + dict set vtstate row $r + dict set vtstate col $c } vt { #can vt add a line like a linefeed can? - set row $post_render_row - set col $post_render_col + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } set_window_title { set newtitle [lindex $instruction 1] @@ -1735,19 +1472,6 @@ tcl::namespace::eval overtype { lappend nextprefix_list {*}[lrange $unapplied_ansisplit 1 end] } - if 0 { - if {$nextprefix ne ""} { - set nextoveridx [expr {$overidx+1}] - if {$nextoveridx >= [llength $inputchunks]} { - lappend inputchunks $nextprefix - } else { - #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] - #set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] - ledit inputchunks $nextoveridx -1 $nextprefix - } - } - } - if {[llength $nextprefix_list]} { #set inputchunks [linsert $inputchunks 0 $nextprefix] #JMN - assume backwards compat ledit available from punk::lib (for tcl <9) @@ -1766,7 +1490,6 @@ tcl::namespace::eval overtype { set debugmsg "" append debugmsg "${Y}${sep_header}${RST}" \n append debugmsg "looplimit $looplimit reached\n" - append debugmsg "data_mode:$data_mode\n" append debugmsg "opt_appendlines:$opt_appendlines\n" append debugmsg "prev_row :[tcl::dict::get $renderopts -cursor_row]\n" append debugmsg "prev_col :[tcl::dict::get $renderopts -cursor_column]\n" @@ -2446,7 +2169,6 @@ tcl::namespace::eval overtype { Default is \uFFFD - the unicode replacement char.} -cursor_restore_attributes -default "" -cp437 -default 0 -type boolean - -experimental -default {} -overtext_type -type string -choices {mixed plain ansisplit} -default mixed @values -min 2 -max 2 undertext -type string -help\ @@ -2564,7 +2286,6 @@ tcl::namespace::eval overtype { -exposed2 \uFFFD -cursor_restore_attributes "" -cp437 0 - -experimental {} -overtext_type mixed }] #-overtext_type plain|mixed|ansisplit @@ -2580,7 +2301,7 @@ tcl::namespace::eval overtype { set argsflags [lrange $args 0 end-2] tcl::dict::for {k v} $argsflags { switch -- $k { - -experimental - -cp437 - -width - -expand_right - -transparent - -startcolumn - -cursor_column - -cursor_row + -cp437 - -width - -expand_right - -transparent - -startcolumn - -cursor_column - -cursor_row - -crm_mode - -insert_mode - -autowrap_mode - -reverse_mode - -etabs - -info - -exposed1 - -exposed2 - -cursor_restore_attributes - -overtext_type { tcl::dict::set opts $k $v @@ -4053,6 +3774,7 @@ tcl::namespace::eval overtype { } B { #CUD - Cursor Down + #CSI n B #Row move - down lassign [split $param {;}] num modifierkey set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] @@ -4379,10 +4101,34 @@ tcl::namespace::eval overtype { if {$param eq ""} {set param 0} switch -exact -- $param { 0 { - #clear from cursor to end of screen + #ED 0 - clear from cursor to end of screen (including cursor position) + #Current-line part can be done here; remaining lines are handled by caller. + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] + incr idx_over + if {[llength $outcols] && $idx < [llength $outcols]} { + priv::render_erasechar $idx [expr {[llength $outcols] - $idx}] + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction clear_to_end_display + break } 1 { - #clear from cursor to beginning of screen + #ED 1 - clear from start of screen to cursor + #Current-line part can be done here; previous lines are handled by caller. + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] + incr idx_over + if {[llength $outcols] && $idx >= 0} { + set count [expr {$idx + 1}] + if {$count > [llength $outcols]} { + set count [llength $outcols] + } + if {$count > 0} { + priv::render_erasechar 0 $count + } + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction clear_to_start_display + break } 2 { #clear entire screen CSI 2J @@ -4400,7 +4146,8 @@ tcl::namespace::eval overtype { break } 3 { - #clear entire screen. presumably cursor doesn't move - otherwise it would be same as 2J ? + #clear entire screen. As well as scrollback buffer if supported (unimplemented) + puts stderr "overtype::renderline ED 3 - clear entire screen and scrollback buffer if supported (unimplemented) [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" } default { @@ -4461,8 +4208,79 @@ tcl::namespace::eval overtype { } M { #CSI Pn M - DL - Delete Line - puts stderr "overtype::renderline DL - Delete Line - not implemented [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to delete + if {![string is integer -strict $param] || $param < 0} { + puts stderr "overtype::renderline DCH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + #The current line will be deleted by the calling function - along with more below if param > 1 + #we clear the outcols so that the result for this line is empty. + ledit outcols 0 end + ledit understacks 0 end + ledit understacks_gx 0 end + #puts stderr "overtype::renderline DL - Delete Line - not implemented [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + #todo - rename insert_lines_below to affect_lines_below or something equally generic (use for multiple instructions) + set instruction [list delete_lines $param] + break + } + P { + #DCH - Delete Character(s) + #Deletes Pn characters from cursor position, shifts line left, + #and fills vacated rightmost cells with erased cells. + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to delete + if {![string is integer -strict $param] || $param < 0} { + puts stderr "overtype::renderline DCH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + set orig_len [llength $outcols] + for {set di 0} {$di < $param} {incr di} { + priv::render_delchar $idx + } + #Maintain line width by padding erased cells at right edge. + set removed [expr {$orig_len - [llength $outcols]}] + for {set fi 0} {$fi < $removed} {incr fi} { + lappend outcols \u0000 + lappend understacks [list $replay_codes_overlay] + lappend understacks_gx [list] + #review - should we be appending gx0state here? or just empty list? + #- presumably we should be appending gx0state from the end of the line - which may be different from current gx0state if there are codes in the line that change it - but we don't want to track those changes as we delete chars - so maybe we should be appending the gx0state from the end of the line before deletion started? + #lappend understacks_gx [list $gx0state] + } + #cursor position doesn't change. + } + @ { + #ICH - Insert Character(s) + #Inserts Pn blank characters at the cursor position, shifts line right, + #and fills vacated leftmost cells with erased cells. + #The characters shifted beyond the right margin are lost. + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to insert + if {![string is integer -strict $param] || $param < 1} { + puts stderr "overtype::renderline ICH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + set orig_len [llength $outcols] + if {$overflow_idx != -1 && $param > [llength $outcols]} { + #since characters at rhs are lost, we can't insert more than the width. + set param $orig_len + } + set this_sgrstack [lindex $overlay_grapheme_control_stacks $gci] + set this_gxstack [lindex $overstacks_gx $idx_over] + #use space for inserted blanks; helper handles tab reflow + priv::render_insertgraphemes $idx [lrepeat $param " "] $this_sgrstack $this_gxstack + #Keep line width fixed unless expand-right mode is active. + if {$overflow_idx != -1} { + if {[llength $outcols] > $orig_len} { + #truncate + ledit outcols $orig_len end + ledit understacks $orig_len end + ledit understacks_gx $orig_len end + } + + } + + #cursor position doesn't change. } T { #CSI Pn T - SD Pan Up (empty lines introduced at top) @@ -4518,16 +4336,36 @@ tcl::namespace::eval overtype { #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins lassign [split $param {;}] margin_top margin_bottom - #todo - return these for the caller to process.. - puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" - #Also moves the cursor to col 1 line 1 of the page - set cursor_column 1 - set cursor_row 1 + set margins_valid 1 ;#default assumption + #todo - validate margins. A valid scrolling region requires at least two lines. + if {$margin_top eq ""} {set margin_top 1} + if {$margin_bottom eq ""} {set margin_bottom $screen_rows} + if {![string is integer -strict $margin_top] || $margin_top < 1} { + puts stderr "DECSTBM invalid margin_top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {![string is integer -strict $margin_bottom] || $margin_bottom < 1} { + puts stderr "DECSTBM invalid margin_bottom '$margin_bottom' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margin_bottom - $margin_top < 1} { + puts stderr "DECSTBM invalid margins - bottom '$margin_bottom' must be greater than top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margins_valid} { + #todo - return these for the caller to process.. + puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + set cursor_column 1 + set cursor_row 1 - incr idx_over - priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction move ;#own instruction? decstbm? - break + incr idx_over + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction [list decstbm $margin_top $margin_bottom] + break + } else { + puts stderr "overtype::renderline DECSTBM invalid margins - ignoring [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } } s { #code conflict between ansi emulation and DECSLRM - REVIEW @@ -5023,12 +4861,12 @@ tcl::namespace::eval overtype { } D { #\x84 - #index (IND) + #index (IND) ESC D #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" puts stderr "renderline ESC D not fully implemented" incr cursor_row priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction down + set instruction down_scrolling #retain cursor_column break } @@ -5062,7 +4900,7 @@ tcl::namespace::eval overtype { } #ensure rest of *overlay* is emitted to remainder priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction up ;#need instruction for scroll-down? + set instruction up ;#need instruction for screen to scroll-down? #retain cursor_column break } @@ -5588,17 +5426,9 @@ tcl::namespace::eval overtype { set in_tab_expansion [dict create idx $i remaining [expr {$this_tab_width -1}]] set this_char \t } elseif {$ch eq "\u0000"} { - if {$cp437_glyphs} { - #map all nulls including at tail to space - set this_char " " - } else { - set this_char " " - #if {$trailing_nulls && $i < $first_tail_null_posn} { - # append outstring " " ;#map inner nulls to space - #} else { - # append outstring \u0000 - #} - } + #map all nulls including at tail to space + set this_char " " + #review } else { set this_char $ch } diff --git a/src/modules/punk-999999.0a1.0.tm b/src/modules/punk-999999.0a1.0.tm index ca6ec1f4..964855b0 100644 --- a/src/modules/punk-999999.0a1.0.tm +++ b/src/modules/punk-999999.0a1.0.tm @@ -2466,6 +2466,7 @@ namespace eval punk { set splitchars "" set assigned [split $leveldata $splitchars] }] + puts "---split script: $script" set level_script_complete 1 #todo %splitat- %splitn- ?? @@ -4205,7 +4206,7 @@ namespace eval punk { #avoid use of regexp match on each element - or we will unnecessarily force string reps on lists #same with lsearch with a string pattern - #wouldn't matter for small lists - but we need to be able to handle large ones efficiently without unneccessary string reps - set script [string map [list $scopepattern $equalsrhs] { + set script [string map [list [list $scopepattern] $equalsrhs] { #script built by punk::match_assign if {[llength $args]} { #scan for existence of any pipe operator (|*> or <*|) only - we don't need position @@ -4214,11 +4215,12 @@ namespace eval punk { # x= <| # x= |> #both leave x empty. To assign a pipelike value to x we would have to do: x= <| |> (equiv: set x |>) + set scopep foreach a $args { if {![catch {llength $a} sublen]} { #don't enforce sublen == 1. Legal to have whitespace including newlines {| x >} if {[string match |*> $a] || [string match <*| $a]} { - tailcall punk::pipeline = "" "" {*}$args + tailcall punk::pipeline = $scopep "" {*}$args } } } @@ -4594,6 +4596,10 @@ namespace eval punk { #debug.punk.pipe.rep {[rep_listname fulltail]} 6 + #review + set equalsrhs [string map [list {;} {\;}] $equalsrhs] + + #--------------------------------------------------------------------- # test if we have an initial x.=y.= or x.= y.= @@ -4643,26 +4649,31 @@ namespace eval punk { #var1 will contain ETC (from entire pipeline), var2 will contain etc (from associated segment) # - if {([set nexteposn [string last = $next1]] >= 0) && (![punk::pipe::lib::arg_is_script_shaped $next1]) } { - set nexttail [lrange $args 1 end] - #*SUB* pipeline recursion. - #puts "======> recurse based on next1:$next1 " - if {[string index $next1 $nexteposn-1] eq {.}} { - #var1.= var2.= ... - #non pipelined call to self - return result + + if {([set nexteposn [string last = $next1]] >= 0)} { + set next1 [string map [list {;} {\;}] $next1] ;#review + #do we really need to test for script_shaped if last char is = ? + if {![punk::pipe::lib::arg_is_script_shaped $next1]} { + set nexttail [lrange $args 1 end] + #*SUB* pipeline recursion. + #puts "======> recurse based on next1:$next1 " + if {[string index $next1 $nexteposn-1] eq {.}} { + #var1.= var2.= ... + #non pipelined call to self - return result + set results [uplevel 1 [list $next1 {*}$nexttail]] + #debug.punk.pipe.rep {==> rep recursive results: [rep $results]} 5 + #debug.punk.pipe {>>> results: $results} 1 + return [_handle_bind_result [_multi_bind_result $initial_returnvarspec $results]] + } + #puts "======> recurse assign based on next1:$next1 " + #if {[regexp {^([^ \t\r\n=\{]*)=(.*)} $next1 _ nextreturnvarspec nextrhs]} { + #} + #non pipelined call to plain = assignment - return result set results [uplevel 1 [list $next1 {*}$nexttail]] - #debug.punk.pipe.rep {==> rep recursive results: [rep $results]} 5 #debug.punk.pipe {>>> results: $results} 1 - return [_handle_bind_result [_multi_bind_result $initial_returnvarspec $results]] + set d [_multi_bind_result $initial_returnvarspec $results] + return [_handle_bind_result $d] } - #puts "======> recurse assign based on next1:$next1 " - #if {[regexp {^([^ \t\r\n=\{]*)=(.*)} $next1 _ nextreturnvarspec nextrhs]} { - #} - #non pipelined call to plain = assignment - return result - set results [uplevel 1 [list $next1 {*}$nexttail]] - #debug.punk.pipe {>>> results: $results} 1 - set d [_multi_bind_result $initial_returnvarspec $results] - return [_handle_bind_result $d] } } @@ -5981,6 +5992,9 @@ namespace eval punk { tailcall {*}[list ::punk::pipeline = "" "" {*}$arglist] } + #review + set assign [string map {; \\;} $assign] + set is_script [punk::pipe::lib::arg_is_script_shaped $assign] if {!$is_script && [string index $assign end] eq "="} { @@ -5999,7 +6013,7 @@ namespace eval punk { if {$is_script} { set cmdlist [list ::punk::pipeline "script" "" "" {*}$args] } else { - set cmdlist [list ::punk::pipeline ".=" "" "" {*}$args] + set cmdlist [list ::punk::pipeline ".=" "" "" $assign {*}$arglist] } } tailcall {*}$cmdlist diff --git a/src/modules/punk/aliascore-999999.0a1.0.tm b/src/modules/punk/aliascore-999999.0a1.0.tm index e7e7e2df..daa42de2 100644 --- a/src/modules/punk/aliascore-999999.0a1.0.tm +++ b/src/modules/punk/aliascore-999999.0a1.0.tm @@ -123,6 +123,7 @@ tcl::namespace::eval punk::aliascore { ansistrip ::punk::ansi::ansistrip stripansi ::punk::ansi::ansistrip ansiwrap ::punk::ansi::ansiwrap + ansisplit ::punk::ansi::ta::split_codes_single grepstr ::punk::ansi::grepstr untabify ::punk::ansi::untabify colour ::punk::console::colour diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 78b23d3d..111d1bf5 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -127,8 +127,8 @@ tcl::namespace::eval punk::ansi::class { -width -type integer -default "" -height -type integer -default "" -crm_mode -type boolean -default 0 - -binarytext -type string -default "" -choices {"" bios ice} - -format -type string -choices {ansi binarytext-bios binarytext-ice xbin} + -format -type string -choices {ansi binarytext-bios binarytext-ice xbin} -help\ + "Format of new data being applied as an overlay" @values -min 0 -max 0 }] method rendertest {args} { @@ -136,7 +136,6 @@ tcl::namespace::eval punk::ansi::class { set opt_width [dict get $argd opts -width] set opt_height [dict get $argd opts -height] set opt_crm_mode [dict get $argd opts -crm_mode] - set opt_binarytext [dict get $argd opts -binarytext] set opt_format [dict get $argd opts -format] set existing_dimensions $o_render_dimensions @@ -152,8 +151,6 @@ tcl::namespace::eval punk::ansi::class { set o_render_dimensions ${w}x${h} - - #set rendered [overtype::renderspace -binarytext $opt_binarytext -cp437 1 -crm_mode $opt_crm_mode -expand_right 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] set rendered [overtype::renderspace -format $opt_format -cp437 1 -crm_mode $opt_crm_mode -expand_right 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] return $rendered } @@ -699,7 +696,6 @@ tcl::namespace::eval punk::ansi { } set opt_crm_mode [dict get $opts -crm_mode] - set binarytext "" set sdict [dict create] #if SAUCE data is present - it may give an indication of encoding as well as number of columns/lines if {![catch {package require punk::ansi::sauce}]} { @@ -720,8 +716,7 @@ tcl::namespace::eval punk::ansi { switch -exact -- [string tolower [file extension $fname]] { .bin { #In the absence of SAUCE data - assume .bin is binary text - set binarytext bios ;#16 fg, 8 bg + blink - set format binarytext-bios + set format binarytext-bios ;#16 fg, 8 bg + blink } .xb { set format xbin @@ -744,12 +739,10 @@ tcl::namespace::eval punk::ansi { if {[dict exists $sdict datatype_name]} { switch -- [dict get $sdict datatype_name] { binarytext { - #SAUCE ANSiFlags - ice vs default bios + #SAUCE ANSiFlags - iCE vs default bios if {[dict exists $sdict ansiflags_ice] && [dict get $sdict ansiflags_ice]} { - set binarytext ice set format binarytext-ice } else { - set binarytext bios set format binarytext-bios } } @@ -825,13 +818,14 @@ tcl::namespace::eval punk::ansi { #set ansidata $hdr$data #don't convert at all - compressed is binary? + } elseif {[string match binarytext* $format]} { + #don't convert - this is binary data - the rendering obj will handle it as binary } else { set ansidata [encoding convertfrom $encoding $ansidata] } set obj [punk::ansi::class::class_ansi new $ansidata] if {$encoding eq "cp437"} { - #set result [$obj rendertest -binarytext $binarytext -width $cols -height $rows -crm_mode $opt_crm_mode] set result [$obj rendertest -format $format -width $cols -height $rows -crm_mode $opt_crm_mode] } else { set result [$obj render $dimensions] @@ -6251,24 +6245,12 @@ be as if this was off - ie lone CR. #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. #(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) - set n 0 - #set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. - set chars [punk::char::grapheme_split $line] - set cr_posns [lsearch -all $chars \r] - set bs_posns [lsearch -all $chars \b] - foreach p $cr_posns { - lset chars $p - } - foreach p $bs_posns { - lset chars $p - } #mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner #build an output set idx 0 set outchars [list] - set outsizes [list] # -- #tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code #this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above @@ -6278,39 +6260,65 @@ be as if this was off - ie lone CR. #set cr ? # -- - - #consider also that AB\0\bC will usually render as AC not ABC - foreach c $chars { - switch -- $c { - { - if {$idx > 0} { - incr idx -1 - } - } - { - set idx 0 - } - default { - if {$c eq "\0"} { - #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. - #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. - #review - other zero-width chars? - continue - } - #set nxt [llength $outchars] - if {$idx < [llength $outchars]} { - #overstrike? - should usually have no impact on width - width taken as last grapheme in that column - #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done - #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. - lset outchars $idx $c - } else { - lappend outchars $c - } - #punk::ansi::internal::printing_length_addchar $idx $c - incr idx - } + set graphemes [punk::char::grapheme_split $line] + foreach g $graphemes { + if {$g eq "\0"} { + #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. + #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. + #review - other zero-width chars? + continue + } elseif {$g eq "\r"} { + set idx 0 + } elseif {$g eq "\b"} { + incr idx -1 + set idx [expr {max(0,$idx)}] + } else { + lset outchars $idx $g ;#lset will append if $idx is equal to the current length of the list - since we only increment idx by 1, this should be safe to do without checking the length first + #if {$idx < [llength $outchars]} { + # #overstrike? - should usually have no impact on width - width taken as last grapheme in that column + # #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done + # #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. + # lset outchars $idx $g + #} else { + # lappend outchars $g + #} + incr idx } } + + + + #consider also that AB\0\bC will usually render as AC not ABC + #foreach g $graphemes { + # switch -exact -- $g { + # { + # if {$idx > 0} { + # incr idx -1 + # } + # } + # { + # set idx 0 + # } + # { + # #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. + # #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. + # #review - other zero-width chars? + # continue + # } + # default { + # #set nxt [llength $outchars] + # if {$idx < [llength $outchars]} { + # #overstrike? - should usually have no impact on width - width taken as last grapheme in that column + # #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done + # #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. + # lset outchars $idx $g + # } else { + # lappend outchars $g + # } + # incr idx + # } + # } + #} #we already have the string split into grapheme clusters. #we should calculate length as the sum of the widths of the graphemes in the output list rather #than passing to a function that will need to split into graphemes again. @@ -6345,7 +6353,7 @@ be as if this was off - ie lone CR. set max_component_width 1 } } - } elseif {$c < 917504 || $c > 917631} { + } elseif {$dec < 917504 || $dec > 917631} { #codepoint not in the zero-width unicode tag block - \UE0000-\UE000F #set w [punk::char::char_width $dec] set w [textutil::wcswidth_char $dec] @@ -6372,19 +6380,6 @@ be as if this was off - ie lone CR. return $sumwidth #return [punk::char::ansifreestring_width [join $outchars ""]] } - namespace eval internal { - proc printing_length_addchar {i c} { - #review - upvar outchars outc - upvar outsizes outs - set nxt [llength $outc] - if {$i < $nxt} { - lset outc $i $c - } else { - lappend outc $c - } - } - } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- @@ -7579,6 +7574,10 @@ tcl::namespace::eval punk::ansi { #} #------------------------------------------------------- proc sgr_merge {codelist args} { + if {[llength $codelist] == 0 && [llength $args] == 0} { + return "" + } + #pass through even single code or empty codelist to sgr_merge_singles - as there may be arguments such as -info or -filter_* set allparts [list] foreach c $codelist { #set cparts [punk::ansi::ta::split_codes_single $c] @@ -9023,7 +9022,6 @@ tcl::namespace::eval punk::ansi::class { -overflow 0 -appendlines 1 -looplimit 15000 - -experimental {} -cursor_column 1 -cursor_row 1 -insert_mode 0 @@ -9034,7 +9032,7 @@ tcl::namespace::eval punk::ansi::class { foreach {k v} $argsflags { switch -- $k { -width - -height - - -overflow - -appendlines - -looplimit - -experimental - + -overflow - -appendlines - -looplimit - -autowrap_mode - -insert_mode - -initial_ansistring { @@ -9735,7 +9733,8 @@ tcl::namespace::eval punk::ansi::class { upvar ${ns}::o_gx0states new_gx0states upvar ${ns}::o_splitindex new_splitindex - lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_ptlist 0]] lappend o_ptlist {*}[lrange $new_ptlist 1 end] @@ -10625,6 +10624,7 @@ tcl::namespace::eval punk::ansi::ansistring { #Note that trim/trimleft/trimright will trim spaces at the extremities that are styled with background colour, underline etc #that may be unexpected, but it's probably the only thing that makes sense. Plain string trim can chop off whitespace that is extraneous to the ansi entirely. proc trimleft {string args} { + #todo - don't just trim whitespace - need to accept optional ?chars? to trim. set intext 0 set out "" #for split_codes only first or last pt can be empty string - but we can also get an empty ansiblock by using foreach with 2 vars on an odd-length list @@ -12028,6 +12028,827 @@ tcl::namespace::eval punk::ansi::xbin { return [punk::ansi::a+ $blink rgb-$fr-$fg-$fb Rgb-$br-$bg-$bb] } + proc parse {xbindata} { + set bytenum 0 + set xbin_header [string range $xbindata 0 10] ;#11 bytes + set xbin_header_info [punk::ansi::xbin::parse_header $xbin_header] + set xbin_body [string range $xbindata 11 end] + incr bytenum 11 + + set flags [dict get $xbin_header_info flags] + set xbin_width [dict get $xbin_header_info width] + set xbin_height [dict get $xbin_header_info height] + set expected_cells [expr {$xbin_width * $xbin_height}] + set xbin_nonblink [expr {"nonblink" in $flags}] ;# ice. + set xbin_palette [punk::ansi::xbin::default_palette] + + set parse_warnings [list] + + #optional 16-entry palette, 3 bytes per entry, RGB values 0..63 + if {"palette" in $flags} { + #puts stderr "renderspace warning - palette unimplemented" + set xbin_palette [punk::ansi::xbin::parse_palette [string range $xbin_body 0 47]] + set xbin_body_after_palette [string range $xbin_body 48 end] + incr bytenum 48 + } else { + set xbin_body_after_palette $xbin_body + } + + #todo - font. + #hack - skip over font 256 x fontsize or 512 x fontsize + if {"512chars" in $flags} { + set sz 512 + } else { + set sz 256 + } + #temp + set skip [expr {$sz * [dict get $xbin_header_info fontsize]}] + if {"font" in $flags} { + #todo - consider sixel or similar for font data - but for now we just skip over it. + #puts stderr "punk::ansi::xbin::parse warning - xbin font unimplemented" + lappend parse_warnings "XBIN_FONT_UNIMPLEMENTED skipping over font data" + set celldata [string range $xbin_body_after_palette $skip end] + incr bytenum $skip + } else { + set celldata $xbin_body_after_palette + } + set celldata_bytes [split $celldata ""] + #puts stdout "xbin image data size [llength $celldata_bytes]" + + set decoded_cells 0 + set ansisplit [list ""] + if {"compress" in $flags} { + #puts stderr "renderspace warning - compress experimental" + #process 'repeatcounter' bytes + #first 2 bits - compression type + # 00 - no compression + # 01 - character compression + # 10 - attribute compression + # 11 - character/attribute compression + #remaining 6 bits - counter + set input "" + set byte_count [llength $celldata_bytes] + for {set b 0} {$b < $byte_count} {} { + set rc [lindex $celldata_bytes $b] + set dec [scan $rc %c] + set ctype [expr {$dec >> 6}] + #0x3F - 00111111 + set count [expr {$dec & 0x3F}] + incr count ;#count stored as 1 less than actual number of repeats + if {$count < 1 || $count > 64} { + #generally unlikely to occur if we are decoding 6 bits of count correctly. + # - but will be zero for example if we have a trailing carriage return. + puts stderr "punk::ansi::xbin::parse - max count must be between 1 and 64 inclusive. received $count" + } + incr b + if {$decoded_cells + $count > $expected_cells} { + #some of the more common causes of this could be additional non xbin data after the expected end of celldata, eg: + #\x1a (ctrl-z) decimal value 26 (= count 27) delimiter for start of SAUCE record. + #\r (carriage regurn) decimal value 13 (= count 14) + #\n (line feed) decimal value 10 (= count 11) + # or it could be more celldata but the header dimensions are wrong + #- either way we should probably just warn and stop processing. + lappend parse_warnings "XBIN_OVERFLOW - record would emit $count cells at decoded offset $decoded_cells, expected total $expected_cells cells for header dimensions ${xbin_width}x${xbin_height} (possible trailing SAUCE record or newlines)" + break + } + switch -exact -- $ctype { + 0 { + set needed [expr {$count * 2}] + } + 1 - + 2 { + set needed [expr {$count + 1}] + } + 3 { + set needed 2 + } + default { + #hard error - will probably cause desynchronization between decoder and byte stream + error "punk::ansi::xbin::parse - invalid compression type $ctype in repeatcounter byte '$rc' at offset $b" + } + } + if {$b + $needed > $byte_count} { + lappend parse_warnings "XBIN_BAD_RECORD - truncated record: type $ctype requires $needed bytes at payload offset $b, but only [expr {$byte_count - $b}] bytes remain." + #abort processing - would probably raise an error in the compression switch cases below. + #This may indicate a truncated file, but it could also be a file with additional data after the expected end of celldata. + #This is likely to happen if the xbindata includes a trailing SAUCE record. + #we shouldn't raise a hard error - as the caller may want to salvage what data they can from the file, and report the issue via warnings. + break + } + switch -exact -- $ctype { + 0 { + #no compression + for {set c 0} {$c < $count*2} {incr c 2} { + set ch [lindex $celldata_bytes $b+$c] + set ch [encoding convertfrom cp437 $ch] + set at [lindex $celldata_bytes [expr {$b+$c+1}]] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ red] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + lappend ansisplit $clr $ch + } + incr b [expr {$count*2}] + } + 1 { + #char compression + set ch [lindex $celldata_bytes $b] + set ch [encoding convertfrom cp437 $ch] + incr b + for {set c 0} {$c < $count} {incr c} { + set at [lindex $celldata_bytes $b+$c] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ cyan] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + lappend ansisplit $clr $ch + } + incr b [expr {$count}] + } + 2 { + #attribute compression + set at [lindex $celldata_bytes $b] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ green] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + incr b + for {set c 0} {$c < $count} {incr c} { + set ch [lindex $celldata_bytes $b+$c] + set ch [encoding convertfrom cp437 $ch] + lappend ansisplit $clr $ch + } + incr b $count + } + 3 { + #attribute and char compression + set ch [lindex $celldata_bytes $b] + set ch [encoding convertfrom cp437 $ch] + set at [lindex $celldata_bytes $b+1] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ white] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + for {set c 0} {$c < $count} {incr c} { + lappend ansisplit $clr $ch + } + incr b 2 + } + } + incr decoded_cells $count + } + if {$decoded_cells != $expected_cells} { + lappend parse_warnings "XBIN_CELLCOUNT_MISMATCH decoded $decoded_cells cells, expected $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" + } + } else { + foreach {ch at} $celldata_bytes { + #binary scan $at cu code + #set clr [a+ term-$code] + if {$at eq ""} { + #eg src/testansi/formatsamples/image/xbin/test.xb + #has missing last byte. for now just warn. + #puts stderr "renderspace warning - xbin attribute byte is empty at char '[ansistring VIEW $ch]'" + lappend parse_warnings "XBIN_MISSING_BYTE attribute byte is empty at byte [expr {$bytenum + 1}] char '[ansistring VIEW $ch]'" + #experiment - treat as a reset. + lappend ansisplit [a+] $ch + } else { + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + set ch [encoding convertfrom cp437 $ch] + lappend ansisplit $clr $ch + } + incr bytenum 2 + incr decoded_cells + } + } + #lappend inputchunks [list ansisplit $ansisplit] + + #_reset key with ansi reset to ensure direct display of dict in terminal is readable. + return [dict create header $xbin_header_info palette $xbin_palette ansisplit $ansisplit _reset \x1b\[m warnings $parse_warnings decoded_cells $decoded_cells expected_cells $expected_cells] + } + +} +tcl::namespace::eval punk::ansi::png { + + proc paethPredictor {a b c} { + #A Paeth PNG filter is a pre-compression image processing algorithm used in the Portable Network Graphics (PNG) format. + #It is designed to prepare image data for the format's lossless compression by predicting the color of a pixel based on + #its neighbors + set p [expr {$a + $b - $c}] + set pa [expr {abs($p - $a)}] + set pb [expr {abs($p - $b)}] + set pc [expr {abs($p - $c)}] + if {$pa <= $pb && $pa <= $pc} { return $a } + if {$pb <= $pc} { return $b } + return $c + } + + proc pngdataToAnsi {pngdata} { + #This will create very large ansi images as the smallest possible colorised cell is the half-block character. + #To create smaller images, we could consider some kind of lossy conversion to a smaller palette, or even to monochrome with dithering. + #A better alternative might be sixel or similar. + + #if {[::png::validate $filename] ne "OK"} { + # error "Invalid PNG file." + #} + # Extract PNG header metadata + #set info [::png::imageInfo $filename] + + if {[string range $pngdata 0 7] ne "\x89PNG\r\n\x1a\n"} { + error "pngdataToAnsi: Invalid PNG data - missing PNG signature" + } + + #----------------------------------------------------------------------------------------- + #set info [::png::imageInfo $filename] + #----------------------------------------------------------------------------------------- + set posn [expr {8}] ;# Skip PNG signature + binary scan [string range $pngdata $posn [expr {$posn + 7}]] Ia4 len type + incr posn 8 + set r [string range $pngdata $posn [expr {$posn + $len - 1}]] + incr posn $len + if {$type eq "IHDR"} { + binary scan $r IIccccc width height depth color compression filter interlace + binary scan [string range $pngdata $posn [expr {$posn + 3}]] I check + if {$check < 0} { + set check [format %u [expr {$check & 0xffffffff}]] + } + if {![catch {package present crc32}] && [::crc32::crc32 IHDR$r] != $check} { + error "pngdataToAnsi: Invalid PNG data - IHDR chunk CRC mismatch" + } + set info [list width $width height $height depth $depth color $color compression $compression filter $filter interlace $interlace] + } else { + error "pngdataToAnsi: Invalid PNG data - missing IHDR chunk" + } + #----------------------------------------------------------------------------------------- + + + set width [dict get $info width] + set height [dict get $info height] + set depth [dict get $info depth] + set color [dict get $info color] + set filter [dict get $info filter] + set interlace [dict get $info interlace] + set compression [dict get $info compression] + if {$compression != 0} { + #true as at PNG-3 2025 + error "pngdataToAnsi: Unsupported PNG compression method $compression - only method 0 (deflate/inflate) is supported." + } + puts stderr "pngdataToAnsi: PNG image info - width $width height $height depth $depth color $color interlace $interlace filter $filter" + + set color_types { + 0 Grayscale + 2 TrueColor (RGB) + 3 Indexed-color + 4 Grayscale with alpha + 6 TrueColor with alpha (RGBA) + } + switch -exact $color { + 0 { + error "pngdataToAnsi warning - PNG color type 0 (grayscale) not supported - todo: treat as RGB with R=G=B ?" + set ctype "grayscale" + if {$depth ni {1 2 4 8 16}} { + error "Unsupported format. Only grayscale PNGs with bit depths of 1, 2, 4, 8, or 16 are supported." + } + } + 2 { + # RGB TrueColor - supported + set ctype "rgb" + #todo depth 16 + if {$depth != 8} { + error "Unsupported format. Only 8-bit RGB or RGBA PNGs are supported." + } + set bpp 3 + } + 3 { + set ctype "indexed" + puts stderr "pngdataToAnsi warning - PNG color type 3 (indexed colour)" + if {$depth ni {1 2 4 8}} { + error "Unsupported format. Only indexed-color PNGs with 1,2,4 or 8 bit depth are supported." + } + set bpp 1 + } + 4 { + error "pngdataToAnsi warning - PNG color type 4 (grayscale with alpha) not supported - todo: treat as RGBA with R=G=B and alpha channel" + set ctype "grayscale_alpha" + set bpp 3 ;#Bytes per pixel + if {$depth ni {8 16}} { + error "Unsupported format. Only grayscale PNGs with bit depths of 8 or 16 are supported." + } + } + 6 { + puts stderr "pngdataToAnsi warning - PNG color type 6 (truecolor with alpha)" + set ctype "rgba" + if {$depth == 8} { + set bpp 4 ;#Bytes per pixel + } elseif {$depth == 16} { + set bpp 8 ;#Bytes per pixel + } else { + error "Unsupported format. Only depths of 8 or 16 bits per channel are supported for RGBA PNGs." + } + } + default { + error "pngdataToAnsi: Unsupported PNG color type $color" + } + } + + + #------------------------------------------ + # Extract raw compressed IDAT stream chunks + #set chunks [::png::getChunks $filename] + set chunks [list] + set posn [expr {8}] ;# Skip PNG signature + while {[set r [string range $pngdata $posn [incr posn 8]]] ne ""} { + binary scan $r Ia4 len type + if {$type eq "IEND"} { + #end of PNG data - stop processing chunks + #(important to stop before we try to process any trailing non-PNG data such as a SAUCE record) + break + } + lappend chunks [list $type $posn $len] + incr posn [expr {$len + 4}] + } + #------------------------------------------ + puts stderr "pngdataToAnsi: found [llength $chunks] chunks in PNG data" + foreach chunk $chunks { + puts stderr "pngdataToAnsi: chunk type '[lindex $chunk 0]' length [lindex $chunk 2]" + } + + + set paletteRaw "" + + set idatData "" + foreach chunk $chunks { + switch -exact -- [lindex $chunk 0] { + "IDAT" { + set posn [lindex $chunk 1] + append idatData [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + } + "PLTE" { + set posn [lindex $chunk 1] + puts stderr "pngdataToAnsi warning - PNG PLTE chunk" + #implement PLTE chunk parsing and support for indexed colour PNGs + append paletteRaw [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + } + "tEXt" { + set posn [lindex $chunk 1] + #todo - consider supporting tEXt chunks for metadata such as title, author, description etc. + set textData [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + set nullpos [string first \x00 $textData] + #neither the keyword nor text data is supposed to contain nulls. + if {$nullpos >= 0} { + set keyword [string range $textData 0 [expr {$nullpos - 1}]] + set text [string range $textData [expr {$nullpos + 1}] end]] + puts stderr "pngdataToAnsi warning - PNG tEXt chunk - keyword '$keyword' text '$text'" + } else { + puts stderr "pngdataToAnsi warning - PNG tEXt chunk - no separator null found: $textData" + } + } + "zTXt" { + #set posn [lindex $chunk 1] + #todo - consider supporting zTXt chunks for compressed metadata such as title, author, description etc. + #puts stderr "pngdataToAnsi warning - PNG zTXt chunk - [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]]" + } + "iTXt" { + #set posn [lindex $chunk 1] + #todo - consider supporting iTXt chunks for international text metadata such as title, author, description etc. + #puts stderr "pngdataToAnsi warning - PNG iTXt chunk - [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]]" + } + "IEND" { + } + default { + #ignore other chunk types for now + } + } + } + if {$ctype eq "indexed" && $paletteRaw eq ""} { + error "pngdataToAnsi: Indexed colour PNG missing PLTE chunk" + } + if {[string match grayscale* $ctype] && $paletteRaw ne ""} { + puts stderr "pngdataToAnsi warning - PNG PLTE chunk present in grayscale image - ignoring palette data" + } + if {$paletteRaw ne ""} { + set palette [list] + binary scan $paletteRaw c* components + puts "components: $components '[ansistring VIEW $paletteRaw]'" + foreach {r g b} $components { + lappend palette [list [expr {$r & 0xFF}] [expr {$g & 0xFF}] [expr {$b & 0xFF}]] + } + } + + # Decompress using raw Tcl zlib inflation + set decompressed [zlib decompress $idatData] + #set decompressed [zlib deflate $idatData] + #PLTE data is not compressed. + + #set stride [expr {1 + ($width * $bpp)}] + #set prevLine [binary format x[expr {$width * $bpp}]] ;# Row 0 baseline + + if {$ctype eq "indexed"} { + set bytesPerLine [expr {($width * $depth + 7) / 8}] + } else { + set bytesPerLine [expr {$width * $bpp}] + } + set stride [expr {1 + $bytesPerLine}] ;# Filter type byte + pixel data bytes + set prevLine [binary format x$bytesPerLine] ;# Row 0 baseline + set allRows [list] + + + # Process rows + for {set y 0} {$y < $height} {incr y} { + set offset [expr {$y * $stride}] + + # Unpack the filter type byte at start of each scanline + #puts "---> filter type byte: [ansistring VIEW [string range $decompressed $offset $offset]] at offset $offset for row $y" + binary scan [string range $decompressed $offset $offset] c filterType + set filterType [expr {$filterType & 0xFF}] + if {$filterType < 0 || $filterType > 4} { + puts stderr "pngdataToAnsi warning - invalid filter type $filterType at row $y - treating as no filter" + set filterType 0 + } + + # Get filtered pixel payload bytes for the row + set rawRow [string range $decompressed [expr {$offset + 1}] [expr {$offset + $stride - 1}]] + set currentLine "" + + # Defilter scanline bytes based on specification types + for {set xBytes 0} {$xBytes < $bytesPerLine} {incr xBytes} { + binary scan [string range $rawRow $xBytes $xBytes] c origByte + set origByte [expr {$origByte & 0xFF}] + + # Get left byte (A) and upper byte (B) and upper-left byte (C) + #set leftVal [expr {$xBytes >= $bpp ? [string index $currentLine [expr {$xBytes - $bpp}]] : 0}] + #binary scan $leftVal c a + #set a [expr {$a & 0xFF}] + if {$xBytes >= $bpp} { + binary scan [string index $currentLine [expr {$xBytes - $bpp}]] c a + set a [expr {$a & 0xFF}] + } else { + set a 0 + } + + binary scan [string range $prevLine $xBytes $xBytes] c b; + set b [expr {$b & 0xFF}] + + #set upLeftVal [expr {$xBytes >= $bpp ? [string index $prevLine [expr {$xBytes - $bpp}]] : 0}] + #binary scan $upLeftVal c c + #set c [expr {$c & 0xFF}] + if {$xBytes >= $bpp} { + binary scan [string index $prevLine [expr {$xBytes - $bpp}]] c c + set c [expr {$c & 0xFF}] + } else { + set c 0 + } + + # Reverse the PNG filter transformations + switch -- $filterType { + 0 { set reconByte $origByte } ;# None + 1 { set reconByte [expr {($origByte + $a) % 256}] } ;# Sub + 2 { set reconByte [expr {($origByte + $b) % 256}] } ;# Up + 3 { set reconByte [expr {($origByte + (($a + $b) / 2)) % 256}] } ;# Average + 4 { set reconByte [expr {($origByte + [paethPredictor $a $b $c]) % 256}] } ;# Paeth + default { + } + } + append currentLine [binary format c $reconByte] + } + set prevLine $currentLine + + if {$ctype eq "indexed"} { + # For indexed colour PNGs, map pixel values to RGB using the PLTE chunk palette + set pixelRow [list] + set pixelCount 0 + + #pre-calculate masks and steps based on depth + # depth 4: mask = 15 (0x0F), pixels per byte = 2 + # depth 2: mask = 3 (0x03), pixels per byte = 4 + # depth 1: mask = 1 (0x01), pixels per byte = 8 + set mask [expr {(1 << $depth) - 1}] + set pixelsPerByte [expr {8 / $depth}] + + for {set x 0} {$x < $bytesPerLine} {incr x} { + binary scan [string range $currentLine $x $x] c packedByte + set byteVal [expr {$packedByte & 0xFF}] + + #read left-to-right within the byte, extracting pixel values based on depth and mask + for {set p 0} {$p < $pixelsPerByte} {incr p} { + if {$pixelCount < $width} { + #set shift [expr {($pixelsPerByte - 1 - $p) * $depth}] + set shift [expr {8 - $depth - ($p * $depth)}] + set idx [expr {($byteVal >> $shift) & $mask}] + set rgb [lindex $palette $idx] + #append outputBuffer [format "\x1b\[48\;2\;%d\;%d\;%dm " [lindex $rgb 0] [lindex $rgb 1] [lindex $rgb 2]] + #lappend pixelRow $idx + lappend pixelRow [list [lindex $rgb 0] [lindex $rgb 1] [lindex $rgb 2]] + incr pixelCount + } + } + + } + } else { + #RGB + set pixelRow [list] + for {set x 0} {$x < $width} {incr x} { + set idx [expr {$x * $bpp}] + #pull either 3 bytes (RGB) or 4 bytes (RGBA) for the pixel, depending on bpp + if {$depth == 16} { + binary scan [string range $currentLine $idx [expr {$idx + 3}]] c4 rgba + set r [expr {[lindex $rgba 0] & 0xFF}] + set g [expr {[lindex $rgba 1] & 0xFF}] + set b [expr {[lindex $rgba 2] & 0xFF}] + set a [expr {[lindex $rgba 3] & 0xFF}] + + #terminal fallback background colour .eg dark terminal grey + set bgR 30 + set bgG 30 + set bgB 30 + set alpha [expr {$a / 255.0}] + + set r [expr {int(($r * $alpha) + ($bgR * (1 - $alpha)))}] + set g [expr {int(($g * $alpha) + ($bgG * (1 - $alpha)))}] + set b [expr {int(($b * $alpha) + ($bgB * (1 - $alpha)))}] + } else { + binary scan [string range $currentLine $idx [expr {$idx + 2}]] c3 rgb + set r [expr {[lindex $rgb 0] & 0xFF}] + set g [expr {[lindex $rgb 1] & 0xFF}] + set b [expr {[lindex $rgb 2] & 0xFF}] + #puts stderr "pixel $x,$y - RGB($r,$g,$b)" + } + + + # Use background-color escape sequence with two blank spaces to build a square pixel + #append outputBuffer "\x1b\[48\;2\;${r}\;${g}\;${b}m " + lappend pixelRow [list $r $g $b] + } + #append outputBuffer "\x1b\[m\n" ;# Reset row styling + } + lappend allRows $pixelRow + } + + set symbols 1 + # ------------------------------------------------------------- + # Unicode Quadrant Mosaic Definition Matrix + # ------------------------------------------------------------- + # Maps a 4-bit representation of a 2x2 grid to a structural character. + # Layout: Bit 3 = TopLeft, Bit 2 = TopRight, Bit 1 = BottomLeft, Bit 0 = BottomRight + variable MOSAIC_MAP + array set MOSAIC_MAP { + 0 " " 1 "▗" 2 "▖" 3 "▄" + 4 "▝" 5 "▐" 6 "▞" 7 "▟" + 8 "▘" 9 "▚" 10 "▌" 11 "▙" + 12 "▀" 13 "▜" 14 "▛" 15 "█" + } + + # ------------------------------------------------------------- + # Sub-Pixel Structural Rendering Engine + # ------------------------------------------------------------- + proc renderSymbols {allRows width height} { + variable MOSAIC_MAP + set outputBuffer "" + fconfigure stdout -encoding utf-8 + + # Process chunks of 2 vertical rows and 2 horizontal columns + for {set y 0} {$y < $height} {incr y 2} { + set rowTop [lindex $allRows $y] + + # Edge safety padding for odd vertical bounds + if {($y + 1) < $height} { + set rowBottom [lindex $allRows [expr {$y + 1}]] + } else { + set rowBottom $rowTop + } + + for {set x 0} {$x < $width} {incr x 2} { + # Extract 4 pixels of the 2x2 cluster + set p_tl [lindex $rowTop $x] + + if {($x + 1) < $width} { + set p_tr [lindex $rowTop [expr {$x + 1}]] + set p_bl [lindex $rowBottom $x] + set p_br [lindex $rowBottom [expr {$x + 1}]] + } else { + # Pad horizontally if image width is odd + set p_tr $p_tl; set p_bl $p_tl; set p_br $p_tl + } + + # Calculate individual pixel luminance values (Standard Rec. 601 weights) + set l_tl [expr {[lindex $p_tl 0]*0.299 + [lindex $p_tl 1]*0.587 + [lindex $p_tl 2]*0.114}] + set l_tr [expr {[lindex $p_tr 0]*0.299 + [lindex $p_tr 1]*0.587 + [lindex $p_tr 2]*0.114}] + set l_bl [expr {[lindex $p_bl 0]*0.299 + [lindex $p_bl 1]*0.587 + [lindex $p_bl 2]*0.114}] + set l_br [expr {[lindex $p_br 0]*0.299 + [lindex $p_br 1]*0.587 + [lindex $p_br 2]*0.114}] + + # Block Threshold: Local average brightness + set avg_lum [expr {($l_tl + $l_tr + $l_bl + $l_br) / 4.0}] + + # Build the 4-bit structure index mapping bitwise states + set bitmask 0 + if {$l_tl >= $avg_lum} { set bitmask [expr {$bitmask | 8}] } + if {$l_tr >= $avg_lum} { set bitmask [expr {$bitmask | 4}] } + if {$l_bl >= $avg_lum} { set bitmask [expr {$bitmask | 2}] } + if {$l_br >= $avg_lum} { set bitmask [expr {$bitmask | 1}] } + + # Segregate pixels into foreground (bright) and background (dark) sets + set fg_r 0; set fg_g 0; set fg_b 0; set fg_count 0 + set bg_r 0; set bg_g 0; set bg_b 0; set bg_count 0 + + foreach p [list $p_tl $p_tr $p_bl $p_br] lum [list $l_tl $l_tr $l_bl $l_br] { + if {$lum >= $avg_lum} { + incr fg_r [lindex $p 0]; incr fg_g [lindex $p 1]; incr fg_b [lindex $p 2] + incr fg_count + } else { + incr bg_r [lindex $p 0]; incr bg_g [lindex $p 1]; incr bg_b [lindex $p 2] + incr bg_count + } + } + + # Compute color averages for both states + if {$fg_count > 0} { + set fR [expr {$fg_r / $fg_count}]; set fG [expr {$fg_g / $fg_count}]; set fB [expr {$fg_b / $fg_count}] + } else { + set fR 0; set fG 0; set fB 0 + } + if {$bg_count > 0} { + set bR [expr {$bg_r / $bg_count}]; set bG [expr {$bg_g / $bg_count}]; set bB [expr {$bg_b / $bg_count}] + } else { + # If everything is uniform, match foreground color to prevent ghosting borders + set bR $fR; set bG $fG; set bB $fB + } + + # Pull symbol match out of the layout map + set symbol $MOSAIC_MAP($bitmask) + + # Generate the combined true color escape output string + append outputBuffer "\x1b\[48\;2\;${bR}\;${bG}\;${bB}m\x1b\[38\;2\;${fR}\;${fG}\;${fB}m${symbol}" + } + append outputBuffer "\x1b\[m\n" + } + return $outputBuffer + } + # ------------------------------------------------------------- + # High-Density 8x4 Block (Braille Mosaic) Rendering Engine + # ------------------------------------------------------------- + proc renderBrailleDensity {allRows width height} { + set outputBuffer "" + fconfigure stdout -encoding utf-8 + + # We skip 8 vertical rows and 4 horizontal pixels per cell cycle + # to achieve a 4x reduction factor (accounting for terminal aspect ratios) + for {set y 0} {$y < $height} {incr y 8} { + + # Buffer up to 8 rows for processing this line + set activeRows [list] + for {set r 0} {$r < 8} {incr r} { + if {($y + $r) < $height} { + lappend activeRows [lindex $allRows [expr {$y + $r}]] + } else { + lappend activeRows "" ;# Pad vertical overflow with empty lines + } + } + + for {set x 0} {$x < $width} {incr x 4} { + + # --- 1. Downsample the 8x4 cluster into a 4x2 grid for Braille --- + # Each cell in our 4x2 grid averages a 2x2 pixel area from the image + set subGridLums [list 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0] + set subGridRgbs [list] + set totalBlockLum 0.0 + + set cellIdx 0 + for {set subY 0} {$subY < 8} {incr subY 2} { + for {set subX 0} {$subX < 4} {incr subX 2} { + + # Accumulate colors for this specific 2x2 sub-pixel zone + set sR 0; set sG 0; set sB 0; set sCount 0 + for {set dy 0} {$dy < 2} {incr dy} { + set rowIdx [expr {$subY + $dy}] + set currRow [lindex $activeRows $rowIdx] + if {$currRow eq ""} { continue } + + for {set dx 0} {$dx < 2} {incr dx} { + set pixelX [expr {$x + $subX + $dx}] + if {$pixelX >= $width} { continue } + + set pixel [lindex $currRow $pixelX] + incr sR [lindex $pixel 0] + incr sG [lindex $pixel 1] + incr sB [lindex $pixel 2] + incr sCount + } + } + + # Store sub-zone averages + if {$sCount > 0} { + set sR [expr {$sR / $sCount}]; set sG [expr {$sG / $sCount}]; set sB [expr {$sB / $sCount}] + } else { + set sR 0; set sG 0; set sB 0 + } + + set sLum [expr {$sR*0.299 + $sG*0.587 + $sB*0.114}] + lset subGridLums $cellIdx $sLum + lappend subGridRgbs [list $sR $sG $sB] + set totalBlockLum [expr {$totalBlockLum + $sLum}] + incr cellIdx + } + } + + # --- 2. Calculate Thresholding & Grouping --- + set avgBlockLum [expr {$totalBlockLum / 8.0}] + + set fg_r 0; set fg_g 0; set fg_b 0; set fg_count 0 + set bg_r 0; set bg_g 0; set bg_b 0; set bg_count 0 + set brailleOffset 0 + + # Unicode Braille bitmask generation table for 4x2 cells + # Maps sequential list index (0-7) to Unicode Braille bit flags + set bitWeights [list 1 8 2 16 4 32 64 128] + + for {set i 0} {$i < 8} {incr i} { + set sLum [lindex $subGridLums $i] + set sRgb [lindex $subGridRgbs $i] + + if {$sLum >= $avgBlockLum} { + # This sub-zone is bright: Turn on the Braille dot + set brailleOffset [expr {$brailleOffset | [lindex $bitWeights $i]}] + incr fg_r [lindex $sRgb 0]; incr fg_g [lindex $sRgb 1]; incr fg_b [lindex $sRgb 2] + incr fg_count + } else { + # This sub-zone is dark + incr bg_r [lindex $sRgb 0]; incr bg_g [lindex $sRgb 1]; incr bg_b [lindex $sRgb 2] + incr bg_count + } + } + + # --- 3. Compute Final Colors --- + if {$fg_count > 0} { + set fR [expr {$fg_r / $fg_count}]; set fG [expr {$fg_g / $fg_count}]; set fB [expr {$fg_b / $fg_count}] + } else { + set fR 0; set fG 0; set fB 0 + } + if {$bg_count > 0} { + set bR [expr {$bg_r / $bg_count}]; set bG [expr {$bg_g / $bg_count}]; set bB [expr {$bg_b / $bg_count}] + } else { + set bR $fR; set bG $fG; set bB $fB + } + + # Construct the final Unicode character using the Braille base boundary block (\u2800) + set brailleChar [format %c [expr {0x2800 + $brailleOffset}]] + + # Append the ANSI sequence + append outputBuffer "\x1b\[48\;2\;${bR}\;${bG}\;${bB}m\x1b\[38\;2\;${fR}\;${fG}\;${fB}m${brailleChar}" + } + append outputBuffer "\x1b\[m\n" + } + return $outputBuffer + } + + if {$symbols} { + # return [renderSymbols $allRows $width $height] + return [renderBrailleDensity $allRows $width $height] + } + + set outputBuffer "" + for {set y 0} {$y < $height} {incr y 2} { + set topRow [lindex $allRows $y] + #if image has an odd height, use pure black {0 0 0} for the missing bottom row of the final half-block character row. + set hasBottom [expr {$y + 1 < $height}] + if {$hasBottom} { + set bottomRow [lindex $allRows [expr {$y + 1}]] + } + for {set x 0} {$x < $width } {incr x} { + #set topIdx [lindex $topRow $x] + set topRgb [lindex $topRow $x] + set tR [lindex $topRgb 0] + set tG [lindex $topRgb 1] + set tB [lindex $topRgb 2] + if {$hasBottom} { + #set bottomIdx [lindex $bottomRow $x] + set bottomRgb [lindex $bottomRow $x] + set bR [lindex $bottomRgb 0] + set bG [lindex $bottomRgb 1] + set bB [lindex $bottomRgb 2] + } else { + set bR 0 + set bG 0 + set bB 0 + } + foreach v {bR bG bB tR tG tB} { + if {[set $v] eq ""} { + set $v 0 + } + } + append outputBuffer [format "\x1b\[38\;2\;%d\;%d\;%dm\x1b\[48\;2\;%d\;%d\;%dm▄" $tR $tG $tB $bR $bG $bB] + } + append outputBuffer "\x1b\[m\n" ;# Reset row styling + } + + return $outputBuffer + } + + proc pngfileToAnsi {filename} { + set f [open $filename rb] + set pngdata [read $f] + close $f + return [pngdataToAnsi $pngdata] + } + } tcl::namespace::eval punk::ansi::internal { proc splitn {str {len 1}} { diff --git a/src/modules/punk/ansi/sauce-999999.0a1.0.tm b/src/modules/punk/ansi/sauce-999999.0a1.0.tm index eb6ea865..2e9d40b4 100644 --- a/src/modules/punk/ansi/sauce-999999.0a1.0.tm +++ b/src/modules/punk/ansi/sauce-999999.0a1.0.tm @@ -218,7 +218,9 @@ tcl::namespace::eval punk::ansi::sauce { - + #--------------------------------------------------------------------------------------------------------------------------------------------- + # This data comes from the sauce spec. + #--------------------------------------------------------------------------------------------------------------------------------------------- #todo - fontName - which can also specify e.g code page 437 ## Font name [1] Font size Resolution [2] Aspect ratio [3] Vertical stretch [6] Description ## Display [4] Pixel [5] @@ -226,7 +228,14 @@ tcl::namespace::eval punk::ansi::sauce { set fontnames [dict create] ## IBM VGA 9×16 [7] 720×400 4:3 20:27 (1:1.35) 35% Standard hardware font on VGA cards for 80×25 text mode (code page 437) - dict set fontnames "IBM VGA" [list fontsize "9x16" resolution "720x400" aspect_ratio_display "4:3" aspect_ratio_pixel "20:27 (1:1.35)" vertical_stretch "35%" description "Standard hardware font on VGA cards for 80×25 text mode (code page 437)"] + dict set fontnames "IBM VGA" [list {*}{ + fontsize "9x16" + resolution "720x400" + aspect_ratio_display "4:3" + aspect_ratio_pixel "20:27 (1:1.35)" + vertical_stretch "35%" + description "Standard hardware font on VGA cards for 80×25 text mode (code page 437)" + }] ## IBM VGA ### [8] 9×16 [7] 720×400 4:3 20:27 (1:1.35) 35% Software installed code page font for VGA 80×25 text mode # - where ### is placeholder for 437,720,737 etc @@ -252,6 +261,7 @@ tcl::namespace::eval punk::ansi::sauce { ## C64 PETSCII unshifted 8×8 [10] 320×200 4:3 5:6 (1:1.2) 20% Original Commodore PETSCII font (PET, VIC-20, C64, CBM-II, Plus/4, C16, C116 and C128) in the unshifted mode. Unshifted mode (graphics) only has uppercase letters and additional graphic characters. This is the normal boot font. ## C64 PETSCII shifted 8×8 [10] 320×200 4:3 5:6 (1:1.2) 20% Original PETSCII font in shifted mode. Shifted mode (text) has both uppercase and lowercase letters. This mode is actuated by pressing Shift+Commodore key. ## Atari ATASCII 8×8 [11] 320×192 4:3 4:5 (1:1.25) 25% Original ATASCII font (Atari 400, 800, XL, XE) + #--------------------------------------------------------------------------------------------------------------------------------------------- #expect a 128 Byte sauce record @@ -261,6 +271,7 @@ tcl::namespace::eval punk::ansi::sauce { variable datatypes variable filetypes variable encodings + set warnings [list] if {[string length $saucerecord] != 128} { error "punk::ansi::sauce::to_dict: Unable to interpret data as a SAUCE record - length != 128" } @@ -326,6 +337,8 @@ tcl::namespace::eval punk::ansi::sauce { dict set sdict filetype_name "" } } else { + #how can a byte fail to scan with cu? is this even reachable? + puts stderr "punk::ansi::sauce::to_dict filetype byte failed to scan - setting filetype and filetype_name to empty string byte: [ansistring VIEW -lf 1 [string range $saucerecord 95 95]]" dict set sdict filetype "" dict set sdict filetype_name "" } @@ -422,25 +435,40 @@ tcl::namespace::eval punk::ansi::sauce { 5 { #binarytext #filetype is supposed to represent half the characterwidth (only widths with multiples of 2 can be specified) - #HOWEVER - in the wild we may see width/height specified in tinfo1/tinfo2 with some other value in filetype (eg 1) - #If both tinfo1 and tinfo2 are non zero - we will use them, even though it's not in spec. - set t1 [dict get $sdict tinfo1] - if {$t1 eq ""} { - set t1 0 - } - set t2 [dict get $sdict tinfo2] - if {$t2 eq ""} { - set t2 0 + #HOWEVER - in the wild we may see width/height specified in tinfo1/tinfo2 with some apparently unrelated value in filetype (eg 0 or 1) that doesn't match the intended image dimensions. + #If both tinfo1 and tinfo2 are non zero - we *could* use them, even though it's not in spec. + #An example file (us-used1.bin) has filetype 0 and tinfo1/tinfo2 640/350 + #It's possible tinfo1/tinfo2 represent pixel dimensions for a 'standard' 8x16 font, but this image is 160 columns wide, so we would expect tinfo1 to be 1280. + #The sauce spec seems to indicate we should ignore tinfo1/tinfo2 for binarytext and only use filetype to determine width. + #the default for binarytext is 160 columns. + + #filetype 1 is theoretically possible, representing 2 columns + #in practice we see this value for binarytext images that are definitely not intended to be 2 columns wide. Why? + #is there some assumption that that images are at least a certain width, and filetype has been repurposed to indicate something else? + #The spec would seem to rule out images of a single column due to filetype being half the character width but a value of 0.5 isn't supported. + #It specifically mentions that only even widths up to 510 can be specified. ($filetype * 2 where filetype is 1-255?) + + + #proper mechanism to specify columns for binarytext is the datatype field. + set cols [expr {2*[dict get $sdict filetype]}] + if {$cols == 0} { + lappend warnings "binarytext filetype value of [dict get $sdict filetype] - using binarytext default cols of 160" + #default for binarytext is 160 columns + set cols 160 } - if {$t1 != 0 && $t2 != 0} { + if {$cols == 2 && [dict get $sdict tinfo1] != 0 && [dict get $sdict tinfo2] != 0} { #not to spec - but we will assume these have values for a reason.. - puts stderr "punk::ansi::sauce::to_dict using tinfo1/tinfo2 data for columns/rows (non-compliant SAUCE data)" - dict set sdict columns [expr {2 * $t1}] - dict set sdict rows $t2 + #--------------------------------------------------------------------------------------------------------------------- + #The sample file src/testansi/formatsamples/image/binaryText/test.bin has a filetype 1 and tinfo1 40 and tinfo2 25. + #(similarly ppe-ansi.bin has tinfo1 80 and tinfo2 26) + #They seem to use the 1 in filetype to indicate that the tinfo1/tinfo2 values should be used. + #(The 80 cols wide test.bin binaryText image matches the xbin sample file src/testansi/formatsamples/image/xbin/test.xb which is a more fully specified format using a header) + #--------------------------------------------------------------------------------------------------------------------- + lappend warnings "binarytext filetype of 1 with non-zero tinfo1/tinfo2 - using tinfo1/tinfo2 data for columns/rows (possibly non-conforming SAUCE data - matching observed data in the wild)" + set cols [expr {2 * [dict get $sdict tinfo1]}] + dict set sdict columns $cols + dict set sdict rows [dict get $sdict tinfo2] } else { - #proper mechanism to specify columns for binarytext is the datatype field. - - set cols [expr {2*[dict get $sdict filetype]}] dict set sdict columns $cols #rows must be calculated from file size #rows = (filesize - sauceinfosize)/ filetype * 2 * 2 @@ -481,6 +509,9 @@ tcl::namespace::eval punk::ansi::sauce { } } } + if {[llength $warnings]} { + dict set sdict warnings $warnings + } return $sdict } diff --git a/src/modules/punk/char-999999.0a1.0.tm b/src/modules/punk/char-999999.0a1.0.tm index d4091473..0fd284b4 100644 --- a/src/modules/punk/char-999999.0a1.0.tm +++ b/src/modules/punk/char-999999.0a1.0.tm @@ -3039,8 +3039,10 @@ tcl::namespace::eval punk::char { set csplits [combiner_split $text] foreach {pt combiners} [lrange $csplits 0 end-1] { set clist [split $pt ""] - lappend components {*}[lrange $clist 0 end-1] - lappend components [tcl::string::cat [lindex $clist end] $combiners] + lset clist end [tcl::string::cat [lindex $clist end] $combiners] + lappend components {*}$clist + #lappend components {*}[lrange $clist 0 end-1] + #lappend components [tcl::string::cat [lindex $clist end] $combiners] } #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme if {[lindex $csplits end] ne ""} { @@ -3066,126 +3068,121 @@ tcl::namespace::eval punk::char { #review \uFE0F variation selector 16 - forces emoji presentation for preceding char - if 1 { - #This is a basic implementation that does not check that all combinations are valid. - set graphemes [list] - set current_cluster "" - - set cluster_base 0 ;#is the current cluster based on a char that can be combined with modifiers/ZWJs (e.g emoji or other cluster-based char) - # or is it based on a char that can't be combined with modifiers/ZWJs (e.g ascii letter) - set cluster_base_RI 0 ;#is the current cluster based on a regional indicator char - which can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - - set current_cluster_is_extensible 0 - for {set i 0} {$i < [llength $components] } {incr i} { - set component [lindex $components $i] - if {$component eq "\r" && [lindex $components $i+1] eq "\n"} { - if {$current_cluster ne ""} { - lappend graphemes $current_cluster - } - lappend graphemes "\r\n" - incr i ;#skip the \n as we've already processed it as part of the cluster - set current_cluster "" - grapheme_split::reset_base + #This is a basic implementation that does not check that all combinations are valid. + set graphemes [list] + set current_cluster "" + + set cluster_base 0 ;#is the current cluster based on a char that can be combined with modifiers/ZWJs (e.g emoji or other cluster-based char) + # or is it based on a char that can't be combined with modifiers/ZWJs (e.g ascii letter) + set cluster_base_RI 0 ;#is the current cluster based on a regional indicator char - which can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + + set current_cluster_is_extensible 0 + for {set i 0} {$i < [llength $components] } {incr i} { + set component [lindex $components $i] + if {$component eq "\r" && [lindex $components $i+1] eq "\n"} { + if {$current_cluster ne ""} { + lappend graphemes $current_cluster + } + lappend graphemes "\r\n" + incr i ;#skip the \n as we've already processed it as part of the cluster + set current_cluster "" + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + set current_cluster_is_extensible 0 + } elseif {$component eq "\u200d"} { + if {$current_cluster eq ""} { + #ZWJ at start of string - treat as separate grapheme cluster - but isn't a valid base for further combining with more ZWJs or modifiers + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base set current_cluster_is_extensible 0 - } elseif {$component eq "\u200d"} { - if {$current_cluster eq ""} { - #ZWJ at start of string - treat as separate grapheme cluster - but isn't a valid base for further combining with more ZWJs or modifiers - set current_cluster $component - grapheme_split::reset_base - set current_cluster_is_extensible 0 - } else { - if {$cluster_base} { - if {$current_cluster_is_extensible} { - #a double (or longer) ZWJ sequence in a row is part of the last cluster - but not extensible anymore. - append current_cluster $component - set current_is_cluster_extensible 0 - } else { - append current_cluster $component - if {$cluster_base_RI} { - #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - grapheme_split::reset_base - set current_cluster_is_extensible 0 - #we can keep adding ZWJs or modifiers though - } else { - set current_cluster_is_extensible 1 - } - } + } else { + if {$cluster_base} { + if {$current_cluster_is_extensible} { + #a double (or longer) ZWJ sequence in a row is part of the last cluster - but not extensible anymore. + append current_cluster $component + set current_is_cluster_extensible 0 } else { - #ZWJ after non-cluster-based char - non extensible but we continue appending ZWJs to the current cluster. append current_cluster $component - set current_cluster_is_extensible 0 - } - - } - } elseif {[regexp {[\U1f3fb-\U1f3ff]} $component]} { - #emoji modifier - join with previous component - if {$current_cluster eq ""} { - #modifier at start of string - not a valid base for further combining with more modifiers or ZWJs - but we continue appending modifiers to the current cluster. - set current_cluster $component - grapheme_split::reset_base - } else { - if {$cluster_base} { - if {$current_cluster_is_extensible} { - append current_cluster $component - #invalidate the base! - grapheme_split::reset_base + if {$cluster_base_RI} { + #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + set current_cluster_is_extensible 0 + #we can keep adding ZWJs or modifiers though } else { - append current_cluster $component + set current_cluster_is_extensible 1 } + } + } else { + #ZWJ after non-cluster-based char - non extensible but we continue appending ZWJs to the current cluster. + append current_cluster $component + set current_cluster_is_extensible 0 + } + + } + } elseif {[regexp {[\U1f3fb-\U1f3ff]} $component]} { + #emoji modifier - join with previous component + if {$current_cluster eq ""} { + #modifier at start of string - not a valid base for further combining with more modifiers or ZWJs - but we continue appending modifiers to the current cluster. + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } else { + if {$cluster_base} { + if {$current_cluster_is_extensible} { + append current_cluster $component + #invalidate the base! + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base } else { - #modifier after non-cluster-based char - non extensible but we continue appending modifiers to the current cluster. append current_cluster $component } - #review - # \u1f33e\u1f3fe\u200d\u2f3fe\u200d\u1f33e is 2 clusters - #This is because after first zwj, we applied a modifier - not a valid base. + } else { + #modifier after non-cluster-based char - non extensible but we continue appending modifiers to the current cluster. + append current_cluster $component } - set current_cluster_is_extensible 0 + #review + # \u1f33e\u1f3fe\u200d\u2f3fe\u200d\u1f33e is 2 clusters + #This is because after first zwj, we applied a modifier - not a valid base. + } + set current_cluster_is_extensible 0 + } else { + if {$current_cluster eq ""} { + grapheme_split::start_cluster $component } else { - if {$current_cluster eq ""} { - grapheme_split::start_cluster $component - } else { - #have existing cluster data - if {$current_cluster_is_extensible} { - #assert - if current_cluster_is_extensible then cluster_base should currently be true. - #if the current char is a base - we can append to existing cluster, but if it's not a base, then we start a new cluster even if we had seen a ZWJ before. - if {[regexp {[\U1f600-\U1f64f\U1f300-\U1f5ff\U1f900-\U1f9ff\U1fa70-\U1faff\U1f680-\U1f6ff\U2700-\U27bf\U2600-\u26ff]} $component]} { - append current_cluster $component - set cluster_base 1 - } else { - lappend graphemes $current_cluster - set current_cluster $component - grapheme_split::reset_base - } - set current_cluster_is_extensible 0 - } elseif {$cluster_base_RI} { - #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - if {[regexp {[\U1f1e6-\U1f1ff]} $component]} { - append current_cluster $component - - #invalidate the base - we can't combine more than 2 RIs in a cluster, and they don't combine with modifiers or ZWJs to form longer clusters. - #we can however add more ZWJs or modifiers to the cluster - but they don't make it extensible for combining with more RIs - grapheme_split::reset_base - } else { - #something else while RI cluster is open - end the current cluster and start a new one with the current char. - lappend graphemes $current_cluster - grapheme_split::start_cluster $component - } - set current_cluster_is_extensible 0 + #have existing cluster data + if {$current_cluster_is_extensible} { + #assert - if current_cluster_is_extensible then cluster_base should currently be true. + #if the current char is a base - we can append to existing cluster, but if it's not a base, then we start a new cluster even if we had seen a ZWJ before. + if {[regexp {[\U1f600-\U1f64f\U1f300-\U1f5ff\U1f900-\U1f9ff\U1fa70-\U1faff\U1f680-\U1f6ff\U2700-\U27bf\U2600-\u26ff]} $component]} { + append current_cluster $component + set cluster_base 1 } else { + lappend graphemes $current_cluster + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } + set current_cluster_is_extensible 0 + } elseif {$cluster_base_RI} { + #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + if {[regexp {[\U1f1e6-\U1f1ff]} $component]} { + append current_cluster $component + + #invalidate the base - we can't combine more than 2 RIs in a cluster, and they don't combine with modifiers or ZWJs to form longer clusters. + #we can however add more ZWJs or modifiers to the cluster - but they don't make it extensible for combining with more RIs + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } else { + #something else while RI cluster is open - end the current cluster and start a new one with the current char. lappend graphemes $current_cluster grapheme_split::start_cluster $component } + set current_cluster_is_extensible 0 + } else { + lappend graphemes $current_cluster + grapheme_split::start_cluster $component } } } - if {$current_cluster ne ""} { - lappend graphemes $current_cluster - } - } else { - set graphemes $components } - + if {$current_cluster ne ""} { + lappend graphemes $current_cluster + } return $graphemes } namespace eval grapheme_split { diff --git a/src/modules/punk/lib-999999.0a1.0.tm b/src/modules/punk/lib-999999.0a1.0.tm index e0a84a89..208e0050 100644 --- a/src/modules/punk/lib-999999.0a1.0.tm +++ b/src/modules/punk/lib-999999.0a1.0.tm @@ -4210,6 +4210,9 @@ namespace eval punk::lib { if {[string index $key 0] ne "%"} { set key %$key } + #puts "---key:'$key'" + set key [string map {; \\;} $key] ;#review + #puts "---key:'$key'" #pipeline - use punk patterns. % thisval.= $key= $thisval } diff --git a/src/modules/punk/ns-999999.0a1.0.tm b/src/modules/punk/ns-999999.0a1.0.tm index d35287f9..b7e1a8d5 100644 --- a/src/modules/punk/ns-999999.0a1.0.tm +++ b/src/modules/punk/ns-999999.0a1.0.tm @@ -775,13 +775,8 @@ tcl::namespace::eval punk::ns { #set parent [nsprefix $ns_absolute] #set tail [nstail $ns_absolute] - #jjj #set allchildren [lsort [nseval $base [list ::namespace children]]] - #set allchildren [lsort [tcl::namespace::eval $base [list ::namespace children]]] - set allchildren [lsort [nseval $base [list ::namespace children]]] - #puts "->base:$base tailparts:$tailparts allchildren: $allchildren" - #puts "->base:$base tailparts:$tailparts childcount: [llength $allchildren]" #** only significant when it is the trailing part of a segment eg ::**::xxx ::a**::xxx if {[llength $tailparts]} { @@ -790,6 +785,7 @@ tcl::namespace::eval punk::ns { set nslist [nstree_list $base -subnslist {} -allbelow 1] } elseif {[regexp {[*]{2}$} $nextglob]} { set nslist [list] + set allchildren [lsort [nseval $base [list ::namespace children]]] lappend nsmatches {*}[lsearch -all -inline -glob $allchildren [nsjoin ${base} $nextglob]] foreach ch $nsmatches { lappend nslist $ch @@ -799,6 +795,7 @@ tcl::namespace::eval punk::ns { } else { #lsearch with -glob ok even if nextglob has no globchars (no discernable speed diff, and earlier parts may have globchars anyway) set nslist [list] + set allchildren [lsort [nseval $base [list ::namespace children]]] lappend nsmatches {*}[lsearch -all -inline -glob $allchildren [nsjoin ${base} $nextglob]] if {[llength $tailparts] >1 || $allbelow} { foreach ch $nsmatches { @@ -812,6 +809,7 @@ tcl::namespace::eval punk::ns { } } else { #puts "nstree_list: no tailparts base:$base" + set allchildren [lsort [nseval $base [list ::namespace children]]] if {$allbelow} { set nsmatches $allchildren set nslist [list] @@ -2134,8 +2132,8 @@ y" {return quirkykeyscript} tcl::dict::set tinfo($target) procoffset 0 tcl::dict::set tinfo($target) level [expr {[::tcl::info::level]+1}] tcl::dict::set tinfo($target) subcmds 0 - puts "enter: $target -- $args" - puts "frame-2: [::tcl::info::frame -2]" + puts stderr "enter: $target -- $args" + #puts stderr "frame-2: [::tcl::info::frame -2]" set _cmdtrace_disabled false } @@ -2481,7 +2479,7 @@ y" {return quirkykeyscript} set line $traceline dict set linedict $target eval_base $traceline dict set linedict $target eval_offset 1 - puts " step type: proc traceline:$traceline ** $args" + puts " step type: proc traceline:$traceline ** $args\x1b\[m" #puts "** $callinfo" if {[dict exists $callinfo cmd]} { #set cmd [string trim [dict get $callinfo cmd]] ;#raw 'unexpanded' script from the stack frame @@ -2504,8 +2502,8 @@ y" {return quirkykeyscript} set eval_base [dict get $linedict $target eval_base] set eval_offset [dict get $linedict $target eval_offset] set line [expr {$eval_base + ($eval_offset-1) + ($traceline-1)}] - puts "stack-- $callinfo" - puts " step type: eval traceline: $traceline -- " + #puts "stack-- $callinfo" + puts stderr " step type: eval traceline: $traceline -- " if {[dict exists $callinfo cmd]} { #set cmd [string trim [dict get $callinfo cmd]] set cmdlist [lindex $args 0] @@ -2627,6 +2625,8 @@ y" {return quirkykeyscript} }] } proc cmdtrace {args} { + #review - displaying argument values has to be done carefully. Small values are ok, but large lists or dicts can be overwhelming. + #Potentially we could apply some heuristics to truncate or summarise them. package require dictn ;#convenience to allow dictn::incr d {key subkey} variable tinfo array unset tinfo @@ -2676,7 +2676,7 @@ y" {return quirkykeyscript} #if the target command has a leading colon (e.g expr alternative :) we can't put a trace directly on a fully qualified name with a triple colon such as ::: #we will need to evaluate in the namespace foreach {tgt_cmd ns nscmd} $resolved_targets { - puts "tracing target: $tgt_cmd whilst running: $origin $arglist" + puts stderr "tracing target: $tgt_cmd whilst running: $origin $arglist" #::tcl::namespace::eval $ns [list ::trace add execution $nscmd enter [list ::punk::ns::_cmdtrace_enter ::punk::ns::linedict $tgt_cmd]] #::tcl::namespace::eval $ns [list ::trace add execution $nscmd enterstep [list ::punk::ns::_cmdtrace_enterstep ::punk::ns::linedict $tgt_cmd]] diff --git a/src/modules/punk/path-999999.0a1.0.tm b/src/modules/punk/path-999999.0a1.0.tm index cc1d1e7f..351c0af4 100644 --- a/src/modules/punk/path-999999.0a1.0.tm +++ b/src/modules/punk/path-999999.0a1.0.tm @@ -565,10 +565,45 @@ namespace eval punk::path { end]] } + + ## for comparison + #proc nsglob_as_re {glob} { + # #any segment that is not just * must match exactly one segment in the path + # set pats [list] + # foreach seg [nsparts_cached $glob] { + # switch -exact -- $seg { + # "" { + # lappend pats "" + # } + # * { + # #review - ::g*t will not find ::got:it (won't match single inner colon) - this should be fixed + # #lappend pats {[^:]*} + # #negative lookahead + # #any number of chars not followed by ::, followed by any number of non : + # lappend pats {(?:.(?!::))*[^:]*} + # } + # ** { + # lappend pats {.*} + # } + # default { + # set seg [string map {. [.]} $seg] + # if {[regexp {[*?]} $seg]} { + # #set pat [string map [list ** {.*} * {[^:]*} ? {[^:]}] $seg] + # set pat [string map [list ** {.*} * {(?:.(?!::))*[^:]*} ? {[^:]}] $seg] + # lappend pats "$pat" + # } else { + # lappend pats "$seg" + # } + # } + # } + # } + # return "^[join $pats ::]\$" + #} proc pathglob_as_re {pathglob} { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure + #[para] Does not support square bracket globs or character classes. #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc @@ -589,7 +624,7 @@ namespace eval punk::path { * {lappend pats {[^/]*}} ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list ^ {\^} $ {\$} \[ {\[} \] {\]} ( {\(} ) {\)} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters (or tcl glob square bracket chars) in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -603,6 +638,52 @@ namespace eval punk::path { } return "^[join $pats /]\$" } + + punk::args::define { + @id -id ::punk::path::globmatchpath + @cmd -name punk::path::globmatchpath\ + -summary\ + "Match path to *|**|? glob patterns"\ + -help\ + "Return a boolean indicating whether the path matches the specialised glob pattern. + A pattern such as /usr/*/bin will match any path that has /usr as the first segment and bin as the third segment, + with any single segment in between. + A pattern such as /usr/**/bin will match any path that has /usr as the first segment and bin as the last segment, + with 1 or more segments in between (so it will not match /usr/bin). + A pattern such as /usr/** will match any path that has /usr as the first segment, with 1 or more segments + following (so it will not match /usr itself). + A pattern such as **/*.txt will match any path that ends with .txt, with 1 or more leading segments + (so it will not match test.txt or .txt). + A pattern such as ** will match any path. + The glob characters * and ? are the only special characters in the pathglob syntax. + - they are treated as glob characters regardless of where they appear in the pathglob string. + Note that this is different from other Tcl glob contexts where square brackets can be used. + The pathglob syntax treats other characters, including square brackets as literals. + For example, the pattern /usr/te?t will match /usr/test and /usr/text but not /usr/texxt, and the pattern /usr/te*t + will match /usr/test, /usr/teat, and /usr/teeeet but not /usr/te/t. + The pathglob syntax does not support escaping of glob characters - any glob characters in the pathglob are treated + as glob characters. For example, the pattern /usr/* will match any path that has /usr as the first segment and any + single segment as the second segment, but there is no way to specify a pattern that matches any path that has /usr + as the first segment and a literal * as the second segment. + Caller must ensure that file separator is forward slash. (e.g use file normalize on windows) + + options: + -nocase 0|1 (default 0 - case sensitive) + If -nocase is not supplied - default to case sensitive *except for driveletter* + ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah) + Explicitly specifying -nocase 0 will require the entire case to match including the driveletter. + " + @leaders + pathglob -type string -help "glob pattern to match path against. See [fun pathglob_as_re] for syntax of glob patterns" + path -type string -help "path to match against glob pattern" + @opts + -nocase -type boolean -default 0 -help\ + "case insensitive matching (default false - case sensitive) + - except for driveletter on windows which is always case insensitive + unless -nocase 0 is explicitly specified" + @values -min 0 -max 0 + } + # -id proc globmatchpath {pathglob path args} { #*** !doctools #[call [fun globmatchpath] [arg pathglob] [arg path] [opt {option value...}]] @@ -669,46 +750,182 @@ namespace eval punk::path { @opts -recursive -type none -help\ "" + -exclude-paths -type list -default {} -help\ + "list of path patterns to exclude from results. + May include * and ** path segments e.g /usr/** + A single /*/ will match any single segment in the path, and a single /**/ will match any number of segments in the path. + + e.g to exclude any path with _aside as a segment in the middle: -exclude-paths **/_aside/** + i.e this would exclude /usr/_aside/etc and /usr/x/_aside/etc but not /usr/x/_aside or _aside/etc + + To exclude all paths with _aside as a segment anywhere: -exclude-paths { **/_aside/** **/_aside _aside/**} + " #todo -depth @values -min 0 -max 1 path -type directory -optional 1 -help\ - "Path of folder. If not supplied current directory is used." + "Path of folder. If not supplied current directory is used. + This may be a relative or absolute path. Relative paths are treated as relative to current directory. + When using relative paths - the result will also be relative paths with the same relative prefix. + (e.g if path is ../test - the results will be ../test/subfolder1 ../test/subfolder2 etc) + Patterns in -exclude-paths are matched against the resulting paths + (so should be written to match the same relative prefix if path is relative)" } + proc subfolders {args} { + #NOTE - this algorithm based on omit_only_patterns and prune_base_patterns was suggested by a 2026 AI model - it is apparent to this programmer that it is inadequate for the purpose. + #e.g consider subfolders -recursion -exclude {**/vfs/** **/src/**} + #This can still return something like c:/repo/etc/src/vfs - which should be excluded by the pattern **/src/** + #todo - review and fix properly. set argd [punk::args::parse $args withid ::punk::path::subfolders] lassign [dict values $argd] leaders opts values received - set do_recursion [dict exists $received -recursive] + set do_recursion [dict exists $received -recursive] + set exclude_paths [dict get $opts -exclude-paths] + if {"**" in $exclude_paths} { + #if ** is in exclude_paths - then we can skip all glob matching and just return empty list + #This is likely user error - so we'll be loud about it for now but will still return empty list rather than erroring. + #If user code is building exclude_paths dynamically - they can check for this case themselves and avoid the call to subfolders to suppress this message. + puts stderr "punk::path::subfolders Warning - exclude_paths contains '**' - all paths will be excluded" + return [list] + } if {[dict exists $received path]} { set path [dict get $values path] } else { set path [pwd] } - set folders [glob -nocomplain -directory $path -types d *] + + set all_subfolders [glob -nocomplain -directory $path -types d *] + + + #example of expected exclude_paths pattern behaviour when recursion is enabled: + # **/dirname -> omit /x/y/dirname, but still visit /x/y/dirname/* + + # **/dirname/* -> include /x/y/dirname and /x/y/dirname/a/b but omit directories that are a single level below /x/y/dirname such as /x/y/dirname/a + + #c:/** - would exclude all subfolders below c: but not c: itself + + # **/test/** - would exclude any path with test as a segment and all its subfolders + #- but not paths with test as a segment that is the final segment + + + set omit_only_patterns [list] + set prune_base_patterns [list] + foreach pat $exclude_paths { + set pat_parts [file split $pat] ;#note file split c:/test gives {c:/ test} but file split **/test gives {** test} + #also note that file split on windows treats forward slashes and backslashes the same. + #by using file split, we gain some flexibility in syntax of paths and patterns, + #but lose the ability to use backslashes as escapes to allow literal glob characters in path segments. + #This is almost always a non-issue on windows since * and ? are not valid in path segments there, and is rarely an issue on unix even though + # * and ? are technically valid in path segments, but it is inadvisable there anyway for compatibility with shells etc. + if {[llength $pat_parts] >= 2 && [lindex $pat_parts end] eq "**"} { + #** at end of pattern - e.g /dir/etc/** + #Convert ".../" to base "...", and prune descendants of that base. + lappend prune_base_patterns [file join {*}[lrange $pat_parts 0 end-1]] + } else { + lappend omit_only_patterns $pat + } + } + + set folders [list] + set recurse_subdirs [list] + + foreach f $all_subfolders { + set include_in_results 1 + set allow_recurse 1 + foreach pat $omit_only_patterns { + if {[globmatchpath $pat $f]} { + set include_in_results 0 + break + } + } + if {$allow_recurse && [llength $prune_base_patterns]} { + foreach base_pat $prune_base_patterns { + #prune both the matched base node and its decendants. + if {[globmatchpath $base_pat $f]} { + set allow_recurse 0 + break + } + if {[globmatchpath "${base_pat}/**" $f]} { + set include_in_results 0 + set allow_recurse 0 + break + } + } + } + if {$include_in_results} { + lappend folders $f + } + if {$allow_recurse} { + lappend recurse_subdirs $f + } + } if {$do_recursion} { - foreach subdir $folders { - lappend folders {*}[subfolders -recursive $subdir] + foreach subdir $recurse_subdirs { + lappend folders {*}[subfolders -exclude-paths $exclude_paths -recursive $subdir] } } + + #if {[llength $exclude_paths]} { + # set folders [list] + # foreach f $all_subfolders { + # set skip 0 + # foreach pat $exclude_paths { + # #review - this is slightly too simplistic. + # # for exclusion pattern **/dirname + # # this will exclude any path with dirname as final segment - but it will also exclude any path with dirname as a segment anywhere in the path - which is not intended. + # #puts stderr "Checking exclude pat '$pat' against '$f'" + # if {[globmatchpath $pat $f]} { + # set skip 1 + # break + # } + # } + # if {!$skip} { + # lappend folders $f + # } + # } + #} else { + # set folders $all_subfolders + #} + #if {$do_recursion} { + # foreach subdir $folders { + # lappend folders {*}[subfolders -exclude-paths $exclude_paths -recursive $subdir] + # } + #} return $folders } #todo - treefolders with similar search caps as treefilenames punk::args::define { - @id -id ::punk::path::treefilenames + @id -id ::punk::path::treefilenames + @cmd -name punk::path::treefilenames\ + -summary\ + "List of filenames below supplied path."\ + -help\ + "List of filenames below path. + The resulting list is unsorted." -directory -type directory -help\ "folder in which to begin recursive scan for files." - -call-depth-internal -default 0 -type integer - -sort -type any -default natural -choices {none ascii dictionary natural} + -call-depth-internal -default 0 -type integer -help "internal use only - caller should not specify - used to track depth of recursive calls for internal logic" + -call-subvector -default {} -type list -help "internal use only - caller should not specify - used to track path vector of recursive calls for internal logic" + -call-allbelow -default 1 -type boolean -help "internal use only - caller should not specify - used to track whether we are in a subtree below a match for glob_paths (which means we can skip glob matching and antiglob_paths checks and just include all files below here)" + -sort -type any -default natural -choices {none ascii dictionary natural} -antiglob_paths -default {} -help\ "list of path patterns to exclude may include * and ** path segments e.g - /usr/** (exlude subfolders based at /usr but not + /usr/** (exclude subfolders based at /usr but not files within /usr itself) - **/_aside (exlude files where _aside is last segment) + **/_aside (exclude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" -antiglob_files -default {} + -glob_paths -default {*} -help\ + "list of path patterns to include + may include * and ** path segments e.g + /usr/** (include subfolders based at /usr but not + files within /usr itself) + **/_aside (include files where _aside is last segment) + **/_aside/* (include folders one below an _aside folder) + **/_aside/** (include all folders with _aside as a segment)" @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path @@ -732,12 +949,20 @@ namespace eval punk::path { lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- - set opt_sort [dict get $opts -sort] - set opt_antiglob_paths [dict get $opts -antiglob_paths] - set opt_antiglob_files [dict get $opts -antiglob_files] - set CALLDEPTH [dict get $opts -call-depth-internal] + set opt_sort [dict get $opts -sort] + set opt_antiglob_paths [dict get $opts -antiglob_paths] + set opt_glob_paths [dict get $opts -glob_paths] + set opt_antiglob_files [dict get $opts -antiglob_files] + + set CALLDEPTH [dict get $opts -call-depth-internal] + set callsubvector [dict get $opts -call-subvector] + set callallbelow [dict get $opts -call-allbelow] ;#whether to return matches longer than the matched glob-path # -- --- --- --- --- --- --- # -- --- --- --- --- --- --- + if {"*" in $opt_glob_paths} { + #if we have a * in the default glob_paths - then any other entries are redundant. + set opt_glob_paths {*} + } set files [list] if {$CALLDEPTH == 0} { @@ -745,14 +970,17 @@ namespace eval punk::path { package require natsort } #set opts [dict merge $opts [list -directory $opt_dir]] - if {![dict exists $received -directory]} { - set opt_dir [pwd] - } else { + if {[dict exists $received -directory]} { set opt_dir [dict get $opts -directory] + } else { + + set opt_dir [pwd] } if {![file isdirectory $opt_dir]} { return [list] } + + } else { #assume/require to exist in any recursive call set opt_dir [dict get $opts -directory] @@ -831,19 +1059,35 @@ namespace eval punk::path { lappend okdirs $dir } } - if {[llength $okdirs]} { + if {$opt_glob_paths eq {*}} { + set matchdirs $okdirs + } else { + #** only significant when it is the trailing part of a segment eg /**/xxx /a**/xxx + + + + set matchdirs [list] + foreach dir $okdirs { + foreach gp $opt_glob_paths { + if {[globmatchpath $gp $dir] || [globmatchpath "$gp/**" $dir]} { + lappend matchdirs $dir + } + } + } + } + if {[llength $matchdirs]} { switch -- $opt_sort { ascii { - set finaldirs [lsort $okdirs] + set finaldirs [lsort $matchdirs] } dictionary { - set finaldirs [lsort -dictionary $okdirs] + set finaldirs [lsort -dictionary $matchdirs] } natural { - set finaldirs [natsort::sort $okdirs] + set finaldirs [natsort::sort $matchdirs] } default { - set finaldirs $okdirs + set finaldirs $matchdirs } } foreach dir $finaldirs { diff --git a/src/modules/punk/pipe-999999.0a1.0.tm b/src/modules/punk/pipe-999999.0a1.0.tm index 0ad82b57..829bb542 100644 --- a/src/modules/punk/pipe-999999.0a1.0.tm +++ b/src/modules/punk/pipe-999999.0a1.0.tm @@ -169,8 +169,8 @@ tcl::namespace::eval punk::pipe::lib { #This stops us matching {/@**@x x} vs {/@**@x x} #--- - set rhs [tcl::string::map {: ? * [ ] \\ {"} " " } $rhs] - #review - we don't expect other command-incompatible chars such as colon? + set rhs [tcl::string::map {: ; ? * [ ] \\ {"} " " } $rhs] + #review - we don't expect other command-incompatible chars? return $rhs } @@ -187,6 +187,7 @@ tcl::namespace::eval punk::pipe::lib { #exclude quoted whitespace proc arg_is_script_shaped {arg} { + set arg [string map {\\; ""} $arg] if {[tcl::string::first \n $arg] >= 0} { return 1 } elseif {[tcl::string::first ";" $arg] >= 0} { diff --git a/src/modules/punk/repo-999999.0a1.0.tm b/src/modules/punk/repo-999999.0a1.0.tm index 93c02e17..11695d8e 100644 --- a/src/modules/punk/repo-999999.0a1.0.tm +++ b/src/modules/punk/repo-999999.0a1.0.tm @@ -1817,17 +1817,13 @@ namespace eval punk::repo { error "unimplemented" } - #file normalize is expensive so this is too + #file normalize can be a little expensive so this is too proc norm {path {platform env}} { - #kettle::path::norm - #see also wiki - #full path normalization - - set platform [string tolower $platform] - if {$platform eq "env"} { - set platform $::tcl_platform(platform) - } + #set platform [string tolower $platform] + #if {$platform eq "env"} { + # set platform $::tcl_platform(platform) + #} #No - don't do this sort of path translation here - leave as option for specific utils only such as ./ #Windows volume-relative syntax with specific volume specified is somewhat broken in Tcl - but leading slash volume-relative does work #We shouldn't break it totally just because accessing WSL/mingw paths is slightly more useful @@ -1835,6 +1831,9 @@ namespace eval punk::repo { #return [file dirname [file normalize [punk::unixywindows::towinpath $path]/__]] #} + #kettle::path::norm + #see also wiki + #full path normalization return [file dirname [file normalize $path/__]] } diff --git a/src/modules/test/#modpod-overtype-999999.0a1.0/overtype-1.7.4_testsuites/overtype/renderline.test b/src/modules/test/#modpod-overtype-999999.0a1.0/overtype-1.7.4_testsuites/overtype/renderline.test index ba831b1c..6fcef11d 100644 --- a/src/modules/test/#modpod-overtype-999999.0a1.0/overtype-1.7.4_testsuites/overtype/renderline.test +++ b/src/modules/test/#modpod-overtype-999999.0a1.0/overtype-1.7.4_testsuites/overtype/renderline.test @@ -1,4 +1,5 @@ package require tcltest +package require overtype namespace eval ::testspace { namespace import ::tcltest::* @@ -6,6 +7,32 @@ namespace eval ::testspace { set result "" } + # Temporarily replaces punk::console::get_tabstops so tab-related tests are deterministic. + proc with_tabstops {tabstops body} { + variable __tabstops + set __tabstops $tabstops + set had_original [expr {[llength [info commands ::punk::console::get_tabstops]] > 0}] + if {$had_original} { + rename ::punk::console::get_tabstops ::testspace::__orig_get_tabstops + } + namespace eval ::punk::console {} + proc ::punk::console::get_tabstops {{inoutchannels {stdin stdout}}} { + return [set ::testspace::__tabstops] + } + + set code [catch {uplevel 1 $body} out opts] + + rename ::punk::console::get_tabstops {} + if {$had_original} { + rename ::testspace::__orig_get_tabstops ::punk::console::get_tabstops + } + return -options $opts $out + } + proc vis {s} { + string map [list "\u0000" "" "\x1b" "" "\n" "" "\r" "" "\t" ""] $s + } + + test renderline_basic_noansi {basic renderline calls with no ansi in underlay or overlay}\ -setup $common -body { set undertext "abcdefghij" @@ -26,6 +53,734 @@ namespace eval ::testspace { ABCDEfghij ABCDEabcde ] + test renderline_empty_overlay_passthrough {empty overlay returns undertext unchanged} \ + -setup $common -body { + overtype::renderline abcdef "" + }\ + -cleanup { + }\ + -result abcdef + + test renderline_startcolumn_overtype_plain {startcolumn with overtype mode replaces at offset} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -startcolumn 3 abcdef XY + }\ + -cleanup { + }\ + -result abXYef + + test renderline_error_newline_undertext {undertext cannot contain newline} \ + -setup $common -body { + overtype::renderline "ab\ncd" XX + }\ + -cleanup { + }\ + -returnCodes error \ + -match glob \ + -result "*not allowed to contain newlines in undertext*" + + test renderline_error_unknown_option {unknown option should error} \ + -setup $common -body { + overtype::renderline -bogus 1 abc XX + }\ + -cleanup { + }\ + -returnCodes error \ + -match glob \ + -result "*unknown option*" + + test renderline_error_cursor_row_non_integer {cursor_row must be integer if specified} \ + -setup $common -body { + overtype::renderline -cursor_row x abc XX + }\ + -cleanup { + }\ + -returnCodes error \ + -match glob \ + -result "*-cursor_row must be empty for unspecified/unknown or a non-zero positive integer*" + + test renderline_error_cursor_row_non_positive {cursor_row must be positive if specified} \ + -setup $common -body { + overtype::renderline -cursor_row 0 abc XX + }\ + -cleanup { + }\ + -returnCodes error \ + -match glob \ + -result "*-cursor_row must be empty for unspecified/unknown or a non-zero positive integer*" + + test renderline_info_basic_contract {info mode returns expected key shape for simple case} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef XX] + expr { + [dict exists $d result] + && [dict exists $d instruction] + && [dict exists $d cursor_column] + && [dict exists $d overflow_right] + && [dict exists $d unapplied] + && [dict get $d result] eq "XXcdef" + } + }\ + -cleanup { + }\ + -result 1 + + test renderline_basic_sgr_overlay_no_forced_reset {sgr overlay is preserved and no unconditional reset is appended} \ + -setup $common -body { + set out [overtype::renderline aaaa "\x1b\[31mB"] + expr { + [string first "\x1b\[31m" $out] >= 0 + && [string first B $out] >= 0 + && ![string match "*\x1b\[0m" $out] + } + }\ + -cleanup { + }\ + -result 1 + + test renderline_transparent_default_space {transparent 1 makes spaces in overlay pass through underlay} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -transparent 1 abcde " X " + }\ + -cleanup { + }\ + -result aXcde + + test renderline_transparent_custom_regex {custom transparent regexp is honored} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -transparent {[#]} abcde #Y# + }\ + -cleanup { + }\ + -result aYcde + + test renderline_expand_right_off_tracks_unapplied {non-expanding insert leaves unapplied/overflow info} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 1 -expand_right 0 abc WXYZ] + expr { + [string length [dict get $d result]] == 3 + && ([string length [dict get $d unapplied]] > 0 || [string length [dict get $d overflow_right]] > 0) + } + }\ + -cleanup { + }\ + -result 1 + + test renderline_expand_right_on_grows_result {expanding insert allows output growth on same shape} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 1 -expand_right 1 abc WXYZ] + expr { + [string length [dict get $d result]] >= 4 + && [dict get $d unapplied] eq "" + } + }\ + -cleanup { + }\ + -result 1 + + test renderline_control_cr_repositions_to_start {carriage return moves cursor back to start column} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcde "A\rZ" + }\ + -cleanup { + }\ + -result Zbcde + + test renderline_control_bs_moves_back_one {backspace rewinds one column before next char} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcde "AB\bZ" + }\ + -cleanup { + }\ + -result AZcde + + test renderline_control_lf_sets_instruction_and_unapplied {linefeed reports lf_mid and keeps tail unapplied} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcde "A\nZ"] + list [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list lf_mid Z] + + test renderline_widechar_transparency_default {without transparency wide first-half can be replaced directly} \ + -setup $common -body { + overtype::renderline -insert_mode 0 "A\uFF5EB" " X" + }\ + -cleanup { + }\ + -result " XB" + + test renderline_widechar_transparency_enabled {transparency preserves first half and marks exposed second half} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -transparent 1 "A\uFF5EB" " X" + }\ + -cleanup { + }\ + -result "A\uFFFDXB" + + test renderline_expand_right_off_overflow_column {overflow_right_column is tracked in non-expanding insert mode} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 1 -expand_right 0 abc WXYZ] + dict get $d overflow_right_column + }\ + -cleanup { + }\ + -result 4 + + test renderline_tab_uses_stubbed_tabstops {tab advances to deterministic stop when tabstops are stubbed} \ + -setup $common -body { + with_tabstops {1 9 17 25} { + set d [overtype::renderline -info 1 -insert_mode 0 abcdefghij "A\tZ"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + } + }\ + -cleanup { + }\ + -result [list AbcdefghZj 10 "" ""] + + test renderline_cursor_save_restore_esc7_esc8 {ESC7 and ESC8 restore cursor and leave trailing data unapplied} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcde "AB\x1b7CD\x1b8Z"] + list [dict get $d result] [dict get $d instruction] [dict get $d unapplied] [dict get $d cursor_saved_position] + }\ + -cleanup { + }\ + -result [list ABCDe restore_cursor Z [dict create row 1 column 3]] + + test renderline_cursor_save_restore_csi_s_u {CSI s and CSI u restore cursor and leave trailing data unapplied} \ + -setup $common -body { + set over "AB\x1b\[sCD\x1b\[uZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcde $over] + list [dict get $d result] [dict get $d instruction] [dict get $d unapplied] [dict get $d cursor_saved_position] + }\ + -cleanup { + }\ + -result [list ABCDe restore_cursor Z [dict create row 1 column 3]] + + test renderline_csi_forward_basic {CSI nC moves cursor forward before rendering next grapheme} \ + -setup $common -body { + set over "A\x1b\[3CZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list AbcdZf 6 "" ""] + + test renderline_csi_back_basic {CSI nD moves cursor backward before rendering next grapheme} \ + -setup $common -body { + set over "ABCD\x1b\[2DZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list ABZDef 4 "" ""] + + test renderline_csi_forward_wrapmoveforward_instruction {large CSI nC can trigger wrapmoveforward with unapplied tail} \ + -setup $common -body { + set over "A\x1b\[20CZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d instruction] [dict get $d cursor_column] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef wrapmoveforward 22 Z] + + test renderline_csi_back_wrapmovebackward_instruction {CSI nD from start can trigger wrapmovebackward with unapplied tail} \ + -setup $common -body { + set over "A\x1b\[2DZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d instruction] [dict get $d cursor_column] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef wrapmovebackward 0 Z] + + test renderline_mode_irm_on_from0 {CSI 4h enables insert mode state} \ + -setup $common -body { + set over "\x1b\[4hX" + set d [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 abcdef $over] + list [dict get $d insert_mode] [dict get $d result] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list 1 Xabcde "" ""] + + test renderline_mode_irm_off_from1 {CSI 4l disables insert mode state} \ + -setup $common -body { + set over "\x1b\[4lX" + set d [overtype::renderline -info 1 -insert_mode 1 -autowrap_mode 1 abcdef $over] + list [dict get $d insert_mode] [dict get $d result] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list 0 Xbcdef "" ""] + + test renderline_mode_awm_off_from1 {CSI ?7l disables autowrap state} \ + -setup $common -body { + set over "\x1b\[?7lX" + set d [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 abcdef $over] + list [dict get $d autowrap_mode] [dict get $d result] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list 0 Xbcdef "" ""] + + test renderline_mode_awm_on_from0 {CSI ?7h enables autowrap state} \ + -setup $common -body { + set over "\x1b\[?7hX" + set d [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 0 abcdef $over] + list [dict get $d autowrap_mode] [dict get $d result] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list 1 Xbcdef "" ""] + + test renderline_mode_crm_on_from0 {CSI 3h enables CRM mode state} \ + -setup $common -body { + set over "\x1b\[3hX" + set d [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -crm_mode 0 abcdef $over] + expr { + [dict get $d crm_mode] == 1 + && [dict get $d instruction] eq "" + && [dict get $d unapplied] eq "" + && [string length [dict get $d result]] >= 1 + } + }\ + -cleanup { + }\ + -result 1 + + test renderline_mode_crm_off_from1 {CSI 3l disables CRM mode state} \ + -setup $common -body { + set over "\x1b\[3lX" + set d [overtype::renderline -info 1 -insert_mode 0 -autowrap_mode 1 -crm_mode 1 abcdef $over] + list [dict get $d crm_mode] [dict get $d result] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list 0 Xbcdef "" ""] + + #todo + #test renderline_erase_line_sequences_currently_noop {CSI K variants keep current rendered content in this implementation} \ + # -setup $common -body { + # set k0 "AB\x1b\[0K" + # set k1 "AB\x1b\[1K" + # set k2 "AB\x1b\[2K" + # set d0 [overtype::renderline -info 1 -insert_mode 0 abcdef $k0] + # set d1 [overtype::renderline -info 1 -insert_mode 0 abcdef $k1] + # set d2 [overtype::renderline -info 1 -insert_mode 0 abcdef $k2] + # list \ + # [dict get $d0 result] [dict get $d0 instruction] [dict get $d0 cursor_column] \ + # [dict get $d1 result] [dict get $d1 instruction] [dict get $d1 cursor_column] \ + # [dict get $d2 result] [dict get $d2 instruction] [dict get $d2 cursor_column] + # }\ + # -cleanup { + # }\ + # -result [list ABcdef "" 3 ABcdef "" 3 ABcdef "" 3] + + #test renderline_erase_display_0_and_1_currently_noop {CSI J0 and J1 keep current rendered content in this implementation} \ + # -setup $common -body { + # set j0 "AB\x1b\[0J" + # set j1 "AB\x1b\[1J" + # set d0 [overtype::renderline -info 1 -insert_mode 0 abcdef $j0] + # set d1 [overtype::renderline -info 1 -insert_mode 0 abcdef $j1] + # list \ + # [dict get $d0 result] [dict get $d0 instruction] [dict get $d0 cursor_column] \ + # [dict get $d1 result] [dict get $d1 instruction] [dict get $d1 cursor_column] + # }\ + # -cleanup { + # }\ + # -result [list ABcdef "" 3 ABcdef "" 3] + + test renderline_erase_display_2_clears_and_moves {CSI J2 clears line and returns clear_and_move instruction} \ + -setup $common -body { + set j2 "AB\x1b\[2J" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $j2] + list [dict get $d result] [dict get $d instruction] [dict get $d cursor_column] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list " " clear_and_move 1 ""] + + test renderline_csi_G_absolute_column {CSI G sets absolute column and renders next grapheme there} \ + -setup $common -body { + set over "A\x1b\[5GZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list AbcdZf 6 "" ""] + + test renderline_csi_tick_absolute_column_alias {CSI backtick behaves as HPA alias like CSI G} \ + -setup $common -body { + set over "A\x1b\[5`Z" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list AbcdZf 6 "" ""] + + test renderline_csi_G_zero_column {CSI 0G moves cursor to column zero in this implementation} \ + -setup $common -body { + set over "A\x1b\[0GZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 0 "" ""] + + test renderline_csi_H_same_row_applies {CSI H on current row repositions and applies overlay} \ + -setup $common -body { + set over "A\x1b\[1;2HZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 1 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list AZcdef 3 1 "" ""] + + test renderline_csi_H_other_row_returns_move {CSI H to another row reports move and leaves tail unapplied} \ + -setup $common -body { + set over "A\x1b\[2;1HZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 1 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 1 2 move Z] + + test renderline_csi_up_reports_up_instruction {CSI A moves to a previous row and returns up instruction} \ + -setup $common -body { + set over "A\x1b\[1AZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 2 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 2 1 up Z] + + test renderline_csi_up_multiple_clamps_to_first_row {CSI multiple A updates row and leaves remaining text unapplied} \ + -setup $common -body { + set over "A\x1b\[2AZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 3 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 2 1 up Z] + + test renderline_csi_down_reports_down_instruction {CSI B moves to a later row and returns down instruction} \ + -setup $common -body { + set over "A\x1b\[1BZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 1 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 2 2 down Z] + + test renderline_csi_down_multiple_updates_target_row {CSI multiple B updates row and leaves remaining text unapplied} \ + -setup $common -body { + set over "A\x1b\[2BZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 1 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 2 3 down Z] + + test renderline_csi_next_line_moves_to_column_one {CSI E moves to next row column one and returns move instruction} \ + -setup $common -body { + set over "A\x1b\[1EZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 1 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 1 2 move Z] + + test renderline_csi_prev_line_moves_to_column_one {CSI F moves to previous row column one and returns move instruction} \ + -setup $common -body { + set over "A\x1b\[1FZ" + set d [overtype::renderline -info 1 -insert_mode 0 -cursor_row 2 abcdef $over] + list [dict get $d result] [dict get $d cursor_column] [dict get $d cursor_row] [dict get $d instruction] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list Abcdef 1 1 move Z] + + test renderline_info_cursor_saved_attributes_esc7 {ESC7 save captures active SGR and ESC8 restore leaves tail unapplied} \ + -setup $common -body { + set over "\x1b\[31mA\x1b7\x1b\[32mB\x1b8Z" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list \ + [vis [dict get $d result]] \ + [dict get $d instruction] \ + [dict get $d cursor_saved_position] \ + [vis [dict get $d cursor_saved_attributes]] \ + [vis [dict get $d unapplied]] + }\ + -cleanup { + }\ + -result [list {[31mA[32mB[0mcdef} restore_cursor [dict create row 1 column 2] {[31m} Z] + + test renderline_info_cursor_saved_attributes_csi_s_u {CSI s save captures active SGR and CSI u restore leaves tail unapplied} \ + -setup $common -body { + set over "\x1b\[31mA\x1b\[s\x1b\[32mB\x1b\[uZ" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list \ + [vis [dict get $d result]] \ + [dict get $d instruction] \ + [dict get $d cursor_saved_position] \ + [vis [dict get $d cursor_saved_attributes]] \ + [vis [dict get $d unapplied]] + }\ + -cleanup { + }\ + -result [list {[31mA[32mB[0mcdef} restore_cursor [dict create row 1 column 2] {[31m} Z] + + test renderline_info_replay_codes_underlay_tracks_open_underlay_sgr {open underlay SGR is reported in replay_codes and replay_codes_underlay} \ + -setup $common -body { + set under "\x1b\[34mabcdef" + set d [overtype::renderline -info 1 -insert_mode 0 $under X] + list \ + [vis [dict get $d result]] \ + [vis [dict get $d replay_codes]] \ + [vis [dict get $d replay_codes_underlay]] \ + [vis [dict get $d replay_codes_overlay]] + }\ + -cleanup { + }\ + -result [list {X[34mbcdef} {[34m} {[34m} {}] + + test renderline_info_replay_codes_overlay_tracks_overlay_sgr {overlay SGR is reported in replay_codes_overlay} \ + -setup $common -body { + set over "\x1b\[31mX" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $over] + list \ + [vis [dict get $d result]] \ + [vis [dict get $d replay_codes]] \ + [vis [dict get $d replay_codes_underlay]] \ + [vis [dict get $d replay_codes_overlay]] + }\ + -cleanup { + }\ + -result [list {[31mX[0mbcdef} {} {} {[31m}] + + test renderline_info_replay_codes_preserve_underlay_reset_boundary {underlay reset becomes replay_codes_underlay while overlay keeps its own replay} \ + -setup $common -body { + set under "\x1b\[34mabcdef\x1b\[0m" + set over "\x1b\[31mX" + set d [overtype::renderline -info 1 -insert_mode 0 $under $over] + list \ + [vis [dict get $d result]] \ + [vis [dict get $d replay_codes]] \ + [vis [dict get $d replay_codes_underlay]] \ + [vis [dict get $d replay_codes_overlay]] + }\ + -cleanup { + }\ + -result [list {[31mX[0;34mbcdef[0m} {[0m} {[0m} {[31m}] + + test renderline_exposed_custom_overtype_default {custom exposed markers are unused when overlay fully replaces wide char second half without transparency} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -exposed1 L -exposed2 R "A\uFF5EB" " X" + }\ + -cleanup { + }\ + -result { XB} + + test renderline_exposed_custom_transparent_uses_exposed1 {transparent split over wide char uses custom exposed1 marker} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -transparent 1 -exposed1 L -exposed2 R "A\uFF5EB" " X" + }\ + -cleanup { + }\ + -result ALXB + + test renderline_exposed_custom_startcolumn_uses_exposed2 {starting on wide char second half uses custom exposed2 marker} \ + -setup $common -body { + overtype::renderline -insert_mode 0 -startcolumn 2 -exposed1 L -exposed2 R "A\uFF5EB" X + }\ + -cleanup { + }\ + -result AXRB + + test renderline_exposed_custom_insert_mode_preserves_shifted_wide_char {insert mode keeps shifted wide char rather than exposing second half marker} \ + -setup $common -body { + overtype::renderline -insert_mode 1 -exposed1 L -exposed2 R "A\uFF5EB" X + }\ + -cleanup { + }\ + -result "XA\uFF5E" + + test renderline_exposed_custom_info_contract {info mode reports stable fields for custom exposed transparency case} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 -transparent 1 -exposed1 L -exposed2 R "A\uFF5EB" " X"] + list [dict get $d result] [dict get $d instruction] [dict get $d cursor_column] [dict get $d overflow_right] [dict get $d unapplied] + }\ + -cleanup { + }\ + -result [list ALXB "" 4 "" ""] + + test renderline_del_deletes_at_cursor {literal DEL deletes at current cursor position} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcdef "AB\x7f" + }\ + -cleanup { + }\ + -result ABdef + + test renderline_del_after_backspace_deletes_previous_column {backspace followed by DEL removes the backed-up character position} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcdef "AB\b\x7f" + }\ + -cleanup { + }\ + -result Acdef + + test renderline_del_info_contract {DEL reports stable info fields} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x7f"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] [dict get $d overflow_right] + }\ + -cleanup { + }\ + -result [list ABdef 3 "" "" ""] + + test renderline_ech_one_erases_single_character_to_space {CSI X with count 1 replaces one character with a space} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcdef "AB\x1b\[1X" + }\ + -cleanup { + }\ + -result {AB def} + + test renderline_ech_two_erases_two_characters_to_spaces {CSI X with count 2 replaces two characters with spaces} \ + -setup $common -body { + overtype::renderline -insert_mode 0 abcdef "AB\x1b\[2X" + }\ + -cleanup { + }\ + -result {AB ef} + + test renderline_ech_info_contract {ECH reports stable info fields} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[1X"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] [dict get $d overflow_right] + }\ + -cleanup { + }\ + -result [list {AB def} 3 "" "" ""] + + test renderline_cp437_off_preserves_control_byte {without cp437 flag, low control byte is preserved in the rendered output} \ + -setup $common -body { + set c [format "%c" 1] + overtype::renderline -insert_mode 0 abcdef "A${c}Z" + }\ + -cleanup { + }\ + -result "A\x01Zef" + + test renderline_cp437_on_maps_control_to_glyph {cp437 flag maps low control bytes to printable cp437 glyphs} \ + -setup $common -body { + set c [format "%c" 1] + overtype::renderline -cp437 1 -insert_mode 0 abcdef "A${c}Z" + }\ + -cleanup { + }\ + -result "A\u263aZdef" + + test renderline_gx_overlay_passthrough {overlay gx0 on/off sequences are preserved in rendered output} \ + -setup $common -body { + set gx "\x1b(0x\x1b(By" + set d [overtype::renderline -info 1 -insert_mode 0 abcdef $gx] + list [vis [dict get $d result]] [vis [dict get $d replay_codes]] [vis [dict get $d replay_codes_underlay]] [vis [dict get $d replay_codes_overlay]] + }\ + -cleanup { + }\ + -result [list {(0x(Bycdef} {} {} {}] + + test renderline_gx_underlay_replay_tracks_gx_close {gx underlay with plain overlay returns gx close in replay_codes} \ + -setup $common -body { + set under "\x1b(0abcdef\x1b(B" + set d [overtype::renderline -info 1 -insert_mode 0 $under X] + list [vis [dict get $d result]] [vis [dict get $d replay_codes]] [vis [dict get $d replay_codes_underlay]] [vis [dict get $d replay_codes_overlay]] + }\ + -cleanup { + }\ + -result [list {X(0bcdef(B} {(B} {} {}] + + test renderline_dch_one_deletes_and_pads_right_edge {CSI P deletes one character at cursor and pads the right edge with an empty cell} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[1P"] + list \ + [vis [dict get $d result]] \ + [dict get $d cursor_column] \ + [dict get $d instruction] \ + [vis [dict get $d unapplied]] \ + [vis [dict get $d overflow_right]] + }\ + -cleanup { + }\ + -result [list {ABdef } 3 "" "" ""] + + test renderline_dch_two_deletes_and_pads_right_edge {CSI P with count 2 deletes two characters and pads two empty cells on the right} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[2P"] + list \ + [vis [dict get $d result]] \ + [dict get $d cursor_column] \ + [dict get $d instruction] \ + [vis [dict get $d unapplied]] \ + [vis [dict get $d overflow_right]] + }\ + -cleanup { + }\ + -result [list {ABef } 3 "" "" ""] + + test renderline_dch_defaults_count_to_one {CSI P with empty parameter behaves like count 1} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[P"] + list [vis [dict get $d result]] [dict get $d cursor_column] + }\ + -cleanup { + }\ + -result [list {ABdef } 3] + + test renderline_ich_one_inserts_blank_and_shifts_right {CSI @ with count 1 inserts one blank at the cursor position} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[1@"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] [dict get $d overflow_right] + }\ + -cleanup { + }\ + -result [list {AB cde} 3 "" "" ""] + + test renderline_ich_two_inserts_two_blanks_and_shifts_right {CSI @ with count 2 inserts two blanks and shifts the rest of the line right} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[2@"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] [dict get $d overflow_right] + }\ + -cleanup { + }\ + -result [list {AB cd} 3 "" "" ""] + + test renderline_ich_clamps_insert_count_to_line_width {CSI @ clamps very large insert counts to the current line width} \ + -setup $common -body { + set d [overtype::renderline -info 1 -insert_mode 0 abcdef "AB\x1b\[10@"] + list [dict get $d result] [dict get $d cursor_column] [dict get $d instruction] [dict get $d unapplied] [dict get $d overflow_right] + }\ + -cleanup { + }\ + -result [list {AB } 3 "" "" ""] + #todo - test #P% overtype::left -transparent 1 [textblock::block 10 2 -] " [a+ underline yellow].\n [a+ underline yellow]yyy" #- --.------- diff --git a/src/modules/test/runtestmodules.tcl b/src/modules/test/runtestmodules.tcl index b7846da6..3d46cea0 100644 --- a/src/modules/test/runtestmodules.tcl +++ b/src/modules/test/runtestmodules.tcl @@ -3,6 +3,65 @@ #(plain tclsh may stall - todo - review reasons for this and whether shellfilter can be modified to support ordinary tclsh) #A known working copy of a punk shell executable should be placed on the path and the shebang line updated to reflect this +#------------------------------------ +lassign [split [info tclversion] .] tcl_major tcl_minor +set script_dir [file dirname [file normalize [info script]]] +set modules_posn [string first /modules/ $script_dir] +if {$modules_posn < 0} { + puts stderr "Error: script dir $script_dir does not contain /modules/" + #exit 2 ;#don't call exit. If run in a single proc it can cause the hole test suite exit before summary can be printed. + return -code error "Error: script dir $script_dir does not contain /modules/" +} +set modules_base [string range $script_dir 0 $modules_posn-1] +if {[file tail $modules_base] eq "src"} { + set project_root [file dirname $modules_base] +} else { + set project_root $modules_base +} +puts stderr "runtestmodules.tcl project_root: $project_root" +#use the unbuilt modules/libraries under development rather than the installed versions. +#The unbuilt modules should have a higher version number (such as the magic version number 999999.0a1.0) than any installed versions to ensure they are preferred. +tcl::tm::add [file normalize $project_root/src/modules] +tcl::tm::add [file normalize $project_root/src/modules_tcl$tcl_major] +tcl::tm::add [file normalize $project_root/src/vendormodules] +tcl::tm::add [file normalize $project_root/src/vendormodules_tcl$tcl_major] + +# add 'package ifneeded' definitions for unbuilt #modpod modules. +#first gather subdirectories of modules that contain #modpod-*-999999.0a1.0 in their name - these should be the unbuilt versions of zip based modules. +#set subfolders [punk::path::subfolders -recursive [file normalize $project_root/src/modules] -match */#modpod-*-999999.0a1.0] +#'punk::path::subfolders' currently only supports negative matching with -exclude, so we have to filter for the positive match ourselves. +set subfolders [punk::path::subfolders -recursive [file normalize $project_root/src/modules] -exclude {**/_build/** **/_build}] +foreach sub $subfolders { + #In most cases we could use string match - but the * within modpod-*-999999.0a1.0 could match a forward slash which could then match some other file under a #modpod- folder structure, + #so we use globmatchpath which treats * as matching any characters except path separators. + if {[globmatchpath "**/#modpod-*-999999.0a1.0" $sub]} { + set modname [file tail $sub] + set modname [string range $modname 8 end-12] ;#strip off #modpod- and -999999.0a1.0 + set modpath [file join $sub "$modname-999999.0a1.0.tm"] + #!!!! + #todo - calculate fully qualified module name based on path relative to the modules folder we added to the tcl::tm path. + if {[file exists $modpath]} { + puts stderr "runtestmodules.tcl adding package ifneeded for modpod module $modname at path $modpath" + package ifneeded $modname 999999.0a1.0 [list source $modpath] + } else { + puts stderr "runtestmodules.tcl warning: expected mod.tcl not found for modpod module $modname at path $modpath" + } + } +} + +set libdir [file normalize $project_root/src/lib] +set libvdir [file normalize $project_root/src/lib/tcl$tcl_major] +set libvldir [file normalize $project_root/src/vendorlib] +set libvlvdir [file normalize $project_root/src/vendorlib_tcl$tcl_major] +foreach d [list $libdir $libvdir $libvldir $libvlvdir] { + if {$d ni $::auto_path} { + lappend ::auto_path $d + } +} +#------------------------------------ +puts stderr "runtestmodules.tcl ::auto_path: $::auto_path" +puts stderr "runtestmodules.tcl tcl::tm::list: [tcl::tm::list]" + package require punk package require punk::args @@ -122,7 +181,7 @@ foreach pkg $punktestpkgs { foreach ln [split $chunk \n] { incr i if {[string match "Tests ended at*" $ln]} { - puts stdout " [punk::ansi::ansistring VIEW -lf 2 -cr 1 "$pkg $ln"]" + puts stdout "<$pkg> $ln" } elseif {[string match "*:*Total*Passed*Skipped*Failed*" $ln]} { set fields [lrange $ln 1 end] dict for {K v} $fields { @@ -136,16 +195,26 @@ foreach pkg $punktestpkgs { } } } - puts stdout "$pkg $ln" + puts stdout "<$pkg> $ln" + } elseif {[string match "*Sourced * Test Files*" $ln]} { + puts stdout "<$pkg> $ln" } else { - puts stdout " $ln" + if {[string trim $ln] ne ""} { + puts stdout " $ln" + } else { + puts -nonewline stdout "\n" + } #puts stdout "$i" } } flush stdout } stderr { - puts stderr " [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk]" + #puts stderr " [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk]" + set chunkview [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk] + foreach ln [split $chunkview \n] { + puts stderr " $ln" + } flush stderr } default { diff --git a/src/tests/all.tcl b/src/tests/all.tcl new file mode 100644 index 00000000..66c8e779 --- /dev/null +++ b/src/tests/all.tcl @@ -0,0 +1,66 @@ +#!tclsh +#This script uses shellfilter::run calls under the hood +lassign [split [info tclversion] .] tcl_major tcl_minor + +set script_dir [file dirname [info script]] + +#------------------------------------ +#use the unbuilt modules/libraries under development rather than the installed versions. +set original_tmlist [tcl::tm::list] +tcl::tm::remove {*}$original_tmlist +tcl::tm::add [file normalize $script_dir/../modules] ;#ie /src/modules +tcl::tm::add [file normalize $script_dir/../modules_tcl$tcl_major] +tcl::tm::add {*}[lreverse $original_tmlist] +set libdir [file normalize $script_dir/../lib] +set libvdir [file normalize $script_dir/../lib/tcl$tcl_major] +if {$libdir ni $::auto_path} { + lappend ::auto_path $libdir +} +if {$libvdir ni $::auto_path} { + lappend ::auto_path $libvdir +} +#------------------------------------ +package require tcltest + + +package require punk +package require punk::args +punk::args::define { + @id -id (script)::runtestmodules + @cmd -name runtestmodules -help\ + "Run test:: modules that support the packagetest api + (have RUN command)" + -tcltestoptions -type dict -default "" -help\ + "pairs of flags/values that will be passed to tcltest::configure before running the tests. + For example, to run only tests with names matching *foo* and *bar* you could use: + -tcltestoptions {-file {*foo* *bar*}} + " + @values -min 0 -max -1 + glob -type string -multiple 1 -optional 1 -help\ + " names or glob patterns of test files to run." +} +set argd [punk::args::parse $::argv withid (script)::runtestmodules] +lassign [dict values $argd] leaders opts values received +set tcltestoptions [dict get $opts -tcltestoptions] +if {![dict exists $received glob]} { + set file_globs [list *.test] +} else { + set file_globs [dict get $values glob] +} + +set ::argv $tcltestoptions +set ::argc [llength $tcltestoptions] +#set ::argv {} +#set ::argc 0 + +tcltest::configure -verbose "body pass skip error usec" +tcltest::configure -testdir $script_dir +tcltest::configure -file $file_globs +#review - single process has less isolation - but works better in this case. +#(some tclsh shells can hang when running with -singleproc false - needs investigation) +#tclte::configure -singleproc true +tcltest::configure -singleproc true +dict for {k v} $tcltestoptions { + tcltest::configure $k $v +} +tcltest::runAllTests \ No newline at end of file diff --git a/src/tests/modules/opunk/str/tests/all.tcl b/src/tests/modules/opunk/str/tests/all.tcl new file mode 100644 index 00000000..28b21c97 --- /dev/null +++ b/src/tests/modules/opunk/str/tests/all.tcl @@ -0,0 +1,39 @@ + +if {[llength $::argv]} { + puts stderr "$script_dir ::argv $::argv" +} + + +#------------------------------------ +lassign [split [info tclversion] .] tcl_major tcl_minor +set script_dir [file dirname [file normalize [info script]]] +set src_tests_posn [string first /src/tests/ $script_dir] +if {$src_tests_posn < 0} { + puts stderr "Error: script dir $script_dir does not contain /src/tests/" + #exit 2 ;#don't call exit. If run in a single proc it can cause the hole test suite exit before summary can be printed. + return -code error "Error: script dir $script_dir does not contain /src/tests/" +} +set project_root [string range $script_dir 0 $src_tests_posn-1] +#use the unbuilt modules/libraries under development rather than the installed versions. +#The unbuilt modules should have a higher version number (such as the magic version number 999999.0a1.0) than any installed versions to ensure they are preferred. +tcl::tm::add [file normalize $project_root/src/modules] +tcl::tm::add [file normalize $project_root/src/modules_tcl$tcl_major] +set libdir [file normalize $project_root/src/lib] +set libvdir [file normalize $project_root/src/lib/tcl$tcl_major] +if {$libdir ni $::auto_path} { + lappend ::auto_path $libdir +} +if {$libvdir ni $::auto_path} { + lappend ::auto_path $libvdir +} +#------------------------------------ + +package require tcltest +puts "----------------- [tcl::tm::list]" + +#tcltest::configure -debug 1 +tcltest::configure -singleproc true +tcltest::configure {*}$::argv +tcltest::configure -testdir $script_dir +tcltest::configure -asidefromdir * ;#only the toplevel all.tcl should recurse. +tcltest::runAllTests \ No newline at end of file diff --git a/src/tests/modules/opunk/str/tests/str.test b/src/tests/modules/opunk/str/tests/str.test new file mode 100644 index 00000000..25d3ceed --- /dev/null +++ b/src/tests/modules/opunk/str/tests/str.test @@ -0,0 +1,84 @@ +package require tcltest +tcltest::configure {*}$::argv + +#------------------------------------ +lassign [split [info tclversion] .] tcl_major tcl_minor +set script_dir [file dirname [file normalize [info script]]] +set src_tests_posn [string first /src/tests/ $script_dir] +if {$src_tests_posn < 0} { + puts stderr "Error: script dir $script_dir does not contain /src/tests/" + #exit 2 ;#don't call exit. If run in a single proc it can cause the hole test suite exit before summary can be printed. + return -code error "Error: script dir $script_dir does not contain /src/tests/" +} +set project_root [string range $script_dir 0 $src_tests_posn-1] +#use the unbuilt modules/libraries under development rather than the installed versions. +#The unbuilt modules should have a higher version number (such as the magic version number 999999.0a1.0) than any installed versions to ensure they are preferred. +tcl::tm::add [file normalize $project_root/src/modules] +tcl::tm::add [file normalize $project_root/src/modules_tcl$tcl_major] +set libdir [file normalize $project_root/src/lib] +set libvdir [file normalize $project_root/src/lib/tcl$tcl_major] +if {$libdir ni $::auto_path} { + lappend ::auto_path $libdir +} +if {$libvdir ni $::auto_path} { + lappend ::auto_path $libvdir +} +#------------------------------------ + +puts stdout "==================== $::argv" +puts stdout "==================== [tcl::tm::list]" +package require overtype +package require opunk::str + +namespace eval ::testspace { + namespace import ::tcltest::* + variable common { + set result "" + } + + # Temporarily replaces punk::console::get_tabstops so tab-related tests are deterministic. + proc with_tabstops {tabstops body} { + variable __tabstops + set __tabstops $tabstops + set had_original [expr {[llength [info commands ::punk::console::get_tabstops]] > 0}] + if {$had_original} { + rename ::punk::console::get_tabstops ::testspace::__orig_get_tabstops + } + namespace eval ::punk::console {} + proc ::punk::console::get_tabstops {{inoutchannels {stdin stdout}}} { + return [set ::testspace::__tabstops] + } + + set code [catch {uplevel 1 $body} out opts] + + rename ::punk::console::get_tabstops {} + if {$had_original} { + rename ::testspace::__orig_get_tabstops ::punk::console::get_tabstops + } + return -options $opts $out + } + proc vis {s} { + string map [list "\u0000" "" "\x1b" "" "\n" "" "\r" "" "\t" ""] $s + } + + + test opunkstr_basic {basic string object creation and properties}\ + -setup $common -body { + set text "abcde" + set o [opunk::Str new $text] + set result [list \ + [string equal $text [opunk::Str::get $o]] \ + [opunk::Str::count $o] \ + [expr {[opunk::Str::has_ansi $o] && true}] \ + ] + + }\ + -cleanup { + }\ + -result [list {*}{ + 1 5 0 + }] + +} + +tcltest::cleanupTests ;#needed to produce test summary. diff --git a/src/tests/modules/punk/path/tests/all.tcl b/src/tests/modules/punk/path/tests/all.tcl new file mode 100644 index 00000000..cdee0917 --- /dev/null +++ b/src/tests/modules/punk/path/tests/all.tcl @@ -0,0 +1,38 @@ + +lassign [split [info tclversion] .] tcl_major tcl_minor + + +set script_dir [file dirname [file normalize [info script]]] +if {[llength $::argv]} { + puts stderr "$script_dir ::argv $::argv" +} + +set src_tests_posn [string first /src/tests/ $script_dir] +if {$src_tests_posn < 0} { + puts "Error: script dir $script_dir does not contain /src/tests/" + exit 2 +} +set project_root [string range $script_dir 0 $src_tests_posn-1] + +#------------------------------------ +#use the unbuilt modules/libraries under development rather than the installed versions. +#The unbuilt modules should have a higher version number (such as the magic version number 999999.0a1.0) than any installed versions to ensure they are preferred. +tcl::tm::add [file normalize $project_root/src/modules] +tcl::tm::add [file normalize $project_root/src/modules_tcl$tcl_major] +set libdir [file normalize $project_root/src/lib] +set libvdir [file normalize $project_root/src/lib/tcl$tcl_major] +if {$libdir ni $::auto_path} { + lappend ::auto_path $libdir +} +if {$libvdir ni $::auto_path} { + lappend ::auto_path $libvdir +} +#------------------------------------ +package require tcltest + +#tcltest::configure -debug 1 +tcltest::configure -singleproc true +tcltest::configure {*}$::argv +tcltest::configure -testdir $script_dir +tcltest::configure -asidefromdir * ;#only the toplevel all.tcl should recurse. +tcltest::runAllTests \ No newline at end of file diff --git a/src/tests/modules/punk/path/tests/path.test b/src/tests/modules/punk/path/tests/path.test new file mode 100644 index 00000000..a20eb5a4 --- /dev/null +++ b/src/tests/modules/punk/path/tests/path.test @@ -0,0 +1,33 @@ +package require tcltest +tcltest::configure {*}$::argv + + +package require overtype +package require punk::path + +namespace eval ::testspace { + namespace import ::tcltest::* + variable common { + set result "" + } + + + test globmatchpath_basic {Test single star between slashes pathglob argument will match exactly a single level}\ + -setup $common -body { + + set result [list {*}{ + } [punk::path::globmatchpath /etc/*/*.doc /etc/A/test.doc] {*}{ + } [punk::path::globmatchpath /etc/*/*.doc /etc/A/B/test.doc] {*}{ + } [punk::path::globmatchpath /etc/*/*.doc /etc/test.doc] + ] + + }\ + -cleanup { + }\ + -result [list {*}{ + 1 0 0 + }] + +} + +tcltest::cleanupTests ;#needed to produce test summary. \ No newline at end of file diff --git a/src/vfs/_vfscommon.vfs/modules/opunk/str-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/opunk/str-0.1.0.tm index d81a8ac9..11c9794b 100644 --- a/src/vfs/_vfscommon.vfs/modules/opunk/str-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/opunk/str-0.1.0.tm @@ -26,6 +26,7 @@ package require voo package require punk::assertion package require punk::char package require punk::ansi +package require punk::args #Note we are doing a heavy trade for space vs time when using punk::str @@ -48,31 +49,49 @@ tcl::namespace::eval ::opunk::str { #--------------------------------------- } - set unshare_script { - #--------------------------------------- - #unshare fields - set o_class [lindex $this 0] - lset this 0 {} - set o_string [lindex $this 1] - lset this 1 {} - set o_count [lindex $this 2] - lset this 2 {} - set o_ansisplits [lindex $this 3] - lset this 3 {} - set o_elements [lindex $this 4] - lset this 4 {} - set o_graphemes [lindex $this 5] - lset this 5 {} - set o_ptindex [lindex $this 5] - lset this 6 {} - set o_ptlist [lindex $this 7] - lset this 7 {} - set o_sgrstacks [lindex $this 8] - lset this 8 {} - set o_gx0states [lindex $this 9] - lset this 9 {} - #--------------------------------------- + set unshare_script1 { + #--------------------------------------- + #unshare fields + #Note VOO's somewhat unfortunate choice to use the user's field names as namespace variables for the field index values. + #these shouldn't collide unless you use 'variable o_fieldname' in a method + set o_class [lindex $this 0] + lset this 0 {} + set o_string [lindex $this 1] + lset this 1 {} + set o_count [lindex $this 2] + lset this 2 {} + set o_ansisplits [lindex $this 3] + lset this 3 {} + set o_elements [lindex $this 4] + lset this 4 {} + set o_graphemes [lindex $this 5] + lset this 5 {} + set o_ptindex [lindex $this 6] + lset this 6 {} + set o_ptlist [lindex $this 7] + lset this 7 {} + set o_sgrstacks [lindex $this 8] + lset this 8 {} + set o_gx0states [lindex $this 9] + lset this 9 {} + #--------------------------------------- } + set unshare_script [punk::args::lib::tstr { + #--------------------------------------- + #unshare fields + lassign $this o_class o_string o_count o_ansisplits o_elements o_graphemes o_ptindex o_ptlist o_sgrstacks o_gx0states + lset this 0 {} + lset this 1 {} + lset this 2 {} + lset this 3 {} + lset this 4 {} + lset this 5 {} + lset this 6 {} + lset this 7 {} + lset this 8 {} + lset this 9 {} + #--------------------------------------- + }] set restore_script_runtime { @@ -85,26 +104,27 @@ tcl::namespace::eval ::opunk::str { } #--------------------------------------- } - set restore_script { - #--------------------------------------- - #restore fields - lset this 0 $o_class - lset this 1 $o_string - lset this 2 $o_count - lset this 3 $o_ansisplits - lset this 4 $o_elements - lset this 5 $o_graphemes - lset this 6 $o_ptindex - lset this 7 $o_ptlist - lset this 8 $o_sgrstacks - lset this 9 $o_gx0states - #--------------------------------------- - } + + set restore_script [punk::args::lib::tstr { + #--------------------------------------- + #restore fields + lset this 0 $o_class + lset this 1 $o_string + lset this 2 $o_count + lset this 3 $o_ansisplits + lset this 4 $o_elements + lset this 5 $o_graphemes + lset this 6 $o_ptindex + lset this 7 $o_ptlist + lset this 8 $o_sgrstacks + lset this 9 $o_gx0states + #--------------------------------------- + }] variable etype_grapheme "g" - variable etype_sgr "sgr" - variable etype_gx0 "gx0" - variable etype_other "o" + variable etype_sgr "sgr" + variable etype_gx0 "gx0" + variable etype_other "o" #warning: uses tcl::unsupported::representation. proc estimate_list_mem {listVar {seen {}}} { @@ -150,7 +170,8 @@ tcl::namespace::eval ::opunk::str { #catches 'value is a pure string' #for utf-8 strings the actual memory usage could be up to 3 bytes per char, but we will estimate based on the common case of 1 byte per char for ascii, #and the fact that many strings will be ascii or mostly ascii. - incr total_size [string length $item] + #incr total_size [string length $item] + incr total_size [string length [encoding convertto utf-8 $item]] } list { # Recursively estimate memory for nested lists @@ -197,7 +218,7 @@ tcl::namespace::eval ::opunk::str { private { #we still use o_ prefix for private variables even though voo has the 'my' prefix, because we still want to distinguish them from local variables in 'update' methods. #This also gives consistency with the tclOO implementations of similar classes (e.g ansistring) and makes it easier to copy code between them. - string_t o_class "opunk::Str" + string_t o_class "::opunk::Str" ;#fully qualified string_t o_string "" int_t o_count -1 ;#count first updated when string appended or a method causes my.MakeSplit to run (or by count method if constructor argument was empty string) list_t o_ansisplits [list] ;#split version of string - odd number of elements - alternating plain text and ansicodestrings. Updated by my.makesplit @@ -217,10 +238,10 @@ tcl::namespace::eval ::opunk::str { #my.makesplit is called whenever string is modified (e.g by append) or when count method is called with an empty string argument (e.g constructor) #my.makesplit should update count to be the number of plain text graphemes in the split version of string - upvar ::opunk::str::etype_grapheme etype_grapheme - upvar ::opunk::str::etype_sgr etype_sgr - upvar ::opunk::str::etype_gx0 etype_gx0 - upvar ::opunk::str::etype_other etype_other + #g upvar ::opunk::str::etype_grapheme etype_grapheme + #sgr upvar ::opunk::str::etype_sgr etype_sgr + #gx0 upvar ::opunk::str::etype_gx0 etype_gx0 + #o upvar ::opunk::str::etype_other etype_other #The split with each code as it's own element is more generally useful. set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; @@ -253,7 +274,7 @@ tcl::namespace::eval ::opunk::str { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state foreach grapheme [punk::char::grapheme_split $pt] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex @@ -274,26 +295,26 @@ tcl::namespace::eval ::opunk::str { #set codestack [list "\x1b\[m"] #vs set codestack [list $code] ;#pass through as is. - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 - lappend o_elements [list $etype_gx0 [expr {true}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 [expr {true}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 - lappend o_elements [list $etype_gx0 [expr {false}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 [expr {false}]] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { #lappend o_elements [list other $code] - lappend o_elements [list $etype_other $code] + lappend o_elements [list o $code] } } #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index @@ -308,13 +329,13 @@ tcl::namespace::eval ::opunk::str { #set num_splits [llength [my.get.o_ansisplits $this]] assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} - - } - method test {} { + if {[llength $this] != 10} { + error "invalid object" + } puts stdout "string is '[my.get.o_string $this]' and count is '[my.get.o_count $this]'" } } @@ -322,18 +343,38 @@ tcl::namespace::eval ::opunk::str { method get.class {} { return [my.get.o_class $this] } + method debug {} { + if {[llength $this] != 10} { + error "invalid object" + } + set fields [punk::lib::showdict -roottype list [class.fields]] + set data [punk::lib::showdict -roottype list $this] + set result [textblock::join -- $fields " " $data] + + set o_ptlist [my.get.o_ptlist $this] + set o_sgrstacks [my.get.o_sgrstacks $this] + set o_gx0states [my.get.o_gx0states $this] + set o_graphemes [my.get.o_graphemes $this] + set o_ptindex [my.get.o_ptindex $this] + if {[llength $o_ptlist] != [llength $o_sgrstacks]} { + set result "$result\n\x1b\[31mERROR: length of ptlist and sgrstacks do not match - [llength $o_ptlist] vs [llength $o_sgrstacks]\x1b\[m" + } + if {[llength $o_ptlist] != [llength $o_gx0states]} { + set result "$result\n\x1b\[31mERROR: length of ptlist and gx0states do not match - [llength $o_ptlist] vs [llength $o_gx0states]\x1b\[m" + } + if {[llength $o_graphemes] != [llength $o_ptindex]} { + set result "$result\n\x1b\[31mERROR: length of graphemes and ptindex do not match - [llength $o_graphemes] vs [llength $o_ptindex]\x1b\[m" + } + + return $result + } #This is the count of visible graphemes + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!! - method count {} -upvar { - if {[my.get.o_count $this] == -1} { - #only initial string present - if {[my.get.o_string $this] eq ""} { - my.set.o_count this 0 - return 0 - } - my.makesplit this + method count {} { + if {[llength $this] != 10} { + error "invalid object" } - return [my.get.o_count $this] + my.get.o_count $this } method storage_estimate2 {} { #very slow @@ -343,11 +384,16 @@ tcl::namespace::eval ::opunk::str { method storage_estimate {} { #we can speed it up a little by not doing the recursive estimation for the top level lists that we know are just lists of strings or ints #this involves many less calls to tcl::unsupported::representation. + if {[llength $this] != 10} { + error "invalid object" + } array set seen {} set total_size 0 set idx 0 - upvar o_elements idx_o_elements - upvar o_ptindex idx_o_ptindex + # if the voo author had named these sensibly to avoid collisions we could have just done: variable idx_o_string. + upvar ::opunk::Str::o_elements idx_o_elements + upvar ::opunk::Str::o_graphemes idx_o_graphemes + upvar ::opunk::Str::o_ptindex idx_o_ptindex foreach element $this { if {$idx == $idx_o_elements} { #o_elements @@ -362,10 +408,13 @@ tcl::namespace::eval ::opunk::str { } incr total_size 48 } - } elseif {$idx = $idx_o_graphemes} { + } elseif {$idx == $idx_o_graphemes} { incr total_size 48 incr total_size 8 - incr total_size 1 + set count [llength $element] + #ascii vs utf-8 - we will estimate based on the common case of 1 byte per char for ascii, but some graphemes may be multiple bytes. + #review + incr total_size [expr {$count * 1}] } elseif {$idx == $idx_o_ptindex} { #o_splitindex set count [llength $element] @@ -380,55 +429,44 @@ tcl::namespace::eval ::opunk::str { return $total_size } - #we can't use -update if we also use other helper methods that take 'this' as an argument and may modify any of the same fields - #- so we have to do the upvar-detach-try-finally dance ourselves. - method append {args} -upvar [string map [list %unshare% $::opunk::str::unshare_script_runtime %restore% $::opunk::str::restore_script_runtime] { - upvar $thisVar this - upvar o_string idx_o_string - if {![llength $args]} { + # -undent 0 so source of proc will match indentation as in this file. (just as for any other 'info proc' output). + method append {args} -upvar [punk::args::lib::tstr -undent 0 { + if {[llength $this] != 10} { + error "invalid object $thisVar" + } + if {![llength $args] || [set catstr [string cat {*}$args]] eq ""} { #nothing to append - just return current string - return [lindex $this $idx_o_string] + return [my.get.o_string $this] } - upvar o_ansisplits idx_o_ansisplits - - set catstr [::join $args ""] set catstr_has_ansi [punk::ansi::ta::detect $catstr] - if {!$catstr_has_ansi} { - #if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state - if {![llength [lindex $this $idx_o_ansisplits 0]]} { - #initialise o_count because we need to add to it. - #The count method will do this by calling makesplit only if it needs to. (which will create ansisplits for anything except empty string) - count this - } - } - - if {!$catstr_has_ansi} { -%unshare% - + ${$::opunk::str::unshare_script} try { - upvar ::opunk::str::etype_grapheme etype_grapheme + #upvar ::opunk::str::etype_grapheme etype_grapheme #ansi-free additions #puts stderr "this: $this" ::append o_string $catstr;# only append after updating using count method above #puts stderr "after append: $o_string" + + + #TODO if {![llength $o_ptlist]} { #If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits lappend o_ptlist $catstr lappend o_sgrstacks [list] lappend o_gx0states [expr {false}] } else { - lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] $catstr] - lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] $catstr] + ledit o_ptlist end end [tcl::string::cat [lindex $o_ptlist end] $catstr] } + ledit o_ansisplits end end [tcl::string::cat [lindex $o_ansisplits end] $catstr] set ptindex [expr {[llength $o_ptlist] - 1}] foreach grapheme [punk::char::grapheme_split $catstr] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex @@ -437,28 +475,28 @@ tcl::namespace::eval ::opunk::str { } #incr o_count [my DoCount $catstr] ;#from before we were doing grapheme split.. review return $o_string + } trap * {emsg eopt} { + puts stderr "Error in append method: $emsg" } finally { -%restore% + ${$::opunk::str::restore_script} } - - + #todo error? } #appending string with ANSI codes. - upvar o_string idx_o_string - upvar o_ansisplits idx_o_ansisplits - if {![llength [lindex $this $idx_o_ansisplits]]} { + if {![llength [my.get.o_ansisplits $this]]} { + #string will only have no ansisplits if it is empty string. + #----- + # obsolete. #if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append - #set o_string [my.get.o_string $this] - set o_string [lindex $this $idx_o_string] + #----- + set o_string [my.get.o_string $this] lset this $idx_o_string {} puts stderr "initial string: '$o_string' and catstr: '$catstr'" ::append o_string $catstr ;#append before split and count on whole lot lset this $idx_o_string $o_string my.makesplit this ;#update o_count - #set combined_plaintext [join $o_ptlist ""] - #set o_count [my DoCount $combined_plaintext] #assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} return $o_string } @@ -466,19 +504,18 @@ tcl::namespace::eval ::opunk::str { #we have pre-existing ansisplits. #update each element of internal state incrementally without reprocessing what is already there. -%unshare% + ${$::opunk::str::unshare_script} try { - upvar ::opunk::str::etype_grapheme etype_grapheme - upvar ::opunk::str::etype_sgr etype_sgr - upvar ::opunk::str::etype_gx0 etype_gx0 - upvar ::opunk::str::etype_other etype_other + # upvar ::opunk::str::etype_grapheme etype_grapheme + # upvar ::opunk::str::etype_sgr etype_sgr + # upvar ::opunk::str::etype_gx0 etype_gx0 + # upvar ::opunk::str::etype_other etype_other ::append o_string $catstr set newsplits [punk::ansi::ta::split_codes_single $catstr] set ptnew "" set codestack [lindex $o_sgrstacks end] set gx0_state [lindex $o_gx0states end] - set current_split_index [expr {[llength $o_ansisplits] -1}] #first pt must be merged with last element of o_ptlist set new_pt_list [list] @@ -494,40 +531,38 @@ tcl::namespace::eval ::opunk::str { ::append ptnew $pt foreach grapheme [punk::char::grapheme_split $pt] { - lappend o_elements [list $etype_grapheme $grapheme] + lappend o_elements [list g $grapheme] lappend o_graphemes $grapheme lappend o_ptindex $ptindex incr o_count } - incr current_split_index ;#increment 1 of 2 within each loop if {$code ne ""} { #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list $code] ;#pass through as is. #vs #set codestack [list "\x1b\[m"] ;#normalize. - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code - lappend o_elements [list $etype_sgr $code] + lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 - lappend o_elements [list $etype_gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 - lappend o_elements [list $etype_gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets + lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { - lappend o_elements [list $etype_other $code] + lappend o_elements [list o $code] } } - incr current_split_index ;#increment 2 of 2 } incr ptindex } @@ -540,36 +575,155 @@ tcl::namespace::eval ::opunk::str { lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $newsplits 0]] lappend o_ansisplits {*}[lrange $newsplits 1 end] - #if {$o_count eq ""} { - # #we have splits - but didn't count graphemes? - # set o_count [my DoCount [join $o_ptlist ""]] ;#o_ptlist already has ptnew parts - #} else { - # incr o_count [my DoCount $ptnew] - #} - assert {[llength $o_ptlist] == [llength $o_sgrstacks] && [llength $o_ptlist] == [llength $o_gx0states] && [llength $o_graphemes] == [llength $o_ptindex]} return $o_string } finally { -%restore% + ${$::opunk::str::restore_script} } }] - #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already - method stripped -upvar {} { - if {![llength [my.get.o_ansisplits $this]]} {my.makesplit this} - return [join [my.get.o_ptlist $this] ""] + + + # accept multiple opunk::Str objects to append to this object + method appendobj {args} -upvar [punk::args::lib::tstr -undent 0 { + if {[llength $this] != 10} { + error "invalid object" + } + if {![llength $args]} { + #nothing to append - just return current value + return $this + } + + ${$::opunk::str::unshare_script} + #it would be safer to retain the original values in case of error, + #but this seems likely to reduce the efficiency of our copy on write avoidance strategy. review + #set orig_o_string $o_string + #set orig_o_count $o_count + #set orig_o_ansisplits $o_ansisplits + #set orig_o_elements $o_elements + #set orig_o_graphemes $o_graphemes + #set orig_o_ptindex $o_ptindex + #set orig_o_ptlist $o_ptlist + #set orig_o_sgrstacks $o_sgrstacks + #set orig_o_gx0states $o_gx0states + try { + foreach a $args { + if {[llength $a] != 10} { + error "invalid object in appendobj argument list" + } + lassign $a new_o_class new_o_string new_o_count new_o_ansisplits new_o_elements new_o_graphemes new_o_ptindex new_o_ptlist new_o_sgrstacks new_o_gx0states + + ::append o_string $new_o_string + + incr o_count $new_o_count + + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_o_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_o_ansisplits 1 end] + + lappend o_elements {*}$new_o_elements + + lappend o_graphemes {*}$new_o_graphemes + + #o_ptindex is a mapping of grapheme to ptlist index - we need to update the new list's indices before appending + #we also need to recognise that the first pt in the new list will be merged with the last pt in the existing list + #example data: + # o_graphemes: x x x b l a h + # o_ptindex : 1 1 1 2 2 2 2 + # o_ptlist : {} xxx blah + # o_graphemes: y y y + # o_ptindex : 0 0 1 + # o_ptlist : yy y + # r merge: + # o_graphemes: x x x b l a h y y y + # o_ptindex : 1 1 1 2 2 2 2 2 2 3 + # o_ptlist : {} xxx blahyy y + + set last_ptindex [expr {[llength $o_ptlist]-1}] + #note there may be skipped ptindex values for empty string pt blocks. (since there are no graphemes corresponding to them). + #i.e last_ptindex is the index of the last pt block - not necessarily the value of the last entry in o_ptindex (which only tracks graphemes) + #by adding last_ptindex, new_o_ptindex zero items in appended string will be merged to the same value as the last pt block index of the existing string, and the rest will be incremented accordingly. + foreach v $new_o_ptindex { + lappend o_ptindex [expr {$v + $last_ptindex}] + } + + lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_o_ptlist 0]] + lappend o_ptlist {*}[lrange $new_o_ptlist 1 end] + + + #prepend the previous sgr stack to all stacks in the new list. + #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. + #ie just call sgr_merge_list once now. + set laststack [lindex $o_sgrstacks end] + #set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + set mergedtail [punk::ansi::codetype::sgr_merge $laststack] + lset o_sgrstacks end [list $mergedtail {*}[lindex $new_o_sgrstacks 0]] + foreach n [lrange $new_o_sgrstacks 1 end] { + lappend o_sgrstacks [list $mergedtail {*}$n] + } + + set last_gx0state [lindex $o_gx0states end] + #merge the last gx0 state of the existing string with the first gx0 state of the appended string. + #use the policy that the new gx0 state wins. + set first_new_gx0state [lindex $new_o_gx0states 0] + #set merged_gx0state [expr {$last_gx0state || $first_new_gx0state}] ;experimental. + set merged_gx0state $first_new_gx0state + lset o_gx0states end $merged_gx0state + lappend o_gx0states {*}[lrange $new_o_gx0states 1 end] + + } + } finally { + ${$::opunk::str::restore_script} + #If our assert fails - we have already written invalid state to the object, + #but we will at least know about it and can fix the bug. + #Trying to restore the original state on error would be safer but would potentially reduce the efficiency of our copy on write avoidance strategy, + #and it may be helpful to be able to inspect the invalid state that we have written to the object when debugging the cause of the failure. + #review. + assert { + [llength $o_ptlist] == [llength $o_sgrstacks] && + [llength $o_ptlist] == [llength $o_gx0states] && + [llength $o_graphemes] == [llength $o_ptindex] + } + } + return $this + }] + + #returns the ansiless string - doesn't affect the stored state + method stripped {} { + #review - the ptlist has original gx0 characters in it, so it differs to ansistrip which replaces them with their gx0 unicode equivalents + join [my.get.o_ptlist $this] "" } method get {} { - return [my.get.o_string $this] + my.get.o_string $this } method has_ansi {} { - if {![llength [my.get.o_ansisplits $this]]} { + if {[llength [my.get.o_ansisplits $this]] > 1} { #initial string - for large strings,it's faster to run detect than update the internal split-state. return [punk::ansi::ta::detect [my.get.o_string $this]] } else { + #empty string = zero ansisplits, or string with no ANSI codes = single ansisplits element containing whole string. #string will continue to have a single o_ansisplits element if only non-ansi appended - return [expr {[llength [my.get.o_ansisplits $this]] != 1}] + return [expr {false}] + } + } + #review - has_other is roughly equivalent to a test for whether string is 'rendered' + method has_other {} { + #we expect only g,sgr and gx0 element types in 'rendered' strings + set posn [lsearch -index 0 -exact [my.get.o_elements] o] + return [expr {$posn != -1}] + } + method has_linefeed {} { + #test if there is a grapheme that is exactly \n or a graphme that is exactly \r\n + #we don't recognise a lone \r as a linefeed in this context. review. + #if present, we expect linefeeds to be near the end of the grapheme list slightly more often than the start, so we will search from the end. + set o_graphemes [lreverse [my.get.o_graphemes $this]] + if {[lsearch -exact $o_graphemes "\n"] != -1} { + return true + } + if {[lsearch -exact $o_graphemes "\r\n"] != -1} { + return true } + return false } + method length_raw {} { tcl::string::length [my.get.o_string $this] } @@ -592,8 +746,8 @@ tcl::namespace::eval ::opunk::str { " @opts @values -min 2 -max 2 - thisVar -type varname -help\ - "The variable name of the Str object to query. This is passed by name and will be upvar'd to 'this' within the method body." + strobj -type list -help\ + "The value of the Str object to query." idx -type int -help\ "The index of the character to return. Zero-based index into the original string, excluding ANSI codes. @@ -601,13 +755,16 @@ tcl::namespace::eval ::opunk::str { }] } - method INDEXCHAR -upvar {idx} { + method INDEXCHAR {idx} { #this is not the same as the character at that index in the original string, or even in the ansistripped string - which may be an ANSI code or *part of a grapheme cluster* - set o_graphemes [my.get.o_graphemes $this] - if {![llength $o_graphemes]} { - my.makesplit this - set o_ographemes [my.get.o_graphemes $this] + if {[llength $this] != 10} { + error "invalid object $this" } + set o_graphemes [my.get.o_graphemes $this] + #if {![llength $o_graphemes]} { + # my.makesplit this + # set o_ographemes [my.get.o_graphemes $this] + #} return [lindex $o_graphemes $idx] #experimental. @@ -620,23 +777,40 @@ tcl::namespace::eval ::opunk::str { # return [lindex $o_elements $required_posn 1] #} } - method INDEX -upvar {idx} { + method INDEX {idx} { #returns the index in the original string of the character at the given grapheme index - i.e the index of the start of the grapheme cluster in the original string - upvar o_graphemes idx_o_graphemes - set o_graphemes [lindex $this $idx_o_graphemes] - if {![llength $o_graphemes]} { - my.makesplit this - set o_graphemes [lindex $this $idx_o_graphemes] + if {[llength $this] != 10} { + error "invalid object $this" } - upvar o_sgrstacks idx_o_sgrstacks - upvar o_ptindex idx_o_ptindex + set o_graphemes [my.get.o_graphemes $this] - set ptindex [lindex $this $idx_o_ptindex] - set ptidx [lindex $ptindex $idx] + set ptindex [my.get.o_ptindex $this] + set ptidx [lindex $ptindex $idx] + #idx could be out of range + if {$ptidx eq ""} { + #review + return "" + } + set g [lindex $o_graphemes $idx] - set stack [lindex $this $idx_o_sgrstacks $ptidx] + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set mergedstack [punk::ansi::codetype::sgr_merge $stack] set ansi [punk::ansi::codetype::sgr_merge $stack] - return $ansi[lindex $o_graphemes $idx] + + set obj [list {*}{ + } ::opunk::Str {*}{ + } $ansi$g {*}{ + } 1 {*}{ + } [list {} $ansi $g] {*}{ + } [list [list sgr $ansi] [list g $g]] {*}{ + } [list $g] {*}{ + } [list 1] {*}{ + } [list "" $g] {*}{ + } [list "" [list $mergedstack]] {*}{ + } [list [expr {false}] [expr {false}]] + ] + #puts stdout $ansi$g + return $obj } namespace eval argdoc { variable PUNKARGS @@ -656,8 +830,8 @@ tcl::namespace::eval ::opunk::str { This is effectively the ANSI code that would need to be applied to the character returned by INDEX to get the same visual effect as the original string at that position." @values -min 2 -max 2 - string -type string -help\ - "String to index into, which may contain ANSI codes." + strobj -type list -help\ + "Value of string object to index into, which may contain ANSI codes." index -type indexexpression -help\ "Index of character to get code for, or as a special case: the index ${$B}end+1${$N} or numerical equivalent to get the code in effect after the last character (if any). @@ -665,27 +839,337 @@ tcl::namespace::eval ::opunk::str { effect after the last character." }] } - method INDEXCODE -upvar {idx} { + method INDEXCODE {idx} { + #todo - tests for out of bounds indices at both ends, and for end+1 case. + if {[llength $this] != 10} { + error "invalid object $this" + } #TODO - special cases for pure ANSI string and end+1 equivalent index to get code in effect after last character. - upvar o_graphemes idx_o_graphemes - set o_graphemes [lindex $this $idx_o_graphemes] - if {![llength $o_graphemes]} { - my.makesplit this - set o_graphemes [lindex $this $idx_o_graphemes] - } - upvar o_sgrstacks idx_o_sgrstacks - upvar o_ptindex idx_o_ptindex - set ptindex [lindex $this $idx_o_ptindex] - set ptidx [lindex $ptindex $idx] - set stack [lindex $this $idx_o_sgrstacks $ptidx] - set ansi [punk::ansi::codetype::sgr_merge $stack] + #set o_graphemes [my.get.o_graphemes $this] + set ptindexlist [my.get.o_ptindex $this] + if {[llength $ptindexlist] == 0} { + #no graphemes - so no ptindex entries + #we may still have a ptlist of any length - all empty strings, and a corresponding list of the same length for sgrstacks. + #(a string made of multiple separate ansi codes with no graphemes would be an example of this) + #we can consider the INDEXCODE for any index in this case to be the merge of all the sgrstacks + #- which will be the same as the merge of all the codes in the string since there are no graphemes to cause splits in the stacks. + #- it will also be the same as the merge of the single stack at the end of the o_sgrstacks list, + #since it was built by merging all the codes in the string as we went along. + return [punk::ansi::codetype::sgr_merge [lindex [my.get.o_sgrstacks $this] end]] + } + + set ptidx [lindex $ptindexlist $idx] + if {[string is integer -strict $ptidx]} { + #in bounds index - we can just return the merged stack for that index. + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set ansi [punk::ansi::codetype::sgr_merge $stack] + return $ansi + } + #if $ptidx is empty string - using it as the subsequent index for lindex into sgrstacks will return the entire list. + #this merged result is technically what we want for the end+1 case - except that for large ansi-alternating strings this is very inefficient. + #also - relying on the out-of-bounds empty string from lindex would also do the same for out-of-bounds indices at the lower end - which would be incorrect. + #we can use the use the ptindex of the first or last grapheme to determine which stacks to use from the o_sgrstacks list. + #use the lindex_resolve helper so that we cater for end-n etc in the index expression. + set idxinfo [punk::lib::lindex_resolve [my.get.o_count $this] $idx] + #we should only get -Inf for lower end out-of-bounds, and Inf for upper end out-of-bounds. + switch -exact $idxinfo { + "-Inf" { + #out of bounds at lower end - same as the ansi we would get for the first grapheme. + #assert - we would already have returned if there were no graphemes, so we know there is at least one grapheme and therefore at least one ptindex entry. + set ptidx [lindex $ptindexlist 0] + set stack [lindex [my.get.o_sgrstacks $this] $ptidx] + set ansi [punk::ansi::codetype::sgr_merge $stack] + } + "Inf" { + #out of bounds at upper end - return merged stack for end of string. + set stack [lindex [my.get.o_sgrstacks $this] end] + set ansi [punk::ansi::codetype::sgr_merge $stack] + } + default { + #we expect this case to be unreachable. + #If the original idx was in bounds we would have returned above + error "opunk::Str::INDEXCODE unexpected index resolution result: $idxinfo for index $idx with count [my.get.o_count $this]" + } + } return $ansi } + #test performance of get method vs upvar access to internal state. + method perftest1 {} { + if {[llength $this] != 10} { + error "invalid object $this" + } + upvar ::opunk::Str::o_graphemes idx_o_graphemes + upvar ::opunk::Str::o_ansisplits idx_o_ansisplits + upvar ::opunk::Str::o_ptlist idx_o_ptlist + upvar ::opunk::Str::o_ptindex idx_o_ptindex + upvar ::opunk::Str::o_gx0states idx_o_gx0states + upvar ::opunk::Str::o_sgrstacks idx_o_sgrstacks + set o_graphemes [lindex $this $idx_o_graphemes] + set o_ansisplits [lindex $this $idx_o_ansisplits] + set o_ptlist [lindex $this $idx_o_ptlist] + set o_ptindex [lindex $this $idx_o_ptindex] + set o_gx0states [lindex $this $idx_o_gx0states] + set o_sgrstacks [lindex $this $idx_o_sgrstacks] + return [list $o_graphemes $o_ansisplits $o_ptlist $o_ptindex $o_gx0states $vo_sgrstacks] + } + method perftest2 {} { + if {[llength $this] != 10} { + error "invalid object $this" + } + #this is faster than upvaring the internal index variables. + set o_graphemes [my.get.o_graphemes $this] ;#test get method + set o_ansisplits [my.get.o_ansisplits $this] ;#test get method + set o_ptlist [my.get.o_ptlist $this] ;#test get method + set o_ptindex [my.get.o_ptindex $this] ;#test get method + set o_gx0states [my.get.o_gx0states $this] ;#test get method + set o_sgrstacks [my.get.o_sgrstacks $this] ;#test get method + return [list $o_graphemes $o_ansisplits $o_ptlist $o_ptindex $o_gx0states $o_sgrstacks] + } + + + #Returns the substring of string between the character positions startindex and endindex, inclusive, + #where the character positions are determined by INDEX. + #The returned substring will include any ANSI codes that are in effect for those characters. + method RANGE {startindex endindex} { + #TODO - special case processing for end+n and equivalents to return trailing ANSI codes as well. + #(similar to INDEXCODE) + + set o_graphemes [my.get.o_graphemes $this] + #set o_ansisplits [my.get.o_ansisplits $this] + set o_ptlist [my.get.o_ptlist $this] + set o_ptindex [my.get.o_ptindex $this] + set o_gx0states [my.get.o_gx0states $this] + set o_sgrstacks [my.get.o_sgrstacks $this] + + set o_count [my.get.o_count $this] ;#assert same as [llength $graphemes] + + #for range we may need to return a result even if one or both indices are out of bounds + #if both are out of bounds at the same end - we return empty string for consistency with Tcl string index behaviour + set index_start [punk::lib::lindex_resolve $o_count $startindex] + set index_end [punk::lib::lindex_resolve $o_count $endindex] + if {$index_start eq "-Inf" && $index_end eq "-Inf"} {return ""} ;#out of bounds at lower end + if {$index_start eq "Inf" && $index_end eq "Inf"} {return ""} ;#out of bounds at upper end + #if the start is out bounds at the upper end and end is out of bounds at the lower end we also return empty string - as the range is effectively empty + #Also if start index > end index we return empty string - as the range is empty + #the expr intrinsic to if can test with -Inf and Inf against each other and integers - so we can do this with a single condition + if {$index_start > $index_end} {return ""} + #we can now assert that our range covers at least part of the string. + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set pt_found -1 + set char "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] + #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go + #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + if {$index_start eq "-Inf"} { + set index_start 0 + } + if {$index_end eq "Inf"} { + set index_end [expr {$o_count - 1}] + } + if {$index_start == 0 && $index_end == [expr {$o_count - 1}]} { + #shortcut for whole string - just return the original string object? + #review end vs end+1 trailing codes or not? + #if INDEX n doesn't return codes after each character n, then RANGE m n shouldn't either, + #but if RANGE 0 end doesn't return trailing codes that may violate user expectations. + #consider that joining INDEX 0 .. INDEX end should give the same result as RANGE 0 end + #- which would argue for including trailing codes in both cases or neither case. + #- returning trailing codes for INDEX n - would however also violate user expectations. + #we may have to solve some of this with tests and documentation to clarify the behaviour and set user expectations appropriately. + #ANSI strings are complicated and there may not be a single behaviour that meets all user expectations in all cases. + #The option to treat end differently to numeric value that happens to be the same as end should be considered, + # - there is a precedent for this in the Tcl linsert command which treats an end-relative index differently to a start-relative index. + return $this + } + + set START $index_start + set END $index_end + set grapheme_base 0 + set range_o_class ::opunk::Str + set range_o_string "" ;#string value - todo - change to return opunk::Str object representing the range not just the string. + set range_o_count 0 + set range_o_ansisplits [list] + set range_o_elements [list] + set range_o_graphemes [list] + set range_o_ptindex [list] + set range_o_ptlist [list] + set range_o_sgrstacks [list] + set range_o_gx0states [list] + + set ptindex_from [lindex $o_ptindex $index_start] + set ptindex_to [lindex $o_ptindex $index_end] + + #set idx -1 + set idx [expr {$ptindex_from - 1}] + set grapheme_base [lsearch -integer $o_ptindex $ptindex_from] ;#index at which this pt block starts in the grapheme list. + set high [expr {$grapheme_base - 1}] + + #foreach {pt code} [lrange $o_ansisplits [expr {$ptindex_from * 2}] end] {} + foreach pt [lrange $o_ptlist $ptindex_from end] codestack [lrange $o_sgrstacks $ptindex_from end] gx0state [lrange $o_gx0states $ptindex_from end] { + incr idx + #puts "idx: $idx pt: '$pt'" + set include_code [punk::ansi::codetype::sgr_merge $codestack] + if {$pt ne ""} { + #set graphemes [lindex $pt_graphemes $idx] + #set grapheme_to_pt_index [lsearch -all -inline $o_ptindex $idx] ;# e.g {0 0 0 0} if first pt is 4 graphemes, then {1 1} if next pt is 2 graphemes etc. + set pt_graphemes [list] + for {set i $grapheme_base} {$i < [llength $o_ptindex]} {incr i} { + set gidx [lindex $o_ptindex $i] + if {$gidx != $idx} { + break + } + lappend pt_graphemes [lindex $o_graphemes $i] + } + #puts "--->pt_graphemes: $pt_graphemes for pt: '$pt' with idx: $idx and grapheme_base: $grapheme_base" + + set low [expr {$high + 1}] ;#last high + set high [expr {$low + [llength $pt_graphemes]-1}] + + #if {$START >= $low && $START <= $high} {} + if {$index_start >= $low && $index_start <= $high} { + #puts "--->start in this pt '$pt' - index_start:$index_start >= low:$low and <= high:$high index_end: $index_end" + #starts in this pt + if {$END >= $low && $END <= $high} { + #both start and end in this pt + set include_graphemes [lrange $pt_graphemes $START-$low $END-$low] + set include_pt [::string cat {*}$include_graphemes] + ::append range_o_string $include_code$include_pt + incr range_o_count [llength $include_graphemes] + #start of ansisplits is always PT. + #end of ansisplits is always PT too + if {$include_code ne ""} { + lappend range_o_ansisplits "" $include_code $include_pt + lappend range_o_ptlist "" $include_pt + lappend range_o_sgrstacks [list] + lappend range_o_gx0states [expr {false}] + } else { + lappend range_o_ansisplits $include_pt + lappend range_o_ptlist $include_pt + } + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states [lindex $o_gx0states $idx] + break + } else { + #start in this pt but end is not - so we want to take the rest of this pt and then keep going until we find the end + set include_graphemes [lrange $pt_graphemes $START-$low end] + set include_pt [::string cat {*}$include_graphemes] + ::append range_o_string $include_code$include_pt + incr range_o_count [llength $include_graphemes] + if {$include_code ne ""} { + lappend range_o_ansisplits "" $include_code $include_pt + lappend range_o_ptlist "" $include_pt + lappend range_o_sgrstacks [list] + lappend range_o_gx0states [expr {false}] + } else { + lappend range_o_ansisplits $include_pt + lappend range_o_ptlist $include_pt + } + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + incr START [expr {[llength $pt_graphemes] - ($START - $low)}] ;#we want to start from the next character after this pt + } + } else { + #does not start in this pt + #if {$START < $low} {} + if {$index_start < $low} { + #already passed the start + if {$END >= $low && $END <= $high} { + #end in this pt but start is not - so we want to take the start of this pt up to the end index and then break + #puts "--->end in this pt but start is not - so we want to take the start of this pt up to the end index and then break" + set include_graphemes [lrange $pt_graphemes 0 $END-$low] + set include_pt [::string cat {*}$include_graphemes] + + ::append range_o_string $include_code$include_pt + lappend range_o_ansisplits $include_code $include_pt + incr range_o_count [llength $include_graphemes] + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $include_graphemes {list g $g}] + lappend range_o_graphemes {*}$include_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $include_graphemes] $idx] + lappend range_o_ptlist $include_pt + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + #puts stdout "adding gx0state: [lindex $o_gx0states $idx] for pt: '$pt' idx $idx with include_graphemes: $include_graphemes" + break + } else { + #neither start nor end in this pt - so we want to take the whole pt and keep going + set include_pt [::string cat {*}$pt_graphemes] + ::append range_o_string $include_code[string cat {*}$pt_graphemes] + lappend range_o_ansisplits $include_code $include_pt + incr range_o_count [llength $pt_graphemes] + lappend range_o_elements [list sgr $include_code] + lappend range_o_elements {*}[lmap g $pt_graphemes {list g $g}] + lappend range_o_graphemes {*}$pt_graphemes + lappend range_o_ptindex {*}[lrepeat [llength $pt_graphemes] $idx] + lappend range_o_ptlist $include_pt + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + #? + incr START [expr {[llength $pt_graphemes] - ($START - $low)}] ;#we want to start from the next character after this pt + } + } + } + incr grapheme_base [llength $pt_graphemes];#set up for next loop + } else { + #leave grapheme_base as is. + #if {$START < $high && $END > $high} {} + if {$index_start < $high && $index_end > $high} { + #we are in a gap between pts that covers part of our range - we want to include any codes in this gap that are in effect + ::append range_o_string $include_code + lappend range_o_ansisplits $include_code "" + lappend range_o_elements [list sgr $include_code] + #no graphemes or ptindex to add in this case. + lappend range_o_ptlist "" + #lappend range_o_sgrstacks $codestack + lappend range_o_sgrstacks [list $include_code] + lappend range_o_gx0states $gx0state + } + } + } + #avoid line continuation to support debugging with punk::lib::cmdtrace (proper linenumber matching) + set obj [list {*}{ + } ::opunk::Str {*}{ + } $range_o_string {*}{ + } $range_o_count {*}{ + } $range_o_ansisplits {*}{ + } $range_o_elements {*}{ + } $range_o_graphemes {*}{ + } $range_o_ptindex {*}{ + } $range_o_ptlist {*}{ + } $range_o_sgrstacks {*}{ + } $range_o_gx0states + ] + #puts stdout "[a] $obj" + #puts stdout "[a] $range_o_string" + #return $range_o_string + return $obj + } + + } constructor {initial_string} { - return [list "opunk::Str" $initial_string -1 [list] [list] [list] [list] [list] [list] [list]] + #use fully qualified classname - so that upvar in methods will not fail due to namespace not existing when (erroneously) value passed to a method instead of varname. + #We can then test in the methods whether the upvar worked or not (using info exists) to give a more helpful error message about passing a value instead of a variable name. + set init [list "::opunk::Str" $initial_string -1 [list] [list] [list] [list] [list] [list] [list]] + my.makesplit init + return $init } } } @@ -750,10 +1234,10 @@ tcl::namespace::eval opunk::str { proc default_topics {} {return [list Description *]} # ------------------------------------------------------------- - # get_topic_ functions add more to auto-include in about topics + # get_topic_ functions add more to auto-include in about topics # ------------------------------------------------------------- proc get_topic_Description {} { - punk::args::lib::tstr [string trim { + punk::args::lib::tstr [string trim { package opunk::str voo classes for ANSI-aware strings. } \n] @@ -786,7 +1270,7 @@ tcl::namespace::eval opunk::str { # we re-use the argument definition from punk::args::standard_about and override some items set overrides [dict create] dict set overrides @id -id "::opunk::str::about" - dict set overrides @cmd -name "opunk::str::about" + dict set overrides @cmd -name "opunk::str::about" dict set overrides @cmd -help [string trim [punk::args::lib::tstr { About opunk::str }] \n] diff --git a/src/vfs/_vfscommon.vfs/modules/overtype-1.7.4.tm b/src/vfs/_vfscommon.vfs/modules/overtype-1.7.4.tm index dec8e80f..1ca40672 100644 --- a/src/vfs/_vfscommon.vfs/modules/overtype-1.7.4.tm +++ b/src/vfs/_vfscommon.vfs/modules/overtype-1.7.4.tm @@ -253,7 +253,6 @@ tcl::namespace::eval overtype { coloured as this doesn't affect the display width. Default is \uFFFD - the unicode replacement char.} - -experimental -default 0 -cp437 -default 0 -type boolean -looplimit -default \uFFEF\ -type integer -help\ "internal failsafe - experimental" @@ -263,8 +262,8 @@ tcl::namespace::eval overtype { -wrap -default 0 -type boolean -info -default 0 -type boolean -help\ "When set to 1, return a dictionary (experimental)" - -format -default ansi -type string -choices {ansi binarytext-bios binarytext-ice xbin} - -binarytext -default "" -type string -choices {"" bios ice} + -format -default ansi -type string -choices {ansi binarytext-bios binarytext-ice xbin} -help\ + "Format of new data being applied as an overlay" -console -default {stdin stdout stderr} -type list @values -min 1 -max 2 @@ -329,7 +328,6 @@ tcl::namespace::eval overtype { -transparent 0 -exposed1 \uFFFD -exposed2 \uFFFD - -experimental 0 -cp437 0 -looplimit \uFFEF -crm_mode 0 @@ -337,7 +335,6 @@ tcl::namespace::eval overtype { -insert_mode 0 -wrap 0 -info 0 - -binarytext "" -format ansi -console {stdin stdout stderr} }] @@ -355,11 +352,11 @@ tcl::namespace::eval overtype { foreach {k v} $argsflags { switch -- $k { -looplimit - -width - -height - -startcolumn - -startrow - -bias - -ellipsis - -ellipsistext - -ellipsiswhitespace - - -transparent - -exposed1 - -exposed2 - -experimental + - -transparent - -exposed1 - -exposed2 - -expand_right - -appendlines - -reverse_mode - -crm_mode - -insert_mode - -cp437 - - -info - -binarytext - -format - -console { + - -info - -format - -console { tcl::dict::set opts $k $v } -wrap - -autowrap_mode { @@ -381,6 +378,7 @@ tcl::namespace::eval overtype { set opt_height [tcl::dict::get $opts -height] set opt_startcolumn [tcl::dict::get $opts -startcolumn] set opt_startrow [tcl::dict::get $opts -startrow] + #review -appendlines - this needs thought regarding interaction with terminal height concept and scrolling set opt_appendlines [tcl::dict::get $opts -appendlines] set opt_transparent [tcl::dict::get $opts -transparent] set opt_ellipsistext [tcl::dict::get $opts -ellipsistext] @@ -399,7 +397,6 @@ tcl::namespace::eval overtype { # -- --- --- --- --- --- set opt_cp437 [tcl::dict::get $opts -cp437] set opt_info [tcl::dict::get $opts -info] - set opt_binarytext [tcl::dict::get $opts -binarytext] set opt_format [tcl::dict::get $opts -format] set opt_console [tcl::dict::get $opts -console] @@ -419,24 +416,6 @@ tcl::namespace::eval overtype { #} #-------------------------------------------------------------------------- - # ---------------------------- - # -experimental dev flag to set flags etc - # ---------------------------- - set data_mode 0 - set edit_mode 0 - set opt_experimental [tcl::dict::get $opts -experimental] - foreach o $opt_experimental { - switch -- $o { - data_mode { - set data_mode 1 - } - edit_mode { - set edit_mode 1 - } - } - } - # ---------------------------- - #--------------------------------------------------------- #underblock is expected to be pre-rendered - ie any ANSI codes have already been processed and rendered into the text. @@ -484,12 +463,17 @@ tcl::namespace::eval overtype { } insert_mode $opt_insert_mode {*}{ } autowrap_mode $opt_autowrap_mode {*}{ } cp437 $opt_cp437 {*}{ + } row 1 {*}{ + } col 1 {*}{ + } topmargin 1 {*}{ + } bottommargin $renderheight {*}{ } ] #modes #e.g insert_mode can be toggled by insert key or ansi IRM sequence CSI 4 h|l #opt_startcolumn ?? - DECSLRM ? set vtstate $initial_state + dict set vtstate col $opt_startcolumn # -- --- --- --- #REVIEW - do we need ansi resets in the underblock? @@ -497,7 +481,6 @@ tcl::namespace::eval overtype { set blankline [string repeat \u0000 $renderwidth] set underlines [lrepeat $renderheight $blankline] } else { - #---- #this splits into lines - only to rejoin - which is inefficient. #It also has code to handle joining multiple blocks - but we only have one in this case. @@ -511,16 +494,8 @@ tcl::namespace::eval overtype { } else { set underlines [split $underblock \n] } - } - #if {$underblock eq ""} { - # set blank "\x1b\[0m\x1b\[0m" - # #set underlines [list "\x1b\[0m\x1b\[0m"] - # set underlines [lrepeat $renderheight $blank] - #} else { - # #lines_as_list -ansiresets 1 will do nothing if -ansiresets 1 isn't specified - REVIEW - # set underlines [lines_as_list -ansiresets 1 $underblock] - #} + #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext # -- --- --- --- #todo - reconsider the 'line' as the natural chunking mechanism for the overlay. @@ -542,270 +517,82 @@ tcl::namespace::eval overtype { #overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height #lassign [blocksize $overblock] _w overblock_width _h overblock_height - #temporary scheme selector for experimenting with different approaches to chunking the input overlay for processing. - set scheme 4 - switch -- $scheme { - 0 { - #one big chunk - set inputchunks [list mixed $overblock] - } - 1 { - #todo - set inputchunks [punk::ansi::ta::split_codes $overblock] - } - 2 { - #todo - #split into lines if possible first - then into plaintext/ansi-sequence chunks ? - set inputchunks [list ""] ;#put an empty plaintext split in for starters - set i 1 - set lines [split $overblock \n] - foreach ln $lines { - if {$i < [llength $lines]} { - append ln \n - } - set sequence_split [punk::ansi::ta::split_codes_single $ln] ;#use split_codes Not split_codes_single? - set lastpt [lindex $inputchunks end] - lset inputchunks end [tcl::string::cat $lastpt [lindex $sequence_split 0]] - lappend inputchunks {*}[lrange $sequence_split 1 end] - incr i - } - } - 3 { - #todo - #it turns out line based chunks are faster than the above.. probably because some of those end up doing the regex splitting twice - set lflines [list] - set inputchunks [split $overblock \n] - foreach ln $inputchunks { - append ln \n - lappend lflines $ln + set inputchunks [list] + switch -- $opt_format { + ansi { + #ansi is commonly but not always line-based. + #some ansi is a string of data with ansi movements and no linefeeds. + set overblock [tcl::string::map {\r\n \n} $overblock] + foreach ln [split $overblock \n] { + lappend inputchunks [list mixed $ln\n] } - if {[llength $lflines]} { - lset lflines end [tcl::string::range [lindex $lflines end] 0 end-1] + if {[llength $inputchunks]} { + #strip trailing newline from last line. + lset inputchunks end 1 [tcl::string::range [lindex $inputchunks end 1] 0 end-1] } - #set inputchunks $lflines[unset lflines] - set inputchunks [lindex [list $lflines [unset lflines]] 0] - } - 4 { - #active development scheme - 2026. - set inputchunks [list] - switch -- $opt_format { - ansi { - set overblock [tcl::string::map {\r\n \n} $overblock] - foreach ln [split $overblock \n] { - lappend inputchunks [list mixed $ln\n] - } - if {[llength $inputchunks]} { - lset inputchunks end 1 [tcl::string::range [lindex $inputchunks end 1] 0 end-1] - } + binarytext-bios { + #16 fg, 8 fg + possible blink + set input "" + set ansisplit [list ""] + set charpair 0 + foreach {ch at} [split $overblock ""] { + #review - does binarytext only apply to cp437??? we need to know the original encoding + if {[catch {punk::ansi::colour::byteAnsi $at} code]} { + puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" + #append input [punk::ansi::a+ brightred White] \uFFef + set code [punk::ansi::a+ brightred White] } - binarytext-bios { - #16 fg, 8 fg + possible blink - set input "" - set ansisplit [list ""] - set charpair 0 - foreach {ch at} [split $overblock ""] { - #review - does binarytext only apply to cp437??? we need to know the original encoding - set at [encoding convertto cp437 $at] - if {[catch {punk::ansi::colour::byteAnsi $at} code]} { - puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" - #append input [punk::ansi::a+ brightred White] \uFFef - set code [punk::ansi::a+ brightred White] - set ch \uFFeF - } - append input $code $ch - lappend ansisplit $code $ch - incr charpair - } - #lappend inputchunks [list mixed $input] - lappend inputchunks [list ansisplit $ansisplit] - } - binarytext-ice { - #16 fg, 16 bg (no blink) - set input "" - foreach {ch at} [split $overblock ""] { - set at [encoding convertto cp437 $at] - append input [punk::ansi::colour::byteAnsiIce $at]$ch - } - lappend inputchunks [list mixed $input] + if {[catch {encoding convertfrom cp437 $ch} ch]} { + puts stderr "renderspace err at charpair: $charpair [punk::ansi::ansistring VIEW ${ch}${at}]" + set ch \uFFeF } - xbin { - set xbin_header [string range $overblock 0 10] ;#11 bytes - set xbin_header_info [punk::ansi::xbin::parse_header $xbin_header] - set overblock [string range $overblock 11 end] - - set flags [dict get $xbin_header_info flags] - set xbin_width [dict get $xbin_header_info width] - set xbin_height [dict get $xbin_header_info height] - set expected_cells [expr {$xbin_width * $xbin_height}] - set xbin_nonblink [expr {"nonblink" in $flags}] ;# ice. - set xbin_palette [punk::ansi::xbin::default_palette] - - puts "xbin ${xbin_width}x${xbin_height}" - puts "xbin flags $flags" - - #optional 16-entry palette, 3 bytes per entry, RGB values 0..63 - if {"palette" in $flags} { - #puts stderr "renderspace warning - palette unimplemented" - set xbin_palette [punk::ansi::xbin::parse_palette [string range $overblock 0 47]] - set overblock [string range $overblock 48 end] - } - - #todo - font. - #hack - skip over font 256 x fontsize or 512 x fontsize - if {"512chars" in $flags} { - set sz 512 - } else { - set sz 256 - } - #temp - set skip [expr {$sz * [dict get $xbin_header_info fontsize]}] - if {"font" in $flags} { - #todo - consider sixel or similar for font data - but for now we just skip over it. - puts stderr "renderspace warning - xbin font unimplemented" - set overblock [string range $overblock $skip end] - } - puts stdout "xbin image data size [string length $overblock]" - - set ansisplit [list ""] - if {"compress" in $flags} { - #puts stderr "renderspace warning - compress experimental" - #process 'repeatcounter' bytes - #first 2 bits - compression type - # 00 - no compression - # 01 - character compression - # 10 - attribute compression - # 11 - character/attribute compression - #remaining 6 bits - counter - set input "" - set bytes [split $overblock ""] - set byte_count [llength $bytes] - set decoded_cells 0 - for {set b 0} {$b < $byte_count} {} { - set rc [lindex $bytes $b] - set dec [scan $rc %c] - set ctype [expr {$dec >> 6}] - #0x3F - 00111111 - set count [expr {$dec & 0x3F}] - incr count ;#count stored as 1 less than actual number of repeats - if {$count < 1 || $count > 64} { - puts stderr "xbin - something wrong - max must be between 1 and 64 inclusive. received $count" - } - incr b - if {$decoded_cells + $count > $expected_cells} { - error "overtype::renderspace xbin decode overflow: record would emit $count cells at decoded offset $decoded_cells, expected total $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" - } - switch -exact -- $ctype { - 0 { - set needed [expr {$count * 2}] - } - 1 - - 2 { - set needed [expr {$count + 1}] - } - 3 { - set needed 2 - } - default { - error "overtype::renderspace xbin invalid compression type $ctype in repeatcounter byte '$rc' at offset $b" - } - } - if {$b + $needed > $byte_count} { - error "overtype::renderspace xbin truncated record: type $ctype requires $needed bytes at payload offset $b, but only [expr {$byte_count - $b}] bytes remain." - } - switch -exact -- $ctype { - 0 { - #no compression - for {set c 0} {$c < $count*2} {incr c 2} { - set ch [lindex $bytes $b+$c] - set ch [encoding convertfrom cp437 $ch] - set at [lindex $bytes [expr {$b+$c+1}]] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ red] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - lappend ansisplit $clr $ch - } - incr b [expr {$count*2}] - } - 1 { - #char compression - set ch [lindex $bytes $b] - set ch [encoding convertfrom cp437 $ch] - incr b - for {set c 0} {$c < $count} {incr c} { - set at [lindex $bytes $b+$c] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ cyan] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - lappend ansisplit $clr $ch - } - incr b [expr {$count}] - } - 2 { - #attribute compression - set at [lindex $bytes $b] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ green] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - incr b - for {set c 0} {$c < $count} {incr c} { - set ch [lindex $bytes $b+$c] - set ch [encoding convertfrom cp437 $ch] - lappend ansisplit $clr $ch - } - incr b $count - } - 3 { - #attribute and char compression - set ch [lindex $bytes $b] - set ch [encoding convertfrom cp437 $ch] - set at [lindex $bytes $b+1] - #binary scan $at cu code - #set clr [a+ term-$code] - #set clr [a+ white] ;#debug - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - for {set c 0} {$c < $count} {incr c} { - lappend ansisplit $clr $ch - } - incr b 2 - } - } - incr decoded_cells $count - } - if {$decoded_cells != $expected_cells} { - puts stderr "overtype::renderspace xbin decoded $decoded_cells cells, expected $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" - } - lappend inputchunks [list ansisplit $ansisplit] - } else { - foreach {ch at} [split $overblock ""] { - #binary scan $at cu code - #set clr [a+ term-$code] - if {$at eq ""} { - #eg src/testansi/formatsamples/image/xbin/test.xb - #has trailing nul byte. for now just warn. - puts stderr "renderspace warning - xbin attribute byte is empty at char '[ansistring VIEW $ch]'" - #break ? - #experiment - treat as a reset. - lappend ansisplit [a+] $ch - } else { - set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] - set ch [encoding convertfrom cp437 $ch] - lappend ansisplit $clr $ch - } - } - lappend inputchunks [list ansisplit $ansisplit] - } - puts stdout "xbin decoded" - flush stdout + append input $code $ch + lappend ansisplit $code $ch + incr charpair + } + #lappend inputchunks [list mixed $input] + lappend inputchunks [list ansisplit $ansisplit] + } + binarytext-ice { + #16 fg, 16 bg (no blink) + set input "" + foreach {ch at} [split $overblock ""] { + if {$at ne ""} { + append input [punk::ansi::colour::byteAnsiIce $at] } + set ch [encoding convertfrom cp437 $ch] + append input $ch } + lappend inputchunks [list mixed $input] } - } + xbin { + set parse_dict [punk::ansi::xbin::parse $overblock] + set ansisplit [dict get $parse_dict ansisplit] + set xbin_header_info [dict get $parse_dict header] + set flags [dict get $xbin_header_info flags] + set xbin_width [dict get $xbin_header_info width] + set xbin_height [dict get $xbin_header_info height] + puts stdout "xbin dimensions ${xbin_width}x${xbin_height} decoded [dict get $parse_dict decoded_cells] of [dict get $parse_dict expected_cells] expected cells" + puts stdout "xbin flags $flags" + set warnings [dict get $parse_dict warnings] + foreach w $warnings { + puts stderr "xbin warning: $w" + } + puts stdout "xbin decoded" + flush stdout + lappend inputchunks [list ansisplit $ansisplit] + } + } + #we have a list of 2 element input chunks {overtext_type overtext} in $inputchunks + #- each chunk is either a string of text with embedded ANSI codes (type 'mixed') + #- or a list of alternating ANSI code and text segments (type 'ansisplit') + #For ansi files each chunk may commonly correspond to a line of text - but this is not necessarily the case, as ANSI cursor movements and other codes may be present which affect the layout in ways that can't be determined until processing. + #for binary files - there may be no newlines at all - just a stream of bytes with ANSI codes interspersed to control the layout and colours. + #The chunks are processed in order, with the output of each chunk being rendered onto the current 'underlay' of the output, + #and then becoming the new 'underlay' for the next chunk to render onto. set replay_codes_underlay [tcl::dict::create 1 ""] @@ -819,13 +606,6 @@ tcl::namespace::eval overtype { set outputlines $underlines set overidx 0 - #underlines are not necessarily processed in order - depending on cursor-moves applied from overtext - set row 1 - #if {$data_mode} { - # set col [_get_row_append_column $row] - #} else { - set col $opt_startcolumn - #} set instruction_stats [tcl::dict::create] @@ -843,7 +623,10 @@ tcl::namespace::eval overtype { continue } #puts "----->[ansistring VIEW -lf 1 -vt 1 -nul 1 $overtext]<----" - set undertext [lindex $outputlines [expr {$row -1}]] + set undertext [lindex $outputlines [dict get $vtstate row]-1] + if {[tcl::dict::exists $replay_codes_underlay [dict get $vtstate row]]} { + set undertext [tcl::dict::get $replay_codes_underlay [dict get $vtstate row]]$undertext + } #renderline pads each underly line to width with spaces and should track where end of data is @@ -878,19 +661,17 @@ tcl::namespace::eval overtype { #} ###################### - set renderedrow $row + #remember the row we are just about to render. + set renderedrow [dict get $vtstate row] if {$renderedrow > $renderedrow_max} { set renderedrow_max $renderedrow } - if {[tcl::dict::exists $replay_codes_underlay $row]} { - set undertext [tcl::dict::get $replay_codes_underlay $row]$undertext - } #review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary - #but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l - set renderopts [list -experimental $opt_experimental {*}{ + set renderopts [list {*}{ } -cp437 $opt_cp437 {*}{ } -info 1 {*}{ } -crm_mode [tcl::dict::get $vtstate crm_mode] {*}{ @@ -903,8 +684,8 @@ tcl::namespace::eval overtype { } -exposed1 $opt_exposed1 {*}{ } -exposed2 $opt_exposed2 {*}{ } -expand_right $opt_expand_right {*}{ - } -cursor_column $col {*}{ - } -cursor_row $row {*}{ + } -cursor_column [tcl::dict::get $vtstate col] {*}{ + } -cursor_row [tcl::dict::get $vtstate row] {*}{ } -overtext_type $overtext_type {*}{ } ] @@ -935,6 +716,8 @@ tcl::namespace::eval overtype { set visualwidth [tcl::dict::get $rinfo visualwidth] ;#column width of what is 'rendered' for the line set insert_lines_above [tcl::dict::get $rinfo insert_lines_above] set insert_lines_below [tcl::dict::get $rinfo insert_lines_below] + + #review - this assumes lines are rendered in order - but this isn't always true. tcl::dict::set replay_codes_underlay [expr {$renderedrow+1}] [tcl::dict::get $rinfo replay_codes_underlay] #lset replay_codes_overlay [expr $overidx+1] [tcl::dict::get $rinfo replay_codes_overlay] @@ -973,7 +756,7 @@ tcl::namespace::eval overtype { #-- todo - detect looping properly - if {$row > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == $row && $instruction eq ""} { + if {[dict get $vtstate row] > 1 && $overtext ne "" && $unapplied eq $overtext && $post_render_row == [dict get $vtstate row] && $instruction eq ""} { puts stderr "overtype::renderspace loop?" puts [ansistring VIEW $rinfo] break @@ -999,25 +782,23 @@ tcl::namespace::eval overtype { tcl::dict::incr instruction_stats $instruction_type switch -- $instruction_type { reset { - #reset the 'renderspace terminal' (not underlying terminal) - set row 1 - set col 1 + #reset the 'renderspace virtual terminal' (not underlying terminal) set vtstate [tcl::dict::merge $vtstate $initial_state] #todo - clear screen } {} { #end of supplied line input #lf included in data - set row $post_render_row - set col $post_render_col + dict set vtstate row $post_render_row + #dict set vtstate col $post_render_col if {![llength $unapplied_list]} { if {$overflow_right ne ""} { - incr row + dict incr vtstate row } } else { puts stderr "renderspace end of input line - has unapplied: [ansistring VIEW $unapplied] (review)" } - set col $opt_startcolumn + dict set vtstate col $opt_startcolumn } up { @@ -1031,87 +812,42 @@ tcl::namespace::eval overtype { #puts stderr "up $post_render_row" #puts stderr "$rinfo" - #puts stdout "1 row:$row col $col" - set row $post_render_row - #data_mode (naming?) determines if we move to end of existing data or not. - #data_mode 0 means ignore existing line length and go to exact column - #set by -experimental flag - if {$data_mode == 0} { - set col $post_render_col - } else { - #This doesn't really work if columns are pre-filled with spaces..we can't distinguish them from data - #we need renderline to return the number of the maximum column filled (or min if we ever do r-to-l) - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $renderwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $renderwidth - } - } - - #puts stdout "2 row:$row col $col" - #puts stdout "-----------------------" - #puts stdout $rinfo - #flush stdout + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } down { - if {$data_mode == 0} { + #cursor down. Will not force scroll if at bottom of screen. + if {$post_render_row > [llength $outputlines]} { #renderline doesn't know how far down we can go.. - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" - } - } - set row $post_render_row - set col $post_render_col - } else { - puts stderr "renderspace down - data_mode 1 - review" - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" + set post_render_row [llength $outputlines] + } + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col + } + down_scrolling { + #todo - scrolling region. take account of decstbm. + + #renderline doesn't know how far down we can go.. + if {$post_render_row > [llength $outputlines]} { + if {$opt_appendlines} { + set diff [expr {$post_render_row - [llength $outputlines]}] + set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + if {$diff > 0} { + lappend outputlines {*}[lrepeat $diff $bce_line] } + lappend outputlines $bce_line } - # ---- - # review - set col $post_render_col - #just because it's out of range of the renderwidth - doesn't mean a move down should jump to within the range - 2025 - #---- - - #set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - #set lastdatacol [punk::ansi::printing_length $existingdata] - - #set col [expr {$lastdatacol+1}] - - #if {$lastdatacol < $renderwidth} { - # set col [expr {$lastdatacol+1}] - #} else { - # set col $renderwidth - #} - } + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } restore_cursor { #testfile belinda.ans uses this #puts stdout "[a+ blue bold]CURSOR_RESTORE[a]" if {[tcl::dict::exists $cursor_saved_position row]} { - set row [tcl::dict::get $cursor_saved_position row] - set col [tcl::dict::get $cursor_saved_position column] + dict set vtstate row [tcl::dict::get $cursor_saved_position row] + dict set vtstate col [tcl::dict::get $cursor_saved_position column] #puts stdout "restoring: row $row col $col [ansistring VIEW $cursor_saved_attributes] [a] unapplied [ansistring VIEWCODES $unapplied]" #set nextprefix $cursor_saved_attributes #lset replay_codes_overlay [expr $overidx+1] $cursor_saved_attributes @@ -1159,6 +895,47 @@ tcl::namespace::eval overtype { set overflow_handled 1 } + decstbm { + #scrolling region - CSI r + #renderline will have rendered the line based on the current vtstate row/col + #- but the scrolling region change may have caused a move to be rendered to the output which changes the row/col for the next line + #- so we need to update our vtstate cursor position. + lassign $instruction _ margin_top margin_bottom + + set margins_valid 1 ;#default assumption + #todo - validate margins. A valid scrolling region requires at least two lines. + if {$margin_top eq ""} {set margin_top 1} + if {$margin_bottom eq ""} {set margin_bottom $screen_rows} + if {![string is integer -strict $margin_top] || $margin_top < 1} { + puts stderr "DECSTBM invalid margin_top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {![string is integer -strict $margin_bottom] || $margin_bottom < 1} { + puts stderr "DECSTBM invalid margin_bottom '$margin_bottom' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margin_bottom - $margin_top < 1} { + puts stderr "DECSTBM invalid margins - bottom '$margin_bottom' must be greater than top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margins_valid} { + #todo - return these for the caller to process.. + puts stderr "overtype::renderspace DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + #review - examine DECOM state to determine new cursor position? + dict set vtstate row 1 + dict set vtstate col 1 + + #incr idx_over + #priv::render_to_unapplied $overlay_grapheme_control_list $gci + #set instruction [list decstbm $margin_top $margin_bottom] + dict set vtstate topmargin $margin_top + dict set vtstate bottommargin $margin_bottom + } else { + puts stderr "overtype::renderspace DECSTBM invalid margins - ignoring [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + #don't update the vtstate margins. + } + } move { ######## if {$post_render_row > [llength $outputlines]} { @@ -1170,67 +947,95 @@ tcl::namespace::eval overtype { if {$diff > 0} { lappend outputlines {*}[lrepeat $diff $bce_line] } - set row $post_render_row + dict set vtstate row $post_render_row } else { - set row [llength $outputlines] + dict set vtstate row [llength $outputlines] } } else { - set row $post_render_row + dict set vtstate row $post_render_row } ####### - set col $post_render_col + dict set vtstate col $post_render_col #overflow + unapplied? } + clear_to_end_display { + #ED 0 + #review - needs to operate within top and bottom margins if set (decstbm) - but for now we assume full screen clear + #Current row already partially erased by renderline. + if {$post_render_row > [llength $outputlines]} { + dict set vtstate row [llength $outputlines] + } else { + dict set vtstate row $post_render_row + } + dict set vtstate col $post_render_col + set overflow_right "" + + set start_idx [expr {[dict get $vtstate row]}] + if {$start_idx < 0} {set start_idx 0} + for {set i $start_idx} {$i < [llength $outputlines]} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m + } + } + clear_to_start_display { + #ED 1 + #Current row already partially erased by renderline. + if {$post_render_row > [llength $outputlines]} { + dict set vtstate row [llength $outputlines] + } else { + dict set vtstate row $post_render_row + } + dict set vtstate col $post_render_col + set overflow_right "" + + set stop_idx [expr {[dict get $vtstate row] - 1}] + if {$stop_idx >= [llength $outputlines]} { + set stop_idx [expr {[llength $outputlines] - 1}] + } + for {set i 0} {$i < $stop_idx} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m + } + } clear_and_move { - #e.g 2J + #ED 2J if {$post_render_row > [llength $outputlines]} { - set row [llength $outputlines] + dict set vtstate row [llength $outputlines] } else { - set row $post_render_row + dict set vtstate row $post_render_row } - set col $post_render_col + dict set vtstate col $post_render_col set overflow_right "" ;#if we're clearing - any overflow due to insert_mode is irrelevant - set clearedlines [list] - foreach ln $outputlines { - lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m - - #set lineparts [punk::ansi::ta::split_codes $ln] - #set numcells 0 - #foreach {pt _code} $lineparts { - # if {$pt ne ""} { - # foreach grapheme [punk::char::grapheme_split $pt] { - # switch -- $grapheme { - # " " - - - _ - ! - @ - # - $ - % - ^ - & - * - = - + - : - . - , - / - | - ? - - # a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - - # z - A - B - C - D - E - F - G - H - I - J - K - L - M - N - O - P - Q - R - S - T - U - V - W - X - Y - Z - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { - # incr numcells 1 - # } - # default { - # if {$grapheme eq "\u0000"} { - # #review - # incr numcells 1 - # } elseif {$grapheme eq "\t"} { - # #set tstops [lsort -integer -unique [punk::console::get_tabstops]] - # puts stderr "tab at numcells: $numcells - REVIEW renderspace" - # set nexttabstop [expr {((int($numcells / 8) + 1) * 8)}] - # incr numcells [expr {$nexttabstop - $numcells}] - # } else { - # incr numcells [grapheme_width_cached $grapheme] - # } - # } - # } - - # } - # } - #} - ##replays/resets each line - #lappend clearedlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $numcells]\x1b\[0m + for {set i 0} {$i < [llength $outputlines]} {incr i} { + lset outputlines $i \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth]\x1b\[0m } - set outputlines $clearedlines #todo - determine background/default to be in effect - DECECM ? puts stderr "replay_codes_overlay: [ansistring VIEW $replay_codes_overlay]" - #lset outputlines 0 $replay_codes_overlay[lindex $outputlines 0] - + } + delete_lines { + #DL n + set delete_count [lindex $instruction 1] + set r $renderedrow + puts stderr "delete_lines $delete_count at row $r" + if {$delete_count > 0} { + #set outputlines [lreplace $outputlines [dict get $vtstate row] [expr {[dict get $vtstate row] + $delete_count - 1}]] + set delidx_first [expr {$r - 1}] ;#convert to 0-based index + set delidx_last [expr {$delidx_first + ($delete_count - 1)}] ;#inclusive index of last line to delete + #if delete_count is 1 - we are only deleting the current line. + ledit outputlines $delidx_first $delidx_last + } + dict set vtstate row $renderedrow + if {[llength $outputlines] < [dict get $vtstate row]} { + dict set vtstate row [llength $outputlines] + } + #we need to ensure 'unapplied' data is still applied to the current line after deletion. + #Any overflow on the current line should be abandoned. + if {[llength $unapplied_ansisplit]} { + #set inputchunks [linsert $inputchunks 0 $nextprefix] + #JMN - assume backwards compat ledit available from punk::lib (for tcl <9) + ledit inputchunks -1 -1 [list ansisplit $unapplied_ansisplit] + } + incr overidx + incr loop + continue } lf_start { #raw newlines @@ -1239,9 +1044,9 @@ tcl::namespace::eval overtype { #test - treating as newline below... #append rendered $overflow_right #set overflow_right "" - set row $renderedrow - incr row - if {$row > [llength $outputlines]} { + + dict set vtstate row [expr {$renderedrow + 1}] + if {[dict get $vtstate row] > [llength $outputlines]} { #lappend outputlines "" # BCE lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] @@ -1250,137 +1055,88 @@ tcl::namespace::eval overtype { # ---------------------- } lf_mid { - set edit_mode 0 - if {$edit_mode} { - #set inputchunks [linsert $inputchunks 0 $overflow_right$unapplied] - #JMN - #ledit inputchunks -1 -1 $overflow_right$unapplied - - set pt_ansi_pt [punk::ansi::ta::split_codes_single $overflow_right] - #join the trailing and leading pt parts of the 2 lists - ledit pt_ansi_pt end end "[lindex $pt_ansi_pt end][lindex $unapplied_ansisplit 0]" - lappend pt_ansi_pt [lrange $unapplied_ansisplit 1 end] - - ledit inputchunks -1 -1 [list ansisplit $pt_ansi_pt] ;#combined overflow_right and unapplied - in ansisplit form - + if {$overflow_right ne ""} { + if {$opt_expand_right} { + append rendered $overflow_right set overflow_right "" - set unapplied "" - set unapplied_list [list] - set unapplied_ansisplit [list] - - set row $post_render_row - #set col $post_render_col - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } } else { - if 1 { - if {$overflow_right ne ""} { - if {$opt_expand_right} { - append rendered $overflow_right - set overflow_right "" - } else { - #review - we should really make renderline do the work...? - set overflow_width [punk::ansi::printing_length $overflow_right] - if {$visualwidth + $overflow_width <= $renderwidth} { - append rendered $overflow_right - set overflow_right "" - } else { - if {[tcl::dict::get $vtstate autowrap_mode]} { - #set outputlines [linsert $outputlines $renderedrow $overflow_right] - #ledit outputlines $renderedrow $renderedrow-1 $overflow_right - puts stderr "REVIEW wrap overflow_right to next line: $overflow_right" - #this looks wrong - ledit outputlines $renderedrow -1 $overflow_right - set overflow_right "" - set row [expr {$renderedrow + 2}] - } else { - set overflow_right "" ;#abandon - } + #review - we should really make renderline do the work...? + set overflow_width [punk::ansi::printing_length $overflow_right] + if {$visualwidth + $overflow_width <= $renderwidth} { + append rendered $overflow_right + set overflow_right "" + } else { + if {[tcl::dict::get $vtstate autowrap_mode]} { + #set outputlines [linsert $outputlines $renderedrow $overflow_right] + #ledit outputlines $renderedrow $renderedrow-1 $overflow_right + puts stderr "REVIEW wrap overflow_right to next line: $overflow_right" + #this looks wrong + ledit outputlines $renderedrow -1 $overflow_right + set overflow_right "" + #review - why are we setting this here when we override it below? + dict set vtstate row [expr {$renderedrow + 2}] + } else { + set overflow_right "" ;#abandon + } - if {0 && $visualwidth < $renderwidth} { - puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" - error "incomplete - abandon?" - set overflowparts [punk::ansi::ta::split_codes $overflow_right] - set remaining_overflow $overflowparts - set filled 0 - foreach {pt code} $overflowparts { - lpop remaining_overflow 0 - if {$pt ne ""} { - set graphemes [punk::char::grapheme_split $pt] - set add "" - set addlen $visualwidth - foreach g $graphemes { - set w [overtype::grapheme_width_cached $g] - if {$addlen + $w <= $renderwidth} { - append add $g - incr addlen $w - } else { - set filled 1 - break - } - } - append rendered $add - } - if {!$filled} { - lpop remaining_overflow 0 ;#pop code - } + if {0 && $visualwidth < $renderwidth} { + puts stderr "visualwidth: $visualwidth < renderwidth:$renderwidth" + error "incomplete - abandon?" + set overflowparts [punk::ansi::ta::split_codes $overflow_right] + set remaining_overflow $overflowparts + set filled 0 + foreach {pt code} $overflowparts { + lpop remaining_overflow 0 + if {$pt ne ""} { + set graphemes [punk::char::grapheme_split $pt] + set add "" + set addlen $visualwidth + foreach g $graphemes { + set w [overtype::grapheme_width_cached $g] + if {$addlen + $w <= $renderwidth} { + append add $g + incr addlen $w + } else { + set filled 1 + break } - set overflow_right [join $remaining_overflow ""] } + append rendered $add + } + if {!$filled} { + lpop remaining_overflow 0 ;#pop code } } - } - set row $post_render_row - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - # BCE - #puts "row: $row > outputlines: [llength $outputlines] overlay replay:[ansistring VIEW $replay_codes_overlay]" - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } - } else { - #old version - known to work with various ansi graphics - e.g fruit.ans - # - but fails to limit lines to renderwidth when expand_right == 0 - append rendered $overflow_right - set overflow_right "" - set row $post_render_row - set col $opt_startcolumn - if {$row > [llength $outputlines]} { - lappend outputlines {*}[lrepeat 1 ""] + set overflow_right [join $remaining_overflow ""] } } } + } + dict set vtstate row $post_render_row + dict set vtstate col $opt_startcolumn + if {[dict get $vtstate row] > [llength $outputlines]} { + #lappend outputlines {*}[lrepeat 1 ""] + # BCE + #puts "row: $row > outputlines: [llength $outputlines] overlay replay:[ansistring VIEW $replay_codes_overlay]" + lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + } } lf_overflow { - #linefeed after renderwidth e.g at column 81 for an 80 col width - #we may also have other control sequences that came after col 80 e.g cursor save + #linefeed after renderwidth e.g at column 81 for an 80 col width + #we may also have other control sequences that came after col 80 e.g cursor save - if 0 { - set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -nul 1 -lf 1 -vt 1 $rendered]] - set lhs [textblock::frame -title "rendered $visualwidth cols" -subtitle "row-$renderedrow" $lhs] - set rhs "" - - #assertion - there should be no overflow.. - puts $lhs - } - if {![tcl::dict::get $vtstate insert_mode]} { - assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode - } - - set row $post_render_row - #set row $renderedrow - #incr row - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - #lappend outputlines {*}[lrepeat 1 ""] - # BCE - lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - } - set col $opt_startcolumn + if {![tcl::dict::get $vtstate insert_mode]} { + assert {$overflow_right eq ""} lf_overflow should not get data in overflow_right when not insert_mode + } + dict set vtstate row $post_render_row + #only add newline if we're at the bottom + if {[dict get $vtstate row] > [llength $outputlines]} { + #lappend outputlines {*}[lrepeat 1 ""] + # BCE + lappend outputlines \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + } + dict set vtstate col $opt_startcolumn } newlines_above { #we get a newlines_above instruction when received at column 1 @@ -1390,76 +1146,53 @@ tcl::namespace::eval overtype { puts "--->newlines_above" puts "rinfo: $rinfo" #renderline doesn't advance the row for us - the caller has the choice to implement or not - set row $post_render_row - set col $post_render_col + set temp_row $post_render_row if {$insert_lines_above > 0} { - set row $renderedrow + set temp_row $renderedrow #set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] #ledit outputlines $renderedrow-1 $renderedrow-2 {*}[lrepeat $insert_lines_above ""] # BCE (background color erase) set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] ledit outputlines $renderedrow-1 -1 {*}[lrepeat $insert_lines_above $bce_line] #ledit outputlines $renderedrow-1 -1 {*}[lrepeat $insert_lines_above ""] - incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above + incr temp_row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above #? set row $post_render_row #can renderline tell us? } + dict set vtstate row $temp_row + dict set vtstate col $post_render_col } newlines_below { #obsolete? - use for ANSI insert lines sequence - if {$data_mode == 0} { - puts --->nl_below - set row $post_render_row - set col $post_render_col - if {$insert_lines_below == 1} { - #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] - #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] - #set rhs "" - #if {$overflow_right ne ""} { - # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] - # set rhs [textblock::frame -title overflow_right $rhs] - #} - #puts [textblock::join $lhs $rhs] - - #rendered - append rendered $overflow_right - # + puts --->nl_below + set temp_row $post_render_row + set temp_col $post_render_col + if {$insert_lines_below == 1} { + #set lhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $rendered]] + #set lhs [textblock::frame -title rendered -subtitle "row-$renderedrow" $lhs] + #set rhs "" + #if {$overflow_right ne ""} { + # set rhs [overtype::left -width 40 -wrap 1 "" [ansistring VIEWSTYLE -lf 1 -vt 1 $overflow_right]] + # set rhs [textblock::frame -title overflow_right $rhs] + #} + #puts [textblock::join $lhs $rhs] + #rendered + append rendered $overflow_right + # - set overflow_right "" - set row $renderedrow - #only add newline if we're at the bottom - if {$row > [llength $outputlines]} { - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - lappend outputlines {*}[lrepeat $insert_lines_below $bce_line] - #lappend outputlines {*}[lrepeat $insert_lines_below ""] - } - incr row $insert_lines_below - set col $opt_startcolumn - } - } else { - set row $post_render_row - if {$post_render_row > [llength $outputlines]} { - if {$opt_appendlines} { - set diff [expr {$post_render_row - [llength $outputlines]}] - set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] - - if {$diff > 0} { - lappend outputlines {*}[lrepeat $diff $bce_line] - #lappend outputlines {*}[lrepeat $diff ""] - } - lappend outputlines $bce_line - #lappend outputlines "" - } - } else { - set existingdata [lindex $outputlines [expr {$post_render_row -1}]] - set lastdatacol [punk::ansi::printing_length $existingdata] - if {$lastdatacol < $renderwidth} { - set col [expr {$lastdatacol+1}] - } else { - set col $renderwidth - } + set overflow_right "" + set temp_row $renderedrow + #only add newline if we're at the bottom + if {$temp_row > [llength $outputlines]} { + set bce_line \x1b\[0m$replay_codes_overlay[string repeat \u0000 $renderwidth] + lappend outputlines {*}[lrepeat $insert_lines_below $bce_line] + #lappend outputlines {*}[lrepeat $insert_lines_below ""] } + incr temp_row $insert_lines_below + set temp_col $opt_startcolumn } + dict set vtstate row $temp_row + dict set vtstate col $temp_col } wrapmoveforward { #doesn't seem to be used by fruit.ans testfile @@ -1493,8 +1226,8 @@ tcl::namespace::eval overtype { set c $post_render_col } #puts stderr "wrapmoveforward - moving from row $row col $col to row $r col $c" - set row $r - set col $c + dict set vtstate row $r + dict set vtstate col $c } wrapmovebackward { set c $renderwidth @@ -1522,8 +1255,8 @@ tcl::namespace::eval overtype { } else { puts stderr "Wrapmovebackward - but postrendercol >= 1???" } - set row $r - set col $c + dict set vtstate row $r + dict set vtstate col $c } overflow { #normal single-width grapheme overflow @@ -1539,13 +1272,13 @@ tcl::namespace::eval overtype { #example trigger: textblock::frame -width 80 [ansicat us-pump_up_the_jam-355.ans 355x100] - set row $post_render_row ;#renderline will not advance row when reporting overflow char + set r $post_render_row ;#renderline will not advance row when reporting overflow char #puts stderr "overflow autowrap - wrap to next line row: $row autowrap: [tcl::dict::get $vtstate autowrap_mode] renderwidth: $renderwidth visualwidth: $visualwidth [ansistring VIEW $unapplied]" if {[tcl::dict::get $vtstate autowrap_mode]} { - incr row - set col $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? + incr r + set c $opt_startcolumn ;#whether wrap or not - next data is at column 1 ?? } else { - set col $post_render_col + set c $post_render_col #The overflow can only be triggered by a grapheme (todo cluster?) - but our unapplied could contain SGR codes prior to the grapheme that triggered overflow - so we need to skip beyond any SGRs #There may be more than one, because although the stack leading up to overflow may have been merged - codes between the last column and the overflowing grapheme will remain separate #We don't expect any movement or other ANSI codes - as if they came before the grapheme, they would have triggered a different instruction to 'overflow' @@ -1598,9 +1331,12 @@ tcl::namespace::eval overtype { set overflow_handled 1 #handled by dropping overflow if any } + dict set vtstate row $r + dict set vtstate col $c } overflow_splitchar { - set row $post_render_row ;#renderline will not advance row when reporting overflow char + set r $post_render_row ;#renderline will not advance row when reporting overflow char + set c $post_render_col #2nd half of grapheme would overflow - treggering grapheme is returned in unapplied. There may also be overflow_right from earlier inserts #todo - consider various options .. re-render a single trailing space or placeholder on same output line, etc @@ -1622,8 +1358,8 @@ tcl::namespace::eval overtype { #review - inefficient to re-split (return unapplied_tagged instead of unapplied_ansisplit?) set unapplied_ansisplit [punk::ansi::ta::split_codes_single $unapplied] } else { - set col $opt_startcolumn - incr row + set c $opt_startcolumn + incr r } } else { set overflow_handled 1 @@ -1646,13 +1382,14 @@ tcl::namespace::eval overtype { set unapplied_ansisplit [punk::ansi::ta::split_codes_single $unapplied] } } - + dict set vtstate row $r + dict set vtstate col $c } vt { #can vt add a line like a linefeed can? - set row $post_render_row - set col $post_render_col + dict set vtstate row $post_render_row + dict set vtstate col $post_render_col } set_window_title { set newtitle [lindex $instruction 1] @@ -1735,19 +1472,6 @@ tcl::namespace::eval overtype { lappend nextprefix_list {*}[lrange $unapplied_ansisplit 1 end] } - if 0 { - if {$nextprefix ne ""} { - set nextoveridx [expr {$overidx+1}] - if {$nextoveridx >= [llength $inputchunks]} { - lappend inputchunks $nextprefix - } else { - #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] - #set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] - ledit inputchunks $nextoveridx -1 $nextprefix - } - } - } - if {[llength $nextprefix_list]} { #set inputchunks [linsert $inputchunks 0 $nextprefix] #JMN - assume backwards compat ledit available from punk::lib (for tcl <9) @@ -1766,7 +1490,6 @@ tcl::namespace::eval overtype { set debugmsg "" append debugmsg "${Y}${sep_header}${RST}" \n append debugmsg "looplimit $looplimit reached\n" - append debugmsg "data_mode:$data_mode\n" append debugmsg "opt_appendlines:$opt_appendlines\n" append debugmsg "prev_row :[tcl::dict::get $renderopts -cursor_row]\n" append debugmsg "prev_col :[tcl::dict::get $renderopts -cursor_column]\n" @@ -2446,7 +2169,6 @@ tcl::namespace::eval overtype { Default is \uFFFD - the unicode replacement char.} -cursor_restore_attributes -default "" -cp437 -default 0 -type boolean - -experimental -default {} -overtext_type -type string -choices {mixed plain ansisplit} -default mixed @values -min 2 -max 2 undertext -type string -help\ @@ -2564,7 +2286,6 @@ tcl::namespace::eval overtype { -exposed2 \uFFFD -cursor_restore_attributes "" -cp437 0 - -experimental {} -overtext_type mixed }] #-overtext_type plain|mixed|ansisplit @@ -2580,7 +2301,7 @@ tcl::namespace::eval overtype { set argsflags [lrange $args 0 end-2] tcl::dict::for {k v} $argsflags { switch -- $k { - -experimental - -cp437 - -width - -expand_right - -transparent - -startcolumn - -cursor_column - -cursor_row + -cp437 - -width - -expand_right - -transparent - -startcolumn - -cursor_column - -cursor_row - -crm_mode - -insert_mode - -autowrap_mode - -reverse_mode - -etabs - -info - -exposed1 - -exposed2 - -cursor_restore_attributes - -overtext_type { tcl::dict::set opts $k $v @@ -4053,6 +3774,7 @@ tcl::namespace::eval overtype { } B { #CUD - Cursor Down + #CSI n B #Row move - down lassign [split $param {;}] num modifierkey set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] @@ -4379,10 +4101,34 @@ tcl::namespace::eval overtype { if {$param eq ""} {set param 0} switch -exact -- $param { 0 { - #clear from cursor to end of screen + #ED 0 - clear from cursor to end of screen (including cursor position) + #Current-line part can be done here; remaining lines are handled by caller. + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] + incr idx_over + if {[llength $outcols] && $idx < [llength $outcols]} { + priv::render_erasechar $idx [expr {[llength $outcols] - $idx}] + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction clear_to_end_display + break } 1 { - #clear from cursor to beginning of screen + #ED 1 - clear from start of screen to cursor + #Current-line part can be done here; previous lines are handled by caller. + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] + incr idx_over + if {[llength $outcols] && $idx >= 0} { + set count [expr {$idx + 1}] + if {$count > [llength $outcols]} { + set count [llength $outcols] + } + if {$count > 0} { + priv::render_erasechar 0 $count + } + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction clear_to_start_display + break } 2 { #clear entire screen CSI 2J @@ -4400,7 +4146,8 @@ tcl::namespace::eval overtype { break } 3 { - #clear entire screen. presumably cursor doesn't move - otherwise it would be same as 2J ? + #clear entire screen. As well as scrollback buffer if supported (unimplemented) + puts stderr "overtype::renderline ED 3 - clear entire screen and scrollback buffer if supported (unimplemented) [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" } default { @@ -4461,8 +4208,79 @@ tcl::namespace::eval overtype { } M { #CSI Pn M - DL - Delete Line - puts stderr "overtype::renderline DL - Delete Line - not implemented [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to delete + if {![string is integer -strict $param] || $param < 0} { + puts stderr "overtype::renderline DCH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + priv::render_to_unapplied $overlay_grapheme_control_list $gci + #The current line will be deleted by the calling function - along with more below if param > 1 + #we clear the outcols so that the result for this line is empty. + ledit outcols 0 end + ledit understacks 0 end + ledit understacks_gx 0 end + #puts stderr "overtype::renderline DL - Delete Line - not implemented [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + #todo - rename insert_lines_below to affect_lines_below or something equally generic (use for multiple instructions) + set instruction [list delete_lines $param] + break + } + P { + #DCH - Delete Character(s) + #Deletes Pn characters from cursor position, shifts line left, + #and fills vacated rightmost cells with erased cells. + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to delete + if {![string is integer -strict $param] || $param < 0} { + puts stderr "overtype::renderline DCH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + set orig_len [llength $outcols] + for {set di 0} {$di < $param} {incr di} { + priv::render_delchar $idx + } + #Maintain line width by padding erased cells at right edge. + set removed [expr {$orig_len - [llength $outcols]}] + for {set fi 0} {$fi < $removed} {incr fi} { + lappend outcols \u0000 + lappend understacks [list $replay_codes_overlay] + lappend understacks_gx [list] + #review - should we be appending gx0state here? or just empty list? + #- presumably we should be appending gx0state from the end of the line - which may be different from current gx0state if there are codes in the line that change it - but we don't want to track those changes as we delete chars - so maybe we should be appending the gx0state from the end of the line before deletion started? + #lappend understacks_gx [list $gx0state] + } + #cursor position doesn't change. + } + @ { + #ICH - Insert Character(s) + #Inserts Pn blank characters at the cursor position, shifts line right, + #and fills vacated leftmost cells with erased cells. + #The characters shifted beyond the right margin are lost. + if {$param eq "" || $param eq "0"} {set param 1}; #param=number of chars to insert + if {![string is integer -strict $param] || $param < 1} { + puts stderr "overtype::renderline ICH bad param '$param' - assuming 1 [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set param 1 + } + set orig_len [llength $outcols] + if {$overflow_idx != -1 && $param > [llength $outcols]} { + #since characters at rhs are lost, we can't insert more than the width. + set param $orig_len + } + set this_sgrstack [lindex $overlay_grapheme_control_stacks $gci] + set this_gxstack [lindex $overstacks_gx $idx_over] + #use space for inserted blanks; helper handles tab reflow + priv::render_insertgraphemes $idx [lrepeat $param " "] $this_sgrstack $this_gxstack + #Keep line width fixed unless expand-right mode is active. + if {$overflow_idx != -1} { + if {[llength $outcols] > $orig_len} { + #truncate + ledit outcols $orig_len end + ledit understacks $orig_len end + ledit understacks_gx $orig_len end + } + + } + + #cursor position doesn't change. } T { #CSI Pn T - SD Pan Up (empty lines introduced at top) @@ -4518,16 +4336,36 @@ tcl::namespace::eval overtype { #This control function sets the top and bottom margins for the current page. You cannot perform scrolling outside the margins lassign [split $param {;}] margin_top margin_bottom - #todo - return these for the caller to process.. - puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" - #Also moves the cursor to col 1 line 1 of the page - set cursor_column 1 - set cursor_row 1 + set margins_valid 1 ;#default assumption + #todo - validate margins. A valid scrolling region requires at least two lines. + if {$margin_top eq ""} {set margin_top 1} + if {$margin_bottom eq ""} {set margin_bottom $screen_rows} + if {![string is integer -strict $margin_top] || $margin_top < 1} { + puts stderr "DECSTBM invalid margin_top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {![string is integer -strict $margin_bottom] || $margin_bottom < 1} { + puts stderr "DECSTBM invalid margin_bottom '$margin_bottom' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margin_bottom - $margin_top < 1} { + puts stderr "DECSTBM invalid margins - bottom '$margin_bottom' must be greater than top '$margin_top' [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + set margins_valid 0 + } + if {$margins_valid} { + #todo - return these for the caller to process.. + puts stderr "overtype::renderline DECSTBM set top and bottom margin not implemented" + #Also moves the cursor to col 1 line 1 of the page + set cursor_column 1 + set cursor_row 1 - incr idx_over - priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction move ;#own instruction? decstbm? - break + incr idx_over + priv::render_to_unapplied $overlay_grapheme_control_list $gci + set instruction [list decstbm $margin_top $margin_bottom] + break + } else { + puts stderr "overtype::renderline DECSTBM invalid margins - ignoring [ansistring VIEW -lf 1 -vt 1 -nul 1 $code]" + } } s { #code conflict between ansi emulation and DECSLRM - REVIEW @@ -5023,12 +4861,12 @@ tcl::namespace::eval overtype { } D { #\x84 - #index (IND) + #index (IND) ESC D #vt102-docs: "Moves cursor down one line in same column. If cursor is at bottom margin, screen performs a scroll-up" puts stderr "renderline ESC D not fully implemented" incr cursor_row priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction down + set instruction down_scrolling #retain cursor_column break } @@ -5062,7 +4900,7 @@ tcl::namespace::eval overtype { } #ensure rest of *overlay* is emitted to remainder priv::render_to_unapplied $overlay_grapheme_control_list $gci - set instruction up ;#need instruction for scroll-down? + set instruction up ;#need instruction for screen to scroll-down? #retain cursor_column break } @@ -5588,17 +5426,9 @@ tcl::namespace::eval overtype { set in_tab_expansion [dict create idx $i remaining [expr {$this_tab_width -1}]] set this_char \t } elseif {$ch eq "\u0000"} { - if {$cp437_glyphs} { - #map all nulls including at tail to space - set this_char " " - } else { - set this_char " " - #if {$trailing_nulls && $i < $first_tail_null_posn} { - # append outstring " " ;#map inner nulls to space - #} else { - # append outstring \u0000 - #} - } + #map all nulls including at tail to space + set this_char " " + #review } else { set this_char $ch } diff --git a/src/vfs/_vfscommon.vfs/modules/punk-0.1.1.tm b/src/vfs/_vfscommon.vfs/modules/punk-0.1.1.tm index b6e32cff..c2fdce0e 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk-0.1.1.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk-0.1.1.tm @@ -2466,6 +2466,7 @@ namespace eval punk { set splitchars "" set assigned [split $leveldata $splitchars] }] + puts "---split script: $script" set level_script_complete 1 #todo %splitat- %splitn- ?? @@ -4205,7 +4206,7 @@ namespace eval punk { #avoid use of regexp match on each element - or we will unnecessarily force string reps on lists #same with lsearch with a string pattern - #wouldn't matter for small lists - but we need to be able to handle large ones efficiently without unneccessary string reps - set script [string map [list $scopepattern $equalsrhs] { + set script [string map [list [list $scopepattern] $equalsrhs] { #script built by punk::match_assign if {[llength $args]} { #scan for existence of any pipe operator (|*> or <*|) only - we don't need position @@ -4214,11 +4215,12 @@ namespace eval punk { # x= <| # x= |> #both leave x empty. To assign a pipelike value to x we would have to do: x= <| |> (equiv: set x |>) + set scopep foreach a $args { if {![catch {llength $a} sublen]} { #don't enforce sublen == 1. Legal to have whitespace including newlines {| x >} if {[string match |*> $a] || [string match <*| $a]} { - tailcall punk::pipeline = "" "" {*}$args + tailcall punk::pipeline = $scopep "" {*}$args } } } @@ -4594,6 +4596,10 @@ namespace eval punk { #debug.punk.pipe.rep {[rep_listname fulltail]} 6 + #review + set equalsrhs [string map [list {;} {\;}] $equalsrhs] + + #--------------------------------------------------------------------- # test if we have an initial x.=y.= or x.= y.= @@ -4643,26 +4649,31 @@ namespace eval punk { #var1 will contain ETC (from entire pipeline), var2 will contain etc (from associated segment) # - if {([set nexteposn [string last = $next1]] >= 0) && (![punk::pipe::lib::arg_is_script_shaped $next1]) } { - set nexttail [lrange $args 1 end] - #*SUB* pipeline recursion. - #puts "======> recurse based on next1:$next1 " - if {[string index $next1 $nexteposn-1] eq {.}} { - #var1.= var2.= ... - #non pipelined call to self - return result + + if {([set nexteposn [string last = $next1]] >= 0)} { + set next1 [string map [list {;} {\;}] $next1] ;#review + #do we really need to test for script_shaped if last char is = ? + if {![punk::pipe::lib::arg_is_script_shaped $next1]} { + set nexttail [lrange $args 1 end] + #*SUB* pipeline recursion. + #puts "======> recurse based on next1:$next1 " + if {[string index $next1 $nexteposn-1] eq {.}} { + #var1.= var2.= ... + #non pipelined call to self - return result + set results [uplevel 1 [list $next1 {*}$nexttail]] + #debug.punk.pipe.rep {==> rep recursive results: [rep $results]} 5 + #debug.punk.pipe {>>> results: $results} 1 + return [_handle_bind_result [_multi_bind_result $initial_returnvarspec $results]] + } + #puts "======> recurse assign based on next1:$next1 " + #if {[regexp {^([^ \t\r\n=\{]*)=(.*)} $next1 _ nextreturnvarspec nextrhs]} { + #} + #non pipelined call to plain = assignment - return result set results [uplevel 1 [list $next1 {*}$nexttail]] - #debug.punk.pipe.rep {==> rep recursive results: [rep $results]} 5 #debug.punk.pipe {>>> results: $results} 1 - return [_handle_bind_result [_multi_bind_result $initial_returnvarspec $results]] + set d [_multi_bind_result $initial_returnvarspec $results] + return [_handle_bind_result $d] } - #puts "======> recurse assign based on next1:$next1 " - #if {[regexp {^([^ \t\r\n=\{]*)=(.*)} $next1 _ nextreturnvarspec nextrhs]} { - #} - #non pipelined call to plain = assignment - return result - set results [uplevel 1 [list $next1 {*}$nexttail]] - #debug.punk.pipe {>>> results: $results} 1 - set d [_multi_bind_result $initial_returnvarspec $results] - return [_handle_bind_result $d] } } @@ -5981,6 +5992,9 @@ namespace eval punk { tailcall {*}[list ::punk::pipeline = "" "" {*}$arglist] } + #review + set assign [string map {; \\;} $assign] + set is_script [punk::pipe::lib::arg_is_script_shaped $assign] if {!$is_script && [string index $assign end] eq "="} { @@ -5999,7 +6013,7 @@ namespace eval punk { if {$is_script} { set cmdlist [list ::punk::pipeline "script" "" "" {*}$args] } else { - set cmdlist [list ::punk::pipeline ".=" "" "" {*}$args] + set cmdlist [list ::punk::pipeline ".=" "" "" $assign {*}$arglist] } } tailcall {*}$cmdlist diff --git a/src/vfs/_vfscommon.vfs/modules/punk/aliascore-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/aliascore-0.1.0.tm index 296fa148..bea6a48f 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/aliascore-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/aliascore-0.1.0.tm @@ -123,6 +123,7 @@ tcl::namespace::eval punk::aliascore { ansistrip ::punk::ansi::ansistrip stripansi ::punk::ansi::ansistrip ansiwrap ::punk::ansi::ansiwrap + ansisplit ::punk::ansi::ta::split_codes_single grepstr ::punk::ansi::grepstr untabify ::punk::ansi::untabify colour ::punk::console::colour diff --git a/src/vfs/_vfscommon.vfs/modules/punk/ansi-0.1.1.tm b/src/vfs/_vfscommon.vfs/modules/punk/ansi-0.1.1.tm index cd5a3ac1..407782ff 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/ansi-0.1.1.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/ansi-0.1.1.tm @@ -127,8 +127,8 @@ tcl::namespace::eval punk::ansi::class { -width -type integer -default "" -height -type integer -default "" -crm_mode -type boolean -default 0 - -binarytext -type string -default "" -choices {"" bios ice} - -format -type string -choices {ansi binarytext-bios binarytext-ice xbin} + -format -type string -choices {ansi binarytext-bios binarytext-ice xbin} -help\ + "Format of new data being applied as an overlay" @values -min 0 -max 0 }] method rendertest {args} { @@ -136,7 +136,6 @@ tcl::namespace::eval punk::ansi::class { set opt_width [dict get $argd opts -width] set opt_height [dict get $argd opts -height] set opt_crm_mode [dict get $argd opts -crm_mode] - set opt_binarytext [dict get $argd opts -binarytext] set opt_format [dict get $argd opts -format] set existing_dimensions $o_render_dimensions @@ -152,8 +151,6 @@ tcl::namespace::eval punk::ansi::class { set o_render_dimensions ${w}x${h} - - #set rendered [overtype::renderspace -binarytext $opt_binarytext -cp437 1 -crm_mode $opt_crm_mode -expand_right 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] set rendered [overtype::renderspace -format $opt_format -cp437 1 -crm_mode $opt_crm_mode -expand_right 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] return $rendered } @@ -699,7 +696,6 @@ tcl::namespace::eval punk::ansi { } set opt_crm_mode [dict get $opts -crm_mode] - set binarytext "" set sdict [dict create] #if SAUCE data is present - it may give an indication of encoding as well as number of columns/lines if {![catch {package require punk::ansi::sauce}]} { @@ -720,8 +716,7 @@ tcl::namespace::eval punk::ansi { switch -exact -- [string tolower [file extension $fname]] { .bin { #In the absence of SAUCE data - assume .bin is binary text - set binarytext bios ;#16 fg, 8 bg + blink - set format binarytext-bios + set format binarytext-bios ;#16 fg, 8 bg + blink } .xb { set format xbin @@ -744,12 +739,10 @@ tcl::namespace::eval punk::ansi { if {[dict exists $sdict datatype_name]} { switch -- [dict get $sdict datatype_name] { binarytext { - #SAUCE ANSiFlags - ice vs default bios + #SAUCE ANSiFlags - iCE vs default bios if {[dict exists $sdict ansiflags_ice] && [dict get $sdict ansiflags_ice]} { - set binarytext ice set format binarytext-ice } else { - set binarytext bios set format binarytext-bios } } @@ -825,13 +818,14 @@ tcl::namespace::eval punk::ansi { #set ansidata $hdr$data #don't convert at all - compressed is binary? + } elseif {[string match binarytext* $format]} { + #don't convert - this is binary data - the rendering obj will handle it as binary } else { set ansidata [encoding convertfrom $encoding $ansidata] } set obj [punk::ansi::class::class_ansi new $ansidata] if {$encoding eq "cp437"} { - #set result [$obj rendertest -binarytext $binarytext -width $cols -height $rows -crm_mode $opt_crm_mode] set result [$obj rendertest -format $format -width $cols -height $rows -crm_mode $opt_crm_mode] } else { set result [$obj render $dimensions] @@ -6251,24 +6245,12 @@ be as if this was off - ie lone CR. #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. #(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) - set n 0 - #set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. - set chars [punk::char::grapheme_split $line] - set cr_posns [lsearch -all $chars \r] - set bs_posns [lsearch -all $chars \b] - foreach p $cr_posns { - lset chars $p - } - foreach p $bs_posns { - lset chars $p - } #mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner #build an output set idx 0 set outchars [list] - set outsizes [list] # -- #tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code #this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above @@ -6278,39 +6260,65 @@ be as if this was off - ie lone CR. #set cr ? # -- - - #consider also that AB\0\bC will usually render as AC not ABC - foreach c $chars { - switch -- $c { - { - if {$idx > 0} { - incr idx -1 - } - } - { - set idx 0 - } - default { - if {$c eq "\0"} { - #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. - #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. - #review - other zero-width chars? - continue - } - #set nxt [llength $outchars] - if {$idx < [llength $outchars]} { - #overstrike? - should usually have no impact on width - width taken as last grapheme in that column - #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done - #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. - lset outchars $idx $c - } else { - lappend outchars $c - } - #punk::ansi::internal::printing_length_addchar $idx $c - incr idx - } + set graphemes [punk::char::grapheme_split $line] + foreach g $graphemes { + if {$g eq "\0"} { + #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. + #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. + #review - other zero-width chars? + continue + } elseif {$g eq "\r"} { + set idx 0 + } elseif {$g eq "\b"} { + incr idx -1 + set idx [expr {max(0,$idx)}] + } else { + lset outchars $idx $g ;#lset will append if $idx is equal to the current length of the list - since we only increment idx by 1, this should be safe to do without checking the length first + #if {$idx < [llength $outchars]} { + # #overstrike? - should usually have no impact on width - width taken as last grapheme in that column + # #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done + # #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. + # lset outchars $idx $g + #} else { + # lappend outchars $g + #} + incr idx } } + + + + #consider also that AB\0\bC will usually render as AC not ABC + #foreach g $graphemes { + # switch -exact -- $g { + # { + # if {$idx > 0} { + # incr idx -1 + # } + # } + # { + # set idx 0 + # } + # { + # #nulls don't print - and backspace will ignore them and remove the preceding char if there is one. + # #we need to remove them here or else backspace processing will only remove the null and not the preceding char - which is not how terminals work. + # #review - other zero-width chars? + # continue + # } + # default { + # #set nxt [llength $outchars] + # if {$idx < [llength $outchars]} { + # #overstrike? - should usually have no impact on width - width taken as last grapheme in that column + # #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done + # #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. + # lset outchars $idx $g + # } else { + # lappend outchars $g + # } + # incr idx + # } + # } + #} #we already have the string split into grapheme clusters. #we should calculate length as the sum of the widths of the graphemes in the output list rather #than passing to a function that will need to split into graphemes again. @@ -6345,7 +6353,7 @@ be as if this was off - ie lone CR. set max_component_width 1 } } - } elseif {$c < 917504 || $c > 917631} { + } elseif {$dec < 917504 || $dec > 917631} { #codepoint not in the zero-width unicode tag block - \UE0000-\UE000F #set w [punk::char::char_width $dec] set w [textutil::wcswidth_char $dec] @@ -6372,19 +6380,6 @@ be as if this was off - ie lone CR. return $sumwidth #return [punk::char::ansifreestring_width [join $outchars ""]] } - namespace eval internal { - proc printing_length_addchar {i c} { - #review - upvar outchars outc - upvar outsizes outs - set nxt [llength $outc] - if {$i < $nxt} { - lset outc $i $c - } else { - lappend outc $c - } - } - } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- @@ -7579,6 +7574,10 @@ tcl::namespace::eval punk::ansi { #} #------------------------------------------------------- proc sgr_merge {codelist args} { + if {[llength $codelist] == 0 && [llength $args] == 0} { + return "" + } + #pass through even single code or empty codelist to sgr_merge_singles - as there may be arguments such as -info or -filter_* set allparts [list] foreach c $codelist { #set cparts [punk::ansi::ta::split_codes_single $c] @@ -9023,7 +9022,6 @@ tcl::namespace::eval punk::ansi::class { -overflow 0 -appendlines 1 -looplimit 15000 - -experimental {} -cursor_column 1 -cursor_row 1 -insert_mode 0 @@ -9034,7 +9032,7 @@ tcl::namespace::eval punk::ansi::class { foreach {k v} $argsflags { switch -- $k { -width - -height - - -overflow - -appendlines - -looplimit - -experimental - + -overflow - -appendlines - -looplimit - -autowrap_mode - -insert_mode - -initial_ansistring { @@ -9735,7 +9733,8 @@ tcl::namespace::eval punk::ansi::class { upvar ${ns}::o_gx0states new_gx0states upvar ${ns}::o_splitindex new_splitindex - lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] + lset o_ansisplits end [tcl::string::cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] + lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] lset o_ptlist end [tcl::string::cat [lindex $o_ptlist end] [lindex $new_ptlist 0]] lappend o_ptlist {*}[lrange $new_ptlist 1 end] @@ -10625,6 +10624,7 @@ tcl::namespace::eval punk::ansi::ansistring { #Note that trim/trimleft/trimright will trim spaces at the extremities that are styled with background colour, underline etc #that may be unexpected, but it's probably the only thing that makes sense. Plain string trim can chop off whitespace that is extraneous to the ansi entirely. proc trimleft {string args} { + #todo - don't just trim whitespace - need to accept optional ?chars? to trim. set intext 0 set out "" #for split_codes only first or last pt can be empty string - but we can also get an empty ansiblock by using foreach with 2 vars on an odd-length list @@ -12028,6 +12028,827 @@ tcl::namespace::eval punk::ansi::xbin { return [punk::ansi::a+ $blink rgb-$fr-$fg-$fb Rgb-$br-$bg-$bb] } + proc parse {xbindata} { + set bytenum 0 + set xbin_header [string range $xbindata 0 10] ;#11 bytes + set xbin_header_info [punk::ansi::xbin::parse_header $xbin_header] + set xbin_body [string range $xbindata 11 end] + incr bytenum 11 + + set flags [dict get $xbin_header_info flags] + set xbin_width [dict get $xbin_header_info width] + set xbin_height [dict get $xbin_header_info height] + set expected_cells [expr {$xbin_width * $xbin_height}] + set xbin_nonblink [expr {"nonblink" in $flags}] ;# ice. + set xbin_palette [punk::ansi::xbin::default_palette] + + set parse_warnings [list] + + #optional 16-entry palette, 3 bytes per entry, RGB values 0..63 + if {"palette" in $flags} { + #puts stderr "renderspace warning - palette unimplemented" + set xbin_palette [punk::ansi::xbin::parse_palette [string range $xbin_body 0 47]] + set xbin_body_after_palette [string range $xbin_body 48 end] + incr bytenum 48 + } else { + set xbin_body_after_palette $xbin_body + } + + #todo - font. + #hack - skip over font 256 x fontsize or 512 x fontsize + if {"512chars" in $flags} { + set sz 512 + } else { + set sz 256 + } + #temp + set skip [expr {$sz * [dict get $xbin_header_info fontsize]}] + if {"font" in $flags} { + #todo - consider sixel or similar for font data - but for now we just skip over it. + #puts stderr "punk::ansi::xbin::parse warning - xbin font unimplemented" + lappend parse_warnings "XBIN_FONT_UNIMPLEMENTED skipping over font data" + set celldata [string range $xbin_body_after_palette $skip end] + incr bytenum $skip + } else { + set celldata $xbin_body_after_palette + } + set celldata_bytes [split $celldata ""] + #puts stdout "xbin image data size [llength $celldata_bytes]" + + set decoded_cells 0 + set ansisplit [list ""] + if {"compress" in $flags} { + #puts stderr "renderspace warning - compress experimental" + #process 'repeatcounter' bytes + #first 2 bits - compression type + # 00 - no compression + # 01 - character compression + # 10 - attribute compression + # 11 - character/attribute compression + #remaining 6 bits - counter + set input "" + set byte_count [llength $celldata_bytes] + for {set b 0} {$b < $byte_count} {} { + set rc [lindex $celldata_bytes $b] + set dec [scan $rc %c] + set ctype [expr {$dec >> 6}] + #0x3F - 00111111 + set count [expr {$dec & 0x3F}] + incr count ;#count stored as 1 less than actual number of repeats + if {$count < 1 || $count > 64} { + #generally unlikely to occur if we are decoding 6 bits of count correctly. + # - but will be zero for example if we have a trailing carriage return. + puts stderr "punk::ansi::xbin::parse - max count must be between 1 and 64 inclusive. received $count" + } + incr b + if {$decoded_cells + $count > $expected_cells} { + #some of the more common causes of this could be additional non xbin data after the expected end of celldata, eg: + #\x1a (ctrl-z) decimal value 26 (= count 27) delimiter for start of SAUCE record. + #\r (carriage regurn) decimal value 13 (= count 14) + #\n (line feed) decimal value 10 (= count 11) + # or it could be more celldata but the header dimensions are wrong + #- either way we should probably just warn and stop processing. + lappend parse_warnings "XBIN_OVERFLOW - record would emit $count cells at decoded offset $decoded_cells, expected total $expected_cells cells for header dimensions ${xbin_width}x${xbin_height} (possible trailing SAUCE record or newlines)" + break + } + switch -exact -- $ctype { + 0 { + set needed [expr {$count * 2}] + } + 1 - + 2 { + set needed [expr {$count + 1}] + } + 3 { + set needed 2 + } + default { + #hard error - will probably cause desynchronization between decoder and byte stream + error "punk::ansi::xbin::parse - invalid compression type $ctype in repeatcounter byte '$rc' at offset $b" + } + } + if {$b + $needed > $byte_count} { + lappend parse_warnings "XBIN_BAD_RECORD - truncated record: type $ctype requires $needed bytes at payload offset $b, but only [expr {$byte_count - $b}] bytes remain." + #abort processing - would probably raise an error in the compression switch cases below. + #This may indicate a truncated file, but it could also be a file with additional data after the expected end of celldata. + #This is likely to happen if the xbindata includes a trailing SAUCE record. + #we shouldn't raise a hard error - as the caller may want to salvage what data they can from the file, and report the issue via warnings. + break + } + switch -exact -- $ctype { + 0 { + #no compression + for {set c 0} {$c < $count*2} {incr c 2} { + set ch [lindex $celldata_bytes $b+$c] + set ch [encoding convertfrom cp437 $ch] + set at [lindex $celldata_bytes [expr {$b+$c+1}]] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ red] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + lappend ansisplit $clr $ch + } + incr b [expr {$count*2}] + } + 1 { + #char compression + set ch [lindex $celldata_bytes $b] + set ch [encoding convertfrom cp437 $ch] + incr b + for {set c 0} {$c < $count} {incr c} { + set at [lindex $celldata_bytes $b+$c] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ cyan] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + lappend ansisplit $clr $ch + } + incr b [expr {$count}] + } + 2 { + #attribute compression + set at [lindex $celldata_bytes $b] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ green] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + incr b + for {set c 0} {$c < $count} {incr c} { + set ch [lindex $celldata_bytes $b+$c] + set ch [encoding convertfrom cp437 $ch] + lappend ansisplit $clr $ch + } + incr b $count + } + 3 { + #attribute and char compression + set ch [lindex $celldata_bytes $b] + set ch [encoding convertfrom cp437 $ch] + set at [lindex $celldata_bytes $b+1] + #binary scan $at cu code + #set clr [a+ term-$code] + #set clr [a+ white] ;#debug + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + for {set c 0} {$c < $count} {incr c} { + lappend ansisplit $clr $ch + } + incr b 2 + } + } + incr decoded_cells $count + } + if {$decoded_cells != $expected_cells} { + lappend parse_warnings "XBIN_CELLCOUNT_MISMATCH decoded $decoded_cells cells, expected $expected_cells cells for image dimensions ${xbin_width}x${xbin_height}" + } + } else { + foreach {ch at} $celldata_bytes { + #binary scan $at cu code + #set clr [a+ term-$code] + if {$at eq ""} { + #eg src/testansi/formatsamples/image/xbin/test.xb + #has missing last byte. for now just warn. + #puts stderr "renderspace warning - xbin attribute byte is empty at char '[ansistring VIEW $ch]'" + lappend parse_warnings "XBIN_MISSING_BYTE attribute byte is empty at byte [expr {$bytenum + 1}] char '[ansistring VIEW $ch]'" + #experiment - treat as a reset. + lappend ansisplit [a+] $ch + } else { + set clr [punk::ansi::xbin::attribute_ansi $at $xbin_palette $xbin_nonblink] + set ch [encoding convertfrom cp437 $ch] + lappend ansisplit $clr $ch + } + incr bytenum 2 + incr decoded_cells + } + } + #lappend inputchunks [list ansisplit $ansisplit] + + #_reset key with ansi reset to ensure direct display of dict in terminal is readable. + return [dict create header $xbin_header_info palette $xbin_palette ansisplit $ansisplit _reset \x1b\[m warnings $parse_warnings decoded_cells $decoded_cells expected_cells $expected_cells] + } + +} +tcl::namespace::eval punk::ansi::png { + + proc paethPredictor {a b c} { + #A Paeth PNG filter is a pre-compression image processing algorithm used in the Portable Network Graphics (PNG) format. + #It is designed to prepare image data for the format's lossless compression by predicting the color of a pixel based on + #its neighbors + set p [expr {$a + $b - $c}] + set pa [expr {abs($p - $a)}] + set pb [expr {abs($p - $b)}] + set pc [expr {abs($p - $c)}] + if {$pa <= $pb && $pa <= $pc} { return $a } + if {$pb <= $pc} { return $b } + return $c + } + + proc pngdataToAnsi {pngdata} { + #This will create very large ansi images as the smallest possible colorised cell is the half-block character. + #To create smaller images, we could consider some kind of lossy conversion to a smaller palette, or even to monochrome with dithering. + #A better alternative might be sixel or similar. + + #if {[::png::validate $filename] ne "OK"} { + # error "Invalid PNG file." + #} + # Extract PNG header metadata + #set info [::png::imageInfo $filename] + + if {[string range $pngdata 0 7] ne "\x89PNG\r\n\x1a\n"} { + error "pngdataToAnsi: Invalid PNG data - missing PNG signature" + } + + #----------------------------------------------------------------------------------------- + #set info [::png::imageInfo $filename] + #----------------------------------------------------------------------------------------- + set posn [expr {8}] ;# Skip PNG signature + binary scan [string range $pngdata $posn [expr {$posn + 7}]] Ia4 len type + incr posn 8 + set r [string range $pngdata $posn [expr {$posn + $len - 1}]] + incr posn $len + if {$type eq "IHDR"} { + binary scan $r IIccccc width height depth color compression filter interlace + binary scan [string range $pngdata $posn [expr {$posn + 3}]] I check + if {$check < 0} { + set check [format %u [expr {$check & 0xffffffff}]] + } + if {![catch {package present crc32}] && [::crc32::crc32 IHDR$r] != $check} { + error "pngdataToAnsi: Invalid PNG data - IHDR chunk CRC mismatch" + } + set info [list width $width height $height depth $depth color $color compression $compression filter $filter interlace $interlace] + } else { + error "pngdataToAnsi: Invalid PNG data - missing IHDR chunk" + } + #----------------------------------------------------------------------------------------- + + + set width [dict get $info width] + set height [dict get $info height] + set depth [dict get $info depth] + set color [dict get $info color] + set filter [dict get $info filter] + set interlace [dict get $info interlace] + set compression [dict get $info compression] + if {$compression != 0} { + #true as at PNG-3 2025 + error "pngdataToAnsi: Unsupported PNG compression method $compression - only method 0 (deflate/inflate) is supported." + } + puts stderr "pngdataToAnsi: PNG image info - width $width height $height depth $depth color $color interlace $interlace filter $filter" + + set color_types { + 0 Grayscale + 2 TrueColor (RGB) + 3 Indexed-color + 4 Grayscale with alpha + 6 TrueColor with alpha (RGBA) + } + switch -exact $color { + 0 { + error "pngdataToAnsi warning - PNG color type 0 (grayscale) not supported - todo: treat as RGB with R=G=B ?" + set ctype "grayscale" + if {$depth ni {1 2 4 8 16}} { + error "Unsupported format. Only grayscale PNGs with bit depths of 1, 2, 4, 8, or 16 are supported." + } + } + 2 { + # RGB TrueColor - supported + set ctype "rgb" + #todo depth 16 + if {$depth != 8} { + error "Unsupported format. Only 8-bit RGB or RGBA PNGs are supported." + } + set bpp 3 + } + 3 { + set ctype "indexed" + puts stderr "pngdataToAnsi warning - PNG color type 3 (indexed colour)" + if {$depth ni {1 2 4 8}} { + error "Unsupported format. Only indexed-color PNGs with 1,2,4 or 8 bit depth are supported." + } + set bpp 1 + } + 4 { + error "pngdataToAnsi warning - PNG color type 4 (grayscale with alpha) not supported - todo: treat as RGBA with R=G=B and alpha channel" + set ctype "grayscale_alpha" + set bpp 3 ;#Bytes per pixel + if {$depth ni {8 16}} { + error "Unsupported format. Only grayscale PNGs with bit depths of 8 or 16 are supported." + } + } + 6 { + puts stderr "pngdataToAnsi warning - PNG color type 6 (truecolor with alpha)" + set ctype "rgba" + if {$depth == 8} { + set bpp 4 ;#Bytes per pixel + } elseif {$depth == 16} { + set bpp 8 ;#Bytes per pixel + } else { + error "Unsupported format. Only depths of 8 or 16 bits per channel are supported for RGBA PNGs." + } + } + default { + error "pngdataToAnsi: Unsupported PNG color type $color" + } + } + + + #------------------------------------------ + # Extract raw compressed IDAT stream chunks + #set chunks [::png::getChunks $filename] + set chunks [list] + set posn [expr {8}] ;# Skip PNG signature + while {[set r [string range $pngdata $posn [incr posn 8]]] ne ""} { + binary scan $r Ia4 len type + if {$type eq "IEND"} { + #end of PNG data - stop processing chunks + #(important to stop before we try to process any trailing non-PNG data such as a SAUCE record) + break + } + lappend chunks [list $type $posn $len] + incr posn [expr {$len + 4}] + } + #------------------------------------------ + puts stderr "pngdataToAnsi: found [llength $chunks] chunks in PNG data" + foreach chunk $chunks { + puts stderr "pngdataToAnsi: chunk type '[lindex $chunk 0]' length [lindex $chunk 2]" + } + + + set paletteRaw "" + + set idatData "" + foreach chunk $chunks { + switch -exact -- [lindex $chunk 0] { + "IDAT" { + set posn [lindex $chunk 1] + append idatData [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + } + "PLTE" { + set posn [lindex $chunk 1] + puts stderr "pngdataToAnsi warning - PNG PLTE chunk" + #implement PLTE chunk parsing and support for indexed colour PNGs + append paletteRaw [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + } + "tEXt" { + set posn [lindex $chunk 1] + #todo - consider supporting tEXt chunks for metadata such as title, author, description etc. + set textData [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]] + set nullpos [string first \x00 $textData] + #neither the keyword nor text data is supposed to contain nulls. + if {$nullpos >= 0} { + set keyword [string range $textData 0 [expr {$nullpos - 1}]] + set text [string range $textData [expr {$nullpos + 1}] end]] + puts stderr "pngdataToAnsi warning - PNG tEXt chunk - keyword '$keyword' text '$text'" + } else { + puts stderr "pngdataToAnsi warning - PNG tEXt chunk - no separator null found: $textData" + } + } + "zTXt" { + #set posn [lindex $chunk 1] + #todo - consider supporting zTXt chunks for compressed metadata such as title, author, description etc. + #puts stderr "pngdataToAnsi warning - PNG zTXt chunk - [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]]" + } + "iTXt" { + #set posn [lindex $chunk 1] + #todo - consider supporting iTXt chunks for international text metadata such as title, author, description etc. + #puts stderr "pngdataToAnsi warning - PNG iTXt chunk - [string range $pngdata $posn [expr {$posn + [lindex $chunk 2] - 1}]]" + } + "IEND" { + } + default { + #ignore other chunk types for now + } + } + } + if {$ctype eq "indexed" && $paletteRaw eq ""} { + error "pngdataToAnsi: Indexed colour PNG missing PLTE chunk" + } + if {[string match grayscale* $ctype] && $paletteRaw ne ""} { + puts stderr "pngdataToAnsi warning - PNG PLTE chunk present in grayscale image - ignoring palette data" + } + if {$paletteRaw ne ""} { + set palette [list] + binary scan $paletteRaw c* components + puts "components: $components '[ansistring VIEW $paletteRaw]'" + foreach {r g b} $components { + lappend palette [list [expr {$r & 0xFF}] [expr {$g & 0xFF}] [expr {$b & 0xFF}]] + } + } + + # Decompress using raw Tcl zlib inflation + set decompressed [zlib decompress $idatData] + #set decompressed [zlib deflate $idatData] + #PLTE data is not compressed. + + #set stride [expr {1 + ($width * $bpp)}] + #set prevLine [binary format x[expr {$width * $bpp}]] ;# Row 0 baseline + + if {$ctype eq "indexed"} { + set bytesPerLine [expr {($width * $depth + 7) / 8}] + } else { + set bytesPerLine [expr {$width * $bpp}] + } + set stride [expr {1 + $bytesPerLine}] ;# Filter type byte + pixel data bytes + set prevLine [binary format x$bytesPerLine] ;# Row 0 baseline + set allRows [list] + + + # Process rows + for {set y 0} {$y < $height} {incr y} { + set offset [expr {$y * $stride}] + + # Unpack the filter type byte at start of each scanline + #puts "---> filter type byte: [ansistring VIEW [string range $decompressed $offset $offset]] at offset $offset for row $y" + binary scan [string range $decompressed $offset $offset] c filterType + set filterType [expr {$filterType & 0xFF}] + if {$filterType < 0 || $filterType > 4} { + puts stderr "pngdataToAnsi warning - invalid filter type $filterType at row $y - treating as no filter" + set filterType 0 + } + + # Get filtered pixel payload bytes for the row + set rawRow [string range $decompressed [expr {$offset + 1}] [expr {$offset + $stride - 1}]] + set currentLine "" + + # Defilter scanline bytes based on specification types + for {set xBytes 0} {$xBytes < $bytesPerLine} {incr xBytes} { + binary scan [string range $rawRow $xBytes $xBytes] c origByte + set origByte [expr {$origByte & 0xFF}] + + # Get left byte (A) and upper byte (B) and upper-left byte (C) + #set leftVal [expr {$xBytes >= $bpp ? [string index $currentLine [expr {$xBytes - $bpp}]] : 0}] + #binary scan $leftVal c a + #set a [expr {$a & 0xFF}] + if {$xBytes >= $bpp} { + binary scan [string index $currentLine [expr {$xBytes - $bpp}]] c a + set a [expr {$a & 0xFF}] + } else { + set a 0 + } + + binary scan [string range $prevLine $xBytes $xBytes] c b; + set b [expr {$b & 0xFF}] + + #set upLeftVal [expr {$xBytes >= $bpp ? [string index $prevLine [expr {$xBytes - $bpp}]] : 0}] + #binary scan $upLeftVal c c + #set c [expr {$c & 0xFF}] + if {$xBytes >= $bpp} { + binary scan [string index $prevLine [expr {$xBytes - $bpp}]] c c + set c [expr {$c & 0xFF}] + } else { + set c 0 + } + + # Reverse the PNG filter transformations + switch -- $filterType { + 0 { set reconByte $origByte } ;# None + 1 { set reconByte [expr {($origByte + $a) % 256}] } ;# Sub + 2 { set reconByte [expr {($origByte + $b) % 256}] } ;# Up + 3 { set reconByte [expr {($origByte + (($a + $b) / 2)) % 256}] } ;# Average + 4 { set reconByte [expr {($origByte + [paethPredictor $a $b $c]) % 256}] } ;# Paeth + default { + } + } + append currentLine [binary format c $reconByte] + } + set prevLine $currentLine + + if {$ctype eq "indexed"} { + # For indexed colour PNGs, map pixel values to RGB using the PLTE chunk palette + set pixelRow [list] + set pixelCount 0 + + #pre-calculate masks and steps based on depth + # depth 4: mask = 15 (0x0F), pixels per byte = 2 + # depth 2: mask = 3 (0x03), pixels per byte = 4 + # depth 1: mask = 1 (0x01), pixels per byte = 8 + set mask [expr {(1 << $depth) - 1}] + set pixelsPerByte [expr {8 / $depth}] + + for {set x 0} {$x < $bytesPerLine} {incr x} { + binary scan [string range $currentLine $x $x] c packedByte + set byteVal [expr {$packedByte & 0xFF}] + + #read left-to-right within the byte, extracting pixel values based on depth and mask + for {set p 0} {$p < $pixelsPerByte} {incr p} { + if {$pixelCount < $width} { + #set shift [expr {($pixelsPerByte - 1 - $p) * $depth}] + set shift [expr {8 - $depth - ($p * $depth)}] + set idx [expr {($byteVal >> $shift) & $mask}] + set rgb [lindex $palette $idx] + #append outputBuffer [format "\x1b\[48\;2\;%d\;%d\;%dm " [lindex $rgb 0] [lindex $rgb 1] [lindex $rgb 2]] + #lappend pixelRow $idx + lappend pixelRow [list [lindex $rgb 0] [lindex $rgb 1] [lindex $rgb 2]] + incr pixelCount + } + } + + } + } else { + #RGB + set pixelRow [list] + for {set x 0} {$x < $width} {incr x} { + set idx [expr {$x * $bpp}] + #pull either 3 bytes (RGB) or 4 bytes (RGBA) for the pixel, depending on bpp + if {$depth == 16} { + binary scan [string range $currentLine $idx [expr {$idx + 3}]] c4 rgba + set r [expr {[lindex $rgba 0] & 0xFF}] + set g [expr {[lindex $rgba 1] & 0xFF}] + set b [expr {[lindex $rgba 2] & 0xFF}] + set a [expr {[lindex $rgba 3] & 0xFF}] + + #terminal fallback background colour .eg dark terminal grey + set bgR 30 + set bgG 30 + set bgB 30 + set alpha [expr {$a / 255.0}] + + set r [expr {int(($r * $alpha) + ($bgR * (1 - $alpha)))}] + set g [expr {int(($g * $alpha) + ($bgG * (1 - $alpha)))}] + set b [expr {int(($b * $alpha) + ($bgB * (1 - $alpha)))}] + } else { + binary scan [string range $currentLine $idx [expr {$idx + 2}]] c3 rgb + set r [expr {[lindex $rgb 0] & 0xFF}] + set g [expr {[lindex $rgb 1] & 0xFF}] + set b [expr {[lindex $rgb 2] & 0xFF}] + #puts stderr "pixel $x,$y - RGB($r,$g,$b)" + } + + + # Use background-color escape sequence with two blank spaces to build a square pixel + #append outputBuffer "\x1b\[48\;2\;${r}\;${g}\;${b}m " + lappend pixelRow [list $r $g $b] + } + #append outputBuffer "\x1b\[m\n" ;# Reset row styling + } + lappend allRows $pixelRow + } + + set symbols 1 + # ------------------------------------------------------------- + # Unicode Quadrant Mosaic Definition Matrix + # ------------------------------------------------------------- + # Maps a 4-bit representation of a 2x2 grid to a structural character. + # Layout: Bit 3 = TopLeft, Bit 2 = TopRight, Bit 1 = BottomLeft, Bit 0 = BottomRight + variable MOSAIC_MAP + array set MOSAIC_MAP { + 0 " " 1 "▗" 2 "▖" 3 "▄" + 4 "▝" 5 "▐" 6 "▞" 7 "▟" + 8 "▘" 9 "▚" 10 "▌" 11 "▙" + 12 "▀" 13 "▜" 14 "▛" 15 "█" + } + + # ------------------------------------------------------------- + # Sub-Pixel Structural Rendering Engine + # ------------------------------------------------------------- + proc renderSymbols {allRows width height} { + variable MOSAIC_MAP + set outputBuffer "" + fconfigure stdout -encoding utf-8 + + # Process chunks of 2 vertical rows and 2 horizontal columns + for {set y 0} {$y < $height} {incr y 2} { + set rowTop [lindex $allRows $y] + + # Edge safety padding for odd vertical bounds + if {($y + 1) < $height} { + set rowBottom [lindex $allRows [expr {$y + 1}]] + } else { + set rowBottom $rowTop + } + + for {set x 0} {$x < $width} {incr x 2} { + # Extract 4 pixels of the 2x2 cluster + set p_tl [lindex $rowTop $x] + + if {($x + 1) < $width} { + set p_tr [lindex $rowTop [expr {$x + 1}]] + set p_bl [lindex $rowBottom $x] + set p_br [lindex $rowBottom [expr {$x + 1}]] + } else { + # Pad horizontally if image width is odd + set p_tr $p_tl; set p_bl $p_tl; set p_br $p_tl + } + + # Calculate individual pixel luminance values (Standard Rec. 601 weights) + set l_tl [expr {[lindex $p_tl 0]*0.299 + [lindex $p_tl 1]*0.587 + [lindex $p_tl 2]*0.114}] + set l_tr [expr {[lindex $p_tr 0]*0.299 + [lindex $p_tr 1]*0.587 + [lindex $p_tr 2]*0.114}] + set l_bl [expr {[lindex $p_bl 0]*0.299 + [lindex $p_bl 1]*0.587 + [lindex $p_bl 2]*0.114}] + set l_br [expr {[lindex $p_br 0]*0.299 + [lindex $p_br 1]*0.587 + [lindex $p_br 2]*0.114}] + + # Block Threshold: Local average brightness + set avg_lum [expr {($l_tl + $l_tr + $l_bl + $l_br) / 4.0}] + + # Build the 4-bit structure index mapping bitwise states + set bitmask 0 + if {$l_tl >= $avg_lum} { set bitmask [expr {$bitmask | 8}] } + if {$l_tr >= $avg_lum} { set bitmask [expr {$bitmask | 4}] } + if {$l_bl >= $avg_lum} { set bitmask [expr {$bitmask | 2}] } + if {$l_br >= $avg_lum} { set bitmask [expr {$bitmask | 1}] } + + # Segregate pixels into foreground (bright) and background (dark) sets + set fg_r 0; set fg_g 0; set fg_b 0; set fg_count 0 + set bg_r 0; set bg_g 0; set bg_b 0; set bg_count 0 + + foreach p [list $p_tl $p_tr $p_bl $p_br] lum [list $l_tl $l_tr $l_bl $l_br] { + if {$lum >= $avg_lum} { + incr fg_r [lindex $p 0]; incr fg_g [lindex $p 1]; incr fg_b [lindex $p 2] + incr fg_count + } else { + incr bg_r [lindex $p 0]; incr bg_g [lindex $p 1]; incr bg_b [lindex $p 2] + incr bg_count + } + } + + # Compute color averages for both states + if {$fg_count > 0} { + set fR [expr {$fg_r / $fg_count}]; set fG [expr {$fg_g / $fg_count}]; set fB [expr {$fg_b / $fg_count}] + } else { + set fR 0; set fG 0; set fB 0 + } + if {$bg_count > 0} { + set bR [expr {$bg_r / $bg_count}]; set bG [expr {$bg_g / $bg_count}]; set bB [expr {$bg_b / $bg_count}] + } else { + # If everything is uniform, match foreground color to prevent ghosting borders + set bR $fR; set bG $fG; set bB $fB + } + + # Pull symbol match out of the layout map + set symbol $MOSAIC_MAP($bitmask) + + # Generate the combined true color escape output string + append outputBuffer "\x1b\[48\;2\;${bR}\;${bG}\;${bB}m\x1b\[38\;2\;${fR}\;${fG}\;${fB}m${symbol}" + } + append outputBuffer "\x1b\[m\n" + } + return $outputBuffer + } + # ------------------------------------------------------------- + # High-Density 8x4 Block (Braille Mosaic) Rendering Engine + # ------------------------------------------------------------- + proc renderBrailleDensity {allRows width height} { + set outputBuffer "" + fconfigure stdout -encoding utf-8 + + # We skip 8 vertical rows and 4 horizontal pixels per cell cycle + # to achieve a 4x reduction factor (accounting for terminal aspect ratios) + for {set y 0} {$y < $height} {incr y 8} { + + # Buffer up to 8 rows for processing this line + set activeRows [list] + for {set r 0} {$r < 8} {incr r} { + if {($y + $r) < $height} { + lappend activeRows [lindex $allRows [expr {$y + $r}]] + } else { + lappend activeRows "" ;# Pad vertical overflow with empty lines + } + } + + for {set x 0} {$x < $width} {incr x 4} { + + # --- 1. Downsample the 8x4 cluster into a 4x2 grid for Braille --- + # Each cell in our 4x2 grid averages a 2x2 pixel area from the image + set subGridLums [list 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0] + set subGridRgbs [list] + set totalBlockLum 0.0 + + set cellIdx 0 + for {set subY 0} {$subY < 8} {incr subY 2} { + for {set subX 0} {$subX < 4} {incr subX 2} { + + # Accumulate colors for this specific 2x2 sub-pixel zone + set sR 0; set sG 0; set sB 0; set sCount 0 + for {set dy 0} {$dy < 2} {incr dy} { + set rowIdx [expr {$subY + $dy}] + set currRow [lindex $activeRows $rowIdx] + if {$currRow eq ""} { continue } + + for {set dx 0} {$dx < 2} {incr dx} { + set pixelX [expr {$x + $subX + $dx}] + if {$pixelX >= $width} { continue } + + set pixel [lindex $currRow $pixelX] + incr sR [lindex $pixel 0] + incr sG [lindex $pixel 1] + incr sB [lindex $pixel 2] + incr sCount + } + } + + # Store sub-zone averages + if {$sCount > 0} { + set sR [expr {$sR / $sCount}]; set sG [expr {$sG / $sCount}]; set sB [expr {$sB / $sCount}] + } else { + set sR 0; set sG 0; set sB 0 + } + + set sLum [expr {$sR*0.299 + $sG*0.587 + $sB*0.114}] + lset subGridLums $cellIdx $sLum + lappend subGridRgbs [list $sR $sG $sB] + set totalBlockLum [expr {$totalBlockLum + $sLum}] + incr cellIdx + } + } + + # --- 2. Calculate Thresholding & Grouping --- + set avgBlockLum [expr {$totalBlockLum / 8.0}] + + set fg_r 0; set fg_g 0; set fg_b 0; set fg_count 0 + set bg_r 0; set bg_g 0; set bg_b 0; set bg_count 0 + set brailleOffset 0 + + # Unicode Braille bitmask generation table for 4x2 cells + # Maps sequential list index (0-7) to Unicode Braille bit flags + set bitWeights [list 1 8 2 16 4 32 64 128] + + for {set i 0} {$i < 8} {incr i} { + set sLum [lindex $subGridLums $i] + set sRgb [lindex $subGridRgbs $i] + + if {$sLum >= $avgBlockLum} { + # This sub-zone is bright: Turn on the Braille dot + set brailleOffset [expr {$brailleOffset | [lindex $bitWeights $i]}] + incr fg_r [lindex $sRgb 0]; incr fg_g [lindex $sRgb 1]; incr fg_b [lindex $sRgb 2] + incr fg_count + } else { + # This sub-zone is dark + incr bg_r [lindex $sRgb 0]; incr bg_g [lindex $sRgb 1]; incr bg_b [lindex $sRgb 2] + incr bg_count + } + } + + # --- 3. Compute Final Colors --- + if {$fg_count > 0} { + set fR [expr {$fg_r / $fg_count}]; set fG [expr {$fg_g / $fg_count}]; set fB [expr {$fg_b / $fg_count}] + } else { + set fR 0; set fG 0; set fB 0 + } + if {$bg_count > 0} { + set bR [expr {$bg_r / $bg_count}]; set bG [expr {$bg_g / $bg_count}]; set bB [expr {$bg_b / $bg_count}] + } else { + set bR $fR; set bG $fG; set bB $fB + } + + # Construct the final Unicode character using the Braille base boundary block (\u2800) + set brailleChar [format %c [expr {0x2800 + $brailleOffset}]] + + # Append the ANSI sequence + append outputBuffer "\x1b\[48\;2\;${bR}\;${bG}\;${bB}m\x1b\[38\;2\;${fR}\;${fG}\;${fB}m${brailleChar}" + } + append outputBuffer "\x1b\[m\n" + } + return $outputBuffer + } + + if {$symbols} { + # return [renderSymbols $allRows $width $height] + return [renderBrailleDensity $allRows $width $height] + } + + set outputBuffer "" + for {set y 0} {$y < $height} {incr y 2} { + set topRow [lindex $allRows $y] + #if image has an odd height, use pure black {0 0 0} for the missing bottom row of the final half-block character row. + set hasBottom [expr {$y + 1 < $height}] + if {$hasBottom} { + set bottomRow [lindex $allRows [expr {$y + 1}]] + } + for {set x 0} {$x < $width } {incr x} { + #set topIdx [lindex $topRow $x] + set topRgb [lindex $topRow $x] + set tR [lindex $topRgb 0] + set tG [lindex $topRgb 1] + set tB [lindex $topRgb 2] + if {$hasBottom} { + #set bottomIdx [lindex $bottomRow $x] + set bottomRgb [lindex $bottomRow $x] + set bR [lindex $bottomRgb 0] + set bG [lindex $bottomRgb 1] + set bB [lindex $bottomRgb 2] + } else { + set bR 0 + set bG 0 + set bB 0 + } + foreach v {bR bG bB tR tG tB} { + if {[set $v] eq ""} { + set $v 0 + } + } + append outputBuffer [format "\x1b\[38\;2\;%d\;%d\;%dm\x1b\[48\;2\;%d\;%d\;%dm▄" $tR $tG $tB $bR $bG $bB] + } + append outputBuffer "\x1b\[m\n" ;# Reset row styling + } + + return $outputBuffer + } + + proc pngfileToAnsi {filename} { + set f [open $filename rb] + set pngdata [read $f] + close $f + return [pngdataToAnsi $pngdata] + } + } tcl::namespace::eval punk::ansi::internal { proc splitn {str {len 1}} { diff --git a/src/vfs/_vfscommon.vfs/modules/punk/ansi/sauce-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/ansi/sauce-0.1.0.tm index bcc22ec1..0d3b53de 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/ansi/sauce-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/ansi/sauce-0.1.0.tm @@ -218,7 +218,9 @@ tcl::namespace::eval punk::ansi::sauce { - + #--------------------------------------------------------------------------------------------------------------------------------------------- + # This data comes from the sauce spec. + #--------------------------------------------------------------------------------------------------------------------------------------------- #todo - fontName - which can also specify e.g code page 437 ## Font name [1] Font size Resolution [2] Aspect ratio [3] Vertical stretch [6] Description ## Display [4] Pixel [5] @@ -226,7 +228,14 @@ tcl::namespace::eval punk::ansi::sauce { set fontnames [dict create] ## IBM VGA 9×16 [7] 720×400 4:3 20:27 (1:1.35) 35% Standard hardware font on VGA cards for 80×25 text mode (code page 437) - dict set fontnames "IBM VGA" [list fontsize "9x16" resolution "720x400" aspect_ratio_display "4:3" aspect_ratio_pixel "20:27 (1:1.35)" vertical_stretch "35%" description "Standard hardware font on VGA cards for 80×25 text mode (code page 437)"] + dict set fontnames "IBM VGA" [list {*}{ + fontsize "9x16" + resolution "720x400" + aspect_ratio_display "4:3" + aspect_ratio_pixel "20:27 (1:1.35)" + vertical_stretch "35%" + description "Standard hardware font on VGA cards for 80×25 text mode (code page 437)" + }] ## IBM VGA ### [8] 9×16 [7] 720×400 4:3 20:27 (1:1.35) 35% Software installed code page font for VGA 80×25 text mode # - where ### is placeholder for 437,720,737 etc @@ -252,6 +261,7 @@ tcl::namespace::eval punk::ansi::sauce { ## C64 PETSCII unshifted 8×8 [10] 320×200 4:3 5:6 (1:1.2) 20% Original Commodore PETSCII font (PET, VIC-20, C64, CBM-II, Plus/4, C16, C116 and C128) in the unshifted mode. Unshifted mode (graphics) only has uppercase letters and additional graphic characters. This is the normal boot font. ## C64 PETSCII shifted 8×8 [10] 320×200 4:3 5:6 (1:1.2) 20% Original PETSCII font in shifted mode. Shifted mode (text) has both uppercase and lowercase letters. This mode is actuated by pressing Shift+Commodore key. ## Atari ATASCII 8×8 [11] 320×192 4:3 4:5 (1:1.25) 25% Original ATASCII font (Atari 400, 800, XL, XE) + #--------------------------------------------------------------------------------------------------------------------------------------------- #expect a 128 Byte sauce record @@ -261,6 +271,7 @@ tcl::namespace::eval punk::ansi::sauce { variable datatypes variable filetypes variable encodings + set warnings [list] if {[string length $saucerecord] != 128} { error "punk::ansi::sauce::to_dict: Unable to interpret data as a SAUCE record - length != 128" } @@ -326,6 +337,8 @@ tcl::namespace::eval punk::ansi::sauce { dict set sdict filetype_name "" } } else { + #how can a byte fail to scan with cu? is this even reachable? + puts stderr "punk::ansi::sauce::to_dict filetype byte failed to scan - setting filetype and filetype_name to empty string byte: [ansistring VIEW -lf 1 [string range $saucerecord 95 95]]" dict set sdict filetype "" dict set sdict filetype_name "" } @@ -422,25 +435,40 @@ tcl::namespace::eval punk::ansi::sauce { 5 { #binarytext #filetype is supposed to represent half the characterwidth (only widths with multiples of 2 can be specified) - #HOWEVER - in the wild we may see width/height specified in tinfo1/tinfo2 with some other value in filetype (eg 1) - #If both tinfo1 and tinfo2 are non zero - we will use them, even though it's not in spec. - set t1 [dict get $sdict tinfo1] - if {$t1 eq ""} { - set t1 0 - } - set t2 [dict get $sdict tinfo2] - if {$t2 eq ""} { - set t2 0 + #HOWEVER - in the wild we may see width/height specified in tinfo1/tinfo2 with some apparently unrelated value in filetype (eg 0 or 1) that doesn't match the intended image dimensions. + #If both tinfo1 and tinfo2 are non zero - we *could* use them, even though it's not in spec. + #An example file (us-used1.bin) has filetype 0 and tinfo1/tinfo2 640/350 + #It's possible tinfo1/tinfo2 represent pixel dimensions for a 'standard' 8x16 font, but this image is 160 columns wide, so we would expect tinfo1 to be 1280. + #The sauce spec seems to indicate we should ignore tinfo1/tinfo2 for binarytext and only use filetype to determine width. + #the default for binarytext is 160 columns. + + #filetype 1 is theoretically possible, representing 2 columns + #in practice we see this value for binarytext images that are definitely not intended to be 2 columns wide. Why? + #is there some assumption that that images are at least a certain width, and filetype has been repurposed to indicate something else? + #The spec would seem to rule out images of a single column due to filetype being half the character width but a value of 0.5 isn't supported. + #It specifically mentions that only even widths up to 510 can be specified. ($filetype * 2 where filetype is 1-255?) + + + #proper mechanism to specify columns for binarytext is the datatype field. + set cols [expr {2*[dict get $sdict filetype]}] + if {$cols == 0} { + lappend warnings "binarytext filetype value of [dict get $sdict filetype] - using binarytext default cols of 160" + #default for binarytext is 160 columns + set cols 160 } - if {$t1 != 0 && $t2 != 0} { + if {$cols == 2 && [dict get $sdict tinfo1] != 0 && [dict get $sdict tinfo2] != 0} { #not to spec - but we will assume these have values for a reason.. - puts stderr "punk::ansi::sauce::to_dict using tinfo1/tinfo2 data for columns/rows (non-compliant SAUCE data)" - dict set sdict columns [expr {2 * $t1}] - dict set sdict rows $t2 + #--------------------------------------------------------------------------------------------------------------------- + #The sample file src/testansi/formatsamples/image/binaryText/test.bin has a filetype 1 and tinfo1 40 and tinfo2 25. + #(similarly ppe-ansi.bin has tinfo1 80 and tinfo2 26) + #They seem to use the 1 in filetype to indicate that the tinfo1/tinfo2 values should be used. + #(The 80 cols wide test.bin binaryText image matches the xbin sample file src/testansi/formatsamples/image/xbin/test.xb which is a more fully specified format using a header) + #--------------------------------------------------------------------------------------------------------------------- + lappend warnings "binarytext filetype of 1 with non-zero tinfo1/tinfo2 - using tinfo1/tinfo2 data for columns/rows (possibly non-conforming SAUCE data - matching observed data in the wild)" + set cols [expr {2 * [dict get $sdict tinfo1]}] + dict set sdict columns $cols + dict set sdict rows [dict get $sdict tinfo2] } else { - #proper mechanism to specify columns for binarytext is the datatype field. - - set cols [expr {2*[dict get $sdict filetype]}] dict set sdict columns $cols #rows must be calculated from file size #rows = (filesize - sauceinfosize)/ filetype * 2 * 2 @@ -481,6 +509,9 @@ tcl::namespace::eval punk::ansi::sauce { } } } + if {[llength $warnings]} { + dict set sdict warnings $warnings + } return $sdict } diff --git a/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm index 53ef8ec1..9ec42b88 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/char-0.1.0.tm @@ -3039,8 +3039,10 @@ tcl::namespace::eval punk::char { set csplits [combiner_split $text] foreach {pt combiners} [lrange $csplits 0 end-1] { set clist [split $pt ""] - lappend components {*}[lrange $clist 0 end-1] - lappend components [tcl::string::cat [lindex $clist end] $combiners] + lset clist end [tcl::string::cat [lindex $clist end] $combiners] + lappend components {*}$clist + #lappend components {*}[lrange $clist 0 end-1] + #lappend components [tcl::string::cat [lindex $clist end] $combiners] } #last csplit never has a combiner (_perlish_split style) - and may be empty - in which case we don't append it as a grapheme if {[lindex $csplits end] ne ""} { @@ -3066,126 +3068,121 @@ tcl::namespace::eval punk::char { #review \uFE0F variation selector 16 - forces emoji presentation for preceding char - if 1 { - #This is a basic implementation that does not check that all combinations are valid. - set graphemes [list] - set current_cluster "" - - set cluster_base 0 ;#is the current cluster based on a char that can be combined with modifiers/ZWJs (e.g emoji or other cluster-based char) - # or is it based on a char that can't be combined with modifiers/ZWJs (e.g ascii letter) - set cluster_base_RI 0 ;#is the current cluster based on a regional indicator char - which can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - - set current_cluster_is_extensible 0 - for {set i 0} {$i < [llength $components] } {incr i} { - set component [lindex $components $i] - if {$component eq "\r" && [lindex $components $i+1] eq "\n"} { - if {$current_cluster ne ""} { - lappend graphemes $current_cluster - } - lappend graphemes "\r\n" - incr i ;#skip the \n as we've already processed it as part of the cluster - set current_cluster "" - grapheme_split::reset_base + #This is a basic implementation that does not check that all combinations are valid. + set graphemes [list] + set current_cluster "" + + set cluster_base 0 ;#is the current cluster based on a char that can be combined with modifiers/ZWJs (e.g emoji or other cluster-based char) + # or is it based on a char that can't be combined with modifiers/ZWJs (e.g ascii letter) + set cluster_base_RI 0 ;#is the current cluster based on a regional indicator char - which can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + + set current_cluster_is_extensible 0 + for {set i 0} {$i < [llength $components] } {incr i} { + set component [lindex $components $i] + if {$component eq "\r" && [lindex $components $i+1] eq "\n"} { + if {$current_cluster ne ""} { + lappend graphemes $current_cluster + } + lappend graphemes "\r\n" + incr i ;#skip the \n as we've already processed it as part of the cluster + set current_cluster "" + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + set current_cluster_is_extensible 0 + } elseif {$component eq "\u200d"} { + if {$current_cluster eq ""} { + #ZWJ at start of string - treat as separate grapheme cluster - but isn't a valid base for further combining with more ZWJs or modifiers + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base set current_cluster_is_extensible 0 - } elseif {$component eq "\u200d"} { - if {$current_cluster eq ""} { - #ZWJ at start of string - treat as separate grapheme cluster - but isn't a valid base for further combining with more ZWJs or modifiers - set current_cluster $component - grapheme_split::reset_base - set current_cluster_is_extensible 0 - } else { - if {$cluster_base} { - if {$current_cluster_is_extensible} { - #a double (or longer) ZWJ sequence in a row is part of the last cluster - but not extensible anymore. - append current_cluster $component - set current_is_cluster_extensible 0 - } else { - append current_cluster $component - if {$cluster_base_RI} { - #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - grapheme_split::reset_base - set current_cluster_is_extensible 0 - #we can keep adding ZWJs or modifiers though - } else { - set current_cluster_is_extensible 1 - } - } + } else { + if {$cluster_base} { + if {$current_cluster_is_extensible} { + #a double (or longer) ZWJ sequence in a row is part of the last cluster - but not extensible anymore. + append current_cluster $component + set current_is_cluster_extensible 0 } else { - #ZWJ after non-cluster-based char - non extensible but we continue appending ZWJs to the current cluster. append current_cluster $component - set current_cluster_is_extensible 0 - } - - } - } elseif {[regexp {[\U1f3fb-\U1f3ff]} $component]} { - #emoji modifier - join with previous component - if {$current_cluster eq ""} { - #modifier at start of string - not a valid base for further combining with more modifiers or ZWJs - but we continue appending modifiers to the current cluster. - set current_cluster $component - grapheme_split::reset_base - } else { - if {$cluster_base} { - if {$current_cluster_is_extensible} { - append current_cluster $component - #invalidate the base! - grapheme_split::reset_base + if {$cluster_base_RI} { + #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + set current_cluster_is_extensible 0 + #we can keep adding ZWJs or modifiers though } else { - append current_cluster $component + set current_cluster_is_extensible 1 } + } + } else { + #ZWJ after non-cluster-based char - non extensible but we continue appending ZWJs to the current cluster. + append current_cluster $component + set current_cluster_is_extensible 0 + } + + } + } elseif {[regexp {[\U1f3fb-\U1f3ff]} $component]} { + #emoji modifier - join with previous component + if {$current_cluster eq ""} { + #modifier at start of string - not a valid base for further combining with more modifiers or ZWJs - but we continue appending modifiers to the current cluster. + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } else { + if {$cluster_base} { + if {$current_cluster_is_extensible} { + append current_cluster $component + #invalidate the base! + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base } else { - #modifier after non-cluster-based char - non extensible but we continue appending modifiers to the current cluster. append current_cluster $component } - #review - # \u1f33e\u1f3fe\u200d\u2f3fe\u200d\u1f33e is 2 clusters - #This is because after first zwj, we applied a modifier - not a valid base. + } else { + #modifier after non-cluster-based char - non extensible but we continue appending modifiers to the current cluster. + append current_cluster $component } - set current_cluster_is_extensible 0 + #review + # \u1f33e\u1f3fe\u200d\u2f3fe\u200d\u1f33e is 2 clusters + #This is because after first zwj, we applied a modifier - not a valid base. + } + set current_cluster_is_extensible 0 + } else { + if {$current_cluster eq ""} { + grapheme_split::start_cluster $component } else { - if {$current_cluster eq ""} { - grapheme_split::start_cluster $component - } else { - #have existing cluster data - if {$current_cluster_is_extensible} { - #assert - if current_cluster_is_extensible then cluster_base should currently be true. - #if the current char is a base - we can append to existing cluster, but if it's not a base, then we start a new cluster even if we had seen a ZWJ before. - if {[regexp {[\U1f600-\U1f64f\U1f300-\U1f5ff\U1f900-\U1f9ff\U1fa70-\U1faff\U1f680-\U1f6ff\U2700-\U27bf\U2600-\u26ff]} $component]} { - append current_cluster $component - set cluster_base 1 - } else { - lappend graphemes $current_cluster - set current_cluster $component - grapheme_split::reset_base - } - set current_cluster_is_extensible 0 - } elseif {$cluster_base_RI} { - #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. - if {[regexp {[\U1f1e6-\U1f1ff]} $component]} { - append current_cluster $component - - #invalidate the base - we can't combine more than 2 RIs in a cluster, and they don't combine with modifiers or ZWJs to form longer clusters. - #we can however add more ZWJs or modifiers to the cluster - but they don't make it extensible for combining with more RIs - grapheme_split::reset_base - } else { - #something else while RI cluster is open - end the current cluster and start a new one with the current char. - lappend graphemes $current_cluster - grapheme_split::start_cluster $component - } - set current_cluster_is_extensible 0 + #have existing cluster data + if {$current_cluster_is_extensible} { + #assert - if current_cluster_is_extensible then cluster_base should currently be true. + #if the current char is a base - we can append to existing cluster, but if it's not a base, then we start a new cluster even if we had seen a ZWJ before. + if {[regexp {[\U1f600-\U1f64f\U1f300-\U1f5ff\U1f900-\U1f9ff\U1fa70-\U1faff\U1f680-\U1f6ff\U2700-\U27bf\U2600-\u26ff]} $component]} { + append current_cluster $component + set cluster_base 1 } else { + lappend graphemes $current_cluster + set current_cluster $component + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } + set current_cluster_is_extensible 0 + } elseif {$cluster_base_RI} { + #regional indicators can only combine in pairs, and don't combine with modifiers or ZWJs to form longer clusters - so not extensible. + if {[regexp {[\U1f1e6-\U1f1ff]} $component]} { + append current_cluster $component + + #invalidate the base - we can't combine more than 2 RIs in a cluster, and they don't combine with modifiers or ZWJs to form longer clusters. + #we can however add more ZWJs or modifiers to the cluster - but they don't make it extensible for combining with more RIs + set cluster_base 0; set cluster_base_RI 0 ;#grapheme_split::reset_base + } else { + #something else while RI cluster is open - end the current cluster and start a new one with the current char. lappend graphemes $current_cluster grapheme_split::start_cluster $component } + set current_cluster_is_extensible 0 + } else { + lappend graphemes $current_cluster + grapheme_split::start_cluster $component } } } - if {$current_cluster ne ""} { - lappend graphemes $current_cluster - } - } else { - set graphemes $components } - + if {$current_cluster ne ""} { + lappend graphemes $current_cluster + } return $graphemes } namespace eval grapheme_split { diff --git a/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm index ada0f900..b393fdaa 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm @@ -4210,6 +4210,9 @@ namespace eval punk::lib { if {[string index $key 0] ne "%"} { set key %$key } + #puts "---key:'$key'" + set key [string map {; \\;} $key] ;#review + #puts "---key:'$key'" #pipeline - use punk patterns. % thisval.= $key= $thisval } diff --git a/src/vfs/_vfscommon.vfs/modules/punk/mix/commandset/loadedlib-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/mix/commandset/loadedlib-0.1.0.tm index a07aca09..2cc6ff98 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/mix/commandset/loadedlib-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/mix/commandset/loadedlib-0.1.0.tm @@ -54,13 +54,18 @@ namespace eval punk::mix::commandset::loadedlib { if {$opt_refresh} { catch {package require frobznodule666} ;#ensure pkg system has loaded/searched for everything REVIEW - this doesn't result in full scans foreach tm_path [tcl::tm::list] { + #review - todo - adjust punk::path::subfolders to take arguments to do some filtering itself rather than recurse down unnecessary branches. set paths_below [punk::path::subfolders -recursive $tm_path] foreach folder $paths_below { + if {[string match */_build/* $folder]} {continue} set tail [file tail $folder] - if {[string match #modpod-* $tail] || [string match #tarjar-* $tail]} { + if {[string match #tarjar-* $tail]} { + continue + } + if {[string match #modpod-* $tail]} { + #manually do a 'package ifneeded' fore each module found here. continue } - if {[string match */_build/* $folder]} {continue} set relpath [string tolower [punk::path::relative $tm_path $folder]] set modpath [string map {/ ::} $relpath] catch {package require ${modpath}::flobrudder99} diff --git a/src/vfs/_vfscommon.vfs/modules/punk/ns-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/ns-0.1.0.tm index db6acbb4..99981784 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/ns-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/ns-0.1.0.tm @@ -775,13 +775,8 @@ tcl::namespace::eval punk::ns { #set parent [nsprefix $ns_absolute] #set tail [nstail $ns_absolute] - #jjj #set allchildren [lsort [nseval $base [list ::namespace children]]] - #set allchildren [lsort [tcl::namespace::eval $base [list ::namespace children]]] - set allchildren [lsort [nseval $base [list ::namespace children]]] - #puts "->base:$base tailparts:$tailparts allchildren: $allchildren" - #puts "->base:$base tailparts:$tailparts childcount: [llength $allchildren]" #** only significant when it is the trailing part of a segment eg ::**::xxx ::a**::xxx if {[llength $tailparts]} { @@ -790,6 +785,7 @@ tcl::namespace::eval punk::ns { set nslist [nstree_list $base -subnslist {} -allbelow 1] } elseif {[regexp {[*]{2}$} $nextglob]} { set nslist [list] + set allchildren [lsort [nseval $base [list ::namespace children]]] lappend nsmatches {*}[lsearch -all -inline -glob $allchildren [nsjoin ${base} $nextglob]] foreach ch $nsmatches { lappend nslist $ch @@ -799,6 +795,7 @@ tcl::namespace::eval punk::ns { } else { #lsearch with -glob ok even if nextglob has no globchars (no discernable speed diff, and earlier parts may have globchars anyway) set nslist [list] + set allchildren [lsort [nseval $base [list ::namespace children]]] lappend nsmatches {*}[lsearch -all -inline -glob $allchildren [nsjoin ${base} $nextglob]] if {[llength $tailparts] >1 || $allbelow} { foreach ch $nsmatches { @@ -812,6 +809,7 @@ tcl::namespace::eval punk::ns { } } else { #puts "nstree_list: no tailparts base:$base" + set allchildren [lsort [nseval $base [list ::namespace children]]] if {$allbelow} { set nsmatches $allchildren set nslist [list] @@ -2134,8 +2132,8 @@ y" {return quirkykeyscript} tcl::dict::set tinfo($target) procoffset 0 tcl::dict::set tinfo($target) level [expr {[::tcl::info::level]+1}] tcl::dict::set tinfo($target) subcmds 0 - puts "enter: $target -- $args" - puts "frame-2: [::tcl::info::frame -2]" + puts stderr "enter: $target -- $args" + #puts stderr "frame-2: [::tcl::info::frame -2]" set _cmdtrace_disabled false } @@ -2481,7 +2479,7 @@ y" {return quirkykeyscript} set line $traceline dict set linedict $target eval_base $traceline dict set linedict $target eval_offset 1 - puts " step type: proc traceline:$traceline ** $args" + puts " step type: proc traceline:$traceline ** $args\x1b\[m" #puts "** $callinfo" if {[dict exists $callinfo cmd]} { #set cmd [string trim [dict get $callinfo cmd]] ;#raw 'unexpanded' script from the stack frame @@ -2504,8 +2502,8 @@ y" {return quirkykeyscript} set eval_base [dict get $linedict $target eval_base] set eval_offset [dict get $linedict $target eval_offset] set line [expr {$eval_base + ($eval_offset-1) + ($traceline-1)}] - puts "stack-- $callinfo" - puts " step type: eval traceline: $traceline -- " + #puts "stack-- $callinfo" + puts stderr " step type: eval traceline: $traceline -- " if {[dict exists $callinfo cmd]} { #set cmd [string trim [dict get $callinfo cmd]] set cmdlist [lindex $args 0] @@ -2627,6 +2625,8 @@ y" {return quirkykeyscript} }] } proc cmdtrace {args} { + #review - displaying argument values has to be done carefully. Small values are ok, but large lists or dicts can be overwhelming. + #Potentially we could apply some heuristics to truncate or summarise them. package require dictn ;#convenience to allow dictn::incr d {key subkey} variable tinfo array unset tinfo @@ -2676,7 +2676,7 @@ y" {return quirkykeyscript} #if the target command has a leading colon (e.g expr alternative :) we can't put a trace directly on a fully qualified name with a triple colon such as ::: #we will need to evaluate in the namespace foreach {tgt_cmd ns nscmd} $resolved_targets { - puts "tracing target: $tgt_cmd whilst running: $origin $arglist" + puts stderr "tracing target: $tgt_cmd whilst running: $origin $arglist" #::tcl::namespace::eval $ns [list ::trace add execution $nscmd enter [list ::punk::ns::_cmdtrace_enter ::punk::ns::linedict $tgt_cmd]] #::tcl::namespace::eval $ns [list ::trace add execution $nscmd enterstep [list ::punk::ns::_cmdtrace_enterstep ::punk::ns::linedict $tgt_cmd]] diff --git a/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm index aff97595..b5593d12 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm @@ -565,10 +565,45 @@ namespace eval punk::path { end]] } + + ## for comparison + #proc nsglob_as_re {glob} { + # #any segment that is not just * must match exactly one segment in the path + # set pats [list] + # foreach seg [nsparts_cached $glob] { + # switch -exact -- $seg { + # "" { + # lappend pats "" + # } + # * { + # #review - ::g*t will not find ::got:it (won't match single inner colon) - this should be fixed + # #lappend pats {[^:]*} + # #negative lookahead + # #any number of chars not followed by ::, followed by any number of non : + # lappend pats {(?:.(?!::))*[^:]*} + # } + # ** { + # lappend pats {.*} + # } + # default { + # set seg [string map {. [.]} $seg] + # if {[regexp {[*?]} $seg]} { + # #set pat [string map [list ** {.*} * {[^:]*} ? {[^:]}] $seg] + # set pat [string map [list ** {.*} * {(?:.(?!::))*[^:]*} ? {[^:]}] $seg] + # lappend pats "$pat" + # } else { + # lappend pats "$seg" + # } + # } + # } + # } + # return "^[join $pats ::]\$" + #} proc pathglob_as_re {pathglob} { #*** !doctools #[call [fun pathglob_as_re] [arg pathglob]] #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure + #[para] Does not support square bracket globs or character classes. #[para] ** matches any number of subdirectories. #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc (except directly within /etc itself) #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc @@ -589,7 +624,7 @@ namespace eval punk::path { * {lappend pats {[^/]*}} ** {lappend pats {.*}} default { - set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals + set seg [string map [list ^ {\^} $ {\$} \[ {\[} \] {\]} ( {\(} ) {\)} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters (or tcl glob square bracket chars) in the input as literals #set seg [string map [list . {[.]}] $seg] set seg [string map {. [.]} $seg] if {[regexp {[*?]} $seg]} { @@ -603,6 +638,52 @@ namespace eval punk::path { } return "^[join $pats /]\$" } + + punk::args::define { + @id -id ::punk::path::globmatchpath + @cmd -name punk::path::globmatchpath\ + -summary\ + "Match path to *|**|? glob patterns"\ + -help\ + "Return a boolean indicating whether the path matches the specialised glob pattern. + A pattern such as /usr/*/bin will match any path that has /usr as the first segment and bin as the third segment, + with any single segment in between. + A pattern such as /usr/**/bin will match any path that has /usr as the first segment and bin as the last segment, + with 1 or more segments in between (so it will not match /usr/bin). + A pattern such as /usr/** will match any path that has /usr as the first segment, with 1 or more segments + following (so it will not match /usr itself). + A pattern such as **/*.txt will match any path that ends with .txt, with 1 or more leading segments + (so it will not match test.txt or .txt). + A pattern such as ** will match any path. + The glob characters * and ? are the only special characters in the pathglob syntax. + - they are treated as glob characters regardless of where they appear in the pathglob string. + Note that this is different from other Tcl glob contexts where square brackets can be used. + The pathglob syntax treats other characters, including square brackets as literals. + For example, the pattern /usr/te?t will match /usr/test and /usr/text but not /usr/texxt, and the pattern /usr/te*t + will match /usr/test, /usr/teat, and /usr/teeeet but not /usr/te/t. + The pathglob syntax does not support escaping of glob characters - any glob characters in the pathglob are treated + as glob characters. For example, the pattern /usr/* will match any path that has /usr as the first segment and any + single segment as the second segment, but there is no way to specify a pattern that matches any path that has /usr + as the first segment and a literal * as the second segment. + Caller must ensure that file separator is forward slash. (e.g use file normalize on windows) + + options: + -nocase 0|1 (default 0 - case sensitive) + If -nocase is not supplied - default to case sensitive *except for driveletter* + ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah) + Explicitly specifying -nocase 0 will require the entire case to match including the driveletter. + " + @leaders + pathglob -type string -help "glob pattern to match path against. See [fun pathglob_as_re] for syntax of glob patterns" + path -type string -help "path to match against glob pattern" + @opts + -nocase -type boolean -default 0 -help\ + "case insensitive matching (default false - case sensitive) + - except for driveletter on windows which is always case insensitive + unless -nocase 0 is explicitly specified" + @values -min 0 -max 0 + } + # -id proc globmatchpath {pathglob path args} { #*** !doctools #[call [fun globmatchpath] [arg pathglob] [arg path] [opt {option value...}]] @@ -669,46 +750,182 @@ namespace eval punk::path { @opts -recursive -type none -help\ "" + -exclude-paths -type list -default {} -help\ + "list of path patterns to exclude from results. + May include * and ** path segments e.g /usr/** + A single /*/ will match any single segment in the path, and a single /**/ will match any number of segments in the path. + + e.g to exclude any path with _aside as a segment in the middle: -exclude-paths **/_aside/** + i.e this would exclude /usr/_aside/etc and /usr/x/_aside/etc but not /usr/x/_aside or _aside/etc + + To exclude all paths with _aside as a segment anywhere: -exclude-paths { **/_aside/** **/_aside _aside/**} + " #todo -depth @values -min 0 -max 1 path -type directory -optional 1 -help\ - "Path of folder. If not supplied current directory is used." + "Path of folder. If not supplied current directory is used. + This may be a relative or absolute path. Relative paths are treated as relative to current directory. + When using relative paths - the result will also be relative paths with the same relative prefix. + (e.g if path is ../test - the results will be ../test/subfolder1 ../test/subfolder2 etc) + Patterns in -exclude-paths are matched against the resulting paths + (so should be written to match the same relative prefix if path is relative)" } + proc subfolders {args} { + #NOTE - this algorithm based on omit_only_patterns and prune_base_patterns was suggested by a 2026 AI model - it is apparent to this programmer that it is inadequate for the purpose. + #e.g consider subfolders -recursion -exclude {**/vfs/** **/src/**} + #This can still return something like c:/repo/etc/src/vfs - which should be excluded by the pattern **/src/** + #todo - review and fix properly. set argd [punk::args::parse $args withid ::punk::path::subfolders] lassign [dict values $argd] leaders opts values received - set do_recursion [dict exists $received -recursive] + set do_recursion [dict exists $received -recursive] + set exclude_paths [dict get $opts -exclude-paths] + if {"**" in $exclude_paths} { + #if ** is in exclude_paths - then we can skip all glob matching and just return empty list + #This is likely user error - so we'll be loud about it for now but will still return empty list rather than erroring. + #If user code is building exclude_paths dynamically - they can check for this case themselves and avoid the call to subfolders to suppress this message. + puts stderr "punk::path::subfolders Warning - exclude_paths contains '**' - all paths will be excluded" + return [list] + } if {[dict exists $received path]} { set path [dict get $values path] } else { set path [pwd] } - set folders [glob -nocomplain -directory $path -types d *] + + set all_subfolders [glob -nocomplain -directory $path -types d *] + + + #example of expected exclude_paths pattern behaviour when recursion is enabled: + # **/dirname -> omit /x/y/dirname, but still visit /x/y/dirname/* + + # **/dirname/* -> include /x/y/dirname and /x/y/dirname/a/b but omit directories that are a single level below /x/y/dirname such as /x/y/dirname/a + + #c:/** - would exclude all subfolders below c: but not c: itself + + # **/test/** - would exclude any path with test as a segment and all its subfolders + #- but not paths with test as a segment that is the final segment + + + set omit_only_patterns [list] + set prune_base_patterns [list] + foreach pat $exclude_paths { + set pat_parts [file split $pat] ;#note file split c:/test gives {c:/ test} but file split **/test gives {** test} + #also note that file split on windows treats forward slashes and backslashes the same. + #by using file split, we gain some flexibility in syntax of paths and patterns, + #but lose the ability to use backslashes as escapes to allow literal glob characters in path segments. + #This is almost always a non-issue on windows since * and ? are not valid in path segments there, and is rarely an issue on unix even though + # * and ? are technically valid in path segments, but it is inadvisable there anyway for compatibility with shells etc. + if {[llength $pat_parts] >= 2 && [lindex $pat_parts end] eq "**"} { + #** at end of pattern - e.g /dir/etc/** + #Convert ".../" to base "...", and prune descendants of that base. + lappend prune_base_patterns [file join {*}[lrange $pat_parts 0 end-1]] + } else { + lappend omit_only_patterns $pat + } + } + + set folders [list] + set recurse_subdirs [list] + + foreach f $all_subfolders { + set include_in_results 1 + set allow_recurse 1 + foreach pat $omit_only_patterns { + if {[globmatchpath $pat $f]} { + set include_in_results 0 + break + } + } + if {$allow_recurse && [llength $prune_base_patterns]} { + foreach base_pat $prune_base_patterns { + #prune both the matched base node and its decendants. + if {[globmatchpath $base_pat $f]} { + set allow_recurse 0 + break + } + if {[globmatchpath "${base_pat}/**" $f]} { + set include_in_results 0 + set allow_recurse 0 + break + } + } + } + if {$include_in_results} { + lappend folders $f + } + if {$allow_recurse} { + lappend recurse_subdirs $f + } + } if {$do_recursion} { - foreach subdir $folders { - lappend folders {*}[subfolders -recursive $subdir] + foreach subdir $recurse_subdirs { + lappend folders {*}[subfolders -exclude-paths $exclude_paths -recursive $subdir] } } + + #if {[llength $exclude_paths]} { + # set folders [list] + # foreach f $all_subfolders { + # set skip 0 + # foreach pat $exclude_paths { + # #review - this is slightly too simplistic. + # # for exclusion pattern **/dirname + # # this will exclude any path with dirname as final segment - but it will also exclude any path with dirname as a segment anywhere in the path - which is not intended. + # #puts stderr "Checking exclude pat '$pat' against '$f'" + # if {[globmatchpath $pat $f]} { + # set skip 1 + # break + # } + # } + # if {!$skip} { + # lappend folders $f + # } + # } + #} else { + # set folders $all_subfolders + #} + #if {$do_recursion} { + # foreach subdir $folders { + # lappend folders {*}[subfolders -exclude-paths $exclude_paths -recursive $subdir] + # } + #} return $folders } #todo - treefolders with similar search caps as treefilenames punk::args::define { - @id -id ::punk::path::treefilenames + @id -id ::punk::path::treefilenames + @cmd -name punk::path::treefilenames\ + -summary\ + "List of filenames below supplied path."\ + -help\ + "List of filenames below path. + The resulting list is unsorted." -directory -type directory -help\ "folder in which to begin recursive scan for files." - -call-depth-internal -default 0 -type integer - -sort -type any -default natural -choices {none ascii dictionary natural} + -call-depth-internal -default 0 -type integer -help "internal use only - caller should not specify - used to track depth of recursive calls for internal logic" + -call-subvector -default {} -type list -help "internal use only - caller should not specify - used to track path vector of recursive calls for internal logic" + -call-allbelow -default 1 -type boolean -help "internal use only - caller should not specify - used to track whether we are in a subtree below a match for glob_paths (which means we can skip glob matching and antiglob_paths checks and just include all files below here)" + -sort -type any -default natural -choices {none ascii dictionary natural} -antiglob_paths -default {} -help\ "list of path patterns to exclude may include * and ** path segments e.g - /usr/** (exlude subfolders based at /usr but not + /usr/** (exclude subfolders based at /usr but not files within /usr itself) - **/_aside (exlude files where _aside is last segment) + **/_aside (exclude files where _aside is last segment) **/_aside/* (exclude folders one below an _aside folder) **/_aside/** (exclude all folders with _aside as a segment)" -antiglob_files -default {} + -glob_paths -default {*} -help\ + "list of path patterns to include + may include * and ** path segments e.g + /usr/** (include subfolders based at /usr but not + files within /usr itself) + **/_aside (include files where _aside is last segment) + **/_aside/* (include folders one below an _aside folder) + **/_aside/** (include all folders with _aside as a segment)" @values -min 0 -max -1 -optional 1 -type string tailglobs -default * -multiple 1 -help\ "Patterns to match against filename portion (last segment) of each file path @@ -732,12 +949,20 @@ namespace eval punk::path { lassign [dict values $argd] leaders opts values received set tailglobs [dict get $values tailglobs] # -- --- --- --- --- --- --- - set opt_sort [dict get $opts -sort] - set opt_antiglob_paths [dict get $opts -antiglob_paths] - set opt_antiglob_files [dict get $opts -antiglob_files] - set CALLDEPTH [dict get $opts -call-depth-internal] + set opt_sort [dict get $opts -sort] + set opt_antiglob_paths [dict get $opts -antiglob_paths] + set opt_glob_paths [dict get $opts -glob_paths] + set opt_antiglob_files [dict get $opts -antiglob_files] + + set CALLDEPTH [dict get $opts -call-depth-internal] + set callsubvector [dict get $opts -call-subvector] + set callallbelow [dict get $opts -call-allbelow] ;#whether to return matches longer than the matched glob-path # -- --- --- --- --- --- --- # -- --- --- --- --- --- --- + if {"*" in $opt_glob_paths} { + #if we have a * in the default glob_paths - then any other entries are redundant. + set opt_glob_paths {*} + } set files [list] if {$CALLDEPTH == 0} { @@ -745,14 +970,17 @@ namespace eval punk::path { package require natsort } #set opts [dict merge $opts [list -directory $opt_dir]] - if {![dict exists $received -directory]} { - set opt_dir [pwd] - } else { + if {[dict exists $received -directory]} { set opt_dir [dict get $opts -directory] + } else { + + set opt_dir [pwd] } if {![file isdirectory $opt_dir]} { return [list] } + + } else { #assume/require to exist in any recursive call set opt_dir [dict get $opts -directory] @@ -831,19 +1059,35 @@ namespace eval punk::path { lappend okdirs $dir } } - if {[llength $okdirs]} { + if {$opt_glob_paths eq {*}} { + set matchdirs $okdirs + } else { + #** only significant when it is the trailing part of a segment eg /**/xxx /a**/xxx + + + + set matchdirs [list] + foreach dir $okdirs { + foreach gp $opt_glob_paths { + if {[globmatchpath $gp $dir] || [globmatchpath "$gp/**" $dir]} { + lappend matchdirs $dir + } + } + } + } + if {[llength $matchdirs]} { switch -- $opt_sort { ascii { - set finaldirs [lsort $okdirs] + set finaldirs [lsort $matchdirs] } dictionary { - set finaldirs [lsort -dictionary $okdirs] + set finaldirs [lsort -dictionary $matchdirs] } natural { - set finaldirs [natsort::sort $okdirs] + set finaldirs [natsort::sort $matchdirs] } default { - set finaldirs $okdirs + set finaldirs $matchdirs } } foreach dir $finaldirs { diff --git a/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm index 034fae01..eae8731c 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/pipe-1.0.tm @@ -169,8 +169,8 @@ tcl::namespace::eval punk::pipe::lib { #This stops us matching {/@**@x x} vs {/@**@x x} #--- - set rhs [tcl::string::map {: ? * [ ] \\ {"} " " } $rhs] - #review - we don't expect other command-incompatible chars such as colon? + set rhs [tcl::string::map {: ; ? * [ ] \\ {"} " " } $rhs] + #review - we don't expect other command-incompatible chars? return $rhs } @@ -187,6 +187,7 @@ tcl::namespace::eval punk::pipe::lib { #exclude quoted whitespace proc arg_is_script_shaped {arg} { + set arg [string map {\\; ""} $arg] if {[tcl::string::first \n $arg] >= 0} { return 1 } elseif {[tcl::string::first ";" $arg] >= 0} { diff --git a/src/vfs/_vfscommon.vfs/modules/punk/repo-0.1.1.tm b/src/vfs/_vfscommon.vfs/modules/punk/repo-0.1.1.tm index 5fd534dc..049ed2e7 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/repo-0.1.1.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/repo-0.1.1.tm @@ -1817,17 +1817,13 @@ namespace eval punk::repo { error "unimplemented" } - #file normalize is expensive so this is too + #file normalize can be a little expensive so this is too proc norm {path {platform env}} { - #kettle::path::norm - #see also wiki - #full path normalization - - set platform [string tolower $platform] - if {$platform eq "env"} { - set platform $::tcl_platform(platform) - } + #set platform [string tolower $platform] + #if {$platform eq "env"} { + # set platform $::tcl_platform(platform) + #} #No - don't do this sort of path translation here - leave as option for specific utils only such as ./ #Windows volume-relative syntax with specific volume specified is somewhat broken in Tcl - but leading slash volume-relative does work #We shouldn't break it totally just because accessing WSL/mingw paths is slightly more useful @@ -1835,6 +1831,9 @@ namespace eval punk::repo { #return [file dirname [file normalize [punk::unixywindows::towinpath $path]/__]] #} + #kettle::path::norm + #see also wiki + #full path normalization return [file dirname [file normalize $path/__]] } diff --git a/src/vfs/_vfscommon.vfs/modules/test/overtype-1.7.4.tm b/src/vfs/_vfscommon.vfs/modules/test/overtype-1.7.4.tm index 7b6c2e93..43361b67 100644 Binary files a/src/vfs/_vfscommon.vfs/modules/test/overtype-1.7.4.tm and b/src/vfs/_vfscommon.vfs/modules/test/overtype-1.7.4.tm differ diff --git a/src/vfs/_vfscommon.vfs/modules/test/runtestmodules.tcl b/src/vfs/_vfscommon.vfs/modules/test/runtestmodules.tcl index b7846da6..3d46cea0 100644 --- a/src/vfs/_vfscommon.vfs/modules/test/runtestmodules.tcl +++ b/src/vfs/_vfscommon.vfs/modules/test/runtestmodules.tcl @@ -3,6 +3,65 @@ #(plain tclsh may stall - todo - review reasons for this and whether shellfilter can be modified to support ordinary tclsh) #A known working copy of a punk shell executable should be placed on the path and the shebang line updated to reflect this +#------------------------------------ +lassign [split [info tclversion] .] tcl_major tcl_minor +set script_dir [file dirname [file normalize [info script]]] +set modules_posn [string first /modules/ $script_dir] +if {$modules_posn < 0} { + puts stderr "Error: script dir $script_dir does not contain /modules/" + #exit 2 ;#don't call exit. If run in a single proc it can cause the hole test suite exit before summary can be printed. + return -code error "Error: script dir $script_dir does not contain /modules/" +} +set modules_base [string range $script_dir 0 $modules_posn-1] +if {[file tail $modules_base] eq "src"} { + set project_root [file dirname $modules_base] +} else { + set project_root $modules_base +} +puts stderr "runtestmodules.tcl project_root: $project_root" +#use the unbuilt modules/libraries under development rather than the installed versions. +#The unbuilt modules should have a higher version number (such as the magic version number 999999.0a1.0) than any installed versions to ensure they are preferred. +tcl::tm::add [file normalize $project_root/src/modules] +tcl::tm::add [file normalize $project_root/src/modules_tcl$tcl_major] +tcl::tm::add [file normalize $project_root/src/vendormodules] +tcl::tm::add [file normalize $project_root/src/vendormodules_tcl$tcl_major] + +# add 'package ifneeded' definitions for unbuilt #modpod modules. +#first gather subdirectories of modules that contain #modpod-*-999999.0a1.0 in their name - these should be the unbuilt versions of zip based modules. +#set subfolders [punk::path::subfolders -recursive [file normalize $project_root/src/modules] -match */#modpod-*-999999.0a1.0] +#'punk::path::subfolders' currently only supports negative matching with -exclude, so we have to filter for the positive match ourselves. +set subfolders [punk::path::subfolders -recursive [file normalize $project_root/src/modules] -exclude {**/_build/** **/_build}] +foreach sub $subfolders { + #In most cases we could use string match - but the * within modpod-*-999999.0a1.0 could match a forward slash which could then match some other file under a #modpod- folder structure, + #so we use globmatchpath which treats * as matching any characters except path separators. + if {[globmatchpath "**/#modpod-*-999999.0a1.0" $sub]} { + set modname [file tail $sub] + set modname [string range $modname 8 end-12] ;#strip off #modpod- and -999999.0a1.0 + set modpath [file join $sub "$modname-999999.0a1.0.tm"] + #!!!! + #todo - calculate fully qualified module name based on path relative to the modules folder we added to the tcl::tm path. + if {[file exists $modpath]} { + puts stderr "runtestmodules.tcl adding package ifneeded for modpod module $modname at path $modpath" + package ifneeded $modname 999999.0a1.0 [list source $modpath] + } else { + puts stderr "runtestmodules.tcl warning: expected mod.tcl not found for modpod module $modname at path $modpath" + } + } +} + +set libdir [file normalize $project_root/src/lib] +set libvdir [file normalize $project_root/src/lib/tcl$tcl_major] +set libvldir [file normalize $project_root/src/vendorlib] +set libvlvdir [file normalize $project_root/src/vendorlib_tcl$tcl_major] +foreach d [list $libdir $libvdir $libvldir $libvlvdir] { + if {$d ni $::auto_path} { + lappend ::auto_path $d + } +} +#------------------------------------ +puts stderr "runtestmodules.tcl ::auto_path: $::auto_path" +puts stderr "runtestmodules.tcl tcl::tm::list: [tcl::tm::list]" + package require punk package require punk::args @@ -122,7 +181,7 @@ foreach pkg $punktestpkgs { foreach ln [split $chunk \n] { incr i if {[string match "Tests ended at*" $ln]} { - puts stdout " [punk::ansi::ansistring VIEW -lf 2 -cr 1 "$pkg $ln"]" + puts stdout "<$pkg> $ln" } elseif {[string match "*:*Total*Passed*Skipped*Failed*" $ln]} { set fields [lrange $ln 1 end] dict for {K v} $fields { @@ -136,16 +195,26 @@ foreach pkg $punktestpkgs { } } } - puts stdout "$pkg $ln" + puts stdout "<$pkg> $ln" + } elseif {[string match "*Sourced * Test Files*" $ln]} { + puts stdout "<$pkg> $ln" } else { - puts stdout " $ln" + if {[string trim $ln] ne ""} { + puts stdout " $ln" + } else { + puts -nonewline stdout "\n" + } #puts stdout "$i" } } flush stdout } stderr { - puts stderr " [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk]" + #puts stderr " [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk]" + set chunkview [punk::ansi::ansistring VIEW -lf 2 -cr 1 $chunk] + foreach ln [split $chunkview \n] { + puts stderr " $ln" + } flush stderr } default {