diff --git a/src/bootsupport/modules/overtype-1.7.4.tm b/src/bootsupport/modules/overtype-1.7.4.tm index c5bc1a6a..967de934 100644 --- a/src/bootsupport/modules/overtype-1.7.4.tm +++ b/src/bootsupport/modules/overtype-1.7.4.tm @@ -90,7 +90,9 @@ package require punk::assertion # - need to extract and replace ansi codes? tcl::namespace::eval overtype { - namespace import ::punk::assertion::assert + if {[info commands ::overtype::assert] eq ""} { + namespace import ::punk::assertion::assert + } punk::assertion::active true namespace path ::punk::lib @@ -625,7 +627,7 @@ tcl::namespace::eval overtype { #set overtext [lpop inputchunks 0] ;#could be a list 'ansisplit' or text 'plain|mixed' lassign [lpop inputchunks 0] overtext_type overtext - #use eq test with emptystring instead of 'string length' - test for emptiness shouldn't cause shimmering if popped inputchunks member if an 'ansisplit' list + #use eq test with emptystring instead of 'string length' - test for emptiness shouldn't cause shimmering if popped inputchunks member is an 'ansisplit' list if {$overtext eq ""} { incr loop continue @@ -728,7 +730,7 @@ tcl::namespace::eval overtype { set existing_reverse_state 0 #split_codes_single is single esc sequence - but could have multiple sgr codes within one esc sequence #e.g \x1b\[0;31;7m has a reset,colour red and reverse - set codeinfo [punk::ansi::codetype::sgr_merge [list $replay_codes_overlay] -info 1] + set codeinfo [punk::ansi::codetype::sgr_merge [list $replay_codes_overlay] -info] set codestate_reverse [dict get $codeinfo codestate reverse] switch -- $codestate_reverse { 7 { @@ -863,7 +865,7 @@ tcl::namespace::eval overtype { # ---- # review set col $post_render_col - #just because it's out of range of the renderwidth - doesn't mean a move down should jump to witin the range - 2025 + #just because it's out of range of the renderwidth - doesn't mean a move down should jump to within the range - 2025 #---- #set existingdata [lindex $outputlines [expr {$post_render_row -1}]] @@ -908,7 +910,7 @@ tcl::namespace::eval overtype { #It would perhaps be more properly handled as a queue of instructions from our initial renderline call #we don't need to worry about overflow next call (?)- but we should carry forward our gx and ansi stacks - puts stdout ">>>[a+ red bold]overflow_right during restore_cursor[a]" + puts stdout ">>>renderspace<<<[a+ red bold]overflow_right during restore_cursor[a]" set sub_info [overtype::renderline\ -info 1\ @@ -924,7 +926,7 @@ tcl::namespace::eval overtype { tcl::dict::set vtstate autowrap_mode [tcl::dict::get $sub_info autowrap_mode] ;#nor this.. #todo!!! - # 2025 fix - this does nothing - so what uses it?? create a test! + # 2025 fix - this does nothing - so what is the intention?? create a test! linsert outputlines $renderedrow $foldline #review - row & col set by restore - but not if there was no save.. } @@ -1053,7 +1055,9 @@ tcl::namespace::eval overtype { set overflow_right "" } else { if {[tcl::dict::get $vtstate autowrap_mode]} { - set outputlines [linsert $outputlines $renderedrow $overflow_right] + #set outputlines [linsert $outputlines $renderedrow $overflow_right] + #ledit outputlines $renderedrow $renderedrow-1 $overflow_right + ledit outputlines $renderedrow -1 $overflow_right set overflow_right "" set row [expr {$renderedrow + 2}] } else { @@ -1150,7 +1154,8 @@ tcl::namespace::eval overtype { if {$insert_lines_above > 0} { set row $renderedrow #set outputlines [linsert $outputlines $renderedrow-1 {*}[lrepeat $insert_lines_above ""]] - ledit outputlines $renderedrow-1 $renderedrow-2 {*}[lrepeat $insert_lines_above ""] + #ledit outputlines $renderedrow-1 $renderedrow-2 {*}[lrepeat $insert_lines_above ""] + ledit outputlines $renderedrow-1 -1 {*}[lrepeat $insert_lines_above ""] incr row [expr {$insert_lines_above -1}] ;#we should end up on the same line of text (at a different index), with new empties inserted above #? set row $post_render_row #can renderline tell us? } @@ -1461,6 +1466,7 @@ tcl::namespace::eval overtype { set nextprefix_list $overflow_right_pt_code_pt } else { #merge tail and head + #ledit end end will work with empty list (ledit end does not) ledit nextprefix_list end end "[lindex $nextprefix_list end][lindex $overflow_right_pt_code_pt 0]" lappend nextprefix_list {*}[lrange $overflow_right_pt_code_pt 1 end] } @@ -1476,16 +1482,17 @@ tcl::namespace::eval overtype { } if 0 { - if {$nextprefix ne ""} { - set nextoveridx [expr {$overidx+1}] - if {$nextoveridx >= [llength $inputchunks]} { - lappend inputchunks $nextprefix - } else { - #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] - set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] + if {$nextprefix ne ""} { + set nextoveridx [expr {$overidx+1}] + if {$nextoveridx >= [llength $inputchunks]} { + lappend inputchunks $nextprefix + } else { + #lset overlines $nextoveridx $nextprefix[lindex $overlines $nextoveridx] + #set inputchunks [linsert $inputchunks $nextoveridx $nextprefix] + ledit inputchunks $nextoveridx -1 $nextprefix + } } } - } if {[llength $nextprefix_list]} { #set inputchunks [linsert $inputchunks 0 $nextprefix] @@ -1669,13 +1676,17 @@ tcl::namespace::eval overtype { } } } - lappend outputlines $rendered + #JULZ + #lappend outputlines $rendered + lappend outputlines $rendered\x1b\[m #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent $undertext $overtext] } else { #background block is wider than or equal to data for this line #lappend outputlines [renderline -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] set rinfo [renderline -info 1 -insert_mode 0 -startcolumn [expr {$left_exposed + 1}] -transparent $opt_transparent -exposed1 $opt_exposed1 -exposed2 $opt_exposed2 $undertext $overtext] - lappend outputlines [tcl::dict::get $rinfo result] + #JULZ + #lappend outputlines [tcl::dict::get $rinfo result] + lappend outputlines [tcl::dict::get $rinfo result]\x1b\[m } set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] set replay_codes_overlay [tcl::dict::get $rinfo replay_codes_overlay] @@ -1787,6 +1798,9 @@ tcl::namespace::eval overtype { set overflowlength [expr {$overtext_datalen - $renderwidth}] if {$overflowlength > 0} { #raw overtext wider than undertext column + #broken: + #todo - renderline -overflow is invalid. + # we need renderline to support -expand_left ?? set rinfo [renderline\ -info 1\ -insert_mode 0\ @@ -1814,13 +1828,18 @@ tcl::namespace::eval overtype { } } } - lappend outputlines $rendered + #JULZ + #lappend outputlines $rendered + lappend outputlines $rendered\x1b\[m } else { #padded overtext #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] #Note - we still need overflow(exapnd_right) here - as although the overtext is short - it may oveflow due to the startoffset set rinfo [renderline -info 1 -insert_mode 0 -transparent $opt_transparent -expand_right $opt_overflow -startcolumn [expr {$left_exposed + 1 + $startoffset}] $undertext $overtext] - lappend outputlines [tcl::dict::get $rinfo result] + + #JULZ + #lappend outputlines [tcl::dict::get $rinfo result] + lappend outputlines [tcl::dict::get $rinfo result]\x1b\[m } set replay_codes [tcl::dict::get $rinfo replay_codes] set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] @@ -2014,7 +2033,8 @@ tcl::namespace::eval overtype { # } #} } - lappend outputlines $rendered + #JULZ + lappend outputlines $rendered\x1b\[m } else { #padded overtext #lappend outputlines [renderline -insert_mode 0 -transparent $opt_transparent -startcolumn [expr {$left_exposed + 1}] $undertext $overtext] @@ -2023,7 +2043,9 @@ tcl::namespace::eval overtype { #puts stderr "--> [ansistring VIEW -lf 1 -nul 1 $rinfo] <--" set overflow_right [tcl::dict::get $rinfo overflow_right] set unapplied [tcl::dict::get $rinfo unapplied] - lappend outputlines [tcl::dict::get $rinfo result] + #JULZ + #lappend outputlines [tcl::dict::get $rinfo result] + lappend outputlines [tcl::dict::get $rinfo result]\x1b\[m } set replay_codes [tcl::dict::get $rinfo replay_codes] set replay_codes_underlay [tcl::dict::get $rinfo replay_codes_underlay] @@ -2136,6 +2158,24 @@ tcl::namespace::eval overtype { }] } + proc stack_eq {a b} { + #single level list equality test to avoid generating internal string representations of the lists unnecessarily. + if {[llength $a] != [llength $b]} { + return 0 + } + foreach code1 $a code2 $b { + if {$code1 ne $code2} { + return 0 + } + } + return 1 + } + + #todo: tests + #set j [overtype::renderline -transparent " " -insert_mode 0 -expand_right 1 "[a+ red underline]xxx[a+ blue][a+ nounderline]" "[a green]J" ]yyy + # yyy should be blue with no underline - and the J should be green - and the x's should be red with underline and the J should overwrite the first x + #At the moment we return a reset at the end of the renderline result instead of the replay codes. + proc renderline {args} { #todo - fix 'unapplied' mechanism.This is particularly inefficient for long lines, or data such as binarytext which is not line-based. #All unapplied data is re-split/reprocessed repeatedly for each line! This is very wasteful and slow. @@ -2476,7 +2516,9 @@ tcl::namespace::eval overtype { if {$maybemouse ne "<" && [tcl::string::index $code end] eq "m"} { if {[punk::ansi::codetype::is_sgr_reset $code]} { - set u_codestack [list "\x1b\[m"] + #will normalize all resets to the same code - including 8bit reset. + #set u_codestack [list "\x1b\[m"] + set u_codestack [list $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set u_codestack [list $code] } else { @@ -2557,6 +2599,17 @@ tcl::namespace::eval overtype { } } + #---------------------------------------- + #set test_c [showlist $undercols] + ##set test_s [showlist $understacks %ansiview] + #set sview [list] + #foreach us $understacks { + # lappend sview [ansistring VIEW $us] + #} + #set test_s [showlist $sview] + #puts stderr "undercols/stacks:\n[textblock::join -- $test_c " " $test_s]" + #---------------------------------------- + if {$opt_width ne "\uFFEF"} { set renderwidth $opt_width } else { @@ -2567,7 +2620,10 @@ tcl::namespace::eval overtype { #trailing codes in effect for underlay if {[llength $u_codestack]} { #set replay_codes_underlay [join $u_codestack ""] - set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] + #set replay_codes_underlay [punk::ansi::codetype::sgr_merge_list {*}$u_codestack] + #u_codestack was built from codes split using split_codes_single + #- so should already be simplified to single codes with no multiple SGR params in one code + set replay_codes_underlay [punk::ansi::codetype::sgr_merge_singles $u_codestack] } else { set replay_codes_underlay "" } @@ -2767,13 +2823,17 @@ tcl::namespace::eval overtype { } else { lappend overlay_grapheme_control_stacks $o_codestack #there will always be an empty code at end due to foreach on 2 vars with odd-sized list ending with pt (overmap coming from perlish split) - if {[punk::ansi::codetype::is_sgr_reset $code]} { - set o_codestack [list "\x1b\[m"] ;#reset better than empty list - fixes some ansi art issues + + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] ;#skip SGR regexp testing for cases that don't end with m - as they can't be SGR + if {$code_endswith_m && [punk::ansi::codetype::is_sgr_reset $code]} { + #reset better than empty list - fixes some ansi art issues + #set o_codestack [list "\x1b\[m"] + set o_codestack [list $code] lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + } elseif {$code_endswith_m && [punk::ansi::codetype::has_sgr_leadingreset $code]} { set o_codestack [list $code] lappend overlay_grapheme_control_list [list sgr $code] - } elseif {[priv::is_sgr $code]} { + } elseif {$code_endswith_m && [priv::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $o_codestack $code] ;#must be -exact because of square-bracket glob chars set o_codestack [lremove $o_codestack {*}$dup_posns] @@ -2827,7 +2887,12 @@ tcl::namespace::eval overtype { lappend overstacks_gx $o_gxstack #set replay_codes_overlay [join $o_codestack ""] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}$o_codestack] + if {[llength $o_codestack]} { + #set replay_codes_overlay [join $o_codestack ""] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge_singles $o_codestack] + } else { + set replay_codes_overlay [list] + } #if {[tcl::dict::exists $overstacks $max_overlay_grapheme_index]} { # set replay_codes_overlay [join [tcl::dict::get $overstacks $max_overlay_grapheme_index] ""] @@ -2952,7 +3017,7 @@ tcl::namespace::eval overtype { #specials - each shoud have it's own test of what to do if it happens after overflow_idx reached switch -- $chtest { "" { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] if {$idx == 0} { #puts "---a at col 1" #linefeed at column 1 @@ -3069,8 +3134,7 @@ tcl::namespace::eval overtype { set next_gc [lindex $overlay_grapheme_control_list $gci+1] ;#next grapheme or control lassign $next_gc next_type next_item if {$autowrap_mode} { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci-1]] - #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci-1]] #don't incr idx beyond the overflow_idx #idx_over already incremented - decrement so current overlay grapheme stacks go to unapplied incr idx_over -1 @@ -3087,7 +3151,7 @@ tcl::namespace::eval overtype { #no point throwing back to caller for each grapheme that is overflowing #without this branch - renderline would be called with overtext reducing only by one grapheme per call #processing a potentially long overtext each time (ie - very slow) - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] #JMN4 } @@ -3427,7 +3491,7 @@ tcl::namespace::eval overtype { switch -exact -- $code_end { A { #Row move - up - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] #todo lassign [split $param {;}] num modifierkey if {$modifierkey ne ""} { @@ -3452,7 +3516,7 @@ tcl::namespace::eval overtype { #CUD - Cursor Down #Row move - down lassign [split $param {;}] num modifierkey - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] #move down if {$modifierkey ne ""} { puts stderr "modifierkey:$modifierkey" @@ -3503,7 +3567,7 @@ tcl::namespace::eval overtype { incr cursor_column $num } else { if {$autowrap_mode} { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] #jmn if {$idx == $overflow_idx} { incr num @@ -3598,7 +3662,7 @@ tcl::namespace::eval overtype { set cursor_column 1 set idx 0 } else { - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] incr cursor_column -$num priv::render_to_unapplied $overlay_grapheme_control_list $gci set instruction wrapmovebackward @@ -3626,7 +3690,9 @@ tcl::namespace::eval overtype { set cursor_column 1 set cursor_row [expr {$cursor_row + $downmove}] set idx [expr {$cursor_column -1}] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #sgr_merge_list + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] + #sgr_merge_singles ?? incr idx_over priv::render_to_unapplied $overlay_grapheme_control_list $gci set instruction move @@ -3647,7 +3713,7 @@ tcl::namespace::eval overtype { set cursor_row 1 } set idx [expr {$cursor_column - 1}] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] incr idx_over priv::render_to_unapplied $overlay_grapheme_control_list $gci set instruction move @@ -3656,6 +3722,7 @@ tcl::namespace::eval overtype { } G { #CHA - Cursor Horizontal Absolute (move to absolute column no) + #see also HPA - Horizontal Position Absolute (same functionality) if {$param eq ""} { set targetcol 1 } else { @@ -3680,6 +3747,29 @@ tcl::namespace::eval overtype { set cursor_column $targetcol #puts stderr "renderline absolute col move ESC G (TEST)" } + ` { + #https://vt100.net/docs/vt510-rm/HPA.html + #docs don't mention that it defaults to one if $parm omitted - but it seems to do in practice + if {$param eq ""} { + set targetcol 1 + } else { + set targetcol $param + if {![string is integer -strict $targetcol]} { + puts stderr "renderline HPA (Horizontal Position Absolute) error. Unrecognised parameter '$param'" + } + set targetcol [expr {$param}] + set max [llength $outcols] + if {$overflow_idx == -1} { + incr max + } + if {$targetcol > $max} { + puts stderr "renderline HPA (Horizontal Position Absolute) error. Param '$param' > max: $max" + set targetcol $max + } + } + set idx [expr {($targetcol -1) + $opt_colstart -1}] + set cursor_column $targetcol + } H - f { #CSI n;m H - CUP - Cursor Position @@ -3727,7 +3817,7 @@ tcl::namespace::eval overtype { set cursor_row $target_row set cursor_column $target_column set idx [expr {$cursor_column -1}] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] incr idx_over priv::render_to_unapplied $overlay_grapheme_control_list $gci set instruction move @@ -3758,7 +3848,7 @@ tcl::namespace::eval overtype { set cursor_row 1 set cursor_column 1 set idx [expr {$cursor_column -1}] - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] incr idx_over if {[llength $outcols]} { priv::render_erasechar 0 [llength $outcols] @@ -4000,7 +4090,8 @@ tcl::namespace::eval overtype { } } #append cursor_saved_attributes [join $sgr_stack ""] - append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] + #append cursor_saved_attributes [punk::ansi::codetype::sgr_merge_list {*}$sgr_stack] + append cursor_saved_attributes [punk::ansi::codetype::sgr_merge $sgr_stack] #as there is apparently only one cursor storage element we don't need to throw back to the calling loop for a save. @@ -4024,7 +4115,7 @@ tcl::namespace::eval overtype { # set replay_codes_overlay $cursor_saved_attributes ;#empty - or last save if it happend in this input chunk #} else { #jj - #set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + #set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] set replay_codes_overlay "" #} @@ -4398,7 +4489,7 @@ tcl::namespace::eval overtype { #vt102-docs: "Moves cursor up one line in same column. If cursor is at top margin, screen performs a scroll-down" puts stderr "overtype::renderline ESC M not fully implemented" - set replay_codes_overlay [punk::ansi::codetype::sgr_merge_list {*}[lindex $overlay_grapheme_control_stacks $gci]] + set replay_codes_overlay [punk::ansi::codetype::sgr_merge [lindex $overlay_grapheme_control_stacks $gci]] #move up incr cursor_row -1 if {$cursor_row < 1} { @@ -4743,6 +4834,9 @@ tcl::namespace::eval overtype { #puts stderr "first_tail_null_posn: $first_tail_null_posn" #puts stderr "colview: [ansistring VIEW $outcols]" + + #NOTE understacks has been updated with data from the overlay - so it should reflect the final state of the stacks for each grapheme in outcols + foreach ch $outcols { #puts "---- [ansistring VIEW $ch]" @@ -4766,15 +4860,58 @@ tcl::namespace::eval overtype { if {$i < [llength $understacks]} { #set cstack [tcl::dict::get $understacks $i] set cstack [lindex $understacks $i] - if {$cstack ne $prevstack} { - if {[llength $prevstack] && ![llength $cstack]} { - #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. But why is cstack empty? - append sgrleader \033\[m + #use stack_eq for depth 1 comparison without generating string rep. + if {![stack_eq $cstack $prevstack]} { + #possible SGR attribute change. + if {[llength $prevstack]} { + if {![llength $cstack]} { + #why is cstack empty? + #a) no ansi in underlay and we are at a position 2 after an overlay insertion. + # (position 1 after overlay insertion should already have had a reset inserted) + #b) no ansi in overlay and we are at an overlay insertion point. + + #-------------- + #review + #todo? consider testing next-char's understack when applying each overlay char in the main loop. + #if empty or has no leading reset - we need to add a leading reset at that point. + #-------------- + + + #-------- + #following statement is FALSE - (historical info). Doesn't seem to apply. + #This reset is important e.g testfile fruit.ans - we get overhang on rhs without it. + #append sgrleader \033\[m + #-------- + + ##test + #set view_prev "" + #foreach ps $prevstack { + # append view_prev [ansistring VIEW -lf 1 -vt 1 -nul 1 $ps] + #} + #puts stderr "col $i, ch: $ch - cstack empty vs prevstack $view_prev" + } else { + #without this we get extra redundant codes in some places. + #e.g a continuous string of underlay that originally had \x1b\[31m red text, + #but then when an overlay char is inserted near the start, the following underlay char (insertion index +1) codestack had a reset added. + #All subsequent underlay chars in the same run of plaintext don't have the reset and so appear 'different' but are actually part of the same run. + #check if actually different. ie if current stack actually changes anything from previous stack when merged together. + set prevmerge [punk::ansi::codetype::sgr_merge $prevstack] + set currmerge [punk::ansi::codetype::sgr_merge $cstack] + set together [punk::ansi::codetype::sgr_merge [list $prevmerge $currmerge]] + if {$together ne $prevmerge} { + #stacks are different enough that we need to output something + #if {{[punk::ansi::codetype::has_sgr_leading_reset $currmerge]}} { + #} + append sgrleader $currmerge + } + } } else { - append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + if {[llength $cstack]} { + append sgrleader [punk::ansi::codetype::sgr_merge $cstack] + } } + set prevstack $cstack } - set prevstack $cstack } else { set prevstack [list] } @@ -4797,7 +4934,8 @@ tcl::namespace::eval overtype { #if {[llength $prevstack] && ![llength $cstack]} { # append sgrleader \033\[m #} - append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + #append sgrleader [punk::ansi::codetype::sgr_merge_list {*}$cstack] + append sgrleader [punk::ansi::codetype::sgr_merge $cstack] append overflow_right $sgrleader append overflow_right $ch } else { @@ -4853,14 +4991,50 @@ tcl::namespace::eval overtype { set replay_codes "" if {[llength $understacks] > 0} { if {$overflow_idx == -1} { - #set tail_idx [tcl::dict::size $understacks] set tail_idx [llength $understacks] } else { set tail_idx [llength $undercols] } - if {$tail_idx-1 < [llength $understacks]} { + if {$tail_idx == [llength $undercols]} { + #we got to the end of the original underlay + #- so we want the full stack at the end of the original underlay ie including trailing codes which are not associated with any grapheme in the underlay + #but would be in effect for any text after the underlay. + + #--------------------- + #REVIEW - determine if last col was overwritten by overlay? + #how best to determine if last underlay column was overwritten by overlay? + #we could track in the main loop whether each underlay column was overwritten by overlay + #This seems like the best mechanism, because the overlay ANSI can include movement codes, so the underlay can be overwritten in any order. + #We should consider that just because the last grapheme was overwritten, that doesn't necessarily mean we should disregard the trailing codes + #perhaps trailing underlay codes are never overwritten unless the overlay extends beyond the end of the underlay - in which case we can just check if overlay extends beyond end of underlay to determine whether to include trailing underlay codes in replay or not. + #if overlay extends beyond end of underlay - we use the overlay stack at the end of the underlay as the replay codes, which won't include any trailing underlay codes. + #--------------------- + + if {[lindex $undermap end] eq ""} { + #there were trailing codes in the underlay with no grapheme - we want to include those in the replay as they would affect any text after the underlay + #we need to backtrack from the end of the underlay to find the last grapheme with codes, and merge those codes with any trailing codes in the underlay with no grapheme + set tailcodes [list] ;#build in reverse order. + foreach {pt code} [lreverse $undermap] { + if {$pt ne ""} { + break + } + lappend tailcodes $code + } + set tailcodes [lreverse $tailcodes] + + #set tailcodes [lindex $undermap end-1] + set laststack [lindex $understacks $tail_idx-1] + lappend laststack {*}$tailcodes + set replay_codes [punk::ansi::codetype::sgr_merge $laststack] ;#stack at end of underlay including trailing codes + } else { + #last part of underlay was plain text with no trailing codes - we can just use the stack at the last grapheme of the underlay + set replay_codes [punk::ansi::codetype::sgr_merge [lindex $understacks $tail_idx-1]] ;#stack at end of underlay + } + + } elseif {$tail_idx-1 < [llength $understacks]} { #set replay_codes [join [lindex $understacks $tail_idx-1] ""] ;#tail replay codes - set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes + #set replay_codes [punk::ansi::codetype::sgr_merge_list {*}[lindex $understacks $tail_idx-1]] ;#tail replay codes + set replay_codes [punk::ansi::codetype::sgr_merge [lindex $understacks $tail_idx-1]] ;#tail replay codes } if {$tail_idx-1 < [llength $understacks_gx]} { set gx0 [lindex $understacks_gx $tail_idx-1] @@ -4876,10 +5050,33 @@ tcl::namespace::eval overtype { #pdict $understacks if {[punk::ansi::ta::detect_sgr $outstring]} { - append outstring [punk::ansi::a] ;#without this - we would get for example, trailing backgrounds after rightmost column + #JULZ + + #The caller is responsible for adding a reset at the end of returned lines depending on how they want to use it - so we don't add one here. + # + #append outstring [punk::ansi::a] ;#without this - we would get for example, trailing backgrounds after rightmost column + # + + #we only want to append the replay codes if they are different to those already in effect at the end of the rendered line. + if {$overflow_idx == -1} { + set tail_idx [llength $understacks] + } else { + set tail_idx [llength $undercols] + } + set laststack [lindex $understacks $tail_idx-1] + set laststackmerge [punk::ansi::codetype::sgr_merge $laststack] + if {$replay_codes ne $laststackmerge} { + append outstring $replay_codes + } + #review #close off any open gx? - #probably should - and overflow_right reopen? + #probably not, this is akin to adding a reset to close off open SGR codes, which we specifically don't do. + #caller will need to close off any open gx at the end of the line if they want to, and provide appropriate replay codes for the next line if they want to maintain gx state across lines. + #we just need to make sure we provide all necessary info in the result dictionary. + #todo - tests and examples. + + #and overflow_right reopen? } if {$opt_returnextra} { @@ -4902,29 +5099,29 @@ tcl::namespace::eval overtype { set result [tcl::dict::create\ result $outstring\ - visualwidth [punk::ansi::printing_length $outstring]\ - instruction $instruction\ - stringlen [string length $outstring]\ - overflow_right_column $overflow_right_column\ - overflow_right $overflow_right\ - unapplied $unapplied\ - unapplied_list $unapplied_list\ - unapplied_ansisplit $unapplied_ansisplit\ - insert_mode $insert_mode\ - autowrap_mode $autowrap_mode\ - crm_mode $crm_mode\ - reverse_mode $reverse_mode\ - insert_lines_above $insert_lines_above\ - insert_lines_below $insert_lines_below\ - cursor_saved_position $cursor_saved_position\ + visualwidth [punk::ansi::printing_length $outstring]\ + instruction $instruction\ + stringlen [string length $outstring]\ + overflow_right_column $overflow_right_column\ + overflow_right $overflow_right\ + unapplied $unapplied\ + unapplied_list $unapplied_list\ + unapplied_ansisplit $unapplied_ansisplit\ + insert_mode $insert_mode\ + autowrap_mode $autowrap_mode\ + crm_mode $crm_mode\ + reverse_mode $reverse_mode\ + insert_lines_above $insert_lines_above\ + insert_lines_below $insert_lines_below\ + cursor_saved_position $cursor_saved_position\ cursor_saved_attributes $cursor_saved_attributes\ - cursor_column $cursor_column\ - cursor_row $cursor_row\ - expand_right $opt_expand_right\ - replay_codes $replay_codes\ - replay_codes_underlay $replay_codes_underlay\ - replay_codes_overlay $replay_codes_overlay\ - pm_list $pm_list\ + cursor_column $cursor_column\ + cursor_row $cursor_row\ + expand_right $opt_expand_right\ + replay_codes $replay_codes\ + replay_codes_underlay $replay_codes_underlay\ + replay_codes_overlay $replay_codes_overlay\ + pm_list $pm_list\ ] if {$opt_returnextra == 1} { #puts stderr "renderline: $result" @@ -5073,6 +5270,11 @@ tcl::namespace::eval overtype::priv { #caching the answer saves some regex expense - possibly a few uS to lookup vs under 1uS #todo - test if still worthwhile after a large cache is built up. (limit cache size?) proc is_sgr {code} { + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] ;#skip SGR regexp testing for cases that don't end with m - as they can't be SGR + if {!$code_endswith_m} { + #don't even cache. + return 0 + } variable cache_is_sgr if {[tcl::dict::exists $cache_is_sgr $code]} { return [tcl::dict::get $cache_is_sgr $code] @@ -5081,6 +5283,7 @@ tcl::namespace::eval overtype::priv { tcl::dict::set cache_is_sgr $code $answer return $answer } + proc render_to_unapplied {overlay_grapheme_control_list gci} { upvar idx_over idx_over @@ -5104,7 +5307,8 @@ tcl::namespace::eval overtype::priv { set unapplied_ansisplit [list ""] #append unapplied [join [lindex $overstacks $idx_over] ""] #append unapplied [punk::ansi::codetype::sgr_merge_list {*}[lindex $overstacks $idx_over]] - set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + #set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + set sgr_merged [punk::ansi::codetype::sgr_merge [lindex $og_stacks $gci]] if {$sgr_merged ne ""} { lappend unapplied_list $sgr_merged lappend unapplied_ansisplit $sgr_merged "" @@ -5167,7 +5371,8 @@ tcl::namespace::eval overtype::priv { set unapplied_list [list] set unapplied_ansisplit [list ""] ;#remove empty entry at end if nothing added - set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + #set sgr_merged [punk::ansi::codetype::sgr_merge_list {*}[lindex $og_stacks $gci]] + set sgr_merged [punk::ansi::codetype::sgr_merge [lindex $og_stacks $gci]] if {$sgr_merged ne ""} { lappend unapplied_list $sgr_merged lappend unapplied_ansisplit $sgr_merged "" @@ -5217,9 +5422,13 @@ tcl::namespace::eval overtype::priv { upvar understacks_gx gxstacks set nxt [llength $o] if {$i < $nxt} { - set o [lreplace $o $i $i] - set ustacks [lreplace $ustacks $i $i] - set gxstacks [lreplace $gxstacks $i $i] + #set o [lreplace $o $i $i] + ledit o $i $i + #set ustacks [lreplace $ustacks $i $i] + ledit ustacks $i $i + #review - do we need to ensure that stack at new $i has a reset code at the start? + #set gxstacks [lreplace $gxstacks $i $i] + ledit gxstacks $i $i } elseif {$i == 0 || $i == $nxt} { #nothing to do } else { @@ -5329,6 +5538,27 @@ tcl::namespace::eval overtype::priv { } if {$i < [llength $ustacks]} { lset ustacks $i $sgrstack + #check if next ustacks entry has a reset. + #It will need one if it doesn't already have one because our inserted char should not affect the pre-existing ansi state of the underlay. + #we have just replaced an entry into the ustacks at position i but we are still at the same position - so the next entry is still at position i+1 + if {[llength $sgrstack] && $i+1 < [llength $ustacks]} { + set next_ustack [lindex $ustacks $i+1] + #could be a reset or just empty - either way we need to add a reset if it's not already there + #(empty if underlay had no ansi) + #temporarily emit something to stderr + if {![llength $next_ustack]} { + #puts -nonewline stderr " next_ustack (empty) at position [expr {$i+1}] after replacing position $i with '$c' and sgrstack '[join $sgrstack ""]'\n" + lset ustacks $i+1 [list "\x1b\[m"] + } else { + #review - next_ustack is a list - has_sgr_leadingreset will not work as expected if called on whole next_ustack as a list. + #As the stack will need merging anyway - we can just prepend a reset without checking. + #REVIEW. + #puts -nonewline stderr "check next_ustack '$next_ustack' for reset at position [expr {$i+1}] after replacing position $i with '$c' and sgrstack '[join $sgrstack ""]'\n" + #set next_ustack [linsert $next_ustack 0 [a+ reset]] + ledit next_ustack -1 -1 "\x1b\[m" + lset ustacks $i+1 $next_ustack + } + } lset gxstacks $i $gx0stack } else { lappend ustacks $sgrstack @@ -5339,7 +5569,8 @@ tcl::namespace::eval overtype::priv { if {$i < $nxt} { #set o [linsert $o $i $c] #JMN insert via ledit - ledit o $i $i-1 $c + #ledit o $i $i-1 $c + ledit o $i -1 $c } else { lappend o $c } @@ -5347,8 +5578,10 @@ tcl::namespace::eval overtype::priv { #set ustacks [linsert $ustacks $i $sgrstack] #set gxstacks [linsert $gxstacks $i $gx0stack] #insert via ledit - ledit ustacks $i $i-1 $sgrstack - ledit gxstacks $i $i-1 $gx0stack + #ledit ustacks $i $i-1 $sgrstack + ledit ustacks $i -1 $sgrstack + #ledit gxstacks $i $i-1 $gx0stack + ledit gxstacks $i -1 $gx0stack } else { lappend ustacks $sgrstack lappend gxstacks $gx0stack diff --git a/src/bootsupport/modules/punk-0.1.tm b/src/bootsupport/modules/punk-0.1.tm index 6d181f27..521f8cf7 100644 --- a/src/bootsupport/modules/punk-0.1.tm +++ b/src/bootsupport/modules/punk-0.1.tm @@ -1743,7 +1743,7 @@ namespace eval punk { append script \n {set assigned [ansistring length $leveldata]} set level_script_complete 1 } - %str { + %str - %string { set active_key_type "string" if {$get_not} { error "!%str - not string-get is not supported" @@ -1752,6 +1752,9 @@ namespace eval punk { append script \n {# set active_key_type "" index_operation: string-get} append script \n {set assigned $leveldata} set level_script_complete 1 + + #todo - %lpad- %lpadstr- %join- etc as in punk::lib::showdict + #review - merge code shared with showdict for these operations } %sp { #experimental @@ -1822,6 +1825,8 @@ namespace eval punk { set level_script_complete 1 } %ansiview { + #review - implemented differently in showdict. + #(showdict uses ansistring VIEW -lf 1 ) set active_key_type "string" if {$get_not} { error "!%# not string-ansiview is not supported" @@ -2446,7 +2451,41 @@ namespace eval punk { set index }] } + %split-* { + #split on one or more chars - review + #set hidekey 1 + #lassign [split $key -] _ splitchars + #set thisval [split $dval $splitchars] + set active_key_type "string" + set splitchars [string range $index 7 end] + append script \n [string map [list $splitchars] { + # set active_key_type "string" index_operation: split-string + #e.g supports %split-"\\n"= "l1\n\nl3" -> {l1 "" l3} + set splitchars "" + set assigned [split $leveldata $splitchars] + }] + set level_script_complete 1 + + #todo %splitat- %splitn- ?? + } + %lpad-* { + #moved from punk::lib::showdict patterns. + #set hidekey 1 + #lassign [split $key -] _ extra + #set width [expr {[textblock::width $dval] + $extra}] + #set thisval [textblock::pad $dval -which left -width $width] + set active_key_type "string" + set extra [string range $index 6 end] + append script \n [string map [list $extra] { + # set active_key_type "string" index_operation: lpad-string + set extra "" + set width [expr {[textblock::width $leveldata] + $extra}] + set assigned [textblock::pad $leveldata -which left -width $width] + }] + set level_script_complete 1 + } %* { + #see above re %lpad- etc and synchronizing with showdict set active_key_type "string" set do_bounds_check 0 set index [string range $index 1 end] @@ -2827,11 +2866,21 @@ namespace eval punk { } else { if {$is_range} { lappend INDEX_OPERATIONS list-range + #todo - if we know it's a contiguous range, we could use lrange here instead of lindex + #we would also need to detect if it's a reverse range such as @5..1 and handle that correctly + #- lrange doesn't support reverse ranges, but we could resolve the indexset to a list of indices + #and then use lindex with that list of indices to get the correct result. + #we don't always know at this point if the range is in reverse or not because we don't know the size of the list until + #runtime - so we will handle both cases in the same way for now. + #e.g for index 5..end-6 - this could be forward or reverse depending on the length of the list. + set assign_script { + set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + } } else { lappend INDEX_OPERATIONS listindex - } - set assign_script { - set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + set assign_script { + set assigned [lindex $leveldata [punk::lib::indexset_resolve [llength $leveldata] ]] + } } } @@ -2881,6 +2930,8 @@ namespace eval punk { } set script [string map [list $index] $script] } elseif {[string first "end" $index] >=0} { + #review - obsoleted by indexset syntax. prune branch? + puts stderr "index with end detected - review if this branch still reachable - prune? $index" if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { if {$get_not} { @@ -2923,6 +2974,8 @@ namespace eval punk { } } elseif {[regexp {^([0-9]+|end|end[-+]{1,2}[0-9]+)-([0-9]+|end|end[-+]{1,2}([0-9]+))$} $index _ start end]} { + #review - obsoleted by indexset syntax. prune branch? + puts stderr "index with range and end detected - review if this branch still reachable - prune? $index" if {$get_not} { lappend INDEX_OPERATIONS list-range-not set assign_script [string map [list $start $end ] { @@ -3012,6 +3065,10 @@ namespace eval punk { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } elseif {[string first - $index] > 0} { + puts stderr "index with - detected - review if this branch still reachable - prune? $index" + #review - we changed to detect indexset above. + #syntax @m-n should be deprecated in favour of @m..n + #todo - check if this branch still reachable - prune? #e.g @1-3 gets here #JMN if {$get_not} { @@ -3089,19 +3146,61 @@ namespace eval punk { } } } elseif {$active_key_type eq "string"} { - if {[string match *-* $index]} { - lappend INDEX_OPERATIONS string-range - set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} - #todo - support more complex indices: 0-end-1 etc + #changed to indexset notation m..n allowing eg 2..end-1 etc. + #if {[string match *-* $index]} {} + + if {[punk::lib::is_indexset $index]} { + #review - we are assuming a single element indexset here - ie no comma separated sets. + + #todo - support $get_not + #todo - consider bounds_check for string indices. + # - Tcl doesn't do bounds checking for string index, but we need to consider in the context of pattern-matching + # whether we want to support syntaxes for with and without bounds checking on string indices. + + set is_range [expr {[string first ".." $index] >= 0}] + if {$is_range} { + lappend INDEX_OPERATIONS string-range + #review - not efficient for contiguous monotonically increasing ranges + #because we are retrievinng each character individually and concatenating + #- but it is more flexible because it also supports reverse ranges and could support non-contiguous ranges such as @0,2,4..6 + set assign_script { + set assigned [join [lmap i [punk::lib::indexset_resolve [string length $leveldata] ] {string index $leveldata $i}] ""] + } + } else { + lappend INDEX_OPERATIONS string-index + set assign_script { + set assigned [string index $leveldata [punk::lib::indexset_resolve [string length $leveldata] ]] + } + } + + #set assign_script { + # set assigned [lmap i [punk::lib::indexset_resolve [llength $leveldata] ] {lindex $leveldata $i}] + #} - lassign [split $index -] a b + #todo - consider where/if we can support 'ansistring INDEX' for ANSI strings. + #if so - it shouldn't overload the % operator we currently use for string access. append script \n [tstr -return string -allowcommands { - # set active_key_type "string" - set assigned [string range $leveldata ${$a} ${$b}] + if {$leveldata eq ""} { + set assigned "" + } else { + ${$assign_script} + } }] + set script [string map [list $index] $script] + + + #set re_idxdashidx {^([-+]{0,1}\d+|end[-+]{1}\d+|end)-([-+]{0,1}\d+|end[-+]{1}\d+|end)$} + ##todo - support more complex indices: 0-end-1 etc + + #lassign [split $index -] a b + #append script \n [tstr -return string -allowcommands { + # # set active_key_type "string" + # set assigned [string range $leveldata ${$a} ${$b}] + #}] } else { if {$index eq "*"} { + #equivalent to indexset ".." lappend INDEX_OPERATIONS string-all append script \n [tstr -return string -allowcommands { # set active_key_type "string" @@ -4294,6 +4393,7 @@ namespace eval punk { } #todo check end-x bounds? } + #todo - change to ledit if {$isint} { append script [string map [list $listvar $index $exp $data] { set [linsert [lindex [list $ [unset ]] 0] ] @@ -4350,7 +4450,8 @@ namespace eval punk { #last element has no -, so we are inserting at the final position - not replacing append script [string map [list $listvar [lrange $parts 0 end-1] $last $exp $data] { set target [lindex $ ] - set target [linsert $target ] + #set target [linsert $target ] + ledit target -1 lset $target }] } @@ -8564,7 +8665,7 @@ namespace eval punk { lappend chunks [list stdout $text] } console - term - terminal { - set term_env_vars {TERM TERM_PROGRAM TERM_PROGRAM_VERSION} + set term_env_vars {TERM TERM_PROGRAM TERM_PROGRAM_VERSION COLORTERM} set term_dict [dict create] foreach e $term_env_vars { if {[info exists ::env($e)]} { @@ -8577,6 +8678,7 @@ namespace eval punk { append text [punk::lib::showdict $term_dict] \n lappend chunks [list stdout $text] set text "" + set indent [string repeat " " [string length "WARNING: "]] if {[catch {package require punk::console} result]} { set text "Unable to load punk::console package - cannot test\n$result" @@ -8591,7 +8693,6 @@ namespace eval punk { } lappend chunks [list stdout $text] - set indent [string repeat " " [string length "WARNING: "]] lappend cstring_tests [dict create\ type "PM "\ msg "UN"\ @@ -8686,10 +8787,45 @@ namespace eval punk { } } } + set posn [punk::console::get_cursor_pos] ;#warmup call - and test if works + if {$posn eq ""} { + append warningblock \n "WARNING: terminal doesn't respond to cursor position query - may cause display bugs in some cases." + } else { + set timeresult [timerate {set cpos [punk::console::get_cursor_pos]}] + lassign [split $cpos {;}] row col + if {![string is integer -strict $row] || ![string is integer -strict $col]} { + append warningblock \n "WARNING: terminal returns non-integer values for cursor position query - may cause display bugs in some cases. got row:'$row' col:'$col'" + } else { + set micros [lindex $timeresult 0] + if {$micros > 2000} { + append warningblock \n "WARNING: terminal cursor position query is very slow ($micros microseconds - expect < 2000us )" + append warningblock \n $indent "- may cause display lag/bugs in some cases." + } else { + if {$micros > 1000} { + set text "\n[a+ yellow]Terminal cursor position query test passed." + append text \n $indent "Response time: ${micros} microseconds (OK, good would be <= 1000us).[a]" + + } else { + set text "[a+ green]Terminal cursor position query test passed." + append text \n $indent "Response time: ${micros} microseconds (GOOD).[a]" + } + lappend chunks [list stdout $text] + } + } + } + + if {![string length $warningblock]} { set text "[a+ green]No terminal warnings[a]\n" lappend chunks [list stdout $text] + } else { + set mode [punk::console::mode] + if {$mode eq "line"} { + append warningblock \n "Terminal appears to be in line mode. Consider switching to raw mode and re-testing (command: punk::console::mode raw)." + } } + puts stdout [punk::ansi::move_back 200] ;#hack for some horizontal position bugs where the above tests can leave the cursor in the wrong place for the next output. + #200 is arbitrary large number to move back enough to get to start of line. } } topics - help { @@ -8815,10 +8951,11 @@ namespace eval punk { - #interp alias {} c {} clear ;#external executable 'clear' may not always be available #todo - review - interp alias {} clear {} ::punk::reset - interp alias {} c {} ::punk::reset + #interp alias {} clear {} ::punk::reset + #interp alias {} c {} ::punk::reset + + interp alias {} reset {} ::punk::reset proc reset {} { if {[llength [info commands ::punk::repl::reset_terminal]]} { #punk::repl::reset_terminal notifies prompt system of reset @@ -8828,6 +8965,91 @@ namespace eval punk { } } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::ansi8 + @cmd -name punk::ansi8\ + -summary\ + "Tell terminal to enable 8-bit ANSI codes."\ + -help\ + "Enable 8-bit ANSI codes in the terminal. + May not be supported by all terminals. + Some terminals may already have 8-bit ANSI enabled, but some may require an explicit command to enable it. + 7-bit ANSI codes are generally preferred - and will still work on terminals with 8-bit ANSI support. + + (This is nothing to do with 8-bit colors - it is about the underlying bytes used for ANSI control sequences). + The ANSI sequence sent to the terminal to enable 8-bit codes is: ESC 7 + + To disable 8-bit ANSI support - a reset of the terminal may be required. + " + @opts + @values -min 0 -max 0 + } + } + proc ansi8 {} { + punk::console::S8C1R + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::clear + @cmd -name punk::clear\ + -summary\ + "Clear the terminal screen (and scrollback buffer by default)."\ + -help\ + "Clear the terminal screen. + By default this will also clear scrollback if supported by the terminal. + With -x option it will preserve scrollback but clear the screen. + " + @opts + -x -optional 1 -type none -mash 1 -help\ + "Preserve scrollback (if supported by terminal) but clear screen." + -s -optional 1 -type none -mash 1 -help\ + "Stay at the current cursor position instead of moving to top-left after clearing." + @values -min 0 -max 0 + } + } + proc clear {args} { + set argd [punk::args::parse $args withid ::punk::clear] + lassign [dict values $argd] leaders opts values received + set opt_x [dict exists $received -x] + set opt_s [dict exists $received -s] + # -x preserves scrollback but clears screen + if {$opt_s} { + #set pre_move_cmd [punk::ansi::move_up 1] + #review - terminal support for save/restore. + #we can just move up one line before clearing to preserve the line we're on, + #but this won't work if we're already at the last line. + #save/restore would be better if widely supported. + + #review - get_size already calls get_cursor pos - maybe we can optimize by not calling get_cursor_pos separately? + #review - consider turning off cursor updating while doing this to avoid flicker? + set cpos [punk::console::get_cursor_pos] + set row [lindex $cpos 0] + set size [punk::console::get_size] + set lastrow [dict get $size rows] + if {$row >= $lastrow} { + set pre_move_cmd [punk::ansi::cursor_save_dec] + } else { + set pre_move_cmd [punk::ansi::move_up 1][punk::ansi::cursor_save_dec] + } + set move_cmd [punk::ansi::cursor_restore_dec] + + #set pre_move_cmd [punk::ansi::move_up 1] + #set move_cmd "" + + } else { + set pre_move_cmd "" + set move_cmd [punk::ansi::move 1 1] + } + if {$opt_x} { + puts -nonewline stdout $pre_move_cmd[punk::ansi::clear]$move_cmd + } else { + puts -nonewline stdout $pre_move_cmd[punk::ansi::clear_all]$move_cmd + } + } + #c aliased to clear -xs + #cc aliases to clear -x + #fileutil::cat except with checking for windows illegal path names (when on windows platform) diff --git a/src/bootsupport/modules/punk/aliascore-0.1.0.tm b/src/bootsupport/modules/punk/aliascore-0.1.0.tm index e56e00ea..c93365e7 100644 --- a/src/bootsupport/modules/punk/aliascore-0.1.0.tm +++ b/src/bootsupport/modules/punk/aliascore-0.1.0.tm @@ -125,6 +125,10 @@ tcl::namespace::eval punk::aliascore { grepstr ::punk::ansi::grepstr\ colour ::punk::console::colour\ color ::punk::console::colour\ + ansi8 ::punk::ansi8\ + clear ::punk::clear\ + c {::punk::clear -xs}\ + cc {::punk::clear -x}\ ansi ::punk::console::ansi\ a? ::punk::console::code_a?\ A? {::punk::console::code_a? forcecolor}\ diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm index fb607e8e..d4f24480 100644 --- a/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -66,12 +66,12 @@ package require punk::assertion #*** !doctools #[section API] - - tcl::namespace::eval punk::ansi::class { + if {![llength [tcl::info::commands class_ansi]]} { oo::class create class_ansi { + variable o_ansistringobj variable o_render_dimensions ;#last dimensions at which we rendered @@ -84,7 +84,7 @@ tcl::namespace::eval punk::ansi::class { #a straight string compare may be faster.. but a checksum is much smaller in memory, so we'll use that by default. set o_rendered_what "" - #There may also be advantages to renering to a class_ansistring class object + #There may also be advantages to rendering to a class_ansistring class object set o_render_dimensions $dimensions set o_ansistringobj [ansistring NEW $ansitext] @@ -651,7 +651,7 @@ tcl::namespace::eval punk::ansi { If SAUCE data specifies dimensions, they will be used if this value is not supplied. " - -crm_mode -type binary -default 0 + -crm_mode -type boolean -default 0 -sauce -type none -help\ "Display SAUCE and comments after image" @values -min 1 -max 1 @@ -814,41 +814,45 @@ tcl::namespace::eval punk::ansi { return $result } - proc Get_ansifolder {} { - if {[catch {punk::repo::find_project} base]} { - set base "" - } - if {$base eq ""} { - #pwd not avail in safe interp - if {![catch {pwd} CWD]} { - set base $CWD + + namespace eval argdoc { + proc Get_ansifolder {} { + if {[catch {punk::repo::find_project} base]} { + set base "" } + if {$base eq ""} { + #pwd not avail in safe interp + if {![catch {pwd} CWD]} { + set base $CWD + } + } + if {[info commands file] eq ""} { + #probably a safe interp + return "UNAVAILABLE" + } + return [file join $base src/testansi] } - if {[info commands file] eq ""} { - #probably a safe interp - return "UNAVAILABLE" - } - return [file join $base src/testansi] - } + variable DYN_ANSIFOLDER {${[Get_ansifolder]}} - lappend PUNKARGS [list { - @dynamic - @id -id ::punk::ansi::example - @cmd -name punk::ansi::example -help "Display .ans image files in a grid that will fit in console - " - -colwidth -default 82 -help\ - "Width of each column - default of 82 will fit a standard 80wide ansi image - (when framed) - You can specify a narrower width to truncate images on the right side" - -folder -default "${[punk::ansi::Get_ansifolder]}" -help\ - "Base folder for files if relative paths are used. - Defaults to /src/testansi - where projectbase is determined - from the current directory. - " - @values -min 0 -max -1 - files -default {belinda.ans bot.ans flower.ans fish.ans} -multiple true -help\ - "List of filenames - leave empty to display 4 defaults" - } ""] + lappend PUNKARGS [list { + @dynamic + @id -id ::punk::ansi::example + @cmd -name punk::ansi::example -help "Display .ans image files in a grid that will fit in console + " + -colwidth -default 82 -help\ + "Width of each column - default of 82 will fit a standard 80wide ansi image + (when framed) + You can specify a narrower width to truncate images on the right side" + -folder -default "${$DYN_ANSIFOLDER}" -help\ + "Base folder for files if relative paths are used. + Defaults to /src/testansi - where projectbase is determined + from the current directory. + " + @values -min 0 -max -1 + files -default {belinda.ans bot.ans flower.ans fish.ans} -multiple true -help\ + "List of filenames - leave empty to display 4 defaults" + } ""] + } proc example {args} { set argd [punk::args::parse $args withid ::punk::ansi::example] @@ -1138,14 +1142,22 @@ tcl::namespace::eval punk::ansi { return $result } + #proc dynamic_eg {args} { + # return { + # ${[::punk::args::helpers::example -syntax none { + # grepstr -r m -highlight "red" {string} [i i] + # }]} + # } + #} lappend PUNKARGS [list { @id -id ::punk::ansi::grepstr + #REVIEW - using example -syntax none to avoid recursion problems, because punk::args::helpers::example uses grepstr internally. @cmd -name punk::ansi::grepstr\ -summary\ - "Grep for regex pattern in plaintext of supplied (possibly ANSI) string."\ + "Grep (line by line) for regex pattern within supplied (possibly ANSI) string - matching as if plaintext"\ -help\ - "The grepstr command can find strings in ANSI text even if there are interspersed + {The grepstr command can find strings in ANSI text even if there are interspersed ANSI colour codes etc. Even if a word has different coloured/styled letters, the regex can match the plaintext. (Search is performed on ansistripped text, and then the matched sections are highlighted and overlayed on the original styled/coloured @@ -1155,11 +1167,85 @@ tcl::namespace::eval punk::ansi { searchable because the parts of a word may be separated by various codes and other plain text. To search such an input string, the string should first be 'rendered' to a form where the ANSI only represents SGR styling (and perhaps other non-movement - codes) using something like overtype::renderline or overtype::rendertext." + codes) using something like overtype::renderline or overtype::rendertext. + + + Examples for returning matched lines only in the help output of the 'i' command: + + Highlight each word 'string' in red - retaining all other ANSI in the input: + ${[punk::args::helpers::example -syntax none { + grepstr -r m -highlight "red" {string} [i i] + }]} + + Return each matched line as is including all original ANSI. + ${[punk::args::helpers::example -syntax none { + #The empty capturing group means nothing is highlighted. + grepstr -r m {string()} [i i] + #Alternatively, using -h "+" - additive highlight with empty ANSI also works to leave original ANSI in place + grepstr -r m -highlight "+" {string} [i i] + }]} + + Return each matched line with all ANSI removed. + The pattern matches the whole line and an empty highlight list is applied, + overriding any existing ANSI. + ${[punk::args::helpers::example -syntax none { + grepstr -r m -highlight "" {.*string.*} [i i] + }]} + + Return each matched line with all original ANSI removed, but with the matched + portion of each line highlighted with the default highlighting. + ${[punk::args::helpers::example -syntax none { + grepstr -r m -ansistrip {string} [i i] + #or - using short flag -s for -ansistrip which can be combined with other short flags + grepstr -sr m string [i i] + }]} + + Return lines that don't begin with 2 # characters using negative match patterns. + ${[punk::args::helpers::example -syntax none { + #negative match on ^## - the first set of brackets are not a capturing group. + #For the highlight to be applied to the whole line (or part thereof) we need something after the negative match + #e.g the .* to highlight the whole non-matching line. + grepstr -r m -highlight "red" {^(?!##).*} [s ::after] + + #match all lines that don't begin with ## but do contain 'cancel' + #use a capturing group around 'cancel' to highlight only that word. + #The + red means the original italics will remain but the 'cancel' word will be additionally highlighted in red. + grepstr -r m -highlight "+ red" {^(?!##).*(cancel).*} [s ::after] + + }]} + + Return lines that don't begin with 2 # characters using -v for -invert-match. + ${[punk::args::helpers::example -syntax none { + #Applying no highlight without a + effectively strips ANSI from the (inversely) matched lines. + #if -r all was used, the other lines (those *with* leading ##) would be returned with their original ANSI intact. + grepstr -r m -highlight "" -v {^##} [s ::after] ;#-v for -invert-match + + #Returns Lines that don't begin with 2 # characters, but with ANSI intact. + grepstr -r m -highlight "+" -v {^##} [s ::after] ;#-v for -invert-match + + #Returns Lines that don't begin with 2 # characters, but with ANSI intact except that all + #foreground ANSI is overridden with green. + grepstr -r m -highlight "+ green" -v {^##} [s ::after] ;#-v for -invert-match + + }]} + + Return lines containing the string 'cancelid' with default highlighting. + ${[punk::args::helpers::example -syntax none { + #Default return is breaksandmatches + #lines with matches are returned (by default) with their original ANSI intact aside from the applied highlight. + #non-matching lines in between non-consecutive matches are indicated with a summary using the default break string. + grepstr {cancelid} [s ::after] + + grepstr -C 2 {cancelid} [s ::after] ;#with 2 lines of context + + grepstr -r m -C 2 {cancelid} [s ::after] ;#with 2 lines of context and no break indicators. + }]} + + } @leaders -min 0 -max 0 @opts - -returnlines -type string -typesynopsis matched|all -default breaksandmatches -choicecolumns 1 -choices {matched all breaksandmatches} -choicelabels { + -r|-return -mash true -type string -typesynopsis matched|all -default breaksandmatches -choicecolumns 1 -choices {matched all breaksandmatches summarydict} -choicelabels { "matched"\ " Return only lines that matched." "breaksandmatches"\ @@ -1169,15 +1255,24 @@ tcl::namespace::eval punk::ansi { This has a similar effect to the 'grep' trick of matching on 'pattern|$' (The $ matches all lines that have an end; ie all lines, but there is no associated character to which to apply highlighting) - except that when instead using -returnlines all with --line-number, the * + except that when instead using -return all with --line-number, the * indicator after the linenumber will only be highlighted for lines with matches, and the following matchcount will indicate zero for non-matching lines." + "summarydict"\ + " Return a summary dict with keys 'matches', 'matchlines' and 'totallines' giving + the count of matched patterns, lines with matches and total number of lines + respectively. + If -n is also supplied, the summary dict will also include a 'linemap' key + with a dict value mapping line numbers (1-based) to match counts for each line. + This allows the caller to determine which lines had matches and how many + matches each line had, without returning the matched lines themselves. + " } -B|--before-context= -parsekey "--before-context" -default 0 -type integer -typesynopsis num -C|--context= -parsekey "--context" -default 0 -type integer -typesynopsis num -help\ - "Print num lines of leading and trailing context surrounding each match." + "Print num lines of leading and trailing context surrounding each match." -A|--after-context= -parsekey "--after-context" -default 0 -type integer -typesynopsis num - --break= -type string -default "-- %c%\U2260" -help\ + -b|--break= -mash true -type string -default "-- %c%\U2260" -help\ "When returning matched lines and there is a break in consecutive output, display the break with the given string. %c% is a placeholder for the number of lines skipped. @@ -1190,24 +1285,47 @@ tcl::namespace::eval punk::ansi { Customisation example: grepstr -n \"--break=(skipped %c% lines)\" needle $haystacklines " - -ansistrip -type none -help\ + -s|-ansistrip -mash true -type none -help\ "Strip all ansi codes from the input string before processing. This is not necessary for regex matching purposes, as the matching is always performed on the ansistripped characters anyway, but by stripping ANSI, the result only has the ANSI supplied by the -highlight option." #-n|--line-number as per grep utility, except that we include a * for matches - -n|--line-number -type none -help\ + -n|--line-number -mash true -type none -help\ "Each output line is preceded by its relative line number in the file, starting at line 1. For lines that matched the regex, the line number will be suffixed with a * indicator with the same highlighting as the matched string(s). The number of matches in the line immediately follows the * For lines with no matches the * indicator is present with no highlighting and suffixed with zeros." - -i|--ignore-case -type none -help\ + -i|--ignore-case -mash true -type none -help\ "Perform case insensitive matching." - -highlight -type list -typesynopsis ansinames -default {green bold Black underline overline} -help\ - "list of ANSI SGR style codes as supported by and documented in punk::ansi::a?" + -h|-highlight -mash true -type list -typesynopsis ansinames -default {reset green bold Black underline overline} -help\ + "list of ANSI SGR style codes as supported by and documented in punk::ansi::a? + + If the highlight string contains a ${$B}+${$N} character, the highlight will be additive + - meaning that the supplied ANSI will be added to any existing ANSI in the matched text, + rather than replacing it. In this case the + character is stripped from the supplied ANSI before use. + + If there is no ${$B}+${$N} character, the supplied ANSI will replace any existing ANSI *within* the matched text. + The matched portion of the text will be highlighted with the supplied ANSI codes. + Depending on whether the matched portion of the text has ANSI due to codes prior to the match, or due to ANSI + within the matched text, the supplied highlight ANSI may be the only ANSI in the result for the matched text, + or it may be combined with existing ANSI from prior codes in the original string. + A -highlight can include the word 'reset' at the start to reset all existing ANSI before applying the new ANSI. + + Note that supplying -highlight with an empty list will effectively overtype the matched + text with itself, but with no internal ANSI - ie it effectively removes any ANSI codes that were inside the matched text, + while leaving the non-matched text unchanged. To ensure the matched text has no inherited ANSI - the highlight list should + include 'reset' to clear any existing ANSI before applying the empty highlight. e.g -highlight {reset}. + + Supplying a -highlight with just the ${$B}+${$N} character and no other ANSI codes will be a no-op in terms of styling + - it will leave all existing ANSI in place and not add any new ANSI. + This can be useful in conjunction with negating the match with -v to return non-matching lines with their original ANSI intact. + " + -v|-invert-match -mash true -type none -help\ + "Invert the sense of matching, to select non-matching lines." -- -type none @values pattern -type string -help\ @@ -1233,6 +1351,7 @@ tcl::namespace::eval punk::ansi { }] proc grepstr {args} { + #puts stderr "grepstr args: $args" lassign [dict values [punk::args::parse $args withid ::punk::ansi::grepstr]] leaders opts values received set pattern [dict get $values pattern] set data [dict get $values string] @@ -1241,7 +1360,8 @@ tcl::namespace::eval punk::ansi { set data [punk::ansi::ansistrip $data] } set highlight [dict get $opts -highlight] - set opt_returnlines [dict get $opts -returnlines] + set opt_returnlines [dict get $opts -return] + set opt_invert [dict exists $received -invert-match] set context [dict get $opts --context] ;#int set beforecontext [dict get $opts --before-context] set beforecontext [expr {max($beforecontext,$context)}] @@ -1262,10 +1382,16 @@ tcl::namespace::eval punk::ansi { set do_linenums 0 } + + set highlight_is_additive 0 if {[llength $highlight] == 0} { set H "" set R "" } else { + if {[string first + $highlight] >= 0} { + set highlight_is_additive 1 + set highlight [string map {+ ""} $highlight] + } set H [a+ {*}$highlight] set R \x1b\[m } @@ -1282,7 +1408,11 @@ tcl::namespace::eval punk::ansi { } set plainlines [split $plain \n] set lines [split $data \n] - set matched_line_indices [lsearch -all {*}$nocase -regexp $plainlines $pattern] + if {$opt_invert} { + set matched_line_indices [lsearch -all {*}$nocase -regexp -not $plainlines $pattern] + } else { + set matched_line_indices [lsearch -all {*}$nocase -regexp $plainlines $pattern] + } if {$opt_returnlines eq "all"} { if {[llength $lines] > 0} { set return_line_indices [punk::lib::range 0 [llength $lines]-1] @@ -1303,24 +1433,42 @@ tcl::namespace::eval punk::ansi { set resultlines [dict create] foreach lineindex $return_line_indices { set ln [lindex $lines $lineindex] + set underlay $ln ;#copy of underlay that we may need to insert trailing replays related to highlights into. set col1 "" if {$do_linenums} { set col1 [format "%${w1}s " [expr {$lineindex+1}]] } if {$lineindex in $matched_line_indices} { set plain_ln [lindex $plainlines $lineindex] - #first - determine the number of capturing groups (subexpressions) - #option 1: test the regexp with a single match - #set testparts [regexp {*}$nocase -inline -- $pattern $plain_ln] ;#get fullmatch capturegroup... - #set numgroups [expr {[llength $testparts] -1}] - #option 2: use the regexp -about flag - set numgroups [lindex [regexp -about $pattern] 0] - - set allparts [regexp -all {*}$nocase -indices -inline -- $pattern $plain_ln] - #allparts includes each full match as well as each capturing group - #early versions of tcl 8.6 still didn't support 'lsearch -stride' - avoid for now. - set matchcount [expr {[llength $allparts] / ($numgroups + 1)}] - #set matchcount [llength $allparts] + + if {$opt_invert} { + set numgroups 0 + #In invert-match mode, we want to return the lines that don't match the pattern + set allparts [list [list 0 [expr {[string length $plain_ln]-1}]]] + #our highlight will apply to the whole non-matching line - so we want to return a single range covering the whole line for highlighting purposes. + #our match count will always be 1 in this case and capture groups don't make sense in the context of an inverted match - so we can ignore them. + set matchcount 1 + } else { + #first - determine the number of capturing groups (subexpressions) + #option 1: test the regexp with a single match + #set testparts [regexp {*}$nocase -inline -- $pattern $plain_ln] ;#get fullmatch capturegroup... + #set numgroups [expr {[llength $testparts] -1}] + #option 2: use the regexp -about flag + set numgroups [lindex [regexp -about $pattern] 0] + set allparts [regexp -all {*}$nocase -indices -inline -- $pattern $plain_ln] + #allparts includes each full match as well as each capturing group + #early versions of tcl 8.6 still didn't support 'lsearch -stride' - avoid for now. + set matchcount [expr {[llength $allparts] / ($numgroups + 1)}] + } + + + + if {$opt_returnlines eq "summarydict"} { + #for summarydict we only need the matchcount for each line - no need to do the rest of the processing + dict set resultlines $lineindex $matchcount + continue + } + if {$matchcount == 0} { #This probably can't happen (?) @@ -1338,6 +1486,9 @@ tcl::namespace::eval punk::ansi { set i 0 #{-1 -1} returned for non-matching group when there are capture-group alternatives #e.g {(a)|(b)} + #consider also a negative pattern: {^(?!##)} - we want to return all lines that don't start with ## + #this has no capture groups and no matches - so we get a single {0 -1} range for the whole line - but we want to ignore that and return an empty highlight range list instead. + #more generally we want to ignore any match where the start index is greater than the end index - which indicates a non-matching group. foreach range $allparts { if {($i % ($numgroups+1)) != 0} { lassign $range a b @@ -1349,7 +1500,16 @@ tcl::namespace::eval punk::ansi { } } else { #No capture group in the regex, each index range is just a full match - set highlight_ranges $allparts + #we could still have a part such as {0 -1} for a non-match e.g with {^(?!##)}. + #set highlight_ranges $allparts + set highlight_ranges [list] + foreach range $allparts { + lassign $range a b + if {$range ne {-1 -1} & $a <= $b} { + lappend highlight_ranges $range + } + } + } # ------------------------------------ @@ -1361,33 +1521,88 @@ tcl::namespace::eval punk::ansi { append col1 $H*$R[format %03s $matchcount] } - if {$raw_has_ansi} { - set overlay "" - set i 0 - foreach hrange $highlight_ranges { - lassign $hrange s e - set prelen [expr {$s - $i}] - #append overlay [string repeat $placeholder $prelen] $H[string range $plain_ln $s $e]$R - append overlay [string repeat $placeholder $prelen] $H[string range $plain_ln $s $e] - set i [expr {$e + 1}] - } - set tail [string range $plain_ln $e+1 end] - append overlay [string repeat $placeholder [string length $tail]] - #puts "$overlay" - #puts "$ln" - #set rendered [overtype::renderline -transparent $placeholder -insert_mode 0 $ln $overlay] - set rendered [overtype::renderspace -transparent $placeholder -insert_mode 0 $ln $overlay] + if {[llength $highlight_ranges] == 0} { + #e.g consider 'grepstr -r m {^(?!##).*()} [s after] + #we want to return all synopsis lines that don't start with ##, and we provide an empty capture group to avlid the whole line being highlighted. + #compare with 'grepstr -r m -highlight "" {^(?!##).*} [s after] + #This would 'highlight the whole line with itself but no ANSI ie *stripping* any existing ANSI that was in the line. + set rendered $ln } else { - set rendered "" - set i 0 - foreach hrange $highlight_ranges { - lassign $hrange s e - set prelen [expr {$s - $i}] - #append overlay [string repeat $placeholder $prelen] $H[string range $plain_ln $s $e] - append rendered [string range $plain_ln $i $s-1] $H [string range $plain_ln $s $e] $R - set i [expr {$e + 1}] + if {$raw_has_ansi} { + set overlay "" + set i 0 + foreach hrange $highlight_ranges { + #puts stderr "hrange: $hrange" + lassign $hrange s e + set prelen [expr {$s - $i}] + #we don't want a plain reset after our highlight - but we do want a reset + *replay*. + #(bear in mind that the original line may or may not have a leading reset + #- so we can't assume that the highlight will be fully reset at the end of the highlighted section, + # if we only append a replay.) + #The replay is dependent on the position of the highlight. + #However: a trailing ANSI code on a renderline overlay will be ignored + #- so we have to insert the replay before the highlight if we want it to be effective. + #do so on the copy of ln that is in underlay + + set ln_chars [ansistring COUNT $ln] + set replay [ansistring INDEXCODE $ln $e+1] + #if {$replay ne ""} { + # set underlay [ansistring INSERT $underlay $e+1 $replay] + #} + if {$e < $ln_chars-1} { + #only add reset and replay if we are not right at the end of the line. + set underlay [ansistring INSERT $underlay $e+1 \x1b\[m$replay] + } else { + #INDEXCODE will retrieve trailing ANSI if given end+1 index. + #$underlay [ansistring INSERT $underlay $e+1 $replay] + } + + if {$highlight_is_additive} { + #we want to add the highlight to any existing ANSI in the highlighted section, rather than replacing it + set targetstring [ansistring RANGE $ln $s $e] + #the targetstring may have multiple resets and overriding codes within the string. + #foreach set of codes that we want to insert our highlight at the end. + set ansiparts [punk::ansi::ta::split_codes $targetstring] + set modifiedtarget "$H" + foreach {plainpart codes} $ansiparts { + # [string repeat $placeholder [string length $plainpart]] $H$codes + append modifiedtarget $plainpart$codes$H + } + append overlay [string repeat $placeholder $prelen] $modifiedtarget + } else { + append overlay [string repeat $placeholder $prelen] $H[string range $plain_ln $s $e] + } + + set i [expr {$e + 1}] + } + set tail [string range $plain_ln $e+1 end] + append overlay [string repeat $placeholder [string length $tail]] + #puts "$overlay" + #puts "$ln" + + #------------- + set rendered [overtype::renderline -transparent $placeholder -insert_mode 0 $underlay $overlay] + + #renderspace will introduce extra ANSI resets at the beginning of the line and within ANSI codes. + #ie it is functionaly equivalent from a display perspective but not byte faithful. + #this is not ideal here in a grep-like context. + #We want to preserve original content that is outside the highlighted sections. + #set rendered [overtype::renderspace -transparent $placeholder -insert_mode 0 $ln $overlay] + #------------- + + } else { + #review - highlight_is_additive. + set rendered "" + set i 0 + foreach hrange $highlight_ranges { + lassign $hrange s e + set prelen [expr {$s - $i}] + #append overlay [string repeat $placeholder $prelen] $H[string range $plain_ln $s $e] + append rendered [string range $plain_ln $i $s-1] $H [string range $plain_ln $s $e] $R + set i [expr {$e + 1}] + } + append rendered [string range $plain_ln $e+1 end] } - append rendered [string range $plain_ln $e+1 end] } if {$do_linenums} { @@ -1438,8 +1653,26 @@ tcl::namespace::eval punk::ansi { } dict set resultlines $lineindex $show } - + } ;#end foreach lineindex + + if {$opt_returnlines eq "summarydict"} { + #when we display lines - we use the 1-based line number. + #we should similarly report line numbers rather than the 0-based lineindex in the linemap. + set total_lines [llength $lines] + set matchlines [llength $matched_line_indices] + set matches 0 + set linemap [dict create] + #foreach v [dict values $resultlines] { + # incr matches $v + #} + dict for {k v} $resultlines { + dict set linemap [expr {$k+1}] $v + incr matches $v + } + return [dict create matches $matches matchlines $matchlines totallines $total_lines linemap $linemap] } + + #resultlines keys may be out of order due to context lines - so order them before returning result set ordered_resultlines [lsort -integer [dict keys $resultlines]] set result "" set i -1 @@ -3386,7 +3619,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } errM]} { set termwidth 80 } - set termwidth [expr [$termwidth -3]] + set termwidth [expr {$termwidth -3}] set out "" set linelen 0 set RST [a] @@ -4436,11 +4669,12 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu ] $c1c2] 0 3] switch -- $leadernorm { 7CSI - 8CSI { - if {[punk::ansi::codetype::is_sgr_reset $code]} { + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] + if {$code_endswith_m && [punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list "\x1b\[m"] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + } elseif {$code_endswith_m && [punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - } elseif {[punk::ansi::codetype::is_sgr $code]} { + } elseif {$code_endswith_m && [punk::ansi::codetype::is_sgr $code]} { #todo - make caching is_sgr method set dup_posns [lsearch -all -exact $codestack $code] set codestack [lremove $codestack {*}$dup_posns] @@ -4543,11 +4777,12 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu ] $c1c2] 0 3] switch -- $leadernorm { 7CSI - 8CSI { - if {[punk::ansi::codetype::is_sgr_reset $code]} { + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] + if {$code_endswith_m && [punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list "\x1b\[m"] - } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + } elseif {$code_endswith_m && [punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] - } elseif {[punk::ansi::codetype::is_sgr $code]} { + } elseif {$code_endswith_m && [punk::ansi::codetype::is_sgr $code]} { #todo - make caching is_sgr method set dup_posns [lsearch -all -exact $codestack $code] set codestack [lremove $codestack {*}$dup_posns] @@ -4634,6 +4869,11 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } set res } + + proc S8C1R {} { + #enable support for the 8-bit C1 control characters. + return "\x1b 7" + } proc reset {} { #*** !doctools #[call [fun reset]] @@ -4679,9 +4919,11 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu proc vt52clear_below {} { return \033J } - proc clear_all {} { - # - doesn't work?? + #clear screen and scrollback - but don't move to home. + return \033\[2J\033\[3J + } + proc clear_scrollback {} { return \033\[3J } #see also erase_ functions @@ -4813,7 +5055,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu set char [format %c [expr {$int + 31}]] return \x1bb${char} } - proc move_emit {row col data args} { + #*** !doctools #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] #[para]Return an ansi string representing a move to row col with data appended @@ -4827,7 +5069,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. - + proc move_emit1 {row col data args} { set out "" if {$row eq "this"} { append out \033\[\;${col}G$data @@ -4843,6 +5085,34 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu } return $out } + proc move_emit {row col data args} { + set out "" + if {$row eq "this"} { + #append out \033\[\;${col}G$data + append out [move_column $col]$data + } else { + set drow $row + foreach ln [split $data \n] { + append out \x1b\[${drow}\;${col}H$ln + incr drow + } + #append out \033\[${row}\;${col}H$data + } + foreach {row col data} $args { + if {$row eq "this"} { + #append out \033\[\;${col}G$data + append out [move_column $col]$data + } else { + set drow $row + foreach ln [split $data \n] { + append out \x1b\[${drow}\;${col}H$ln + incr drow + } + #append out \033\[${row}\;${col}H$data + } + } + return $out + } proc vt52move_emit {row col data args} { #Todo - G code? set out "" @@ -4919,8 +5189,24 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu proc move_column {col} { #*** !doctools #[call [fun move_column] [arg col]] + #some systems support a direct column move code - but it's not universal - and not supported by vt52 - so we provide a fallback for vt52 in vt52move_column + + #CHA doesn't seem to be supported in kermit generally - but seems to work if terminal set to 'linux' + #review + return \x1b\[${col}G\x1b\[${col}` ;#try both G and HPA - as support seems to vary + } + proc move_column_CHA {col} { + #CHA - Cursor Horizontal Absolute - seems to be more widely supported than HPA - but not supported in vt52 + #https://vt100.net/docs/vt510-rm/CHA.html return \x1b\[${col}G } + proc move_column_HPA {col} { + #https://vt100.net/docs/vt510-rm/HPA.html + return \x1b\[${col}` + } + + + proc vt52move_column {col} { #This is a bit of a fudge - as there is no command to move to a specific column. #without tracking state - we settle for moving back enough times to ensure we're at column 1 - and then move forward. @@ -5071,7 +5357,7 @@ In VT52 mode - use \x1b< to exit. {origin DEC reset-state "jump scroll" set-state "smooth scroll" description "DECSCLM - Scrolling Mode" names {DECSCLM}} } 5 { - {origin DEC reset-state "dark mode" set-state "light mode" description "DECSCNM - Screen Mode (light or dark screen)" names {DECSNM lightmode}} + {origin DEC reset-state "normal display" set-state "reverse video" description "DECSCNM - Screen Mode (light or dark screen)" names {DECSNM reverse_video}} } 6 { {origin DEC reset-state "upper-left corner" set-state "within margins" @@ -5145,6 +5431,9 @@ some emulators may reverse" see "https://github.com/dankamongmen/notcurses/issues/1782" names {sixel_display DECSDM}} } + 81 { + {origin DEC reset-state "disabled" set-state "enabled" description "DECKPM - Key Position Mode (report via DECEKPD)" see "https://vt100.net/docs/vt510-rm/DECKPM.html" names {DECKPM}} + } 95 { {origin DEC reset-state "clear screen on column change" set-state "no clear on column change" description "No Clearing Screen on Column Change Mode" see "" @@ -5955,11 +6244,26 @@ tcl::namespace::eval punk::ansi { #in some cases multiple sequences or leading trailing strings are ok - but the proc docs should note where the function is looking #review - separate namespace for functions that operate on multiple or embedded? + lappend PUNKARGS [list { + @id -id ::punk::ansi::codetype::is_sgr + @cmd -name punk::ansi::codetype::is_sgr\ + -summary\ + "Test if ANSI at tail of code is SGR"\ + -help\ + "Test for SGR (Select Graphic Rendition) - code ending in 'm' - e.g colour/underline + we will accept the less common colon separator (ITU Open Document Architecture) + + The ITU syntax with colon for params should theoretically be more reliable to parse and would + avoid problems some terminals have with 256 colour SGR codes with many parameters + being mistinterpreted as multiple SGR codes - but it is unfortunately not widely supported. (2026) + " + @values -min 1 + code -type string + } ] proc is_sgr {code} { - #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline - #we will accept and pass through the less common colon separator (ITU Open Document Architecture) #Terminals should generally ignore it if they don't use it - regexp {\033\[[0-9;:]*m$} $code + #regexp {\033\[[0-9;:]*m$} $code + regexp {(?:\033\[|\x9b)[0-9;:]*m$} $code } #review - has_cursor_move_in_line? Are we wanting to allow strings/sequences and detect that there are no moves that *aren't* within line? @@ -5977,6 +6281,7 @@ tcl::namespace::eval punk::ansi { return 0 } #pure SGR reset with no other functions + #ie will not detect a redundant code such as \x1b\[31;0m - but it will detect \x1b\[0m or \x1b\[0\;...m] proc is_sgr_reset {code} { #*** !doctools #[call [fun is_sgr_reset] [arg code]] @@ -5985,7 +6290,9 @@ tcl::namespace::eval punk::ansi { #[para]This is primarily intended for testing a single ansi code sequence, but code can be any string where the trailing SGR code is to be tested. #todo 8-bit csi - regexp {\x1b\[0*m$} $code + #regexp {\x1b\[0*m$} $code + + regexp {(?:\x1b\[|\x9b)0*m$} $code } @@ -6016,11 +6323,44 @@ tcl::namespace::eval punk::ansi { } } + + + + lappend PUNKARGS [list { + @id -id ::punk::ansi::codetype::has_any_effective + @cmd -name punk::ansi::codetype::has_any_effective\ + -summary\ + "Test if string has effective SGR states by the end."\ + -help\ + {Test if string has effective SGR states by the end. + This is intended to be used on strings that may have multiple SGR codes in them + - but we are only interested in the final state after all codes have been processed. + + For example - if a string has \x1b\[31m red text, then \x1b\[0m reset + - then the final state is reset and the red is ineffective + so ${$B}has_any_effective${$N} $str fg would return false. + + If the string had \x1b\[31m red text, then \x1b\[46m cyan background + - then the final state is red on cyan background + so ${$B}has_any_effective${$N} $str fg would return true and ${$B}has_any_effective${$N} $str bg would also return true. + } + + @values -min 2 + str -type string + state -type string -multiple 1\ + -choiceprefix 0\ + -choices {sgr unmergeable othercodes intensity italic underline underextended blink reverse hide strike font gothic doubleunderline proportional frame_or_circle ideogram_underline ideogram_doubleunderline ideogram_clear overline underlinecolour superscript subscript nosupersub fg bg}\ + } ] #e.g has_any_effective $str bg fg - proc has_any_effective {str args} { + proc has_any_effective {args} { + if {[llength $args] < 2} { + error "USAGE:\n[punk::args::synopsis ::punk::ansi::codetype::has_any_effective]" + } + set arglist $args + set str [lpop arglist 0] set singlecodes [punk::ansi::ta::get_codes_single $str] set mergeinfo [punk::ansi::codetype::sgr_merge_singles $singlecodes -info 1] - foreach t $args { + foreach t $arglist { switch -- $t { sgr - unmergeable - othercodes { if {[dict get $mergeinfo $t] ne ""} { @@ -6045,7 +6385,8 @@ tcl::namespace::eval punk::ansi { } } default { - error "punk::ansi::ta::has_any_effective invalid type '$t' specified" + error "[punk::ns::cmdhelp ::punk::ansi::codetype::has_any_effective {*}$args]" + #error "punk::ansi::ta::has_any_effective invalid type '$t' specified" } } } @@ -6155,16 +6496,20 @@ tcl::namespace::eval punk::ansi { variable metastate_empty tcl::dict::set metastate_empty underline_active "" ;#a meta state for whether underlines are on|off - values 1,0,"" + + #------------------------------------------------------- #misnomer should have been sgr_merge_args ? :/ #as a common case optimisation - it will not merge a single element list, even if that code contains redundant elements - proc sgr_merge_list {args} { - if {[llength $args] == 0} { - return "" - } elseif {[llength $args] == 1} { - return [lindex $args 0] - } - sgr_merge $args - } + #TODO - deprecate + #proc sgr_merge_list {args} { + # if {[llength $args] == 0} { + # return "" + # } elseif {[llength $args] == 1} { + # return [lindex $args 0] + # } + # sgr_merge $args + #} + #------------------------------------------------------- proc sgr_merge {codelist args} { set allparts [list] foreach c $codelist { @@ -6175,34 +6520,96 @@ tcl::namespace::eval punk::ansi { sgr_merge_singles $allparts {*}$args } - variable defaultopts_sgr_merge_singles - set defaultopts_sgr_merge_singles [tcl::dict::create\ - -filter_fg 0\ - -filter_bg 0\ - -filter_reset 0\ - -info 0\ - ] + lappend PUNKARGS [list { + @id -id ::punk::ansi::codetype::sgr_merge_singles + @cmd -name punk::ansi::codetype::sgr_merge_singles\ + -summary\ + "Merge a list of SGR codes to a single SGR when possible."\ + -help\ + { + Merge a list of SGR codes to a single SGR when possible, + removing redundant or overridden codes and preserving non-SGR codes. + + codes *must* already have been split so that one CSI (\x1b[ or \x9b) + per element in codelist. + e.g codelist [a+ Yellow Red underline] [a+ blue] [a+ red] is ok + but codelist "[a+ Yellow Red underline][a+ blue]" [a+ red] is not + (use punk::ansi::ta::split_codes_single to produce such a list if needed) + } + + @leaders -min 1 + codelist -type list -minsize 1 -maxsize -1 -help\ + "List of single codes to merge. + Each code that is an ANSI SGR code should be a string containing exactly one CSI and no other codes + - e.g as produced by punk::ansi::ta::split_codes_single + + - some codes such as modern underline colour codes may not be able to be merged into a single CSI ... m sequence + with older SGR codes and may be tacked on to the end of the merged SGR code rather than merged into the codestate, + but they will still be preserved in the output and have their effect when processed by a terminal that supports them. + + - Non SGR codes will be passed through and preserved in the output by tacking them on to the very end of the result. + This means that if the non-SGR codes are cursor moves or similar, they may end up in a different position relative to + the SGR codes than they were in the input list. + This means sgr_merge_singles should generally be used on 'rendered' strings where movement codes have already been processed + and we are only interested in merging the SGR codes for the final state - rather than on 'raw' strings where we want to + preserve the exact order of all codes including movement codes. + Alternatively the caller should break up the list of input codes into appropriate chunks to avoid movement codes being merged + to the end of SGR codes that were originally before them in the input list. + " + @opts + -filter_fg -type none -help\ + "filter out any foreground colour codes" + -filter_bg -type none -help\ + "filter out any background colour codes" + -filter_reset -type none -help\ + "filter out any reset codes (ie 0 or empty parameter SGR codes)" + -info -type none -help\ + "Return a dict of information about the merge instead of just the merged code. + The dict key 'mergeresult' will have the merged code (or empty string if the merge resulted in a reset or no codes). + Other keys are: + sgr - the merged SGR parameters (without leading CSI or trailing m) - empty if no SGR in result + unmergeable - list of SGR parameters that were not merged (e.g because they were overridden by a reset or another SGR code) + othercodes - list of non-SGR codes in the input list (preserving their original form - i.e with their original CSI and parameters) + codestate - dict of the final SGR state after the merge (including any unmergeable parameters that were not merged but are still effective in the final state) + - subkeys are sgr attributes such as intensity, italic, underline, fg, bg etc - values are the parameters for the final effective attribute + " + @values -min 0 -max 0 - #codes *must* already have been split so that one esc per element in codelist - #e.g codelist [a+ Yellow Red underline] [a+ blue] [a+ red] is ok - #but codelist "[a+ Yellow Red underline][a+ blue]" [a+ red] is not - #(use punk::ansi::ta::split_codes_single) - proc sgr_merge_singles {codelist args} { + } ] + proc sgr_merge_singles {args} { variable codestate_empty variable metastate_empty - variable defaultopts_sgr_merge_singles - set opts $defaultopts_sgr_merge_singles - foreach {k v} $args { - switch -- $k { - -filter_fg - -filter_bg - -filter_reset - + + set codelist [lindex $args 0] + set optlist [lrange $args 1 end] + + #foreach {k v} $args { + # switch -- $k { + # -filter_fg - -filter_bg - -filter_reset - + # -info { + # tcl::dict::set opts $k $v + # } + # default { + # error "sgr_merge unknown option '$k'. Known options [tcl::dict::keys $opts]" + # } + # } + #} + set optdefaults [list -filter_fg 0 -filter_bg 0 -filter_reset 0 -info 0] + set opts [dict create] + foreach v $optlist { + switch -- $v { + -filter_fg - -filter_bg - -filter_reset { + tcl::dict::set opts $v 1 + } -info { - tcl::dict::set opts $k $v + tcl::dict::set opts $v 1 } default { - error "sgr_merge unknown option '$k'. Known options [tcl::dict::keys $opts]" + error "sgr_merge unknown option '$v'. Known options [tcl::dict::keys $optdefaults]" } } } + set opts [tcl::dict::merge $optdefaults $opts] set othercodes [list] set codestate $codestate_empty ;#take copy as we need the empty state for resets @@ -6212,21 +6619,27 @@ tcl::namespace::eval punk::ansi { #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? #we will output 7bit merge of the SGRs even if some or all were 8bit CSi - #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals - #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. + #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals (or only effective if explicitly enabled). + #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea + #- as sgr_merge_singles is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. #review - consider a higher-level option for always emitting 8bit or always 7bit #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. #We still output any non SGR codes in the list as they came in - preserving their CSI + set filter_out_reset [tcl::dict::get $opts -filter_reset] foreach c $codelist { + if {$c eq ""} { + continue + } #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes #.. but preserve original c #set cnorm [tcl::string::map [list \x9b {8[} ] $c] #switch -- [tcl::string::index $cnorm 1][string index $cnorm end] {} # {[m} - #todo - detect multiple \x9b or \x1b and raise error - codes not split? + #validate that there is only one CSI and that it is at the start + #- otherwise we may have multiple codes smooshed together without splitting - which would break our parsing #if we don't - we can silently get 8CSI 7CSI in output! if {[string last \x1b\[ $c] > 0} { error "::punk::ansi::codetype::sgr_merge_singles bad entry in codelist [ansistring VIEW $c] (There is a 7CSI not at start)" @@ -6239,7 +6652,7 @@ tcl::namespace::eval punk::ansi { switch -- [tcl::string::range $cnorm 0 3][tcl::string::index $cnorm end] { 7CSIm - 8CSIm { #set params [tcl::string::range $cnorm 2 end-1] ;#strip leading esc lb and trailing m - set params [tcl::string::range $cnorm 4 end-1] ;#string leading XCSI and trailing m + set params [tcl::string::range $cnorm 4 end-1] ;#get string between leading XCSI and trailing m #some systems use colon for 256 colours or RGB or nonstandard subparameters #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. @@ -6265,7 +6678,7 @@ tcl::namespace::eval punk::ansi { set codeint [tcl::string::trimleft [lindex $paramsplit 0] 0] switch -- $codeint { "" - 0 { - if {![tcl::dict::get $opts -filter_reset]} { + if {!$filter_out_reset} { set codestate $codestate_empty set metastate $metastate_empty set did_reset 1 @@ -6544,7 +6957,9 @@ tcl::namespace::eval punk::ansi { set codemerge "" set unmergeable "" ;# can merge with each other but not main set (for terminals not supporting extended codes) - if {[tcl::dict::get $opts -filter_fg] || [tcl::dict::get $opts -filter_bg]} { + set filter_out_fg [tcl::dict::get $opts -filter_fg] + set filter_out_bg [tcl::dict::get $opts -filter_bg] + if {$filter_out_fg || $filter_out_bg} { #safe jumptable test #dict for {k v} $codestate {} tcl::dict::for {k v} $codestate { @@ -6554,12 +6969,12 @@ tcl::namespace::eval punk::ansi { default { switch -- $k { bg { - if {![tcl::dict::get $opts -filter_bg]} { + if {!$filter_out_bg} { append codemerge "${v}\;" } } fg { - if {![tcl::dict::get $opts -filter_fg]} { + if {!$filter_out_fg} { append codemerge "${v}\;" } } @@ -6757,14 +7172,15 @@ tcl::namespace::eval punk::ansi::ta { tcl::namespace::path ::punk::ansi namespace export detect detect_in_list detect_sgr extract length split_codes split_at_codes split_codes_single - variable PUNKARGS #handle both 7-bit and 8-bit csi #review - does codepage affect this? e.g ebcdic has 8bit csi in different position #CSI #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m - variable re_csi_open {(?:\x1b\[|\u009b)} + #variable re_csi_open {(?:\x1b\[|\u009b)} + variable re_csi_open {\x1b\[|\u009b} + #variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@\^_\{|\}\[\]~`]} variable re_csi_code {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]} @@ -6802,13 +7218,54 @@ tcl::namespace::eval punk::ansi::ta { # -- #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess - variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} - variable re_g0_open {(?:\x1b\(0)} - variable re_g0_close {(?:\x1b\(B)} + #variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} + #--------------------- + #1. Remove the redundant group around the opener + + #(?:\x1b\(0) → \x1b\(0 + + #2. Replace the per-character lookahead with bulk consumption + + #(?:(?!\x1b\(B).)* tests the lookahead at every single character. Since \x1b\(B always starts with \x1b, you can consume non-ESC characters in bulk and only pay the lookahead cost on actual ESC bytes: + + #(?:[^\x1b]|\x1b(?!\(B))* + + #--- + #Result: + #\x1b\(0(?:[^\x1b]|\x1b(?!\(B))*\x1b\(B + + #One behavioural note: . doesn't match newlines by default, so the original silently rejects content spanning lines. The optimized [^\x1b] matches newlines, which is arguably more correct for terminal data. If you want to preserve the original behaviour, replace [^\x1b] with + #[^\x1b\n]. + #--------------------- + set re_g0_group {\x1b\(0(?:[^\x1b]|\x1b(?!\(B))*\x1b\(B} + + variable re_g0_open {\x1b\(0} + variable re_g0_close {\x1b\(B} #detect start of ansicode that is closed by ST # DCS "ESC P" or "0x90" is also terminated by ST - set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} + #set re_ST_open {\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f} + # ---------------------------- + # from claude-sonnet-4-6 2024-06-17 + #❯ Is there a simpler expression for the regex: \033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f + # + #● Yes. The 7-bit forms all share the \033 prefix, and the 8-bit forms can be grouped into a character class: + # + # \033[PX^_]|[\u0090\u0098\u009e\u009f] + # + # The four 8-bit codes (0x90, 0x98, 0x9E, 0x9F) aren't contiguous so they can't be expressed as a range, but grouping them in a class is still much cleaner than four separate alternatives. + # + #❯ Is there likely to be a performance difference? + # + #● In practice, no — not meaningfully. + # + # Both forms require the engine to check the same number of characters at any given position. The character class lookup is O(1) and modern regex engines optimize short alternations like P|X|\^|_ to a similar dispatch table internally. With only 4 members, any difference would + # be in the noise. + # + # The real performance factor for this pattern is how quickly the engine can skip non-matching input (e.g. advancing past ordinary text), and that's identical between the two forms. + # ---------------------------- + set re_ST_open {\033[PX^_]|[\u0090\u0098\u009e\u009f]} #ST terminators [list \007 \033\\ \u009c] #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) @@ -6851,17 +7308,109 @@ tcl::namespace::eval punk::ansi::ta { #regexp expanded syntax = ?x #full detect - checking for closing sequences - variable re_ansi_detect {(?x) + variable re_ansi_detect_v1 {(?x) (?:\x1b(?:\[(?:[\x20-\x3f]*[\x40-\x7e])|a|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8)))) |(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c) |(?:\u009b)[\x20-\x3f]*[\x40-\x7e] |(?:\u009d)(?:[^\u009c]*)?\u009c } #--- + variable re_ansi_detect_v2 {(?x) + \x1b(?: + \[[\x20-\x3f]*[\x40-\x7e] + |a|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z + |Y.. #vt52 cursor position + |b. #vt52 foreground colour + |\((?:0|B) #vt52 G0 char set + |\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\) #OSC + |(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007)) #PM,SOS,APC,SC + |\#(?:3|4|5|6|8) #DECDHL and DECSWL/DECDWL + ) + |(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c) + |(?:\u009b)[\x20-\x3f]*[\x40-\x7e] + |(?:\u009d)(?:[^\u009c]*)?\u009c + } + + variable re_ansi_detect {(?x) + \x1b(?: + \[[\x20-\x3f]*[\x40-\x7e] + | [ac78MDEH=>|<|A|B|C|I|J|K|L|M|Z) → [ac78MDEH=> [list $re_ansi_detect]] { regexp $text }] @@ -7003,26 +7548,37 @@ tcl::namespace::eval punk::ansi::ta { } #perl: ta_strip - punk::args::set_idalias ::punk::ansi::ta::strip ::punk::ansi::ansistrip - proc strip {text} { - #*** !doctools - #[call [fun strip] [arg text]] - #[para]Return text stripped of Ansi codes - #[para]This is a tailcall to punk::ansi::ansistrip - tailcall punk::ansi::ansistrip $text + #punk::args::set_idalias ::punk::ansi::ta::strip ::punk::ansi::ansistrip + namespace eval argdoc { + lappend PUNKARGS_aliases {::punk::ansi::ta::strip ::punk::ansi::ansistrip} + } + catch { + namespace import ::punk::ansi::ansistrip + rename ::punk::ansi::ta::ansistrip ::punk::ansi::ta::strip + namespace import ::punk::ansi::ansistrip } - lappend PUNKARGS [list { - @id -id ::punk::ansi::ta::extract - @cmd -name punk::ansi::ta::extract\ - -summary\ - "Return only the ANSI codes in text"\ - -help\ - "This is the opposite of strip, - returning only the ANSI codes in text." - @values -min 1 -max 1 - text -type string - } ] + #proc strip {text} { + # #*** !doctools + # #[call [fun strip] [arg text]] + # #[para]Return text stripped of Ansi codes + # #[para]This is a tailcall to punk::ansi::ansistrip + # tailcall punk::ansi::ansistrip $text + #} + + namespace eval argdoc { + lappend PUNKARGS [list { + @id -id ::punk::ansi::ta::extract + @cmd -name punk::ansi::ta::extract\ + -summary\ + "Return only the ANSI codes in text"\ + -help\ + "This is the opposite of strip, + returning only the ANSI codes in text." + @values -min 1 -max 1 + text -type string + }] + } proc extract {text} { set parts [split_codes $text] set out "" @@ -7032,7 +7588,8 @@ tcl::namespace::eval punk::ansi::ta { return $out } - lappend PUNKARGS [list { + namespace eval argdoc { + lappend PUNKARGS [list { @id -id ::punk::ansi::ta::length @cmd -name punk::ansi::ta::length\ -summary\ @@ -7042,7 +7599,8 @@ tcl::namespace::eval punk::ansi::ta { This is not the printing length of the string on screen." @values -min 1 text -type string - } ] + } ] + } #perl: ta_length proc length {text} { #*** !doctools @@ -7551,6 +8109,7 @@ tcl::namespace::eval punk::ansi::class { } } + #Allow reloading of package if {[llength [tcl::info::commands ::punk::ansi::class::class_ansistring]]} { ::punk::ansi::class::class_ansistring destroy } @@ -7688,6 +8247,9 @@ tcl::namespace::eval punk::ansi::class { } #private method + #todo - consider whether we can optionally take an index argument to only split up to a certain point in the string + #- this would allow us to build the split state incrementally as we render through the string, which may be more efficient for long strings that are only rendered partway through. + #(also potentially useful for index-taking methods where the index is relative to the start of the string - so we can split up to that point and then do lookups in the split state without needing to split the whole string) method MakeSplit {} { #The split with each code as it's own element is more generally useful. set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; @@ -7717,7 +8279,10 @@ tcl::namespace::eval punk::ansi::class { #maintenance warning - dup in append! if {[punk::ansi::codetype::is_sgr_reset $code]} { - set codestack [list "\x1b\[m"] + #review - this effectively normalizes the reset to a single form - we may want to preserve the original code for rendering/debugging purposes + #set codestack [list "\x1b\[m"] + #vs + set codestack [list $code] ;#pass through as is. lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] @@ -8014,7 +8579,9 @@ tcl::namespace::eval punk::ansi::class { lappend o_splitindex $current_split_index #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { - set codestack [list "\x1b\[m"] + set codestack [list $code] ;#pass through as is. + #vs + #set codestack [list "\x1b\[m"] ;#normalize. lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] @@ -8089,7 +8656,8 @@ tcl::namespace::eval punk::ansi::class { #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. #ie just call sgr_merge_list once now. set laststack [lindex $o_sgrstacks end] - set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + #set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] + set mergedtail [punk::ansi::codetype::sgr_merge $laststack] foreach n $new_sgrstacks { lappend o_sgrstacks [list $mergedtail {*}$n] } @@ -8292,7 +8860,8 @@ tcl::namespace::eval punk::ansi::class { if {[llength $args]} { set pt [ansistring VIEW {*}$args $pt] } - append output [punk::ansi::codetype::sgr_merge_list {*}$codestack]$pt + #append output [punk::ansi::codetype::sgr_merge_list {*}$codestack]$pt + append output [punk::ansi::codetype::sgr_merge $codestack]$pt if {$code ne ""} { append output [a][ansistring VIEW -lf 1 $code] if {[punk::ansi::codetype::is_sgr_reset $code]} { @@ -8307,7 +8876,8 @@ tcl::namespace::eval punk::ansi::class { lappend codestack $code } elseif {[regexp {\x1b7|\x1b\[s} $code]} { #cursor_save - set cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$codestack] + #set cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$codestack] + set cursor_saved [punk::ansi::codetype::sgr_merge $codestack] } elseif {[regexp {\x1b8|\x1b\[u} $code]} { #cursor_restore set codestack [list $cursor_saved] @@ -8358,10 +8928,26 @@ tcl::namespace::eval punk::ansi::ansistring { #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. #[list_begin definitions] + namespace eval argdoc { + # -- --- --- --- --- + #non colour SGR codes + # we can use these directly via ${$I} etc without marking a definition with @dynamic + #This is because they don't need to change when colour switched on and off. + #chicken/egg - need to use literals here + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + set LC \u007b ;#left curly brace + set RC \u007d ;#right curly brace + # -- --- --- --- --- + } tcl::namespace::path [list ::punk::ansi ::punk::ansi::ta] tcl::namespace::ensemble create - tcl::namespace::export length trim trimleft trimright INDEX COUNT VIEW VIEWCODES VIEWSTYLE INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX NEW + tcl::namespace::export length trim trimleft trimright INDEX INDEXCODE INDEXCHAR RANGE INSERT COUNT COUNT2 COUNT3 VIEW VIEWCODES VIEWSTYLE INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX NEW #todo - expose _splits_ methods so caller can work efficiently with the splits themselves #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single @@ -8809,7 +9395,7 @@ tcl::namespace::eval punk::ansi::ansistring { #todo - change to COUNT to emphasize the difference between this and doing a Tcl string length on the ansistriped string! #review. Tabs/elastic tabstops. Do we want to count a tab as one element? Probably so if we are doing so for \n etc and not counting 2W unicode. #Consider leaving tab manipualation for a width function which determines columns occupied for all such things. - proc COUNT {string} { + proc COUNT3 {string} { #*** !doctools #[call [fun COUNT] [arg string]] #[para]Returns the count of visible graphemes and non-ansi control characters @@ -8831,10 +9417,21 @@ tcl::namespace::eval punk::ansi::ansistring { } #included as a test/verification - slightly slower. #grapheme split version may end up being used once it supports unicode grapheme clusters - proc count2 {string} { + proc COUNT2 {string} { #we want count to return number of glyphs.. not screen width. Has to be consistent with index function return [llength [punk::char::grapheme_split [ansistrip $string]]] } + proc COUNT {string} { + #review - consider caching like we do for overtype::grapheme_width_cached + #we want count to return number of glyphs.. not screen width. Has to be consistent with index function + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + set count 0 + foreach {pt code} $ansisplits { + if {$pt eq ""} {continue} + incr count [llength [punk::char::grapheme_split $pt]] + } + return $count + } proc length {string} { tcl::string::length [ansistrip $string] @@ -8959,68 +9556,189 @@ tcl::namespace::eval punk::ansi::ansistring { #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal + + #we could call ansistrip here, but it converts g0 chars to equivalent box drawing chars. + #This doesn't affect the length - but is unecessary processing here. + #also the string length of the stripped string is not necessarily the same as the number of graphemes + #- so we would still need to do the splitting and looping below to find the right grapheme even if we had the stripped length, for end-x calculations. + + #Todo - work on improving the grapheme splitting. + #note that punk::char::grapheme_split is not currently (2026-04) implemented to handle all the various unicode grapheme cluster rules + #if our index is end based - we have to go through the whole string (with grapheme processing) to find the character length. + + + #todo - improve efficiency. + # we need to split and do grapheme splitting to get the count in order to resolve the index, + # but we also split and do grapheme splitting to find the character at the index - so we end up doing it twice. + + #set num_chars [COUNT $string] ;#one grapheme per character - so this is the index we want to resolve against. + #COUNT does the expensive split_codes_single - so we do it inline here so we do it once. set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + set num_chars 0 + set pt_index -2 + set pt_graphemes [list] + foreach {pt code} $ansisplits { + incr pt_index 2 + if {$pt eq ""} { + lappend pt_graphemes [list] + continue + } + set graphemes [punk::char::grapheme_split $pt] + lappend pt_graphemes $graphemes + incr num_chars [llength $graphemes] + } - #todo - end-x +/-x+/-x etc - set original_index $index + set index [punk::lib::lindex_resolve_basic $num_chars $index] ;#returns -Inf for out of bounds at either end. + if {$index eq "-Inf"} {return ""} ;#out of bounds - we return empty string for consistency with Tcl string index behaviour - set index [tcl::string::map [list _ ""] $index] - #short-circuit some trivial cases - if {[tcl::string::is integer -strict $index]} { - if {$index < 0} {return ""} - #this only short-circuits an index greater than length including ansi-chars - #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length - if {$index > [tcl::string::length $string]} {return ""} - } else { - if {[tcl::string::match end* $index]} { - #for end- we will probably have to blow a few cycles stripping first and calculate the length - if {$index ne "end"} { - set op [tcl::string::index $index 3] - set offset [tcl::string::range $index 4 end] - if {$op ni {+ -} || ![tcl::string::is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} - if {$op eq "+" && $offset != 0} { - return "" - } - } else { - set offset 0 - } - #by now, if op = + then offset = 0 so we only need to handle the minus case - set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal - if {$offset == 0} { - set index [expr {$payload_len-1}] - } else { - set index [expr {($payload_len-1) - $offset}] - } - if {$index < 0} { - #don't waste time splitting and looping the string - return "" + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set pt_index -2 + set pt_found -1 + set char "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] + #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go + #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + set idx -1 + foreach {pt code} $ansisplits { + incr idx + incr pt_index 2 + #we want an index per grapheme - whether it is doublewide or single + set graphemes [lindex $pt_graphemes $idx] + + if {$pt ne ""} { + #set graphemes [punk::char::grapheme_split $pt] + set low [expr {$high + 1}] ;#last high + #incr high [tcl::string::length $pt] + incr high [llength $graphemes] + if {$index >= $low && $index <= $high} { + set pt_found $pt_index + #set char [tcl::string::index $pt $index-$low] + set char [lindex $graphemes $index-$low] + break } - } else { - #we are trying to avoid evaluating unbraced expr of potentially insecure origin - regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string - if {[tcl::string::is integer -strict $tail]} { - #plain +- - if {$op eq "-"} { - #return nothing for negative indices as per Tcl's lindex etc - return "" - } - set index $tail + } + + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] + if {$code_endswith_m} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #we can throw away previous codestack + #set codestack [list] + #vs + set codestack [list $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] } else { - if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { - if {[tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { - if {$op eq "-"} { - set index [expr {$a - $b}] - } else { - set index [expr {$a + $b}] - } - } else { - error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" - } - } else { - error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" + #may have partial resets + #sgr_merge will handle at end + #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + if {[punk::ansi::codetype::is_sgr $code]} { + lappend codestack $code } } } + #else - we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + #Review - consider if any other types of code make sense to retain in the output in this context. + } + if {$pt_found >= 0} { + if {[llength $codestack]} { + #return [punk::ansi::codetype::sgr_merge_list {*}$codestack]$char + return [punk::ansi::codetype::sgr_merge $codestack]$char + } else { + return $char + } + } else { + return "" + } + } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::ansi::ansistring::INDEXCODE + @cmd -name punk::ansi::ansistring::INDEXCODE -summary\ + "Get ANSI SGR code in effect at index or after last index"\ + -help\ + "Returns the ANSI SGR code at the position of the character returned by INDEX, or empty string + if no code at that position. + + Retrieves the ANSI SGR code in effect after the entire string if index is ${$B}end+1${$N} or numerical equivalent. + + If there are multiple codes in effect at that position, the returned code will be a merge of + those codes - so for example if there is a red foreground and then a bold later, the returned + code will be a single code with both red and bold in it. + This is effectively the ANSI code that would need to be applied to the character returned by + INDEX to get the same visual effect as the original string at that position." + @values -min 2 -max 2 + string -type string -help\ + "String to index into, which may contain ANSI codes." + index -type indexexpression -help\ + "Index of character to get code for, or as a special case: + the index ${$B}end+1${$N} or numerical equivalent to get the code in effect after the last character (if any). + For example end will give the code in effect at the last character, and end+1 will give the code in + effect after the last character." + }] + } + proc INDEXCODE {string index_expression} { + #*** !doctools + #[call [fun indexcode] [arg string] [arg index]] + #[para]Returns the ANSI code at the position of the character returned by INDEX, or empty string if no code at that position. + #[para]If there are multiple codes in effect at that position, the returned code will be a merge of those codes - so for example if there is a red foreground and then a bold later, the returned code will be a single code with both red and bold in it. + + if {![punk::ansi::detect $string]} { + return "" + } + + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + set num_chars 0 + set pt_index -2 + set pt_graphemes [list] + foreach {pt code} $ansisplits { + incr pt_index 2 + if {$pt eq ""} { + lappend pt_graphemes [list] + continue + } + set graphemes [punk::char::grapheme_split $pt] + lappend pt_graphemes $graphemes + incr num_chars [llength $graphemes] + } + #we also need to special case the situation where the string is pure ANSI ie num_chars = 0 + #- in which case we want end+1 to resolve to 0 not 1 - so that we can get the code in effect at the end of the string. + #- also INDEXCODE 0 should by convention return the same thing as INDEXCODE end and INDEXCODE end+1 - which is the code in effect at the end of the string. + + set index [punk::lib::lindex_resolve $num_chars $index_expression] ;#returns -Inf for out of bounds lower end, Inf for out of bounds upper end. + if {$index eq "-Inf"} {return ""} ;#out of bounds - we return empty string for consistency with Tcl string index behaviour + #special case for end+1 and equivalents. + if {$index_expression eq "end+1"} { + set index $num_chars + } elseif {$index eq "Inf"} { + #pretend our string is longer by 1 for the purposes of index resolution - so that end+1 resolves to num_chars not Inf, but end+2 and above resolve to Inf as normal. + set testindex [punk::lib::lindex_resolve [expr {$num_chars+1}] $index_expression] + if {[string is integer -strict $testindex]} { + set index $num_chars + } else { + #if we can't resolve end+1 to a valid index, we treat it as out of bounds and return empty string - for consistency with Tcl string index behaviour + return "" + } + } + if {$num_chars == 0} { + #todo - tests: + #P% ansistring VIEW [ansistring INDEXCODE [a+ red][a+ Cyan] 0] + #- ␛[31;46m + #P% ansistring VIEW [ansistring INDEXCODE [a+ red][a+ Cyan] 1] + #P% ansistring VIEW [ansistring INDEXCODE [a+ red][a+ Cyan] end] + #- ␛[31;46m + #P% ansistring VIEW [ansistring INDEXCODE [a+ red][a+ Cyan] end+1] + #- ␛[31;46m + #P% ansistring VIEW [ansistring INDEXCODE [a+ red][a+ Cyan] end+2] + if {$index == 0} { + return [punk::ansi::codetype::sgr_merge $string] + } else { + return "" + } } #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) @@ -9033,45 +9751,283 @@ tcl::namespace::eval punk::ansi::ansistring { set codestack [list] #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + set idx -1 foreach {pt code} $ansisplits { + incr idx incr pt_index 2 #we want an index per grapheme - whether it is doublewide or single + set graphemes [lindex $pt_graphemes $idx] + if {$pt ne ""} { + #set graphemes [punk::char::grapheme_split $pt] + set low [expr {$high + 1}] ;#last high + #incr high [tcl::string::length $pt] + incr high [llength $graphemes] + if {$index >= $low && $index <= $high} { + set pt_found $pt_index + #set char [tcl::string::index $pt $index-$low] + set char [lindex $graphemes $index-$low] + break + } + } + + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] + if {$code_endswith_m} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #we can throw away previous codestack + #set codestack [list] + #vs + set codestack [list $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + #may have partial resets + #sgr_merge will handle at end + #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + if {[punk::ansi::codetype::is_sgr $code]} { + lappend codestack $code + } + } + } + #else - we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + #Review - consider if any other types of code make sense to retain in the output in this context. + } + + #If we didn't find a pt at the index - we must have an index that is 1 past the end of the string - in which case we want to return the code in effect at the end of the string, if any. + #(we have already ruled out indexes that are before the start of the string or more than 1 past the end of the string) + #so we don't need to test pt_found here. + if {[llength $codestack]} { + return [punk::ansi::codetype::sgr_merge $codestack] + } else { + return "" + } + } + proc INDEXCHAR {string index} { + #*** !doctools + #[call [fun INDEXCHAR] [arg string] [arg index]] + #[para]Returns the character at the position of the character returned by INDEX, but with any ANSI codes stripped from it. + #This is effectively the same as calling ansistrip on the result of INDEX. + + #use split_at_codes ? (doesn't seem to be any faster than split_codes_single) + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + + #set num_chars [COUNT $string] ;#one grapheme per character - so this is the index we want to resolve against. + set num_chars 0 + set pt_index -2 + set pt_graphemes [list] + foreach {pt code} $ansisplits { + incr pt_index 2 + if {$pt eq ""} { + lappend pt_graphemes [list] + continue + } + set graphemes [punk::char::grapheme_split $pt] + lappend pt_graphemes $graphemes + incr num_chars [llength $graphemes] + } + + set index [punk::lib::lindex_resolve_basic $num_chars $index] ;#returns -Inf for out of bounds at either end. + if {$index eq "-Inf"} {return ""} ;#out of bounds - we return empty string for consistency with Tcl string index behaviour + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set char "" + + #no need to track ANSI state here. + + foreach {pt code} $ansisplits { if {$pt ne ""} { set graphemes [punk::char::grapheme_split $pt] set low [expr {$high + 1}] ;#last high #incr high [tcl::string::length $pt] incr high [llength $graphemes] + if {$index >= $low && $index <= $high} { + set char [lindex $graphemes $index-$low] + break + } } + } + return $char + } - if {$pt ne "" && ($index >= $low && $index <= $high)} { - set pt_found $pt_index - #set char [tcl::string::index $pt $index-$low] - set char [lindex $graphemes $index-$low] - break + proc RANGE {string startindex endindex} { + #*** !doctools + #[call [fun RANGE] [arg string] [arg startindex] [arg endindex]] + #[para]Returns the substring of string between the character positions startindex and endindex, inclusive, where the character positions are determined by INDEX. The returned substring will include any ANSI codes that are in effect for those characters. + + #TODO - special case processing for end+n and equivalents to return trailing ANSI codes as well. + #(similar to INDEXCODE) + + + #set num_chars [COUNT $string] ;#one grapheme per character - so this is the index we want to resolve against. + set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run + + #set num_chars [COUNT $string] ;#one grapheme per character - so this is the index we want to resolve against. + set num_chars 0 + set pt_index -2 + set pt_graphemes [list] + foreach {pt code} $ansisplits { + incr pt_index 2 + if {$pt eq ""} { + lappend pt_graphemes [list] + continue + } + set graphemes [punk::char::grapheme_split $pt] + lappend pt_graphemes $graphemes + incr num_chars [llength $graphemes] + } + + #for range we may need to return a result even if one or both indices are out of bounds + #if both are out of bounds at the same end - we return empty string for consistency with Tcl string index behaviour + set index_start [punk::lib::lindex_resolve $num_chars $startindex] + set index_end [punk::lib::lindex_resolve $num_chars $endindex] + if {$index_start eq "-Inf" && $index_end eq "-Inf"} {return ""} ;#out of bounds at lower end + if {$index_start eq "Inf" && $index_end eq "Inf"} {return ""} ;#out of bounds at upper end + #if the start is out bounds at the upper end and end is out of bounds at the lower end we also return empty string - as the range is effectively empty + #Also if start index > end index we return empty string - as the range is empty + #the expr intrinsic to if can test with -Inf and Inf against each other and integers - so we can do this with a single condition + if {$index_start > $index_end} {return ""} + #we can now assert that our range covers at least part of the string. + + #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) + set low -1 + set high -1 + set pt_index -2 + set pt_found -1 + set char "" + #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced + set codestack [list] + #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go + #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) + set idx -1 + set rangeresult "" + if {$index_start eq "-Inf"} { + set index_start 0 + } + if {$index_end eq "Inf"} { + set index_end [expr {$num_chars - 1}] + } + set START $index_start + set END $index_end + foreach {pt code} $ansisplits { + incr idx + incr pt_index 2 + #we want an index per grapheme - whether it is doublewide or single + set graphemes [lindex $pt_graphemes $idx] + if {$pt ne ""} { + set low [expr {$high + 1}] ;#last high + set high [expr {$low + [llength $graphemes]-1}] + if {$START >= $low && $START <= $high} { + if {$END >= $low && $END <= $high} { + #both start and end in this pt + append rangeresult [punk::ansi::codetype::sgr_merge $codestack][join [lrange $graphemes $START-$low $END-$low] ""] + break + } else { + #start in this pt but end is not - so we want to take the rest of this pt and then keep going until we find the end + append rangeresult [punk::ansi::codetype::sgr_merge $codestack][join [lrange $graphemes $START-$low end] ""] + incr START [expr {[llength $graphemes] - ($START - $low)}] ;#we want to start from the next character after this pt + } + } + } + + set code_endswith_m [expr {[tcl::string::index $code end] eq "m"}] + if {$code_endswith_m} { + if {[punk::ansi::codetype::is_sgr_reset $code]} { + #we can throw away previous codestack + #set codestack [list] + #vs + set codestack [list $code] + } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { + set codestack [list $code] + } else { + #may have partial resets + #sgr_merge will handle at end + #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + if {[punk::ansi::codetype::is_sgr $code]} { + lappend codestack $code + } + } + } + #else - we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. + #Review - consider if any other types of code make sense to retain in the output in this context. + } + return $rangeresult + } + + proc INSERT {string index insertstr} { + #*** !doctools + #[call [fun INSERT] [arg string] [arg index] [arg insertstr]] + #[para]Returns a new string with insertstr inserted at the position of the character returned by INDEX, or at the end of the string if index is out of bounds. + #[para]The inserted string will be wrapped in any ANSI codes that are in effect at that position, so for example if there is red foreground in effect at that position, the inserted string will also be red. + + set char_len [COUNT $string] + set index [punk::lib::lindex_resolve $char_len $index] + #returns -Inf for out of bounds at lower end and Inf for out of bounds at upper end + #todo - if index out of bounds at end insertstr is appended. + # if index out of bounds at start - insertstr is prepended. + if {$index eq "-Inf"} { + return $insertstr$string + } elseif {$index eq "Inf"} { + return $string$insertstr + } + + set parts [punk::ansi::ta::split_codes_single $string] ;#odd number of elements - alternating plaintext and codes + #we need to loop through the parts to find the right position to insert at + + set ptchunk_posn -2 ;#position of plain text chunk within the parts list. + set start_ptindex -1 ;#index of first character in the current plain text chunk. Index is how many chars into the entire string we are (not counting ansi codes) + #- so the first char of the first pt chunk is index 0, and the first char of the second pt chunk is index (length of first pt chunk) etc. + set end_ptindex -1 ;#index of last character in the current plain text chunk. + + set out "" + set inserted 0 + foreach {pt code} $parts { + incr ptchunk_posn 2 + if {$inserted} { + #we have already done the insertion - so we just want to copy the rest of the string as is - we don't need to keep track of codes any more. + append out $pt$code + continue + } + #we need to keep track of the codes in effect as we go - so we can apply them to the inserted string when we find the right position + if {$pt ne ""} { + set start_ptindex [expr {$end_ptindex + 1}] + set graphemes [punk::char::grapheme_split $pt] + incr end_ptindex [llength $graphemes] + + if {$index >= $start_ptindex && $index <= $end_ptindex} { + #we have found the right position to insert at + set inserted $ptchunk_posn ;#any non zero value is a token to indicate we have done the insertion. + #we want to insert before the character at index - so we need to split the pt at the right position + set char_index [expr {$index - $start_ptindex}] + set before [join [lrange $graphemes 0 $char_index-1] ""] + set after [join [lrange $graphemes $char_index end] ""] ;#includes char at index + set pt "$before$insertstr$after" + } } if {[punk::ansi::codetype::is_sgr_reset $code]} { - #we can throw away previous codestack - set codestack [list] + set codestack [list $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } else { #may have partial resets - #sgr_merge_list will handle at end + #sgr_merge will handle at end #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. #Review - consider if any other types of code make sense to retain in the output in this context. if {[punk::ansi::codetype::is_sgr $code]} { lappend codestack $code } } - - } - if {$pt_found >= 0} { - return [punk::ansi::codetype::sgr_merge_list {*}$codestack]$char - } else { - return "" + append out $pt$code } + return $out + } + proc INSERT_WITH_REPLAY {string index insertstr} { + #use INDEXCODE to find the code at the index, and re-apply it after the inserted string + #- so that the inserted string's ANSI applies to it, but any subsequent text is not affected. + puts stderr "INSERT_WITH_REPLAY incomplete" + return $string ;#placeholder } #helper to convert indices (possibly of form x+y end-x etc) to numeric values within the payload range i.e without ansi @@ -9355,6 +10311,9 @@ namespace eval punk::ansi::colour { } punk::assertion::active on + #Terminals should generate the 256-color palette from the user's base16 theme. + #https://gist.github.com/jake-stewart/0a8ea46159a7da2c808e5be2177e1783 + #see also the tk function #winfo rgb . |#XXXXXX|#XXX @@ -9419,6 +10378,25 @@ namespace eval punk::ansi::colour { } + #are these mappings only applicable to the default 256-colour palette? + #Probably - but that is the most common one and the one that most closely matches the RGB colour space. + proc RGB2term256 {R G B} { + set r [expr {$R/255.0}] + set g [expr {$G/255.0}] + set b [expr {$B/255.0}] + set rcode [expr {int($r*5+0.5)}] + set gcode [expr {int($g*5+0.5)}] + set bcode [expr {int($b*5+0.5)}] + return [expr {16 + ($rcode*36) + ($gcode*6) + $bcode}] + } + proc term2562RGB {code} { + set r [expr {int($code/36)}] + set g [expr {int(($code%36)/6)}] + set b [expr {$code%6}] + return [list [expr {int($r*255/5)}] [expr {int($g*255/5)}] [expr {int($b*255/5)}]] + } + + proc hsl2RGB { H S L } { if { $L < 0.5 } { set Q [expr {$L*(1.0+$S)}] @@ -9859,7 +10837,7 @@ interp alias {} ansistring {} ::punk::ansi::ansistring namespace eval ::punk::args::register { #use fully qualified so 8.6 doesn't find existing var in global namespace - lappend ::punk::args::register::NAMESPACES ::punk::ansi ::punk::ansi::colour ::punk::ansi::argdoc ::punk::ansi::class ::punk::ansi::ta + lappend ::punk::args::register::NAMESPACES ::punk::ansi ::punk::ansi::ansistring ::punk::ansi::colour ::punk::ansi::argdoc ::punk::ansi::class ::punk::ansi::ta ::punk::ansi::codetype } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ diff --git a/src/bootsupport/modules/punk/ansi/colourmap-0.1.0.tm b/src/bootsupport/modules/punk/ansi/colourmap-0.1.0.tm index 3f914682..581c6041 100644 --- a/src/bootsupport/modules/punk/ansi/colourmap-0.1.0.tm +++ b/src/bootsupport/modules/punk/ansi/colourmap-0.1.0.tm @@ -103,7 +103,7 @@ tcl::namespace::eval ::punk::ansi::colourmap { name -type string|stringstartswith(#) }] proc get_rgb_using_tk {name} { - package require tk + package require Tk ;#package require tk (lowercase) doesn't always work #assuming 'winfo depth .' is always 32 ? set RGB [winfo rgb . $name] set rgb [lmap n $RGB {expr {$n / 256}}] diff --git a/src/bootsupport/modules/punk/args-0.2.1.tm b/src/bootsupport/modules/punk/args-0.2.1.tm index 8f7d06a8..4a1445bb 100644 --- a/src/bootsupport/modules/punk/args-0.2.1.tm +++ b/src/bootsupport/modules/punk/args-0.2.1.tm @@ -1250,6 +1250,11 @@ tcl::namespace::eval punk::args { } set optionspecs [list] + #REVIEW - whilst this is only done once for each command definition, the -help section processing is sometimes expensive, + #and isn't required for parsing of arguments, so it unnecessarily slows first use of a command that uses punk::args and is heavily documented, + #especially if it has tcl syntax highlighted examples. + #- ideally we would delay expansion of -help sections until needed for display, + #and use a different cache key for the parsing vs display versions of the resolved definition. foreach block $normargs { if {[string first \$\{ $block] >= 0} { if {$defspace ne ""} { @@ -2550,7 +2555,7 @@ tcl::namespace::eval punk::args { tcl::dict::set spec_merged -typesynopsis $specval } -parsekey - -group { - tcl::dict::set spec_merged -typesynopsis $specval + tcl::dict::set spec_merged $spec $specval } -mash { #allow when any alt in argname is a single letter flag such s -a or -Z @@ -8535,7 +8540,7 @@ tcl::namespace::eval punk::args { } #todo - move block below up here. if {!$all_mashable} { - puts stderr "Debug: flagsupplied '$flagsupplied' not a valid flagname and not a valid mash of flags - treating as value" + #puts stderr "Debug: flagsupplied '$flagsupplied' not a valid flagname and not a valid mash of flags - treating as value" #- probably isn't a flag at all - could be a value #treat as value set optionset "" @@ -8668,9 +8673,8 @@ tcl::namespace::eval punk::args { #tcl::dict::set opts $flag_ident $tdflt if {$flag_ident_is_parsekey} { #(shimmer - but required for ordering correctness during override) - puts stderr "Debug: flag '$mashflagname' in mash '$flagsupplied' flag_ident '$flag_ident' is the same as parsekey '$api_opt' tdflt: $tdflt - using lappend to ensure it ends up after any previous flag in the mash that had the same parsekey" + #puts stderr "Debug: flag '$mashflagname' in mash '$flagsupplied' flag_ident '$flag_ident' is the same as parsekey '$api_opt' tdflt: $tdflt - using lappend to ensure it ends up after any previous flag in the mash that had the same parsekey" lappend opts $flag_ident $tdflt - puts stderr "opts after lappend: $opts" } else { tcl::dict::set opts $flag_ident $tdflt } @@ -10241,6 +10245,128 @@ tcl::namespace::eval punk::args { } } + + proc _synopsis_form_arg_display {formdict argname} { + #non-colour SGR such as bold/italic/strike - so we don't need to worry about NOCOLOR settings + set I "\x1b\[3m" ;#[punk::ansi::a+ italic] + set NI "\x1b\[23m" ;# [punk::ansi::a+ noitalic] + #for inner question marks marking optional type + set IS "\x1b\[3\;9m" ;#[punk::ansi::a+ italic strike] + set NIS "\x1b\[23\;29m" ;#[punk::ansi::a+ noitalic nostrike] + set RST "\x1b\[m" ;#[punk::ansi::a] + + set arginfo [dict get $formdict ARG_INFO $argname] + set typelist [dict get $arginfo -type] + set ts [Dict_getdef $arginfo -typesynopsis ""] + + set n [expr {[llength $typelist]-1}] + set name_tail [lrange $argname end-$n end];#if there are enough tail words in the argname to match -types + set clause "" + if {$ts ne ""} { + set tp_displaylist $ts + } else { + set tp_displaylist [lrepeat [llength $typelist] ""] + } + + foreach typespec $typelist td $tp_displaylist elementname $name_tail { + #elementname will commonly be empty + if {[string match {\?*\?} $typespec]} { + set tp [string range $typespec 1 end-1] + set member_optional 1 + } else { + set tp $typespec + set member_optional 0 + } + if {$td ne ""} { + set c $td + } else { + #handle alternate-types e.g literal(text)|literal(binary) + set alternates [list] + set type_alternatives [_split_type_expression $tp] + foreach tp_alternative $type_alternatives { + set tp_alternative_word1 [lindex $tp_alternative 0] + set match [lindex $tp_alternative 1] + switch -exact -- $tp_alternative_word1 { + literal { + lappend alternates [list $match] + } + literalprefix { + #todo - trie styling on prefix calc + lappend alternates [list $match] + } + stringstartswith { + lappend alternates [list $match*] + } + stringendswith { + lappend alternates [list *$match] + } + default { + #we'll only take display hints from the name itself if there was no defined typesynopsis element for this position in the type, + #and if the type-alternatives don't specify a literal or string match that we can use for display + #and if there are enough tail words in the argname to match the position in the type list + #empty strings can be put in -typesynopsis positions to only override the type information for certain elements of the clause + #- e.g for a type list of {string int} we could specify a typesynopsis of {"" "count"} to get display of "FILENAME count" for an argname of "file FILENAME FILECOUNT" + if {[llength $name_tail] >= [llength $typelist]} { + #important to list protect $elementname e.g look at ::apply + #The name may contain spaces e.g "{args body ?namespace?}" + #This must not be split into multiple words - it is a single element name that happens to contain spaces. + lappend alternates $I[list $elementname]$NI + } else { + lappend alternates $I<$tp_alternative>$NI + } + } + } + } + + set alternates [punk::args::lib::lunique $alternates] + set c [join $alternates |] + } + + + if {$member_optional} { + #append clause " " "(?$c?)" + append clause " " "\[$c\]" + } else { + append clause " " $c + } + } + set clause [string trimleft $clause] + + #set ARGD [dict create argname $argname class leader] + if {[dict get $arginfo -optional] || [dict exists $arginfo -default]} { + if {[dict get $arginfo -multiple]} { + #set display "?$I$argname$NI?..." + set display "\[$clause\]..." + } else { + set display "\[$clause\]" + #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { + # set display "?[lindex [dict get $arginfo -choices] 0]?" + #} elseif {[dict get $arginfo -type] eq "literal"} { + # set display "?$argname?" + #} else { + # set display "?$I$argname$NI?" + #} + } + } else { + if {[dict get $arginfo -multiple]} { + #set display "$I$argname$NI ?$I$argname$NI?..." + set display "$clause \[$clause\]..." + } else { + set display $clause + #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { + # set display "[lindex [dict get $arginfo -choices] 0]" + #} elseif {[dict get $arginfo -type] eq "literal"} { + # set display $argname + #} else { + # set display "$I$argname$NI" + #} + } + } + return $display + } + + + lappend PUNKARGS [list { @id -id ::punk::args::synopsis @cmd -name punk::args::synopsis\ @@ -10295,7 +10421,19 @@ tcl::namespace::eval punk::args { if {$spec eq ""} { return } - set form_names [dict get $spec form_names] + + set dict_idx_to_name [dict create] + set dict_name_to_idx [dict create] + set all_form_names [dict get $spec form_names] + set idx 0 + #assert: form_names is ordered as defined in the command definition - so idx into it is stable. + foreach fn $all_form_names { + dict set dict_idx_to_name $idx $fn + dict set dict_name_to_idx $fn $idx + incr idx + } + + set form_names $all_form_names if {$form ne "*"} { if {[string is integer -strict $form]} { set f [lindex $form_names $form] @@ -10314,171 +10452,51 @@ tcl::namespace::eval punk::args { } set SYND [dict create] - dict set SYND cmd_info [dict get $spec cmd_info] + set c_info [dict get $spec cmd_info] + set cmd_info [dict create] + dict for {k v} $c_info { + if {[string match -* $k]} { + dict set cmd_info [string range $k 1 end] $v + } + } + dict set SYND COMMAND $cmd_info + #leading "# " required (punk::ns::synopsis will pass through) if {![dict exists $received -noheader]} { set syn "# [Dict_getdef $spec cmd_info -summary ""]\n" + set GRY "\x1b\[38\;5\;8m" + set RST "\x1b\[m" } #todo - -multiple etc foreach f $form_names { - set SYNLIST [list] - dict set SYND FORMS $f [list] - append syn "$id" set forminfo [dict get $spec FORMS $f] - #foreach argname [dict get $forminfo LEADER_NAMES] { - # set arginfo [dict get $forminfo ARG_INFO $argname] - # set ARGD [dict create argname $argname class leader] - # if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { - # set display [lindex [dict get $arginfo -choices] 0] - # } elseif {[dict get $arginfo -type] eq "literal"} { - # set display $argname - # } else { - # set display $I$argname$RST - # } - # if {[dict get $arginfo -optional]} { - # append syn " ?$display?" - # } else { - # append syn " $display" - # } - # dict set ARGD type [dict get $arginfo -type] - # dict set ARGD optional [dict get $arginfo -optional] - # dict set ARGD display $display - # dict lappend SYND $f $ARGD - #} + set idx [dict get $dict_name_to_idx $f] + dict set SYND FORMS $f [dict create] + if {![dict exists $received -noheader]} { + set formsummary "FORM $idx $f" + if {[dict exists $forminfo -summary]} { + append formsummary " - [dict get $forminfo -summary]" + } + append syn "## $GRY$formsummary$RST\n" + } + append syn "$id" set FORMARGS [list] - foreach argname [dict get $forminfo LEADER_NAMES] { - set arginfo [dict get $forminfo ARG_INFO $argname] - set typelist [dict get $arginfo -type] - if {[llength $typelist] == 1} { - set tp [lindex $typelist 0] - set ts [Dict_getdef $arginfo -typesynopsis ""] - if {$ts ne ""} { - #set arg_display [dict get $arginfo -typesynopsis] - set clause $ts - } else { - #set arg_display $argname - set alternates [list];#alternate acceptable types e.g literal(yes)|literal(ok) or indexpression|literal(first) - set type_alternatives [_split_type_expression $tp] - foreach tp_alternative $type_alternatives { - set tp_alternative_word1 [lindex $tp_alternative 0] - switch -exact -- $tp_alternative_word1 { - literal { - set match [lindex $tp_alternative 1] - lappend alternates $match - } - literalprefix { - #todo - trie styling on prefix calc - set match [lindex $tp_alternative 1] - lappend alternates $match - } - stringstartswith { - set match [lindex $tp_alternative 1] - lappend alternates $match* - } - stringendswith { - set match [lindex $tp_alternative 1] - lappend alternates *$match - } - default { - lappend alternates $I$argname$NI - } - } - - #if {$tp_alternative eq "literal"} { - # lappend alternates [lindex $argname end] - #} elseif {[string match literal(*) $tp_alternative]} { - # set match [string range $tp_alternative 8 end-1] - # lappend alternates $match - #} elseif {[string match literalprefix(*) $tp_alternative]} { - # set match [string range $tp_alternative 14 end-1] - # lappend alternates $match - #} else { - # lappend alternates $I$argname$NI - #} - } - #remove dupes - but keep order (e.g of dupes -type string|int when no -typesynopsis was specified) - #todo - trie prefixes display - set alternates [punk::args::lib::lunique $alternates] - set clause [join $alternates |] - } - } else { - set n [expr {[llength $typelist]-1}] - set name_tail [lrange $argname end-$n end];#if there are enough tail words in the argname to match -types - set clause "" - set ts [Dict_getdef $arginfo -typesynopsis ""] - if {$ts ne ""} { - set tp_displaylist $ts - } else { - set tp_displaylist [lrepeat [llength $typelist] ""] - } - foreach typespec $typelist td $tp_displaylist elementname $name_tail { - #elementname will commonly be empty - if {[string match {\?*\?} $typespec]} { - set tp [string range $typespec 1 end-1] - set member_optional 1 - } else { - set tp $typespec - set member_optional 0 - } - if {$tp eq "literal"} { - set c $elementname - } elseif {[string match literal(*) $tp]} { - set match [string range $tp 8 end-1] - set c $match - } else { - if {$td eq ""} { - set c $I$tp$NI - } else { - set c $td - } - } - if {$member_optional} { - append clause " " "(?$c?)" - } else { - append clause " " $c - } - } - set clause [string trimleft $clause] - } + foreach argname [dict get $forminfo LEADER_NAMES] { + set display [_synopsis_form_arg_display $forminfo $argname] + append syn " $display" + set arginfo [dict get $forminfo ARG_INFO $argname] set ARGD [dict create argname $argname class leader] - if {[dict get $arginfo -optional] || [dict exists $arginfo -default]} { - if {[dict get $arginfo -multiple]} { - #set display "?$I$argname$NI?..." - set display "?$clause?..." - } else { - set display "?$clause?" - #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { - # set display "?[lindex [dict get $arginfo -choices] 0]?" - #} elseif {[dict get $arginfo -type] eq "literal"} { - # set display "?$argname?" - #} else { - # set display "?$I$argname$NI?" - #} - } - } else { - if {[dict get $arginfo -multiple]} { - #set display "$I$argname$NI ?$I$argname$NI?..." - set display "$clause ?$clause?..." - } else { - set display $clause - #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { - # set display "[lindex [dict get $arginfo -choices] 0]" - #} elseif {[dict get $arginfo -type] eq "literal"} { - # set display $argname - #} else { - # set display "$I$argname$NI" - #} + dict set ARGD type [dict get $arginfo -type] + dict set ARGD optional [dict get $arginfo -optional] + dict set ARGD multiple [dict get $arginfo -multiple] + foreach k {choices choiceprefix choicerestricted choicemultiple} { + if {[dict exists $arginfo -$k]} { + dict set ARGD $k [dict get $arginfo -$k] } } - append syn " $display" - dict set ARGD type [dict get $arginfo -type] - dict set ARGD optional [dict get $arginfo -optional] - dict set ARGD multiple [dict get $arginfo -multiple] dict set ARGD display $display - - #dict lappend SYND $f $ARGD lappend FORMARGS $ARGD } foreach argname [dict get $forminfo OPT_NAMES] { @@ -10490,7 +10508,7 @@ tcl::namespace::eval punk::args { #(disallowed in punk::args::define) set argdisplay $argname } else { - #assert [llength $tp] == 1 (multiple values for flag unspported in punk::args::define) + #assert [llength $tp] == 1 (multiple values for flag unsupported in punk::args::define) if {[string match {\?*\?} $tp]} { set tp [string range $tp 1 end-1] set value_is_optional true @@ -10509,19 +10527,30 @@ tcl::namespace::eval punk::args { } else { set alternates [list];#alternate acceptable types e.g literal(yes)|literal(ok) or indexpression|literal(first) - foreach tp_alternative [split $tp |] { - #-type literal not valid for opt - review - if {[string match literal(*) $tp_alternative]} { - set match [string range $tp_alternative 8 end-1] - lappend alternates $match - } elseif {[string match literalprefix(*) $tp_alternative]} { - set match [string range $tp_alternative 14 end-1] - lappend alternates $match - } else { - lappend alternates <$I$tp_alternative$NI> + set type_alternatives [_split_type_expression $tp] + foreach tp_alternative $type_alternatives { + set match [lindex $tp_alternative 1] + switch -- [lindex $tp_alternative 0] { + literal { + lappend alternates [list $match] + } + literalprefix { + lappend alternates [list $match] + } + stringstartswith { + lappend alternates [list $match*] + } + stringendswith { + lappend alternates [list *$match] + } + default { + lappend alternates $I<$tp_alternative>$NI + } } } - #todo - trie prefixes display? + #trie prefixes display? + #we probably don't want to show prefixes in synopsis. + #AI agents should be encouraged to use full values for clarity, and human users can refer to help for the prefix info if they care. set alternates [punk::args::lib::lunique $alternates] set tp_display [join $alternates |] } @@ -10529,44 +10558,102 @@ tcl::namespace::eval punk::args { #need to bracket alternate-types to distinguish pipes delimiting flag aliases set tp_display "($tp_display)" } + + + #consider optional: -f|--file|--file= -type string|num + #we can't show this as [-f|--file|--file= string|num] + #because the pipes make visually parsing it ambiguous. + #we *could* show this as [-f|--file|--file= (string|num)] + # but it lacks clarity in descripting we can supply --file string or --file=string + #showing it as [-f (string|num)|--file (string|num)|--file=(string|num)] is not as compact as it could be, but is reasonably precise. + #we could merge the first two to avoid repeating the type info - but then we would also need brackets to clarify the pipe applicability: + #e.g + # [(-f|--file (string|num))|--file=(string|num)] + # + #we choose to only merge in the case where there are no trailing= aliases or they are all trailing= aliases. + set aliasflags [split $argname |] + #set has_longopt_inlinevalue_alias [expr {[lsearch -glob $aliasflags *=] >= 0}] + set num_longopt_inlinevalue_aliases [llength [lsearch -all -glob $aliasflags *=]] ;#count list of indices of aliasflags that end with = + set homogenous_aliases [expr {$num_longopt_inlinevalue_aliases == 0 || $num_longopt_inlinevalue_aliases == [llength $aliasflags]}] + set argdisplay "" - foreach aliasflag [split $argname |] { - if {[string match --* $aliasflag]} { - if {[string index $aliasflag end] eq "="} { - set alias [string range $aliasflag 0 end-1] - if {$value_is_optional} { - append argdisplay "$alias$IS?$NIS=$tp_display$IS?$NIS|" + if {!$homogenous_aliases} { + foreach aliasflag $aliasflags { + if {[string match --* $aliasflag]} { + if {[string index $aliasflag end] eq "="} { + set alias [string range $aliasflag 0 end-1] + if {$value_is_optional} { + #append argdisplay "$alias$IS\[$NIS=$tp_display$IS\]$NIS|" + append argdisplay "$alias$I\[$NI=$tp_display$I\]$NI|" + } else { + append argdisplay "$alias=$tp_display|" + } } else { - append argdisplay "$alias=$tp_display|" + if {$value_is_optional} { + #double-dashed flag without trailing = can't accept optional value + #append argdisplay "$aliasflag $IS\[$NIS$tp_display$IS\]$NIS|" + append argdisplay "$aliasflag|" + } else { + append argdisplay "$aliasflag $tp_display|" + } } } else { if {$value_is_optional} { - append argdisplay "$aliasflag $IS?$NIS$tp_display$IS?$NIS|" + #flag can't accept optional value + append argdisplay "$aliasflag|" } else { append argdisplay "$aliasflag $tp_display|" } } + } + set argdisplay [string trimright $argdisplay |] + } else { + if {$num_longopt_inlinevalue_aliases > 0} { + #all aliases are longopt inlinevalue aliases + #review + # --file=|--fname= -type string + # -> (--file|--fname)=type + # or + # -> (--file|--fname)[=type] + + #first transform the argname to remove the trailing = and bracket the aliases if there are multiple + #review - we don't expect any arguments to be defined with inner = in the name. + #todo - enforce no inner = in argname in punk::args::define for options? + # + set argname "[string map {= ""} $argname]" + if {$num_longopt_inlinevalue_aliases > 1} { + set argname "($argname)" + } + + if {$value_is_optional} { + set argdisplay "$argname$I\[$NI=$tp_display$I\]$NI" + } else { + set argdisplay "$argname=$tp_display" + } } else { + #no longopts with trailing = aliases, so we can show the type info without ambiguity as applying to all aliases if {$value_is_optional} { - #single dash flag can't accept optional value - append argdisplay "$aliasflag|" + set argdisplay "$argname $I\[$NI$tp_display$I\]$NI" } else { - append argdisplay "$aliasflag $tp_display|" + set argdisplay "$argname $tp_display" } } } - set argdisplay [string trimright $argdisplay |] } + if {[dict get $arginfo -optional]} { if {[dict get $arginfo -multiple]} { - set display "?$argdisplay?..." + #set display "?$argdisplay?..." + set display "\[$argdisplay\]..." } else { - set display "?$argdisplay?" + #set display "?$argdisplay?" + set display "\[$argdisplay\]" } } else { if {[dict get $arginfo -multiple]} { - set display "$argdisplay ?$argdisplay?..." + #set display "$argdisplay ?$argdisplay?..." + set display "$argdisplay \[$argdisplay\]..." } else { set display $argdisplay } @@ -10606,136 +10693,43 @@ tcl::namespace::eval punk::args { # } # } #} + + #todo -mash append syn " $display" - dict set ARGD type [dict get $arginfo -type] - dict set ARGD optional [dict get $arginfo -optional] - dict set ARGD multiple [dict get $arginfo -multiple] + dict set ARGD type [dict get $arginfo -type] + dict set ARGD optional [dict get $arginfo -optional] + dict set ARGD multiple [dict get $arginfo -multiple] + foreach k {choices choiceprefix choicerestricted choicemultiple} { + if {[dict exists $arginfo -$k]} { + dict set ARGD $k [dict get $arginfo -$k] + } + } dict set ARGD display $display #dict lappend SYND $f $ARGD lappend FORMARGS $ARGD } + foreach argname [dict get $forminfo VAL_NAMES] { set arginfo [dict get $forminfo ARG_INFO $argname] - set typelist [dict get $arginfo -type] - if {[llength $typelist] == 1} { - set tp [lindex $typelist 0] - set ts [Dict_getdef $arginfo -typesynopsis ""] - if {$ts ne ""} { - #set arg_display [dict get $arginfo -typesynopsis] - set clause $ts - } else { - #set arg_display $argname - set alternates [list];#alternate acceptable types e.g literal(yes)|literal(ok) or indexpression|literal(first) - foreach tp_alternative [split $tp |] { - if {$tp_alternative eq "literal"} { - lappend alternates [lindex $argname end] - } elseif {[string match literal(*) $tp_alternative]} { - set match [string range $tp_alternative 8 end-1] - lappend alternates $match - } elseif {[string match literalprefix(*) $tp_alternative]} { - set match [string range $tp_alternative 14 end-1] - lappend alternates $match - } else { - lappend alternates $I$argname$NI - } - } - #remove dupes - but keep order (e.g of dupes -type string|int when no -typesynopsis was specified) - #todo - trie prefixes display - set alternates [punk::args::lib::lunique $alternates] - set clause [join $alternates |] - } - } else { - set n [expr {[llength $typelist]-1}] - set name_tail [lrange $argname end-$n end];#if there are enough tail words in the argname to match -types - set clause "" - set ts [Dict_getdef $arginfo -typesynopsis ""] - if {$ts ne ""} { - set tp_displaylist $ts - } else { - set tp_displaylist [lrepeat [llength $typelist] ""] - } - - foreach typespec $typelist td $tp_displaylist elementname $name_tail { - #elementname will commonly be empty - if {[string match {\?*\?} $typespec]} { - set tp [string range $typespec 1 end-1] - set member_optional 1 - } else { - set tp $typespec - set member_optional 0 - } - #handle alternate-types e.g literal(text)|literal(binary) - set alternates [list] - foreach tp_alternative [split $tp |] { - if {$tp_alternative eq "literal"} { - lappend alternates $elementname - } elseif {[string match literal(*) $tp_alternative]} { - set match [string range $tp_alternative 8 end-1] - lappend alternates $match - } elseif {[string match literalprefix(*) $tp_alternative]} { - set match [string range $tp_alternative 14 end-1] - lappend alternates $match - } else { - if {$td eq ""} { - lappend alternates $I$tp$NI - } else { - lappend alternates $td - } - } - } - set alternates [punk::args::lib::lunique $alternates] - set c [join $alternates |] - if {$member_optional} { - append clause " " "(?$c?)" - } else { - append clause " " $c - } - } - set clause [string trimleft $clause] - } + set display [_synopsis_form_arg_display $forminfo $argname] + append syn " $display" set ARGD [dict create argname $argname class value] - if {[dict get $arginfo -optional] || [dict exists $arginfo -default]} { - if {[dict get $arginfo -multiple]} { - #set display "?$I$argname$NI?..." - set display "?$clause?..." - } else { - set display "?$clause?" - #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { - # set display "?[lindex [dict get $arginfo -choices] 0]?" - #} elseif {[dict get $arginfo -type] eq "literal"} { - # set display "?$argname?" - #} else { - # set display "?$I$argname$NI?" - #} - } - } else { - if {[dict get $arginfo -multiple]} { - #set display "$I$argname$NI ?$I$argname$NI?..." - set display "$clause ?$clause?..." - } else { - set display $clause - #if {[dict exists $arginfo -choices] && [llength [dict get $arginfo -choices]] == 1} { - # set display "[lindex [dict get $arginfo -choices] 0]" - #} elseif {[dict get $arginfo -type] eq "literal"} { - # set display $argname - #} else { - # set display "$I$argname$NI" - #} + dict set ARGD type [dict get $arginfo -type] + dict set ARGD optional [dict get $arginfo -optional] + dict set ARGD multiple [dict get $arginfo -multiple] + foreach k {choices choiceprefix choicerestricted choicemultiple} { + if {[dict exists $arginfo -$k]} { + dict set ARGD $k [dict get $arginfo -$k] } } - append syn " $display" - dict set ARGD type [dict get $arginfo -type] - dict set ARGD optional [dict get $arginfo -optional] - dict set ARGD multiple [dict get $arginfo -multiple] dict set ARGD display $display - #dict lappend SYND $f $ARGD lappend FORMARGS $ARGD } #accepts unnamed extra arguments e.g toplevel docid for ensembles and ensemble-like commands if {[dict get $forminfo VAL_UNNAMED]} { - set display "?...?" + set display {[...]} append syn " $display" set ARGD [dict create argname "" class value] dict set ARGD type any @@ -10745,7 +10739,7 @@ tcl::namespace::eval punk::args { lappend FORMARGS $ARGD } append syn \n - dict set SYND FORMS $f $FORMARGS + dict set SYND FORMS $f args $FORMARGS } switch -- $opt_return { full { @@ -10757,7 +10751,8 @@ tcl::namespace::eval punk::args { set summary "# [Dict_getdef $spec cmd_info -summary ""]\n" } set FORMS [dict get $SYND FORMS] - dict for {form arglist} $FORMS { + dict for {form arginfo} $FORMS { + set arglist [dict get $arginfo args] append summary $id set class_state leader set option_count 0 @@ -10774,7 +10769,7 @@ tcl::namespace::eval punk::args { incr value_count if {$class_state ne "value"} { if {$option_count > 0} { - append summary " ?options ($option_count defined)?" + append summary " \[OPTIONS ($option_count defined)\]" } set class_state value } @@ -10783,7 +10778,7 @@ tcl::namespace::eval punk::args { } } if {$value_count == 0 && $option_count > 0} { - append summary " ?options ($option_count defined)?" + append summary " \[OPTIONS ($option_count defined)\]" } append summary \n } @@ -10803,6 +10798,7 @@ tcl::namespace::eval punk::args { } + #REVIEW lappend PUNKARGS [list { @id -id ::punk::args::synopsis_summary @cmd -name punk::args::synopsis_summary -help\ @@ -10852,9 +10848,10 @@ tcl::namespace::eval punk::args { } } } - if {$code ne ""} { + if {$code ne "" && [tcl::string::index $code end] eq "m"} { if {[punk::ansi::codetype::is_sgr_reset $code]} { - set codestack [list "\x1b\[m"] + #set codestack [list "\x1b\[m"] + set codestack [list $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { @@ -10862,10 +10859,9 @@ tcl::namespace::eval punk::args { set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code - } else { - #? ignore other ANSI codes? } } + #? ignore other ANSI codes? } if {[string match -* $plain_s] || [string match ?- $plain_s]} { } diff --git a/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm b/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm index 817c3ab6..c3dffff6 100644 --- a/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm +++ b/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm @@ -2986,6 +2986,71 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { time -type integer -optional 1 } "@doc -name Manpage: -url [manpage_tcl file]" ] + lappend PUNKARGS [list { + @id -id ::tcl::file::attributes + @cmd -name "Built-in: tcl::file::attributes"\ + -summary\ + "Get/Set platform-specific values associated with a file/directory."\ + -help\ + "This subcommand returns or sets platform-specific values associated with a file. + The first form without specificing option, returns a list of the platform-specific options and their values. + The first form with an option returns the value for the given option. + The last form sets one or more of the values. The values are as follows: + + On Unix, ${$B}-group${$N} gets or sets the group name for the file. A group id can be given to the command, but it + returns a group name. ${$B}-owner${$N} gets or sets the user name of the owner of the file. The command returns the + owner name, but the numerical id can be passed when setting the owner. ${$B}-permissions${$N} retrieves or sets a + file's access permissions, using octal notation by default. This option also provides limited support for + setting permissions using the symbolic notation accepted by the chmod command, following the form + ${$B}[ugo]?[[+-=][rwxst],[...]]${$N}. Multiple permission specifications may be given, separated by commas. + E.g., ${$B}u+s,go-rw${$N} would set the setuid bit for a file's owner as well as remove read and write permission for + the file's group and other users. An ls-style string of the form rwxrwxrwx is also accepted but must always + be 9 characters long. E.g., ${$B}rwxr-xr-t${$N} is equivalent to ${$B}01755${$N}. On versions of Unix supporting file flags, + ${$B}-readonly${$N} returns the value of, or sets, or clears the readonly attribute of a file, i.e., the user + immutable flag (${$B}uchg${$N}) to the ${$B}chflags${$N} command. + + On Windows, ${$B}-archive${$N} gives the value or sets or clears the archive attribute of the file. ${$B}-hidden${$N} gives the + value or sets or clears the hidden attribute of the file. ${$B}-longname${$N} will expand each path element to its long + version. This attribute cannot be set. ${$B}-readonly${$N} gives the value or sets or clears the readonly attribute of + the file. ${$B}-shortname${$N} gives a string where every path element is replaced with its short (8.3) version of the + name if possible. For path elements that cannot be mapped to short names, the long name is retained. This + attribute cannot be set. ${$B}-system${$N} gives or sets or clears the value of the system attribute of the file. + + On macOS and Darwin, ${$B}-creator${$N} gives or sets the Finder creator type of the file. ${$B}-hidden${$N} gives or sets or + clears the hidden attribute of the file. ${$B}-readonly${$N} gives or sets or clears the readonly attribute of the file. + ${$B}-rsrclength${$N} gives the length of the resource fork of the file, this attribute can only be set to the value 0, + which results in the resource fork being stripped off the file. + + On all platforms, files in ${$B}zipfs${$N} mounted archives return the following attributes. + These are all read-only and cannot be directly set. + + ${$B}-archive${$N} + The path of the mounted ZIP archive containing the file. + ${$B}-compsize${$N} + The compressed size of the file within the archive. This is 0 for directories. + ${$B}-crc${$N} + The CRC of the file if present, else 0. + ${$B}-mount${$N} + The path where the containing archive is mounted. + ${$B}-offset${$N} + The offset of the file within the archive. + ${$B}-uncompsize${$N} + The uncompressed size of the file. This is ${$B}0${$N} for directories. + + Other attributes may be present in the returned list. These should be ignored." + @form -form "get" + @values -min 1 -max 2 + name -type string -optional 0 + option -type stringstartswith(-) -typesynopsis {-${$I}option${$NI}} -optional 1 + + @form -form "set" + @values -min 3 -max -1 + name -type string -optional 0 + option_value -type {stringstartswith(-) string} -typesynopsis {-${$I}option${$NI} ${$I}value${$NI}} -optional 0 -multiple 1 + + + } "@doc -name Manpage: -url [manpage_tcl file]" ] + lappend PUNKARGS [list { @id -id ::tcl::file::channels @cmd -name "Built-in: tcl::file::channels"\ @@ -3026,6 +3091,26 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { pathname -optional 1 -type string -multiple 1 } "@doc -name Manpage: -url [manpage_tcl file]" ] + lappend PUNKARGS [list { + @id -id ::tcl::file::dirname + @cmd -name "Built-in: tcl::file::dirname"\ + -summary\ + "Return a path excluding last element."\ + -help\ + "Returns a name comprised of all of the path components in name excluding the last element. + If name is a relative file name and only contains one path element, then returns “.”. If name + refers to a root directory, then the root directory is returned. For example, + + ${[punk::args::helpers::example { + ${$B} file dirname c:/ + }]} + + returns ${$B}c:/${$N}. + " + @values -min 1 -max 1 + name -type string + } "@doc -name Manpage: -url [manpage_tcl file]" ] + lappend PUNKARGS [list { @id -id ::tcl::file::copy @cmd -name "Built-in: tcl::file::copy"\ @@ -3104,7 +3189,10 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { #tcl 9+ lappend PUNKARGS [list { @id -id ::tcl::file::home - @cmd -name "Built-in: tcl::file::home" -help\ + @cmd -name "Built-in: tcl::file::home"\ + -summary\ + "Return the home directory for a user."\ + -help\ "If no argument is specified, the command returns the home directory of the current user. This is generally the value of the ${$B}$HOME${$N} environment variable except that on Windows platforms backslashes in the path are replaced by forward slashes. An error is raised if @@ -3134,7 +3222,29 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { } "@doc -name Manpage: -url [manpage_tcl file]" ] #join - #link + lappend PUNKARGS [list { + @id -id ::tcl::file::join + @cmd -name "Built-in: tcl::file::join"\ + -summary\ + "Join directory/file components into a single path."\ + -help\ + "Takes one or more file names and combines them, using the correct path separator for the current platform. + If a particular name is relative, then it will be joined to the previous file name argument. Otherwise, any + earlier arguments will be discarded, and joining will proceed from the current argument. For example, + + ${[punk::args::helpers::example { + ${$B}file join ${$N} a b /foo bar + }]} + + returns ${$B}/foo/bar${$N}. + + Note that any of the names can contain separators, and that the result is always canonical for the current + platform: ${$B}/${$N} for Unix and Windows. + " + @values -min 1 -max 1 + name -optional 0 -type string + } "@doc -name Manpage: -url [manpage_tcl file]" ] + lappend PUNKARGS [list { @id -id ::tcl::file::link @cmd -name "Built-in: tcl::file::link"\ @@ -3242,8 +3352,33 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { @values -min 1 -max 1 name -optional 0 -type string } "@doc -name Manpage: -url [manpage_tcl file]"] - #owned - #pathtype + + lappend PUNKARGS [list { + @id -id ::tcl::file::owned + @cmd -name "Built-in: tcl::file::owned"\ + -summary\ + "Test file owned by current user."\ + -help\ + "Returns ${$B}1${$N} if the file ${$I}name${$NI} is owned by the current user, ${$B}0${$N} otherwise." + @values -min 1 -max 1 + name -optional 0 -type string + } "@doc -name Manpage: -url [manpage_tcl file]"] + + lappend PUNKARGS [list { + @id -id ::tcl::file::pathtype + @cmd -name "Built-in: tcl::file::pathtype"\ + -summary\ + {Return path type. Either absolute, relative or volumerelative.}\ + -help\ + "Returns one of ${$B}absolute${$N}, ${$B}relative${$N}, ${$B}volumerelative${$N}. If name refers to a specific file on a specific + volume, the path type will be ${$B}absolute${$N}. If name refers to a file relative to the current working + directory, then the path type will be ${$B}relative${$N}. If name refers to a file relative to the current + working directory on a specified volume, or to a specific file on the current working volume, then + the path type is ${$B}volumerelative${$N}." + @values -min 1 -max 1 + name -optional 0 -type string + } "@doc -name Manpage: -url [manpage_tcl file]"] + lappend PUNKARGS [list { @id -id ::tcl::file::readable @cmd -name "Built-in: tcl::file::readable"\ @@ -3299,9 +3434,46 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { @values -min 1 -max 1 name -optional 0 -type string } "@doc -name Manpage: -url [manpage_tcl file]"] - #separator - #size - #split + + lappend PUNKARGS [list { + @id -id ::tcl::file::separator + @cmd -name "Built-in: tcl::file::separator"\ + -summary\ + {File separator character}\ + -help\ + "If no argument is given, returns the character which is used to separate path segments for native + files on this platform. If a path is given, the filesystem responsible for that path is asked to + return its separator character. If no file system accepts name, an error is generated." + @values -min 0 -max 1 + name -optional 1 -type string -help\ + "Path to query for separator character." + } "@doc -name Manpage: -url [manpage_tcl file]"] + + lappend PUNKARGS [list { + @id -id ::tcl::file::size + @cmd -name "Built-in: tcl::file::size"\ + -summary\ + {Size of named file in bytes.}\ + -help\ + "Returns a decimal string giving the size of file ${$I}name${$NI} in bytes. + If the file does not exist or its size cannot be queried then an error is generated." + @values -min 1 -max 1 + name -optional 0 -type string + } "@doc -name Manpage: -url [manpage_tcl file]"] + + lappend PUNKARGS [list { + @id -id ::tcl::file::split + @cmd -name "Built-in: tcl::file::split"\ + -summary\ + {Split a path into list of components.}\ + -help\ + "Returns a list whose elements are the path components in ${$I}name${$NI}. The first element of the list will have + the same path type as ${$I}name${$NI}. All other elements will be relative. Path separators will be discarded unless + they are needed to ensure that an element is unambiguously relative." + @values -min 1 -max 1 + name -optional 0 -type string + } "@doc -name Manpage: -url [manpage_tcl file]"] + lappend PUNKARGS [list { @id -id ::tcl::file::stat @cmd -name "Built-in: tcl::file::stat"\ @@ -3399,8 +3571,20 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { As such, they can be relied upon to be used with operating-system native APIs and external programs that require a filename." @values -min 0 -max 2 - nameVar -type string -optional 1 - template -type string -optional 1 + nameVar -type string -optional 1 -help\ + "Variable to *receive* the name of the created temporary file. + Any existing value in the variable will not be read, and is just overwritten." + template -type string -optional 1 -help\ + "On some platforms, such as windows: + - file extension is ignored. + - any directory components are ignored and + the last segment is used as a prefix for the temporary file name. + - If the TMP or TEMP environment variables are set, they are used + as the directory for the temporary file, otherwise the user's home + directory is used if it can be determined. (may depend on existence + of HOME or USERPROFILE environment variables.) + On other platforms, such as unix, the template may be handled + differently." } "@doc -name Manpage: -url [manpage_tcl file]"] #tildeexpand @@ -4528,11 +4712,16 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { }]} } - @values -min 1 + #{args body ?namespace?} is a single argument that is a list of two or three elements, + #as opposed to a clause of separate arguments. + #we don't have a way to validate the type of each element in a list - we can only check the length of the whole list. + + @values -min 1 -max -1 "{args body ?namespace?}" -optional 0 -type list -minsize 2 -maxsize 3 arg -type any -optional 1 -multiple 1 + } "@doc -name Manpage: -url [manpage_tcl apply]"\ { @examples -help { @@ -7094,7 +7283,7 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { start -type number|expr count -type literalprefix(count) countelements -type number|expr - "by step" -type {literalprefix(by) number|expr} -optional 1 + "by step" -type {?literalprefix(by)? number|expr} -optional 1 @form -form count @leaders -min 0 -max 0 @@ -10621,15 +10810,34 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { #force all on_handlers to be together and all try_handlers to be together, and it would force #one type of handler to be listed always before or always after the other. handler -optional 1 -multiple 1 -type {literal(on)|literal(trap) string list string}\ - -typesynopsis {"" code|pattern variableList script} + -typesynopsis {"" oncode_or_trappattern variableList script} + #in our typesynopsis we deliberately don't put a pipe symbol in oncode_or_trappattern. + # e.g code|pattern would imply either on or trap could be combined with either code or pattern, which is not the case. + #todo? - #a way to define a compound type? - #handler -optional 1 -multiple 1 -type {|} - ## -type {literal(on)