You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							1681 lines
						
					
					
						
							69 KiB
						
					
					
				
			
		
		
	
	
							1681 lines
						
					
					
						
							69 KiB
						
					
					
				| # -*- tcl -*- | |
| # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt | |
| # | |
| # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. | |
| # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| # (C) 2023 | |
| # | |
| # @@ Meta Begin | |
| # Application punk::ansi 0.1.1 | |
| # Meta platform     tcl | |
| # Meta license      <unspecified> | |
| # @@ Meta End | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| # doctools header | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| #*** !doctools | |
| #[manpage_begin punkshell_module_punk::ansi 0 0.1.1] | |
| #[copyright "2023"] | |
| #[titledesc {Ansi string functions}]  [comment {-- Name section and table of contents description --}] | |
| #[moddesc {punk Ansi library}]             [comment {-- Description at end of page heading --}]  | |
| #[require punk::ansi] | |
| #[keywords module ansi terminal console string] | |
| #[description] | |
| #[para]Ansi based terminal control string functions  | |
| #[para]See [package punk::ansi::console] for related functions for controlling a console  | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
|  | |
| #*** !doctools | |
| #[section Overview] | |
| #[para] overview of punk::ansi  | |
| #[para]punk::ansi functions return their values - no implicit emission to console/stdout | |
| #[subsection Concepts] | |
| #[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner  | |
| #[para]There are many differences in terminal implementations - but most should support a core set of features | |
| #[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. | |
| #[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. | |
|  | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| ##  Requirements | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
|  | |
| #*** !doctools | |
| #[subsection dependencies] | |
| #[para] packages used by punk::ansi  | |
| #[list_begin itemized] | |
|  | |
| package require Tcl 8.6 | |
| #*** !doctools | |
| #[item] [package {Tcl 8.6}] | |
|  | |
| # #package require frobz | |
| # #*** !doctools | |
| # #[item] [package {frobz}] | |
|  | |
| #*** !doctools | |
| #[list_end] | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
|  | |
| #*** !doctools | |
| #[section API] | |
|  | |
|  | |
|  | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| namespace eval punk::ansi { | |
|     #*** !doctools | |
|     #[subsection {Namespace punk::ansi}] | |
|     #[para] Core API functions for punk::ansi  | |
|     #[list_begin definitions] | |
|  | |
|  | |
|     #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control  | |
|  | |
|     variable test "blah\033\[1;33mETC\033\[0;mOK" | |
|  | |
|  | |
|     #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc.  Presumably this won't matter here. | |
|     namespace export\ | |
|         {a?} {a+} a \ | |
|         ansistring\ | |
|         convert*\ | |
|         clear*\ | |
|         cursor_*\ | |
|         detect*\ | |
|         get_*\ | |
|         move*\ | |
|         reset*\ | |
|         strip*\ | |
|         test_decaln\ | |
|         titleset\ | |
|  | |
|  | |
|     variable escape_terminals | |
|     #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). | |
|     dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] | |
|     #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic  | |
|     dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals  | |
|     dict set escape_terminals DCS [list \007 \033\\ \u009c] | |
|     dict set escape_terminals MISC [list \007 \033\\ \u009c] | |
|     #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm )  | |
|     #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? | |
|  | |
|     variable standalone_codes | |
|     set standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] | |
|     #review - there doesn't seem to be an \x1b#7  | |
|     # https://espterm.github.io/docs/VT100%20escape%20codes.html | |
|  | |
|     #self-contained 2 byte ansi escape sequences - review more? | |
|     set  ansi_2byte_codes_dict [dict create\ | |
|         "reset_terminal"            "\u001bc"\ | |
|         "save_cursor_posn"          "\u001b7"\ | |
|         "restore_cursor_posn"       "\u001b8"\ | |
|         "cursor_up_one"             "\u001bM"\ | |
|         "NEL - Next Line"           "\u001bE"\ | |
|         "IND - Down one line"       "\u001bD"\ | |
|         "HTS - Set Tab Stop"        "\u001bH"\ | |
|         "DECPAM app keypad"         "\x1b="\ | |
|         "DECPNM norm keypad"        "\x1b>"\ | |
|         ] | |
|  | |
|     #control strings | |
|     #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf | |
|     #<excerpt> | |
|     #A control string is a string of bit combinations which may occur in the data stream as a logical entity for | |
|     #control purposes. A control string consists of an opening delimiter, a command string or a character string, | |
|     #and a terminating delimiter, the STRING TERMINATOR (ST). | |
|     #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. | |
|     #A character string is a sequence of any bit combination, except those representing START OF STRING | |
|     #(SOS) or STRING TERMINATOR (ST). | |
|     #The interpretation of the command string or the character string is not defined by this Standard, but instead | |
|     #requires prior agreement between the sender and the recipient of the data. | |
|     #The opening delimiters defined in this Standard are | |
|     #a) APPLICATION PROGRAM COMMAND (APC) | |
|     #b) DEVICE CONTROL STRING (DCS) | |
|     #c) OPERATING SYSTEM COMMAND (OSC) | |
|     #d) PRIVACY MESSAGE (PM) | |
|     #e) START OF STRING (SOS)  | |
|     #</excerpt> | |
|  | |
|     #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. | |
|     #  "PM - Privacy Message"      "\u001b^somethinghidden\033\\"\ | |
|     #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out.  | |
|     #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. | |
|     #review - can terminals handle SGR codes within a PM? | |
|     #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) | |
|     proc controlstring_PM {text} { | |
|         return "\x1b^${text}\033\\" | |
|     } | |
|     proc controlstring_PM8 {text} { | |
|         return "\x9e${text}\x9c" | |
|     } | |
|     proc controlstring_SOS {text} { | |
|         return "\x1bX${text}\033\\" | |
|     } | |
|     proc controlstring_SOS8 {text} { | |
|         return "\x98${text}\x9c" | |
|     } | |
|     proc controlstring_APC {text} { | |
|         return "\x1b_${text}\033\\" | |
|     } | |
|     proc controlstring_APC8 {text} { | |
|         return "\x9f${text}\x9c" | |
|     } | |
|     #there is also the SGR hide code (8) which has intermittent terminal support | |
|     #This doesn't change the output length - so support is tricky to detec. (terminal checksum report?) | |
|  | |
|  | |
|     #candidate for zig/c implementation? | |
|     proc stripansi {text} { | |
|         #*** !doctools | |
|         #[call [fun stripansi] [arg text] ] | |
|         #[para]Return a string with ansi codes stripped out  | |
|  | |
|         #todo - character set selection - SS2 SS3 - how are they terminated?  REVIEW | |
|  | |
|         variable escape_terminals  ;#dict | |
|         variable standalone_codes  ;#map to empty string | |
|  | |
|         set text [convert_g0 $text]  | |
|  | |
|  | |
|         #we should just map away the 2-byte sequences too | |
|         #standalone 3 byte  VT100 sequences - some of these work in wezterm  | |
|         #\x1b#3 double-height letters top half | |
|         #\x1b#4 double-height letters bottom half | |
|         #\x1b#5 single-width line | |
|         #\x1b#6 double-width line | |
|         #\x1b#8  dec test fill screen | |
|  | |
|         set text [string map $standalone_codes $text] | |
|  | |
|         #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. | |
|         #line endings can theoretically occur within an ansi escape sequence payload (review e.g title?) | |
|          | |
|         set inputlist [split $text ""] | |
|         set outputlist [list]  | |
|  | |
|         set in_escapesequence 0 | |
|         #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) | |
|          | |
|         set i 0 | |
|         foreach u $inputlist { | |
|             set v [lindex $inputlist $i+1] | |
|             set uv ${u}${v} | |
|             if {$in_escapesequence eq "2b"} { | |
|                 #2nd byte - done. | |
|                 set in_escapesequence 0 | |
|             } elseif {$in_escapesequence != 0} { | |
|                 set endseq [dict get $escape_terminals $in_escapesequence] | |
|                 if {$u in $endseq} { | |
|                     set in_escapesequence 0 | |
|                 } elseif {$uv in $endseq} { | |
|                     set in_escapesequence 2b ;#flag next byte as last in sequence | |
|                 } | |
|             } else { | |
|                 #handle both 7-bit and 8-bit CSI and OSC | |
|                 if {[regexp {^(?:\033\[|\u009b)} $uv]} { | |
|                     set in_escapesequence CSI | |
|                 } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { | |
|                     set in_escapesequence OSC | |
|                 } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { | |
|                     set in_escapesequence DCS | |
|                 } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { | |
|                     #SOS,PM,APC - all terminated with ST | |
|                     set in_escapesequence MISC | |
|                 } else { | |
|                     lappend outputlist $u | |
|                 } | |
|             } | |
|             incr i | |
|         } | |
|         return [join $outputlist ""] | |
|     } | |
|  | |
|     #review - what happens when no terminator? | |
|     #todo - map other chars  to unicode equivs | |
|     proc convert_g0 {text} { | |
|         #using not \033 inside to stop greediness - review how does it compare to ".*?" | |
|         set re {\033\(0[^\033]*\033\(B}  | |
|         set re2 {\033\(0(.*)\033\(B} ;#capturing | |
|         set parts [::punk::ansi::ta::_perlish_split $re $text] | |
|         set out "" | |
|         foreach {pt g} $parts { | |
|             append out $pt | |
|             if {$g ne ""} { | |
|                 #puts --$g-- | |
|                 #box sample | |
|                 #lqk | |
|                 #x x | |
|                 #mqj | |
|                 #m = boxd_lur | |
|                 #set map [list l \u250f k \u2513] ;#heavy | |
|                 set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light | |
|  | |
|                 regexp $re2 $g _match contents | |
|                 append out [string map $map $contents] | |
|             } | |
|         } | |
|         return $out | |
|     } | |
|  | |
|     #todo - convert esc(0 graphics sequences to single char unicode equivalents e.g box drawing set | |
|     # esc) ?? | |
|     proc stripansi_gx {text} { | |
|             #e.g "\033(0" - select VT100 graphics for character set G0 | |
|             #e.g "\033(B" - reset  | |
|             #e.g "\033)0" - select VT100 graphics for character set G1 | |
|             #e.g "\033)X" - where X is any char other than 0 to reset ?? | |
|             return [convert_g0 $text] | |
|     } | |
|  | |
|  | |
|     #CSI <n> m = SGR (Select Graphic Rendition) | |
|     variable SGR_setting_map { | |
|         bold        1   dim             2   blink           5   fastblink   6   noblink     25      hide   8    normal      22 | |
|         underline   4   doubleunderline 21  nounderline     24  strike      9   nostrike    29      italic 3    noitalic    23 | |
|         reverse     7   noreverse       27  defaultfg       39  defaultbg   49  nohide      28 | |
|         overline    53  nooverline      55  frame           51  framecircle 52  noframe     54 | |
|     } | |
|     variable SGR_colour_map { | |
|         black       30  red             31  green           32  yellow      33  blue        34       purple 35   cyan        36      white 37 | |
|         Black       40  Red             41  Green           42  Yellow      43  Blue        44      Purple 45   Cyan        46      White 47 | |
|         BLACK       100 RED             101 GREEN           102 YELLOW      103 BLUE        104     PURPLE 105  CYAN        106     WHITE 107 | |
|     } | |
|     variable SGR_map | |
|     set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] | |
|  | |
|      | |
|     proc colourmap1 {{bgname White}} { | |
|         package require textblock | |
|  | |
|         set bg [textblock::block 33 3 "[a+ $bgname] [a]"] | |
|         set colormap "" | |
|         for {set i 0} {$i <= 7} {incr i} { | |
|             append colormap "_[a+ white bold 48\;5\;$i] $i [a]" | |
|         } | |
|         set map1 [overtype::left -transparent _ $bg "\n$colormap"] | |
|         return $map1 | |
|     } | |
|     proc colourmap2 {{bgname White}} { | |
|         package require textblock | |
|         set bg [textblock::block 39 3 "[a+ $bgname] [a]"] | |
|         set colormap "" | |
|         for {set i 8} {$i <= 15} {incr i} { | |
|             append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey | |
|         } | |
|         set map2 [overtype::left -transparent _ $bg "\n$colormap"] | |
|         return $map2 | |
|     } | |
|     proc a? {args} { | |
|         #*** !doctools | |
|         #[call [fun a?] [opt {ansicode...}]] | |
|         #[para]Return an ansi string representing a table of codes and a panel showing the colours | |
|         variable SGR_setting_map | |
|         variable SGR_colour_map | |
|  | |
|         if {![llength $args]} { | |
|             set out "" | |
|             append out $SGR_setting_map \n | |
|             append out $SGR_colour_map \n | |
|  | |
|             try { | |
|                 package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try | |
|                 set bgname "White" | |
|                 set map1 [colourmap1 $bgname] | |
|                 set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] | |
|                 set map2 [colourmap2 $bgname] | |
|                 set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] | |
|                 append out [textblock::join $map1 "   " $map2] \n | |
|                 #append out $map1[a] \n | |
|                 #append out $map2[a] \n | |
|  | |
|  | |
|  | |
|             } on error {result options} { | |
|                 puts stderr "Failed to draw colormap" | |
|                 puts stderr "$result" | |
|             } finally { | |
|                 return $out | |
|             } | |
|         } else { | |
|             set result [list] | |
|             set rmap [lreverse $map] | |
|             foreach i $args { | |
|                 if {[string is integer -strict $i]} { | |
|                     if {[dict exists $rmap $i]} { | |
|                         lappend result $i [dict get $rmap $i] | |
|                     } | |
|                 } else { | |
|                     if {[dict exists $map $i]} { | |
|                         lappend result $i [dict get $map $i] | |
|                     } | |
|                 } | |
|             } | |
|             return $result | |
|         } | |
|     } | |
|     proc a+ {args} { | |
|         #*** !doctools | |
|         #[call [fun a+] [opt {ansicode...}]] | |
|         #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first | |
|         #[para] e.g to set foreground red and bold  | |
|         #[para]punk::ansi::a red bold | |
|         #[para]to set background red | |
|         #[para]punk::ansi::a Red  | |
|         #[para]see [cmd punk::ansi::a?] to display a list of codes | |
|  | |
|         #don't disable ansi here. | |
|         #we want this to be available to call even if ansi is off | |
|         variable SGR_map | |
|         set t [list]  | |
|         foreach i $args { | |
|             if {[string is integer -strict $i]} { | |
|                 lappend t $i | |
|             } elseif {[string first ";" $i] >=0} { | |
|                 #literal with params | |
|                 lappend t $i | |
|             } else { | |
|                 if {[dict exists $SGR_map $i]} { | |
|                     lappend t [dict get $SGR_map $i] | |
|                 } else { | |
|                     #accept examples for foreground | |
|                     #  256f-# or 256fg-# or 256f# | |
|                     #  rgbf-<r>-<g>-<b>  or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> | |
|                     if {[string match -nocase "256f*" $i]} { | |
|                         set cc [string trim [string range $i 4 end] -gG] | |
|                         lappend t "38;5;$cc"  | |
|                     } elseif {[string match -nocase 256b* $i]} { | |
|                         set cc [string trim [string range $i 4 end] -gG] | |
|                         lappend t "48;5;$cc"  | |
|                     } elseif {[string match -nocase rgbf* $i]} { | |
|                         set rgb [string trim [string range $i 4 end] -gG] | |
|                         lassign [split $rgb -] r g b | |
|                         lappend t "38;2;$r;$g;$b" | |
|                     } elseif {[string match -nocase rgbb* $i]} { | |
|                         set rgb [string trim [string range $i 4 end] -gG] | |
|                         lassign [split $rgb -] r g b | |
|                         lappend t "48;2;$r;$g;$b" | |
|                     } | |
|                 } | |
|             } | |
|         } | |
|         # \033 - octal.  equivalently \x1b in hex which is more common in documentation | |
|         if {![llength $t]} { | |
|             return "" ;# a+ nonexistent should return nothing rather  than a reset ( \033\[\;m  is a reset even without explicit zero(s)) | |
|         } | |
|         return "\x1b\[[join $t {;}]m" | |
|     } | |
|     proc a {args} { | |
|         #*** !doctools | |
|         #[call [fun a] [opt {ansicode...}]] | |
|         #[para]Returns the ansi code to reset any current settings and apply those from the supplied list | |
|         #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text  | |
|         #[para] e.g to set foreground red and bold  | |
|         #[para]punk::ansi::a red bold | |
|         #[para]to set background red | |
|         #[para]punk::ansi::a Red  | |
|         #[para]see [cmd punk::ansi::a?] to display a list of codes | |
|  | |
|  | |
|         #don't disable ansi here. | |
|         #we want this to be available to call even if ansi is off | |
|         variable SGR_map | |
|         set t [list] | |
|         foreach i $args { | |
|             if {[string is integer -strict $i]} { | |
|                 lappend t $i | |
|             } elseif {[string first ";" $i] >=0} { | |
|                 #literal with params | |
|                 lappend t $i | |
|             } else { | |
|                 if {[dict exists $SGR_map $i]} { | |
|                     lappend t [dict get $SGR_map $i] | |
|                 } else { | |
|                     #accept examples for foreground | |
|                     #  256f-# or 256fg-# or 256f# | |
|                     #  rgbf-<r>-<g>-<b>  or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> | |
|                     if {[string match -nocase "256f*" $i]} { | |
|                         set cc [string trim [string range $i 4 end] -gG] | |
|                         lappend t "38;5;$cc"  | |
|                     } elseif {[string match -nocase 256b* $i]} { | |
|                         set cc [string trim [string range $i 4 end] -gG] | |
|                         lappend t "48;5;$cc"  | |
|                     } elseif {[string match -nocase rgbf* $i]} { | |
|                         set rgb [string trim [string range $i 4 end] -gG] | |
|                         lassign [split $rgb -] r g b | |
|                         lappend t "38;2;$r;$g;$b" | |
|                     } elseif {[string match -nocase rgbb* $i]} { | |
|                         set rgb [string trim [string range $i 4 end] -gG] | |
|                         lassign [split $rgb -] r g b | |
|                         lappend t "48;2;$r;$g;$b" | |
|                     } | |
|                 } | |
|             } | |
|         } | |
|         # \033 - octal.  equivalently \x1b in hex which is more common in documentation | |
|         # empty list [a=] should do reset - same for [a= nonexistant] | |
|         # explicit reset at beginning of parameter list for a= (as opposed to a+) | |
|         set t [linsert $t 0 0] | |
|         return "\x1b\[[join $t {;}]m" | |
|     } | |
|  | |
|  | |
|  | |
|  | |
|     proc get_code_name {code} { | |
|         #*** !doctools | |
|         #[call [fun get_code_name] [arg code]] | |
|         #[para]for example | |
|         #[para] get_code_name red will return 31 | |
|         #[para] get_code_name 31 will return red | |
|         variable SGR_map | |
|         set res [list] | |
|         foreach i [split $code ";"] { | |
|             set ix [lsearch -exact $SGR_map $i] | |
|             if {[string is digit -strict $code]} { | |
|                 if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} | |
|             } else { | |
|                 #reverse lookup code from name | |
|                 if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} | |
|             } | |
|         } | |
|         set res | |
|     } | |
|     proc reset {} { | |
|         #*** !doctools | |
|         #[call [fun reset]] | |
|         #[para]reset console | |
|         return "\x1bc"  | |
|     } | |
|     proc reset_soft {} { | |
|         #*** !doctools | |
|         #[call [fun reset_soft]] | |
|         return \x1b\[!p | |
|     } | |
|     proc reset_colour {} { | |
|         #*** !doctools | |
|         #[call [fun reset_colour]] | |
|         #[para]reset colour only | |
|         return "\x1b\[0m"  | |
|     } | |
|  | |
|     # -- ---  --- --- --- | |
|     proc clear {} { | |
|         #*** !doctools | |
|         #[call [fun clear]] | |
|         return "\033\[2J" | |
|     } | |
|     proc clear_above {} { | |
|         #*** !doctools | |
|         #[call [fun clear_above]] | |
|         return \033\[1J | |
|     } | |
|     proc clear_below {} { | |
|         #*** !doctools | |
|         #[call [fun clear_below]] | |
|         return \033\[0J | |
|     } | |
|  | |
|     proc clear_all {} { | |
|         # - doesn't work?? | |
|         return \033\[3J | |
|     } | |
|     #see also erase_ functions | |
|     # -- ---  --- --- --- | |
|  | |
|     proc cursor_on {} { | |
|         #*** !doctools | |
|         #[call [fun cursor_on]] | |
|         return "\033\[?25h" | |
|     } | |
|     proc cursor_off {} { | |
|         #*** !doctools | |
|         #[call [fun cursor_off]] | |
|         return "\033\[?25l" | |
|     } | |
|  | |
|     # -- ---  --- --- --- | |
|     proc move {row col} { | |
|         #*** !doctools | |
|         #[call [fun move] [arg row] [arg col]] | |
|         #[para]Return an ansi sequence to move to row,col | |
|         #[para]aka cursor home | |
|         return \033\[${row}\;${col}H | |
|     } | |
|     proc move_emit {row col data args} { | |
|         #*** !doctools | |
|         #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] | |
|         #[para]Return an ansi string representing a  move to row col with data appended   | |
|         #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points | |
|         #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout | |
|         #[para]punk::console::move_emit_return will also return the cursor to the original position | |
|         #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. | |
|         #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. | |
|         #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin | |
|         #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. | |
|         #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: | |
|         #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] | |
|         #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. | |
|  | |
|         set out "" | |
|         if {$row eq "this"} { | |
|             append out \033\[\;${col}G$data | |
|         } else { | |
|             append out \033\[${row}\;${col}H$data | |
|         } | |
|         foreach {row col data} $args { | |
|             if {$row eq "this"} { | |
|                 append out \033\[\;${col}G$data | |
|             } else { | |
|                 append out \033\[${row}\;${col}H$data | |
|             } | |
|         } | |
|         return $out | |
|     } | |
|     proc move_forward {{n 1}} { | |
|         #*** !doctools | |
|         #[call [fun move_forward] [arg n]] | |
|         return \033\[${n}C | |
|     } | |
|     proc move_back {{n 1}} { | |
|         #*** !doctools | |
|         #[call [fun move_back] [arg n]] | |
|         return \033\[${n}D | |
|     } | |
|     proc move_up {{n 1}} { | |
|         #*** !doctools | |
|         #[call [fun move_up] [arg n]] | |
|         return \033\[${n}A | |
|     } | |
|     proc move_down {{n 1}} { | |
|         #*** !doctools | |
|         #[call [fun move_down] [arg n]] | |
|         return \033\[${n}B | |
|     } | |
|     proc move_column {col} { | |
|         #*** !doctools | |
|         #[call [fun move_column] [arg col]] | |
|         return \x1b\[${col}G | |
|     } | |
|     proc move_row {row} { | |
|         #*** !doctools | |
|         #[call [fun move_row] [arg row]] | |
|         #[para]VPA - Vertical Line Position Absolute | |
|         return \x1b\[${row}d | |
|     } | |
|     # -- ---  --- --- --- | |
|  | |
|     proc save_cursor {} { | |
|         #*** !doctools | |
|         #[call [fun save_cursor]] | |
|         return \x1b\[s | |
|     } | |
|     proc restore_cursor {} { | |
|         #*** !doctools | |
|         #[call [fun restore_cursor]] | |
|         return \x1b\[u | |
|     } | |
|  | |
|     # -- ---  --- --- --- | |
|     proc erase_line {} { | |
|         #*** !doctools | |
|         #[call [fun erase_line]] | |
|         return \033\[2K | |
|     } | |
|     proc erase_sol {} { | |
|         #*** !doctools | |
|         #[call [fun erase_sol]] | |
|         #[para]Erase to start of line, leaving cursor position alone. | |
|         return \033\[1K | |
|     } | |
|     proc erase_eol {} { | |
|         #*** !doctools | |
|         #[call [fun erase_eol]] | |
|         return \033\[K | |
|     } | |
|     #see also clear_above clear_below | |
|     # -- ---  --- --- --- | |
|  | |
|     proc scroll_up {n} { | |
|         #*** !doctools | |
|         #[call [fun scroll_up] [arg n]] | |
|         return \x1b\[${n}S | |
|     } | |
|     proc scroll_down {n} { | |
|         #*** !doctools | |
|         #[call [fun scroll_down] [arg n]] | |
|         return \x1b\[${n}T | |
|     } | |
|  | |
|     proc insert_spaces {count} { | |
|         #*** !doctools | |
|         #[call [fun insert_spaces] [arg count]] | |
|         return \x1b\[${count}@ | |
|     } | |
|     proc delete_characters {count} { | |
|         #*** !doctools | |
|         #[call [fun delete_characters] [arg count]] | |
|         return \x1b\[${count}P | |
|     } | |
|     proc erase_characters {count} { | |
|         #*** !doctools | |
|         #[call [fun erase_characters] [arg count]] | |
|         return \x1b\[${count}X | |
|     } | |
|     proc insert_lines {count} { | |
|         #*** !doctools | |
|         #[call [fun insert_lines] [arg count]] | |
|         return \x1b\[${count}L | |
|     } | |
|     proc delete_lines {count} { | |
|         #*** !doctools | |
|         #[call [fun delete_lines] [arg count]] | |
|         return \x1b\[${count}M | |
|     } | |
|  | |
|     proc cursor_pos {} { | |
|         #*** !doctools | |
|         #[call [fun cursor_pos]] | |
|         #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin | |
|         #[para]The output on screen will look something like ^[lb][lb]47;3R | |
|         #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. | |
|         #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. | |
|         #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list | |
|         return \033\[6n  | |
|     } | |
|  | |
|     proc request_cursor_information {} { | |
|         #*** !doctools | |
|         #[call [fun request_cursor_information]] | |
|         #[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report | |
|         #[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin | |
|         #[para]A stdin readloop will need to be in place to read this information | |
|         return \x1b\[1\$w | |
|     } | |
|     proc request_tabstops {} { | |
|         #*** !doctools | |
|         #[call [fun request_tabstops]] | |
|         #[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report | |
|         #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin | |
|         return \x1b\[2\$w | |
|     } | |
|  | |
|  | |
|     #alternative to string terminator is \007 -  | |
|     proc titleset {windowtitle} { | |
|         #*** !doctools | |
|         #[call [fun titleset] [arg windowtitles]] | |
|         #[para]Returns the code to set the title of the terminal window to windowtitle | |
|         #[para]This may not work on terminals which have multiple panes/windows | |
|         return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives | |
|     } | |
|     #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title | |
|     #no cross-platform ansi-only mechanism ?  | |
|  | |
|     proc test_decaln {} { | |
|         #Screen Alignment Test | |
|         #Reset margins, move cursor to the top left, and fill the screen with 'E' | |
|         #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) | |
|         return \x1b#8 | |
|     } | |
|      | |
|     #length of text for printing characters only | |
|     #review - unicode and other non-printing chars and combining sequences? | |
|     #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names | |
|     #review - is there an existing library or better method? print to a terminal and query cursor position? | |
|     #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first | |
|     #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string.  | |
|     proc printing_length {line} { | |
|         if {[string first \n $line] >= 0} { | |
|             error "line_print_length must not contain newline characters" | |
|         } | |
|         #what if line has \v (vertical tab) ie more than one logical screen line? | |
|  | |
|         #review -  | |
|         set line [punk::ansi::stripansi $line] | |
|  | |
|         set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi | |
|         #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter | |
|         #(* more correctly - moves cursor back) | |
|         #Note some terminals process backspace before \v - which seems quite wrong | |
|         #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already | |
|         #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line | |
|         # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. | |
|         #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS | |
|  | |
|         #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) | |
|         #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces | |
|         #normalize tabs to an appropriate* width | |
|         #*todo - handle terminal/context where tabwidth != the default 8 spaces | |
|         set line [textutil::tabify::untabify2 $line] | |
|  | |
|         set bs [format %c 0x08] | |
|         #set line [string map [list "\r${bs}" "\r"] $line] ;#backsp following a \r will have no effect | |
|         set line [string trim $line $bs] | |
|         #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. | |
|         set n 0 | |
|  | |
|         set chars [split $line ""]  | |
|         #build an output  | |
|         set idx 0 | |
|         set outchars [list] | |
|         set outsizes [list] | |
|         foreach c $chars { | |
|             if {$c eq $bs} { | |
|                 if {$idx > 0} { | |
|                     incr idx -1 | |
|                 } | |
|             } elseif {$c eq "\r"} { | |
|                 set idx 0 | |
|             } else { | |
|                 punk::ansi::internal::printing_length_addchar $idx $c | |
|                 incr idx | |
|             } | |
|         } | |
|         set line2 [join $outchars ""] | |
|         return [punk::char::string_width $line2] | |
|     } | |
|  | |
|  | |
|     #*** !doctools | |
|     #[list_end] [comment {--- end definitions namespace punk::ansi ---}] | |
| } | |
|  | |
|  | |
| namespace eval punk::ansi { | |
|  | |
|  | |
|     # -- --- --- --- --- --- | |
|     #XTGETTCAP | |
|     # xterm responds with | |
|     # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and | |
|     # the value of the corresponding string that xterm would send, | |
|     # or | |
|     # DCS 0 + r ST for invalid requests. | |
|     # The strings are encoded in hexadecimal (2 digits per | |
|     # character).  If more than one name is given, xterm replies | |
|     # with each name/value pair in the same response.  An invalid | |
|     # name (one not found in xterm's tables) ends processing of the | |
|     # list of names. | |
|     proc xtgetcap {keylist} { | |
|         #ESC P = 0x90 = DCS = Device Control String  | |
|         set hexkeys [list] | |
|         foreach k $keylist { | |
|             lappend hexkeys [util::str2hex $k] | |
|         } | |
|         set payload [join $hexkeys ";"] | |
|         return "\x1bP+q$payload\x1b\\" | |
|     } | |
|     proc xtgetcap2 {keylist} { | |
|         #ESC P = 0x90 = DCS = Device Control String  | |
|         set hexkeys [list] | |
|         foreach k $keylist { | |
|             lappend hexkeys [util::str2hex $k] | |
|         } | |
|         set payload [join $hexkeys ";"] | |
|         return "\u0090+q$payload\u009c" | |
|     } | |
|     namespace eval codetype { | |
|         #Functions that operate on a single ansi code sequence - not a sequence, and not codes embedded in another string | |
|         proc is_sgr {code} { | |
|             #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline  | |
|             #we will accept and pass through the less common colon separator (ITU Open Document Architecture)  | |
|             #Terminals should generally ignore it if they don't use it | |
|             regexp {\033\[[0-9;:]*m$} $code | |
|         } | |
|         proc is_cursor_move_in_line {code} { | |
|             #review - what about CSI n : m H where row n happens to be current line? | |
|             regexp {\033\[[0-9]*(:?C|D|G)$} $code | |
|         } | |
|         #pure SGR reset with no other functions | |
|         proc is_sgr_reset {code} { | |
|             #todo 8-bit csi | |
|             regexp {\033\[0*m$} $code | |
|         } | |
|         #whether this code has 0 (or equivalently empty) parameter (but may set others) | |
|         #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes | |
|         #it generally only makes sense for the reset to be the first entry - otherwise the code has ineffective portions | |
|         #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. | |
|         #We will only look at initial parameter as this is the well-formed normal case.   | |
|         #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code | |
|         proc has_sgr_leadingreset {code} { | |
|             set params "" | |
|             regexp {\033\[(.*)m} $code _match params | |
|             set plist [split $params ";"] | |
|             if {[string trim [lindex $plist 0] 0] eq ""} { | |
|                 #e.g \033\[m  \033\[0\;...m  \033\[0000...m | |
|                 return 1 | |
|             } else { | |
|                 return 0 | |
|             } | |
|         } | |
|  | |
|         #has_sgr_reset  - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? | |
|  | |
|     } | |
|     namespace eval sequence_type { | |
|         proc is_Fe {code} { | |
|             # C1 control codes | |
|             if {[regexp {^\033\[[\u0040-\u005F]}]} { | |
|                 #7bit - typical case | |
|                 return 1 | |
|             } | |
|             #8bit | |
|             #review - all C1 escapes ? 0x80-0x90F | |
|             #This is possibly problematic as it is affected by encoding. | |
|             #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit | |
|             #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." | |
|             return 0 | |
|         } | |
|         proc is_Fs {code} { | |
|             puts stderr "is_Fs unimplemented" | |
|         } | |
|     } | |
|     # -- --- --- --- --- --- --- --- --- --- --- | |
|     #todo - implement colour resets like the perl module: | |
|     #https://metacpan.org/pod/Text::ANSI::Util | |
|     #(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) | |
| } | |
|  | |
|  | |
| namespace eval punk::ansi::ta { | |
|     #*** !doctools | |
|     #[subsection {Namespace punk::ansi::ta}] | |
|     #[para] text ansi functions  | |
|     #[para] based on but not identical to the Perl Text Ansi module: | |
|     #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm | |
|     #[list_begin definitions] | |
|     namespace path ::punk::ansi | |
|  | |
|     #handle both 7-bit and 8-bit csi  | |
|     #review - does codepage affect this? e.g ebcdic has 8bit csi in different position | |
|  | |
|     #CSI | |
|     #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m | |
|     variable re_csi_open {(?:\033\[|\u009b)} | |
|  | |
|     #colour and style | |
|     variable re_csi_colour {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m  \033\[m  \033\[0m \033\[m0000m | |
|     #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). | |
|     variable re_csi_code  {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@^_|~`]} | |
|  | |
|     #OSC - termnate with BEL (\a \007) or ST (string terminator \033\\) | |
|     # 8-byte string terminator is \x9c (\u009c) | |
|  | |
|     #non-greedy via "*?" doesn't seem to work like this.. | |
|     #variable re_esc_osc1 {(?:\033\]).*?\007} | |
|     #variable re_esc_osc2 {(?:\033\]).*?\033\\} | |
|     #variable re_esc_osc3 {(?:\u009d).*?\u009c} | |
|  | |
|     #non-greedy by excluding ST terminators | |
|     #TODO - FIX? see re_ST below | |
|     variable re_esc_osc1 {(?:\033\])(?:[^\007]*)\007} | |
|     variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} | |
|     variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} | |
|  | |
|     variable re_osc_open {(?:\033\]|\u009d).*} | |
|  | |
|     #standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] | |
|     variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} | |
|  | |
|     #see stripansi | |
|     set re_start_ST {^(?:\033X|\u0098|\033\^|\u009e|\033_|\u009f)}  | |
|     #ST terminators [list \007 \033\\ \u009c] | |
|  | |
|     #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) | |
|     #non-greedy by exclusion of ST terminators in body | |
|     #!!! | |
|     #TODO - fix. we need to match \033\\ not just \033 ! could be colour codes nested in a privacy msg/string | |
|     #This will currently terminate the code too early in this case | |
|     #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) | |
|     variable re_ST  {(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)(?:[^\033\007\u009c]*)(?:\033\\|\007|\u009c)} | |
|  | |
|     variable re_ansi_detect "${re_csi_open}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_start_ST}" | |
|  | |
|     #detect any ansi escapes | |
|     #review - only detect 'complete' codes - or just use the opening escapes for performance? | |
|     proc detect {text} { | |
|         #*** !doctools | |
|         #[call [fun detect] [arg text]] | |
|         #[para]Return a boolean indicating whether Ansi codes were detected in text | |
|         #[para] | |
|  | |
|         variable re_ansi_detect | |
|         #variable re_csi_open | |
|         #variable re_esc_osc1 | |
|         #variable re_esc_osc2 | |
|         #todo - other escape sequences | |
|         #expr {[regexp $re_csi_open $text] || [regexp $re_esc_osc1 $text] || [regexp $re_esc_osc2 $text]}  | |
|         expr {[regexp $re_ansi_detect $text]} | |
|     } | |
|     #not in perl ta | |
|     proc detect_csi {text} { | |
|         #*** !doctools | |
|         #[call [fun detect_csi] [arg text]] | |
|         #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text | |
|         #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] | |
|         #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation  | |
|         #[para]There is also a multi-byte escape sequence \u009b  | |
|         #[para]This is less commonly used but is also detected here | |
|         #[para](This function is not in perl ta) | |
|         variable re_csi_open | |
|         expr {[regexp $re_csi_open $text]} | |
|     } | |
|     proc detect_sgr {text} { | |
|         #*** !doctools | |
|         #[call [fun detect_sgr] [arg text]] | |
|         #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. | |
|         #[para]This is the set of CSI sequences ending in 'm' | |
|         #[para]This is most commonly an Ansi colour code - but also things such as underline and italics | |
|         #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. | |
|         #[para](This function is not in perl ta) | |
|         variable re_csi_colour | |
|         expr {[regexp $re_csi_colour $text]} | |
|     } | |
|     proc strip {text} { | |
|         #*** !doctools | |
|         #[call [fun strip] [arg text]] | |
|         #[para]Return text stripped of Ansi codes | |
|         #[para]This is a tailcall to punk::ansi::stripansi | |
|         tailcall stripansi $text | |
|     } | |
|     proc length {text} { | |
|         #*** !doctools | |
|         #[call [fun length] [arg text]] | |
|         #[para]Return the character length after stripping ansi codes - not the printing length | |
|         string length [stripansi $text] | |
|     } | |
|     #todo - handle newlines | |
|     #not in perl ta | |
|     #proc printing_length {text} { | |
|     # | |
|     #} | |
|  | |
|     proc trunc {text width args} { | |
|  | |
|     } | |
|  | |
|     #not in perl ta | |
|     #returns just the plaintext portions in a list | |
|     proc split_at_codes {text} { | |
|         variable re_esc_osc1 | |
|         variable re_esc_osc2 | |
|         variable re_csi_code | |
|         variable re_standalones | |
|         variable re_ST | |
|         punk::ansi::internal::splitx $text "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_ST}" | |
|     } | |
|      | |
|     # -- --- --- --- --- ---  | |
|     #Split $text to a list containing alternating ANSI color codes and text. | |
|     #ANSI color codes are always on the second element, fourth, and so on. | |
|     #(ie plaintext on odd list-indices ansi on even indices) | |
|     # Example: | |
|     #ta_split_codes ""              # => "" | |
|     #ta_split_codes "a"             # => "a" | |
|     #ta_split_codes "a\e[31m"       # => {"a"   "\e[31m"} | |
|     #ta_split_codes "\e[31ma"       # => {""    "\e[31m"       "a"} | |
|     #ta_split_codes "\e[31ma\e[0m"  # => {""    "\e[31m"       "a"  "\e[0m"} | |
|     #ta_split_codes "\e[31ma\e[0mb" # => {""    "\e[31m"       "a"  "\e[0m", "b"} | |
|     #ta_split_codes "\e[31m\e[0mb"  # => {""    "\e[31m\e[0m"  "b"} | |
|     # | |
|     proc split_codes {text} { | |
|         variable re_esc_osc1 | |
|         variable re_esc_osc2 | |
|         variable re_csi_code | |
|         variable re_standalones | |
|         variable re_ST | |
|         set re "(?:${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2})+" | |
|         return [_perlish_split $re $text] | |
|     } | |
|     #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) | |
|     proc split_codes_single {text} { | |
|         variable re_esc_osc1 | |
|         variable re_esc_osc2 | |
|         variable re_csi_code | |
|         variable re_standalones | |
|         variable re_ST | |
|         set re "${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2}" | |
|         return [_perlish_split $re $text] | |
|     } | |
|  | |
|     #review - tcl greedy expressions may match multiple in one element | |
|     proc _perlish_split {re text} { | |
|         if {[string length $text] == 0} { | |
|             return {} | |
|         } | |
|         set list [list] | |
|         set start 0 | |
|          | |
|         #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW | |
|         while {[regexp -start $start -indices -- $re $text match]} { | |
|             lassign $match      matchStart matchEnd | |
|             #puts "->start $start ->match $matchStart $matchEnd" | |
|             if {$matchEnd < $matchStart} { | |
|                 lappend list [string range $text $start $matchStart-1] [string index $text $matchStart]  | |
|                 incr start  | |
|                 if {$start >= [string length $text]} { | |
|                     break | |
|                 } | |
|                 continue | |
|             } | |
|             lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd]  | |
|             set start [expr {$matchEnd+1}] | |
|  | |
|             #? | |
|             if {$start >= [string length $text]} { | |
|                 break | |
|             } | |
|         } | |
|         lappend list [string range $text $start end] | |
|         return $list | |
|     } | |
|     proc _ws_split {text} { | |
|         regexp -all -inline {(?:\S+)|(?:\s+)} $text | |
|     } | |
|     # -- --- --- --- --- ---  | |
|  | |
|     #*** !doctools | |
|     #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] | |
| } | |
| # -- --- --- --- --- --- --- --- --- --- --- | |
|  | |
| namespace eval punk::ansi::ansistring { | |
|     #*** !doctools | |
|     #[subsection {Namespace punk::ansi::ansistring}] | |
|     #[para]punk::ansi::ansistring ensemble  - ansi-aware string operations | |
|     #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings | |
|     #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. | |
|     #[list_begin definitions] | |
|  | |
|     namespace path [list ::punk::ansi ::punk::ansi::ta] | |
|     namespace ensemble create | |
|     namespace export length trim trimleft trimright index VIEW | |
|     #todo - expose _splits_ methods so caller can work efficiently with the splits themselves | |
|     #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single  | |
|  | |
|     #\UFFFD  - replacement char or \U2426 | |
|  | |
|     #using ISO 2047 graphical representations of control characters - probably obsolete? | |
|     #00	NUL	Null	⎕	U+2395	NU | |
|     #01	TC1, SOH	Start of Heading	⌈	U+2308	SH | |
|     #02	TC2, STX	Start of Text	⊥	U+22A5	SX | |
|     #03	TC3, ETX	End of Text	⌋	U+230B	EX | |
|     #04	TC4, EOT	End of Transmission	⌁	U+2301[9]	ET | |
|     #05	TC5, ENQ	Enquiry	⊠[a]	U+22A0	EQ | |
|     #06	TC6, ACK	Acknowledge	✓	U+2713	AK | |
|     #07	BEL	Bell	⍾	U+237E[9]	BL | |
|     #08	FE0, BS	Backspace	⤺	—[b]	BS | |
|     #09	FE1, HT	Horizontal Tabulation	⪫	U+2AAB	HT | |
|     #0A	FE2, LF	Line Feed	≡	U+2261	LF | |
|     #0B	FE3, VT	Vertical Tabulation	⩛	U+2A5B	VT | |
|     #0C	FE4, FF	Form Feed	↡	U+21A1	FF | |
|     #0D	FE5, CR	Carriage Return	⪪	U+2AAA	CR | |
|     #0E	SO	Shift Out	⊗	U+2297	SO | |
|     #0F	SI	Shift In	⊙	U+2299	SI | |
|     #10	TC7, DLE	Data Link Escape	⊟	U+229F	DL | |
|     #11	DC1, XON, CON[10]	Device Control 1	◷	U+25F7	D1 | |
|     #12	DC2, RPT,[10] TAPE[c]	Device Control 2	◶	U+25F6	D2 | |
|     #13	DC3, XOF, XOFF	Device Control 3	◵	U+25F5	D3 | |
|     #14	DC4, COF, KMC,[10] TAPE[c]	Device Control 4	◴	U+25F4	D4 | |
|     #15	TC8, NAK	Negative Acknowledge	⍻	U+237B[9]	NK | |
|     #16	TC9, SYN	Synchronization	⎍	U+238D	SY | |
|     #17	TC10, ETB	End of Transmission Block	⊣	U+22A3	EB | |
|     #18	CAN	Cancel	⧖	U+29D6	CN | |
|     #19	EM	End of Medium	⍿	U+237F[9]	EM | |
|     #1A	SUB	Substitute Character	␦	U+2426[12]	SB | |
|     #1B	ESC	Escape	⊖	U+2296	EC | |
|     #1C	IS4, FS	File Separator	◰	U+25F0	FS | |
|     #1D	IS3, GS	Group Separator	◱	U+25F1	GS | |
|     #1E	IS2, RS	Record Separator	◲	U+25F2	RS | |
|     #1F	IS1 US	Unit Separator	◳	U+25F3	US | |
|     #20	SP	Space	△	U+25B3	SP | |
|     #7F	DEL	Delete	▨	—[d]	DT | |
|  | |
|     #C0 control code visual representations | |
|     # Code Val Name 2X Description | |
|     # 2400 00 NUL NU Symbol for Null | |
|     # 2401 01 SOH SH Symbol for Start of Heading | |
|     # 2402 02 STX SX Symbol for Start of Text | |
|     # 2403 03 ETX EX Symbol for End of Text | |
|     # 2404 04 EOT ET Symbol for End of Transmission | |
|     # 2405 05 ENQ EQ Symbol for Enquiry | |
|     # 2406 06 ACK AK Symbol for Acknowledge | |
|     # 2407 07 BEL BL Symbol for Bell | |
|     # 2409 09 BS BS Symbol for Backspace | |
|     # 2409 09 HT HT Symbol for Horizontal Tab (1) | |
|     # 240A 0A LF LF Symbol for Line Feed (1) | |
|     # 240B 0B VT VT Symbol for Vertical Tab (1) | |
|     # 240C 0C FF FF Symbol for Form Feed (2) | |
|     # 240D 0D CR CR Symbol for Carriage Return (1) | |
|     # 240E 0E SO SO Symbol for Shift Out | |
|     # 240F 0F SI SI Symbol for Shift In | |
|     # 2410 10 DLE DL Symbol for Data Link Escape | |
|     # 2411 11 DC1 D1 Symbol for Device Control 1 (2) | |
|     # 2412 12 DC2 D2 Symbol for Device Control 2 (2) | |
|     # 2413 13 DC3 D3 Symbol for Device Control 3 (2) | |
|     # 2414 14 DC4 D4 Symbol for Device Control 4 (2) | |
|     # 2415 15 NAK NK Symbol for Negative Acknowledge | |
|     # 2416 16 SYN SY Symbol for Synchronous Idle | |
|     # 2417 17 ETB EB Symbol for End of Transmission Block | |
|     # 2418 18 CAN CN Symbol for Cancel | |
|     # 2419 19 EM EM Symbol for End of Medium | |
|     # 241A 1A SUB SU Symbol for Substitute | |
|     # 241B 1B ESC EC Symbol for Escape | |
|     # 241C 1C FS FS Symbol for Field Separator (3) | |
|     # 241D 1D GS GS Symbol for Group Separator (3) | |
|     # 241E 1E RS RS Symbol for Record Separator (3) | |
|     # 241F 1F US US Symbol for Unit Separator (3) | |
|     # 2420 20 SP SP Symbol for Space (4) | |
|     # 2421 7F DEL DT Symbol for Delete (4) | |
|  | |
|     #C1 control code visual representations | |
|     #Code Val Name 2X Description | |
|     # 80 80 80 (1) | |
|     # 81 81 81 (1) | |
|     # E022 82 BPH 82 Symbol for Break Permitted Here (2) | |
|     # E023 83 NBH 83 Symbol for No Break Here (2) | |
|     # E024 84 IND IN Symbol for Index (3) | |
|     # E025 85 NEL NL Symbol for Next Line (4) | |
|     # E026 86 SSA SS Symbol for Start Selected Area | |
|     # E027 87 ESA ES Symbol for End Selected Area | |
|     # E028 88 HTS HS Symbol for Character Tabulation Set | |
|     # E029 89 HTJ HJ Symbol for Character Tabulation with Justification | |
|     # E02A 8A VTS VS Symbol for Line Tabulation Set | |
|     # E02B 8B PLD PD Symbol for Partial Line Forward | |
|     # E02C 8C PLU PU Symbol for Partial Line Backward | |
|     # E02D 8D RI RI Symbol for Reverse Line Feed | |
|     # E02E 8E SS2 S2 Symbol for Single Shift 2 | |
|     # E02F 8F SS3 S3 Symbol for Single Shift 3 | |
|     # E030 90 DCS DC Symbol for Device Control String | |
|     # E031 91 PU1 P1 Symbol for Private Use 1 | |
|     # E032 92 PU2 P2 Symbol for Private Use 2 | |
|     # E033 93 STS SE Symbol for Set Transmit State | |
|     # E034 94 CCH CC Symbol for Cancel Character | |
|     # E035 95 MW MW Symbol for Message Waiting | |
|     # E036 96 SPA SP Symbol for Start Protected (Guarded) Area | |
|     # E037 97 EPA EP Symbol for End Protected (Guarded) Area | |
|     # E038 98 SOS 98 Symbol for Start of String (2) | |
|     # 99 99 (1) | |
|     # E03A 9A SCI 9A Symbol for Single Character Introducer (2) | |
|     # E03B 9B CSI CS Symbol for Control Sequence Introducer (5) | |
|     # E03C 9C ST ST Symbol for String Terminator | |
|     # E03D 9D OSC OS Symbol for Operating System Command | |
|     # E03E 9E PM PM Symbol for Privacy Message | |
|     # E03F 9F APC AP Symbol for Application Program Command | |
|  | |
|     proc VIEW {args} { | |
|         #*** !doctools | |
|         #[call [fun VIEW] [arg string]] | |
|         #[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets | |
|         #[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408') | |
|         #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions | |
|         #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. | |
|  | |
|         if {![llength $args]} { | |
|             return "" | |
|         } | |
|  | |
|         set string [lindex $args end] | |
|         set defaults [dict create\ | |
|             -esc 1\ | |
|             -cr 1\ | |
|             -lf 0\ | |
|             -vt 0\ | |
|             -ht 1\ | |
|             -bs 1\ | |
|             -sp 1\ | |
|         ] | |
|         set argopts [lrange $args 0 end-1] | |
|         if {[llength $argopts] % 2 != 0} { | |
|             error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [dict keys $defaults]" | |
|         } | |
|         set opts [dict merge $defaults $argopts] | |
|         # -- --- --- --- --- | |
|         set opt_esc     [dict get $opts -esc] | |
|         set opt_cr      [dict get $opts -cr] | |
|         set opt_lf      [dict get $opts -lf] | |
|         set opt_vt      [dict get $opts -vt] | |
|         set opt_ht      [dict get $opts -ht] | |
|         set opt_bs      [dict get $opts -bs] | |
|         set opt_sp      [dict get $opts -sp] | |
|         # -- --- --- --- --- | |
|  | |
|  | |
|         #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) | |
|          | |
|         #Goal is not to map every control character? | |
|         #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly | |
|         #ETX -ctrl-c | |
|         #EOT  ctrl-d  (EOF?) | |
|         #SYN  ctrl-v | |
|         #SUB  ctrl-z | |
|         #CAN  ctrl-x | |
|         #FS   ctrl-\  (SIGQUIT) | |
|         set visuals_interesting [dict create\ | |
|             NUL [list \x00 \u2400]\ | |
|             ETX [list \x03 \u2403]\ | |
|             EOT [list \x04 \u2404]\ | |
|             BEL [list \x07 \u2407]\ | |
|             SYN [list \x16 \u2416]\ | |
|             CAN [list \x18 \u2418]\ | |
|             SUB [list \x1a \u241a]\ | |
|             FS  [list \x1c \u241c]\ | |
|             SOS [list \x98 \ue038]\ | |
|             CSI [list \x9b \ue03b]\ | |
|             ST  [list \x9c \ue03c]\ | |
|             PM  [list \x9e \ue03e]\ | |
|             APC [list \x9f \ue03f]\ | |
|         ] | |
|         #it turns out we need pretty much everything for debugging | |
|         set visuals_c0 [dict create\ | |
|             NUL [list \x00 \u2400]\ | |
|             SOH [list \x01 \u2401]\ | |
|             STX [list \x02 \u2402]\ | |
|             ETX [list \x03 \u2403]\ | |
|             EOT [list \x04 \u2404]\ | |
|             ENQ [list \x05 \u2405]\ | |
|             ACK [list \x06 \u2406]\ | |
|             BEL [list \x07 \u2407]\ | |
|             FF  [list \x0c \u240c]\ | |
|             SO  [list \x0e \u240e]\ | |
|             SF  [list \x0f \u240f]\ | |
|             DLE [list \x10 \u2410]\ | |
|             DC1 [list \x11 \u2411]\ | |
|             DC2 [list \x12 \u2412]\ | |
|             DC3 [list \x13 \u2413]\ | |
|             DC4 [list \x14 \u2414]\ | |
|             NAK [list \x15 \u2415]\ | |
|             SYN [list \x16 \u2416]\ | |
|             ETB [list \x17 \u2417]\ | |
|             CAN [list \x18 \u2418]\ | |
|             EM  [list \x19 \u2419]\ | |
|             SUB [list \x1a \u241a]\ | |
|             FS  [list \x1c \u241c]\ | |
|             GS  [list \x1d \u241d]\ | |
|             RS  [list \x1e \u241e]\ | |
|             US  [list \x1f \u241f]\ | |
|             DEL [list \x7f \u2421]\ | |
|         ] | |
|         set visuals_c1 [dict create\ | |
|             BPH [list \x82 \ue022]\ | |
|             NBH [list \x83 \ue023]\ | |
|             IND [list \x84 \ue024]\ | |
|             NEL [list \x85 \ue025]\ | |
|             SSA [list \x86 \ue026]\ | |
|             ESA [list \x87 \ue027]\ | |
|             HTS [list \x88 \ue028]\ | |
|             HTJ [list \x89 \ue029]\ | |
|             VTS [list \x8a \ue02a]\ | |
|             PLD [list \x8b \ue02a]\ | |
|             PLU [list \x8c \ue02c]\ | |
|             RI  [list \x8d \ue02d]\ | |
|             SS2 [list \x8e \ue02e]\ | |
|             SS3 [list \x8f \ue02f]\ | |
|             DCS [list \x90 \ue030]\ | |
|             PU1 [list \x91 \ue031]\ | |
|             PU2 [list \x92 \ue032]\ | |
|             STS [list \x93 \ue033]\ | |
|             CCH [list \x94 \ue034]\ | |
|             MW  [list \x95 \ue035]\ | |
|             SPA [list \x96 \ue036]\ | |
|             EPA [list \x97 \ue037]\ | |
|             SOS [list \x98 \ue038]\ | |
|             SCI [list \x9a \ue03a]\ | |
|             CSI [list \x9b \ue03b]\ | |
|             ST  [list \x9c \ue03c]\ | |
|             OSC [list \x9d \ue03d]\ | |
|             PM  [list \x9e \ue03e]\ | |
|             APC [list \x9f \ue03f]\ | |
|         ] | |
|  | |
|         set visuals_opt [dict create] | |
|         if {$opt_esc} { | |
|             dict set visuals_opt ESC [list \x1b \u241b] | |
|         } | |
|         if {$opt_cr} { | |
|             dict set visuals_opt CR [list \x0d \u240d] | |
|         } | |
|         if {$opt_lf} { | |
|             dict set visuals_opt LF [list \x0a \u240a] | |
|         } | |
|         if {$opt_vt} { | |
|             dict set visuals_opt VT [list \x0b \u240b] | |
|         } | |
|         if {$opt_ht} { | |
|             dict set visuals_opt HT [list \x09 \u2409] | |
|         } | |
|         if {$opt_bs} { | |
|             dict set visuals_opt BS [list \x08 \u2408] | |
|         } | |
|         if {$opt_sp} { | |
|             dict set visuals_opt SP [list \x20 \u2420] | |
|         } | |
|  | |
|         set visuals [dict merge $visuals_opt $visuals_c0 $visuals_c1] | |
|         set  charmap [list] | |
|         dict for {nm chars} $visuals { | |
|             lappend charmap {*}$chars | |
|         } | |
|         return [string map $charmap $string] | |
|  | |
|  | |
|         #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs | |
|         #return [string map [list \033 \U2296 \007 \U237E] $string] | |
|     } | |
|  | |
|     proc length {string} { | |
|         #*** !doctools | |
|         #[call [fun length] [arg string]] | |
|         #[para]Returns the length of the string without ansi codes | |
|         #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. | |
|         #[para]This is equivalent to calling string length on the result of stripansi $string  | |
|         #[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal. | |
|         #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. | |
|         string length [stripansi $string] | |
|     } | |
|      | |
|     proc trimleft {string args} { | |
|         set intext 0 | |
|         set out "" | |
|         #for split_codes only first or last pt can be empty string | |
|         foreach {pt ansiblock} [split_codes $string] { | |
|             if {!$intext} { | |
|                 if {$pt eq "" || [regexp {^\s+$} $pt]} { | |
|                     append out $ansiblock | |
|                 } else { | |
|                     append out [string trimleft $pt]$ansiblock | |
|                     set intext 1 | |
|                 } | |
|             } else { | |
|                 append out $pt$ansiblock | |
|             } | |
|         } | |
|         return $out | |
|     } | |
|     proc trimright {string} { | |
|         if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing | |
|         set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] | |
|         return [join $rtrimmed_list ""] | |
|     } | |
|     proc trim {string} { | |
|         #make sure we do our ansi-scanning split only once - so use list-based trim operations | |
|         #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length  | |
|         #we save a single function call by calling both here rather than _splits_trim | |
|         join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" | |
|     } | |
|  | |
|     proc index {string index} { | |
|         #*** !doctools | |
|         #[call [fun index] [arg string] [arg index]] | |
|         #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) | |
|         #[para]Returns the character (with applied ansi effect) at position index | |
|         #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. | |
|         #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST)  | |
|         #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. | |
|         #[para]todo: SGR codes within ST-terminated strings not yet ignored properly  | |
|         #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. | |
|         #[para]As any operation using end-+<int> will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. | |
|         #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. | |
|         #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code | |
|         #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. | |
|         #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. | |
|         #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible | |
|         #[para]Notes: | |
|         #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index | |
|         #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. | |
|         #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal | |
|  | |
|         set splits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run | |
|  | |
|         #todo - end-x  +/-x+/-x etc | |
|         set original_index $index | |
|  | |
|         set index [string map [list _ ""] $index] | |
|         #short-circuit some trivial cases | |
|         if {[string is integer -strict $index]} { | |
|             if {$index < 0} {return ""} | |
|             #this only short-circuits an index greater than length including ansi-chars | |
|             #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length | |
|             if {$index > [string length $string]} {return ""} | |
|         } else { | |
|             if {[string match end* $index]} { | |
|                 #for end- we will probably have to blow a few cycles stripping first and calculate the length | |
|                 if {$index ne "end"} { | |
|                     set op [string index $index 3] | |
|                     set offset [string range $index 4 end] | |
|                     if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} | |
|                     if {$op eq "+" && $offset != 0} { | |
|                         return "" | |
|                     } | |
|                 } else { | |
|                     set offset 0 | |
|                 } | |
|                 #by now, if op = + then offset = 0  so we only need to handle the minus case | |
|                 set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal | |
|                 if {$offset == 0} { | |
|                     set index [expr {$payload_len-1}] | |
|                 } else { | |
|                     set index [expr {($payload_len-1) - $offset}] | |
|                 } | |
|                 if {$index < 0} { | |
|                     #don't waste time splitting and looping the string | |
|                     return "" | |
|                 } | |
|             } else { | |
|                 #we are trying to avoid evaluating unbraced expr of potentially insecure origin | |
|                 regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string | |
|                 if {[string is integer -strict $tail]} { | |
|                     #plain +-<int> | |
|                     if {$op eq "-"} { | |
|                         #return nothing for negative indices as per Tcl's lindex etc | |
|                         return "" | |
|                     } | |
|                     set index $tail | |
|                 } else { | |
|                     if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { | |
|                         if {[string is integer -strict $a] && [string is integer -strict $b]} { | |
|                             if {$op eq "-"} { | |
|                                 set index [expr {$a - $b}] | |
|                             } else { | |
|                                 set index [expr {$a + $b}] | |
|                             } | |
|                         } else { | |
|                             error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" | |
|                         } | |
|                     } else { | |
|                         error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" | |
|                     } | |
|                 } | |
|             } | |
|         } | |
|  | |
|         #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) | |
|         set low   -1  | |
|         set high  -1  | |
|         set pt_index -2 | |
|         set pt_found -1 | |
|         set char "" | |
|         set codes_in_effect ""  | |
|         #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go | |
|         #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) | |
|         foreach {pt code} $splits { | |
|             incr pt_index 2 | |
|             if {$pt ne ""} { | |
|                 set low [expr {$high + 1}] ;#last high  | |
|                 incr high [string length $pt]  | |
|             } | |
|  | |
|             if {$pt ne "" && ($index >= $low && $index <= $high)} { | |
|                 set pt_found $pt_index | |
|                 set char [string index $pt $index-$low] | |
|                 break | |
|             } | |
|              | |
|             if {[punk::ansi::codetype::is_sgr_reset $code]} { | |
|                 #we can throw away previous codes_in_effect | |
|                 set codes_in_effect "" | |
|             } else { | |
|                 #may have partial resets - but we don't want to track individual states of SGR features | |
|                 #A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. | |
|                 #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. | |
|                 #Review - consider if any other types of code make sense to retain in the output in this context. | |
|                 if {[punk::ansi::codetype::is_sgr $code]} { | |
|                     append codes_in_effect $code | |
|                 } | |
|             } | |
|  | |
|         } | |
|         if {$pt_found >= 0} { | |
|             return $codes_in_effect$char | |
|         } else { | |
|             return "" | |
|         } | |
|     } | |
|  | |
|     proc _splits_trimleft {sclist} { | |
|         set intext 0 | |
|         set outlist [list] | |
|         foreach {pt ansiblock} $sclist { | |
|             if {!$intext} { | |
|                 if {$pt eq "" || [regexp {^\s+$} $pt]} { | |
|                     lappend outlist "" $ansiblock | |
|                 } else { | |
|                     lappend outlist [string trimleft $pt] $ansiblock | |
|                     set intext 1 | |
|                 } | |
|             } else { | |
|                 lappend outlist $pt $ansiblock | |
|             } | |
|         } | |
|         return $outlist | |
|     } | |
|     proc _splits_trimright {sclist} { | |
|         set intext 0 | |
|         set outlist [list] | |
|         foreach {pt ansiblock} [lreverse $sclist] { | |
|             if {!$intext} { | |
|                 if {$pt eq "" || [regexp {^\s+$} $pt]} { | |
|                     lappend outlist "" $ansiblock | |
|                 } else { | |
|                     lappend outlist [string trimright $pt] $ansiblock | |
|                     set intext 1 | |
|                 } | |
|             } else { | |
|                 lappend outlist $pt $ansiblock | |
|             } | |
|         } | |
|         return [lreverse $outlist] | |
|     } | |
|     proc _splits_trim {sclist} { | |
|         return [_splits_trimright [_splits_trimleft $sclist]] | |
|     } | |
|  | |
|     #*** !doctools | |
|     #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] | |
| } | |
|  | |
| namespace eval punk::ansi::internal { | |
|     proc splitn {str {len 1}} { | |
|         #from textutil::split::splitn | |
|         if {$len <= 0} { | |
|             return -code error "len must be > 0" | |
|         } | |
|         if {$len == 1} { | |
|             return [split $str {}] | |
|         } | |
|         set result [list] | |
|         set max [string length $str] | |
|         set i 0 | |
|         set j [expr {$len -1}] | |
|         while {$i < $max} { | |
|             lappend result [string range $str $i $j] | |
|             incr i $len | |
|             incr j $len | |
|         } | |
|         return $result | |
|     } | |
|     proc splitx {str {regexp {[\t \r\n]+}}} { | |
|         #from textutil::split::splitx | |
|         # Bugfix 476988 | |
|         if {[string length $str] == 0} { | |
|             return {} | |
|         } | |
|         if {[string length $regexp] == 0} { | |
|             return [::split $str ""] | |
|         } | |
|         if {[regexp $regexp {}]} { | |
|             return -code error \ | |
|             "splitting on regexp \"$regexp\" would cause infinite loop" | |
|         } | |
|         set list  {} | |
|         set start 0 | |
|         while {[regexp -start $start -indices -- $regexp $str match submatch]} { | |
|             foreach {subStart subEnd} $submatch break | |
|             foreach {matchStart matchEnd} $match break | |
|             incr matchStart -1 | |
|             incr matchEnd | |
|             lappend list [string range $str $start $matchStart] | |
|             if {$subStart >= $start} { | |
|                 lappend list [string range $str $subStart $subEnd] | |
|             } | |
|             set start $matchEnd | |
|         } | |
|         lappend list [string range $str $start end] | |
|         return $list | |
|     } | |
|  | |
|     proc printing_length_addchar {i c} { | |
|         upvar outchars outc  | |
|         upvar outsizes outs | |
|         set nxt [llength $outc]  | |
|         if {$i < $nxt} { | |
|             lset outc $i $c | |
|         } else { | |
|             lappend outc $c | |
|         } | |
|     } | |
|  | |
|     #string to 2digit hex - e.g used by XTGETTCAP | |
|     proc str2hex {input} { | |
|         set 2hex "" | |
|         foreach ch [split $input ""] { | |
|             append 2hex [format %02X [scan $ch %c]] | |
|         } | |
|         return $2hex | |
|     } | |
|     proc hex2str {2digithexchars} { | |
|         set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with  tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) | |
|         if {$2digithexchars eq ""} { | |
|             return "" | |
|         } | |
|         if {[string length $2digithexchars] % 2 != 0} { | |
|             error "hex2str requires an even number of hex digits (2 per character)" | |
|         } | |
|         set 2str "" | |
|         foreach pair [splitn $2digithexchars 2] { | |
|             append 2str [format %c 0x$pair] | |
|         } | |
|         return $2str | |
|     } | |
| } | |
|  | |
| # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ | |
| ##  Ready    | |
| package provide punk::ansi [namespace eval punk::ansi { | |
|     variable version | |
|     set version 0.1.1  | |
| }] | |
| return | |
|  | |
|  | |
| #*** !doctools | |
| #[manpage_end] | |
| 
 | |
| 
 |