punkshell/src/bootsupport/modules/punk/ansi-0.1.1.tm

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2023
#
# @@ Meta Begin
# Application punk::ansi 0.1.1
# Meta platform     tcl
# Meta license      <unspecified>
# @@ Meta End

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::ansi 0 0.1.1]
#[copyright "2023"]
#[titledesc {Ansi string functions}]  [comment {-- Name section and table of contents description --}]
#[moddesc {punk Ansi library}]             [comment {-- Description at end of page heading --}]
#[require punk::ansi]
#[keywords module ansi terminal console string]
#[description]
#[para]Ansi based terminal control string functions
#[para]See [package punk::ansi::console] for related functions for controlling a console

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[section Overview]
#[para] overview of punk::ansi
#[para]punk::ansi functions return their values - no implicit emission to console/stdout
#[subsection Concepts]
#[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner
#[para]There are many differences in terminal implementations - but most should support a core set of features
#[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs.
#[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable.


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[subsection dependencies]
#[para] packages used by punk::ansi
#[list_begin itemized]

package require Tcl 8.6
#*** !doctools
#[item] [package {Tcl 8.6}]

# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]

#*** !doctools
#[list_end]

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[section API]


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::ansi {
    #*** !doctools
    #[subsection {Namespace punk::ansi}]
    #[para] Core API functions for punk::ansi
    #[list_begin definitions]


    #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control

    variable test "blah\033\[1;33mETC\033\[0;mOK"


    #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc.  Presumably this won't matter here.
    namespace export\
        {a?} {a+} a \
        ansistring\
        convert*\
        clear*\
        cursor_*\
        detect*\
        get_*\
        move*\
        reset*\
        strip*\
        test_decaln\
        titleset\


    variable escape_terminals
    #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~).
    dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"]
    #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic
    dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals
    dict set escape_terminals DCS [list \007 \033\\ \u009c]
    dict set escape_terminals MISC [list \007 \033\\ \u009c]
    #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm )
    #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows?

    variable standalone_codes
    set standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""]
    #review - there doesn't seem to be an \x1b#7
    # https://espterm.github.io/docs/VT100%20escape%20codes.html

    #self-contained 2 byte ansi escape sequences - review more?
    set  ansi_2byte_codes_dict [dict create\
        "reset_terminal"            "\u001bc"\
        "save_cursor_posn"          "\u001b7"\
        "restore_cursor_posn"       "\u001b8"\
        "cursor_up_one"             "\u001bM"\
        "NEL - Next Line"           "\u001bE"\
        "IND - Down one line"       "\u001bD"\
        "HTS - Set Tab Stop"        "\u001bH"\
        "DECPAM app keypad"         "\x1b="\
        "DECPNM norm keypad"        "\x1b>"\
        ]

    #control strings
    #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf
    #<excerpt>
    #A control string is a string of bit combinations which may occur in the data stream as a logical entity for
    #control purposes. A control string consists of an opening delimiter, a command string or a character string,
    #and a terminating delimiter, the STRING TERMINATOR (ST).
    #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14.
    #A character string is a sequence of any bit combination, except those representing START OF STRING
    #(SOS) or STRING TERMINATOR (ST).
    #The interpretation of the command string or the character string is not defined by this Standard, but instead
    #requires prior agreement between the sender and the recipient of the data.
    #The opening delimiters defined in this Standard are
    #a) APPLICATION PROGRAM COMMAND (APC)
    #b) DEVICE CONTROL STRING (DCS)
    #c) OPERATING SYSTEM COMMAND (OSC)
    #d) PRIVACY MESSAGE (PM)
    #e) START OF STRING (SOS)
    #</excerpt>

    #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway.
    #  "PM - Privacy Message"      "\u001b^somethinghidden\033\\"\
    #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out.
    #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate.
    #review - can terminals handle SGR codes within a PM?
    #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the )
    proc controlstring_PM {text} {
        return "\x1b^${text}\033\\"
    }
    proc controlstring_PM8 {text} {
        return "\x9e${text}\x9c"
    }
    proc controlstring_SOS {text} {
        return "\x1bX${text}\033\\"
    }
    proc controlstring_SOS8 {text} {
        return "\x98${text}\x9c"
    }
    proc controlstring_APC {text} {
        return "\x1b_${text}\033\\"
    }
    proc controlstring_APC8 {text} {
        return "\x9f${text}\x9c"
    }
    #there is also the SGR hide code (8) which has intermittent terminal support
    #This doesn't change the output length - so support is tricky to detec. (terminal checksum report?)


    #candidate for zig/c implementation?
    proc stripansi {text} {
        #*** !doctools
        #[call [fun stripansi] [arg text] ]
        #[para]Return a string with ansi codes stripped out

        #todo - character set selection - SS2 SS3 - how are they terminated?  REVIEW

        variable escape_terminals  ;#dict
        variable standalone_codes  ;#map to empty string

        set text [convert_g0 $text]


        #we should just map away the 2-byte sequences too
        #standalone 3 byte  VT100 sequences - some of these work in wezterm
        #\x1b#3 double-height letters top half
        #\x1b#4 double-height letters bottom half
        #\x1b#5 single-width line
        #\x1b#6 double-width line
        #\x1b#8  dec test fill screen

        set text [string map $standalone_codes $text]

        #we process char by char - line-endings whether \r\n or \n should be processed as per any other character.
        #line endings can theoretically occur within an ansi escape sequence payload (review e.g title?)

        set inputlist [split $text ""]
        set outputlist [list]

        set in_escapesequence 0
        #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements)

        set i 0
        foreach u $inputlist {
            set v [lindex $inputlist $i+1]
            set uv ${u}${v}
            if {$in_escapesequence eq "2b"} {
                #2nd byte - done.
                set in_escapesequence 0
            } elseif {$in_escapesequence != 0} {
                set endseq [dict get $escape_terminals $in_escapesequence]
                if {$u in $endseq} {
                    set in_escapesequence 0
                } elseif {$uv in $endseq} {
                    set in_escapesequence 2b ;#flag next byte as last in sequence
                }
            } else {
                #handle both 7-bit and 8-bit CSI and OSC
                if {[regexp {^(?:\033\[|\u009b)} $uv]} {
                    set in_escapesequence CSI
                } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} {
                    set in_escapesequence OSC
                } elseif {[regexp {^(?:\033P|\u0090)} $uv]} {
                    set in_escapesequence DCS
                } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} {
                    #SOS,PM,APC - all terminated with ST
                    set in_escapesequence MISC
                } else {
                    lappend outputlist $u
                }
            }
            incr i
        }
        return [join $outputlist ""]
    }

    #review - what happens when no terminator?
    #todo - map other chars  to unicode equivs
    proc convert_g0 {text} {
        #using not \033 inside to stop greediness - review how does it compare to ".*?"
        set re {\033\(0[^\033]*\033\(B}
        set re2 {\033\(0(.*)\033\(B} ;#capturing
        set parts [::punk::ansi::ta::_perlish_split $re $text]
        set out ""
        foreach {pt g} $parts {
            append out $pt
            if {$g ne ""} {
                #puts --$g--
                #box sample
                #lqk
                #x x
                #mqj
                #m = boxd_lur
                #set map [list l \u250f k \u2513] ;#heavy
                set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light

                regexp $re2 $g _match contents
                append out [string map $map $contents]
            }
        }
        return $out
    }

    #todo - convert esc(0 graphics sequences to single char unicode equivalents e.g box drawing set
    # esc) ??
    proc stripansi_gx {text} {
            #e.g "\033(0" - select VT100 graphics for character set G0
            #e.g "\033(B" - reset
            #e.g "\033)0" - select VT100 graphics for character set G1
            #e.g "\033)X" - where X is any char other than 0 to reset ??
            return [convert_g0 $text]
    }


    #CSI <n> m = SGR (Select Graphic Rendition)
    variable SGR_setting_map {
        bold        1   dim             2   blink           5   fastblink   6   noblink     25      hide   8    normal      22
        underline   4   doubleunderline 21  nounderline     24  strike      9   nostrike    29      italic 3    noitalic    23
        reverse     7   noreverse       27  defaultfg       39  defaultbg   49  nohide      28
        overline    53  nooverline      55  frame           51  framecircle 52  noframe     54
    }
    variable SGR_colour_map {
        black       30  red             31  green           32  yellow      33  blue        34       purple 35   cyan        36      white 37
        Black       40  Red             41  Green           42  Yellow      43  Blue        44      Purple 45   Cyan        46      White 47
        BLACK       100 RED             101 GREEN           102 YELLOW      103 BLUE        104     PURPLE 105  CYAN        106     WHITE 107
    }
    variable SGR_map
    set SGR_map [dict merge $SGR_colour_map $SGR_setting_map]


    proc colourmap1 {{bgname White}} {
        package require textblock

        set bg [textblock::block 33 3 "[a+ $bgname] [a]"]
        set colormap ""
        for {set i 0} {$i <= 7} {incr i} {
            append colormap "_[a+ white bold 48\;5\;$i] $i [a]"
        }
        set map1 [overtype::left -transparent _ $bg "\n$colormap"]
        return $map1
    }
    proc colourmap2 {{bgname White}} {
        package require textblock
        set bg [textblock::block 39 3 "[a+ $bgname] [a]"]
        set colormap ""
        for {set i 8} {$i <= 15} {incr i} {
            append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey
        }
        set map2 [overtype::left -transparent _ $bg "\n$colormap"]
        return $map2
    }
    proc a? {args} {
        #*** !doctools
        #[call [fun a?] [opt {ansicode...}]]
        #[para]Return an ansi string representing a table of codes and a panel showing the colours
        variable SGR_setting_map
        variable SGR_colour_map

        if {![llength $args]} {
            set out ""
            append out $SGR_setting_map \n
            append out $SGR_colour_map \n

            try {
                package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try
                set bgname "White"
                set map1 [colourmap1 $bgname]
                set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"]
                set map2 [colourmap2 $bgname]
                set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"]
                append out [textblock::join $map1 "   " $map2] \n
                #append out $map1[a] \n
                #append out $map2[a] \n


            } on error {result options} {
                puts stderr "Failed to draw colormap"
                puts stderr "$result"
            } finally {
                return $out
            }
        } else {
            set result [list]
            set rmap [lreverse $map]
            foreach i $args {
                if {[string is integer -strict $i]} {
                    if {[dict exists $rmap $i]} {
                        lappend result $i [dict get $rmap $i]
                    }
                } else {
                    if {[dict exists $map $i]} {
                        lappend result $i [dict get $map $i]
                    }
                }
            }
            return $result
        }
    }
    proc a+ {args} {
        #*** !doctools
        #[call [fun a+] [opt {ansicode...}]]
        #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first
        #[para] e.g to set foreground red and bold
        #[para]punk::ansi::a red bold
        #[para]to set background red
        #[para]punk::ansi::a Red
        #[para]see [cmd punk::ansi::a?] to display a list of codes

        #don't disable ansi here.
        #we want this to be available to call even if ansi is off
        variable SGR_map
        set t [list]
        foreach i $args {
            if {[string is integer -strict $i]} {
                lappend t $i
            } elseif {[string first ";" $i] >=0} {
                #literal with params
                lappend t $i
            } else {
                if {[dict exists $SGR_map $i]} {
                    lappend t [dict get $SGR_map $i]
                } else {
                    #accept examples for foreground
                    #  256f-# or 256fg-# or 256f#
                    #  rgbf-<r>-<g>-<b>  or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b>
                    if {[string match -nocase "256f*" $i]} {
                        set cc [string trim [string range $i 4 end] -gG]
                        lappend t "38;5;$cc"
                    } elseif {[string match -nocase 256b* $i]} {
                        set cc [string trim [string range $i 4 end] -gG]
                        lappend t "48;5;$cc"
                    } elseif {[string match -nocase rgbf* $i]} {
                        set rgb [string trim [string range $i 4 end] -gG]
                        lassign [split $rgb -] r g b
                        lappend t "38;2;$r;$g;$b"
                    } elseif {[string match -nocase rgbb* $i]} {
                        set rgb [string trim [string range $i 4 end] -gG]
                        lassign [split $rgb -] r g b
                        lappend t "48;2;$r;$g;$b"
                    }
                }
            }
        }
        # \033 - octal.  equivalently \x1b in hex which is more common in documentation
        if {![llength $t]} {
            return "" ;# a+ nonexistent should return nothing rather  than a reset ( \033\[\;m  is a reset even without explicit zero(s))
        }
        return "\x1b\[[join $t {;}]m"
    }
    proc a {args} {
        #*** !doctools
        #[call [fun a] [opt {ansicode...}]]
        #[para]Returns the ansi code to reset any current settings and apply those from the supplied list
        #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text
        #[para] e.g to set foreground red and bold
        #[para]punk::ansi::a red bold
        #[para]to set background red
        #[para]punk::ansi::a Red
        #[para]see [cmd punk::ansi::a?] to display a list of codes


        #don't disable ansi here.
        #we want this to be available to call even if ansi is off
        variable SGR_map
        set t [list]
        foreach i $args {
            if {[string is integer -strict $i]} {
                lappend t $i
            } elseif {[string first ";" $i] >=0} {
                #literal with params
                lappend t $i
            } else {
                if {[dict exists $SGR_map $i]} {
                    lappend t [dict get $SGR_map $i]
                } else {
                    #accept examples for foreground
                    #  256f-# or 256fg-# or 256f#
                    #  rgbf-<r>-<g>-<b>  or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b>
                    if {[string match -nocase "256f*" $i]} {
                        set cc [string trim [string range $i 4 end] -gG]
                        lappend t "38;5;$cc"
                    } elseif {[string match -nocase 256b* $i]} {
                        set cc [string trim [string range $i 4 end] -gG]
                        lappend t "48;5;$cc"
                    } elseif {[string match -nocase rgbf* $i]} {
                        set rgb [string trim [string range $i 4 end] -gG]
                        lassign [split $rgb -] r g b
                        lappend t "38;2;$r;$g;$b"
                    } elseif {[string match -nocase rgbb* $i]} {
                        set rgb [string trim [string range $i 4 end] -gG]
                        lassign [split $rgb -] r g b
                        lappend t "48;2;$r;$g;$b"
                    }
                }
            }
        }
        # \033 - octal.  equivalently \x1b in hex which is more common in documentation
        # empty list [a=] should do reset - same for [a= nonexistant]
        # explicit reset at beginning of parameter list for a= (as opposed to a+)
        set t [linsert $t 0 0]
        return "\x1b\[[join $t {;}]m"
    }


    proc get_code_name {code} {
        #*** !doctools
        #[call [fun get_code_name] [arg code]]
        #[para]for example
        #[para] get_code_name red will return 31
        #[para] get_code_name 31 will return red
        variable SGR_map
        set res [list]
        foreach i [split $code ";"] {
            set ix [lsearch -exact $SGR_map $i]
            if {[string is digit -strict $code]} {
                if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]}
            } else {
                #reverse lookup code from name
                if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]}
            }
        }
        set res
    }
    proc reset {} {
        #*** !doctools
        #[call [fun reset]]
        #[para]reset console
        return "\x1bc"
    }
    proc reset_soft {} {
        #*** !doctools
        #[call [fun reset_soft]]
        return \x1b\[!p
    }
    proc reset_colour {} {
        #*** !doctools
        #[call [fun reset_colour]]
        #[para]reset colour only
        return "\x1b\[0m"
    }

    # -- ---  --- --- ---
    proc clear {} {
        #*** !doctools
        #[call [fun clear]]
        return "\033\[2J"
    }
    proc clear_above {} {
        #*** !doctools
        #[call [fun clear_above]]
        return \033\[1J
    }
    proc clear_below {} {
        #*** !doctools
        #[call [fun clear_below]]
        return \033\[0J
    }

    proc clear_all {} {
        # - doesn't work??
        return \033\[3J
    }
    #see also erase_ functions
    # -- ---  --- --- ---

    proc cursor_on {} {
        #*** !doctools
        #[call [fun cursor_on]]
        return "\033\[?25h"
    }
    proc cursor_off {} {
        #*** !doctools
        #[call [fun cursor_off]]
        return "\033\[?25l"
    }

    # -- ---  --- --- ---
    proc move {row col} {
        #*** !doctools
        #[call [fun move] [arg row] [arg col]]
        #[para]Return an ansi sequence to move to row,col
        #[para]aka cursor home
        return \033\[${row}\;${col}H
    }
    proc move_emit {row col data args} {
        #*** !doctools
        #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]]
        #[para]Return an ansi string representing a  move to row col with data appended
        #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points
        #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout
        #[para]punk::console::move_emit_return will also return the cursor to the original position
        #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position.
        #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing.
        #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin
        #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used.
        #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started:
        #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}]
        #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted.

        set out ""
        if {$row eq "this"} {
            append out \033\[\;${col}G$data
        } else {
            append out \033\[${row}\;${col}H$data
        }
        foreach {row col data} $args {
            if {$row eq "this"} {
                append out \033\[\;${col}G$data
            } else {
                append out \033\[${row}\;${col}H$data
            }
        }
        return $out
    }
    proc move_forward {{n 1}} {
        #*** !doctools
        #[call [fun move_forward] [arg n]]
        return \033\[${n}C
    }
    proc move_back {{n 1}} {
        #*** !doctools
        #[call [fun move_back] [arg n]]
        return \033\[${n}D
    }
    proc move_up {{n 1}} {
        #*** !doctools
        #[call [fun move_up] [arg n]]
        return \033\[${n}A
    }
    proc move_down {{n 1}} {
        #*** !doctools
        #[call [fun move_down] [arg n]]
        return \033\[${n}B
    }
    proc move_column {col} {
        #*** !doctools
        #[call [fun move_column] [arg col]]
        return \x1b\[${col}G
    }
    proc move_row {row} {
        #*** !doctools
        #[call [fun move_row] [arg row]]
        #[para]VPA - Vertical Line Position Absolute
        return \x1b\[${row}d
    }
    # -- ---  --- --- ---

    proc save_cursor {} {
        #*** !doctools
        #[call [fun save_cursor]]
        return \x1b\[s
    }
    proc restore_cursor {} {
        #*** !doctools
        #[call [fun restore_cursor]]
        return \x1b\[u
    }

    # -- ---  --- --- ---
    proc erase_line {} {
        #*** !doctools
        #[call [fun erase_line]]
        return \033\[2K
    }
    proc erase_sol {} {
        #*** !doctools
        #[call [fun erase_sol]]
        #[para]Erase to start of line, leaving cursor position alone.
        return \033\[1K
    }
    proc erase_eol {} {
        #*** !doctools
        #[call [fun erase_eol]]
        return \033\[K
    }
    #see also clear_above clear_below
    # -- ---  --- --- ---

    proc scroll_up {n} {
        #*** !doctools
        #[call [fun scroll_up] [arg n]]
        return \x1b\[${n}S
    }
    proc scroll_down {n} {
        #*** !doctools
        #[call [fun scroll_down] [arg n]]
        return \x1b\[${n}T
    }

    proc insert_spaces {count} {
        #*** !doctools
        #[call [fun insert_spaces] [arg count]]
        return \x1b\[${count}@
    }
    proc delete_characters {count} {
        #*** !doctools
        #[call [fun delete_characters] [arg count]]
        return \x1b\[${count}P
    }
    proc erase_characters {count} {
        #*** !doctools
        #[call [fun erase_characters] [arg count]]
        return \x1b\[${count}X
    }
    proc insert_lines {count} {
        #*** !doctools
        #[call [fun insert_lines] [arg count]]
        return \x1b\[${count}L
    }
    proc delete_lines {count} {
        #*** !doctools
        #[call [fun delete_lines] [arg count]]
        return \x1b\[${count}M
    }

    proc cursor_pos {} {
        #*** !doctools
        #[call [fun cursor_pos]]
        #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin
        #[para]The output on screen will look something like ^[lb][lb]47;3R
        #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead.
        #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively.
        #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list
        return \033\[6n
    }

    proc request_cursor_information {} {
        #*** !doctools
        #[call [fun request_cursor_information]]
        #[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report
        #[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin
        #[para]A stdin readloop will need to be in place to read this information
        return \x1b\[1\$w
    }
    proc request_tabstops {} {
        #*** !doctools
        #[call [fun request_tabstops]]
        #[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report
        #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin
        return \x1b\[2\$w
    }


    #alternative to string terminator is \007 -
    proc titleset {windowtitle} {
        #*** !doctools
        #[call [fun titleset] [arg windowtitles]]
        #[para]Returns the code to set the title of the terminal window to windowtitle
        #[para]This may not work on terminals which have multiple panes/windows
        return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives
    }
    #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title
    #no cross-platform ansi-only mechanism ?

    proc test_decaln {} {
        #Screen Alignment Test
        #Reset margins, move cursor to the top left, and fill the screen with 'E'
        #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows)
        return \x1b#8
    }

    #length of text for printing characters only
    #review - unicode and other non-printing chars and combining sequences?
    #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names
    #review - is there an existing library or better method? print to a terminal and query cursor position?
    #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first
    #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string.
    proc printing_length {line} {
        if {[string first \n $line] >= 0} {
            error "line_print_length must not contain newline characters"
        }
        #what if line has \v (vertical tab) ie more than one logical screen line?

        #review -
        set line [punk::ansi::stripansi $line]

        set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi
        #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter
        #(* more correctly - moves cursor back)
        #Note some terminals process backspace before \v - which seems quite wrong
        #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already
        #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line
        # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too.
        #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS

        #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal)
        #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces
        #normalize tabs to an appropriate* width
        #*todo - handle terminal/context where tabwidth != the default 8 spaces
        set line [textutil::tabify::untabify2 $line]

        set bs [format %c 0x08]
        #set line [string map [list "\r${bs}" "\r"] $line] ;#backsp following a \r will have no effect
        set line [string trim $line $bs]
        #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length.
        set n 0

        set chars [split $line ""]
        #build an output
        set idx 0
        set outchars [list]
        set outsizes [list]
        foreach c $chars {
            if {$c eq $bs} {
                if {$idx > 0} {
                    incr idx -1
                }
            } elseif {$c eq "\r"} {
                set idx 0
            } else {
                punk::ansi::internal::printing_length_addchar $idx $c
                incr idx
            }
        }
        set line2 [join $outchars ""]
        return [punk::char::string_width $line2]
    }


    #*** !doctools
    #[list_end] [comment {--- end definitions namespace punk::ansi ---}]
}


namespace eval punk::ansi {


    # -- --- --- --- --- ---
    #XTGETTCAP
    # xterm responds with
    # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and
    # the value of the corresponding string that xterm would send,
    # or
    # DCS 0 + r ST for invalid requests.
    # The strings are encoded in hexadecimal (2 digits per
    # character).  If more than one name is given, xterm replies
    # with each name/value pair in the same response.  An invalid
    # name (one not found in xterm's tables) ends processing of the
    # list of names.
    proc xtgetcap {keylist} {
        #ESC P = 0x90 = DCS = Device Control String
        set hexkeys [list]
        foreach k $keylist {
            lappend hexkeys [util::str2hex $k]
        }
        set payload [join $hexkeys ";"]
        return "\x1bP+q$payload\x1b\\"
    }
    proc xtgetcap2 {keylist} {
        #ESC P = 0x90 = DCS = Device Control String
        set hexkeys [list]
        foreach k $keylist {
            lappend hexkeys [util::str2hex $k]
        }
        set payload [join $hexkeys ";"]
        return "\u0090+q$payload\u009c"
    }
    namespace eval codetype {
        #Functions that operate on a single ansi code sequence - not a sequence, and not codes embedded in another string
        proc is_sgr {code} {
            #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline
            #we will accept and pass through the less common colon separator (ITU Open Document Architecture)
            #Terminals should generally ignore it if they don't use it
            regexp {\033\[[0-9;:]*m$} $code
        }
        proc is_cursor_move_in_line {code} {
            #review - what about CSI n : m H where row n happens to be current line?
            regexp {\033\[[0-9]*(:?C|D|G)$} $code
        }
        #pure SGR reset with no other functions
        proc is_sgr_reset {code} {
            #todo 8-bit csi
            regexp {\033\[0*m$} $code
        }
        #whether this code has 0 (or equivalently empty) parameter (but may set others)
        #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes
        #it generally only makes sense for the reset to be the first entry - otherwise the code has ineffective portions
        #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params.
        #We will only look at initial parameter as this is the well-formed normal case.
        #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code
        proc has_sgr_leadingreset {code} {
            set params ""
            regexp {\033\[(.*)m} $code _match params
            set plist [split $params ";"]
            if {[string trim [lindex $plist 0] 0] eq ""} {
                #e.g \033\[m  \033\[0\;...m  \033\[0000...m
                return 1
            } else {
                return 0
            }
        }

        #has_sgr_reset  - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list?

    }
    namespace eval sequence_type {
        proc is_Fe {code} {
            # C1 control codes
            if {[regexp {^\033\[[\u0040-\u005F]}]} {
                #7bit - typical case
                return 1
            }
            #8bit
            #review - all C1 escapes ? 0x80-0x90F
            #This is possibly problematic as it is affected by encoding.
            #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
            #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
            return 0
        }
        proc is_Fs {code} {
            puts stderr "is_Fs unimplemented"
        }
    }
    # -- --- --- --- --- --- --- --- --- --- ---
    #todo - implement colour resets like the perl module:
    #https://metacpan.org/pod/Text::ANSI::Util
    #(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done)
}


namespace eval punk::ansi::ta {
    #*** !doctools
    #[subsection {Namespace punk::ansi::ta}]
    #[para] text ansi functions
    #[para] based on but not identical to the Perl Text Ansi module:
    #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm
    #[list_begin definitions]
    namespace path ::punk::ansi

    #handle both 7-bit and 8-bit csi
    #review - does codepage affect this? e.g ebcdic has 8bit csi in different position

    #CSI
    #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m
    variable re_csi_open {(?:\033\[|\u009b)}

    #colour and style
    variable re_csi_colour {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m  \033\[m  \033\[0m \033\[m0000m
    #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~).
    variable re_csi_code  {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@^_|~`]}

    #OSC - termnate with BEL (\a \007) or ST (string terminator \033\\)
    # 8-byte string terminator is \x9c (\u009c)

    #non-greedy via "*?" doesn't seem to work like this..
    #variable re_esc_osc1 {(?:\033\]).*?\007}
    #variable re_esc_osc2 {(?:\033\]).*?\033\\}
    #variable re_esc_osc3 {(?:\u009d).*?\u009c}

    #non-greedy by excluding ST terminators
    #TODO - FIX? see re_ST below
    variable re_esc_osc1 {(?:\033\])(?:[^\007]*)\007}
    variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\}
    variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c}

    variable re_osc_open {(?:\033\]|\u009d).*}

    #standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""]
    variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)}

    #see stripansi
    set re_start_ST {^(?:\033X|\u0098|\033\^|\u009e|\033_|\u009f)}
    #ST terminators [list \007 \033\\ \u009c]

    #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST)
    #non-greedy by exclusion of ST terminators in body
    #!!!
    #TODO - fix. we need to match \033\\ not just \033 ! could be colour codes nested in a privacy msg/string
    #This will currently terminate the code too early in this case
    #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions)
    variable re_ST  {(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)(?:[^\033\007\u009c]*)(?:\033\\|\007|\u009c)}

    variable re_ansi_detect "${re_csi_open}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_start_ST}"

    #detect any ansi escapes
    #review - only detect 'complete' codes - or just use the opening escapes for performance?
    proc detect {text} {
        #*** !doctools
        #[call [fun detect] [arg text]]
        #[para]Return a boolean indicating whether Ansi codes were detected in text
        #[para]

        variable re_ansi_detect
        #variable re_csi_open
        #variable re_esc_osc1
        #variable re_esc_osc2
        #todo - other escape sequences
        #expr {[regexp $re_csi_open $text] || [regexp $re_esc_osc1 $text] || [regexp $re_esc_osc2 $text]}
        expr {[regexp $re_ansi_detect $text]}
    }
    #not in perl ta
    proc detect_csi {text} {
        #*** !doctools
        #[call [fun detect_csi] [arg text]]
        #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text
        #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb]
        #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation
        #[para]There is also a multi-byte escape sequence \u009b
        #[para]This is less commonly used but is also detected here
        #[para](This function is not in perl ta)
        variable re_csi_open
        expr {[regexp $re_csi_open $text]}
    }
    proc detect_sgr {text} {
        #*** !doctools
        #[call [fun detect_sgr] [arg text]]
        #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected.
        #[para]This is the set of CSI sequences ending in 'm'
        #[para]This is most commonly an Ansi colour code - but also things such as underline and italics
        #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected.
        #[para](This function is not in perl ta)
        variable re_csi_colour
        expr {[regexp $re_csi_colour $text]}
    }
    proc strip {text} {
        #*** !doctools
        #[call [fun strip] [arg text]]
        #[para]Return text stripped of Ansi codes
        #[para]This is a tailcall to punk::ansi::stripansi
        tailcall stripansi $text
    }
    proc length {text} {
        #*** !doctools
        #[call [fun length] [arg text]]
        #[para]Return the character length after stripping ansi codes - not the printing length
        string length [stripansi $text]
    }
    #todo - handle newlines
    #not in perl ta
    #proc printing_length {text} {
    #
    #}

    proc trunc {text width args} {

    }

    #not in perl ta
    #returns just the plaintext portions in a list
    proc split_at_codes {text} {
        variable re_esc_osc1
        variable re_esc_osc2
        variable re_csi_code
        variable re_standalones
        variable re_ST
        punk::ansi::internal::splitx $text "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_ST}"
    }

    # -- --- --- --- --- ---
    #Split $text to a list containing alternating ANSI color codes and text.
    #ANSI color codes are always on the second element, fourth, and so on.
    #(ie plaintext on odd list-indices ansi on even indices)
    # Example:
    #ta_split_codes ""              # => ""
    #ta_split_codes "a"             # => "a"
    #ta_split_codes "a\e[31m"       # => {"a"   "\e[31m"}
    #ta_split_codes "\e[31ma"       # => {""    "\e[31m"       "a"}
    #ta_split_codes "\e[31ma\e[0m"  # => {""    "\e[31m"       "a"  "\e[0m"}
    #ta_split_codes "\e[31ma\e[0mb" # => {""    "\e[31m"       "a"  "\e[0m", "b"}
    #ta_split_codes "\e[31m\e[0mb"  # => {""    "\e[31m\e[0m"  "b"}
    #
    proc split_codes {text} {
        variable re_esc_osc1
        variable re_esc_osc2
        variable re_csi_code
        variable re_standalones
        variable re_ST
        set re "(?:${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2})+"
        return [_perlish_split $re $text]
    }
    #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds)
    proc split_codes_single {text} {
        variable re_esc_osc1
        variable re_esc_osc2
        variable re_csi_code
        variable re_standalones
        variable re_ST
        set re "${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2}"
        return [_perlish_split $re $text]
    }

    #review - tcl greedy expressions may match multiple in one element
    proc _perlish_split {re text} {
        if {[string length $text] == 0} {
            return {}
        }
        set list [list]
        set start 0

        #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW
        while {[regexp -start $start -indices -- $re $text match]} {
            lassign $match      matchStart matchEnd
            #puts "->start $start ->match $matchStart $matchEnd"
            if {$matchEnd < $matchStart} {
                lappend list [string range $text $start $matchStart-1] [string index $text $matchStart]
                incr start
                if {$start >= [string length $text]} {
                    break
                }
                continue
            }
            lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd]
            set start [expr {$matchEnd+1}]

            #?
            if {$start >= [string length $text]} {
                break
            }
        }
        lappend list [string range $text $start end]
        return $list
    }
    proc _ws_split {text} {
        regexp -all -inline {(?:\S+)|(?:\s+)} $text
    }
    # -- --- --- --- --- ---

    #*** !doctools
    #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}]
}
# -- --- --- --- --- --- --- --- --- --- ---

namespace eval punk::ansi::ansistring {
    #*** !doctools
    #[subsection {Namespace punk::ansi::ansistring}]
    #[para]punk::ansi::ansistring ensemble  - ansi-aware string operations
    #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings
    #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly.
    #[list_begin definitions]

    namespace path [list ::punk::ansi ::punk::ansi::ta]
    namespace ensemble create
    namespace export length trim trimleft trimright index VIEW
    #todo - expose _splits_ methods so caller can work efficiently with the splits themselves
    #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single

    #\UFFFD  - replacement char or \U2426

    #using ISO 2047 graphical representations of control characters - probably obsolete?
    #00	NUL	Null	⎕	U+2395	NU
    #01	TC1, SOH	Start of Heading	⌈	U+2308	SH
    #02	TC2, STX	Start of Text	⊥	U+22A5	SX
    #03	TC3, ETX	End of Text	⌋	U+230B	EX
    #04	TC4, EOT	End of Transmission	⌁	U+2301[9]	ET
    #05	TC5, ENQ	Enquiry	⊠[a]	U+22A0	EQ
    #06	TC6, ACK	Acknowledge	✓	U+2713	AK
    #07	BEL	Bell	⍾	U+237E[9]	BL
    #08	FE0, BS	Backspace	⤺	—[b]	BS
    #09	FE1, HT	Horizontal Tabulation	⪫	U+2AAB	HT
    #0A	FE2, LF	Line Feed	≡	U+2261	LF
    #0B	FE3, VT	Vertical Tabulation	⩛	U+2A5B	VT
    #0C	FE4, FF	Form Feed	↡	U+21A1	FF
    #0D	FE5, CR	Carriage Return	⪪	U+2AAA	CR
    #0E	SO	Shift Out	⊗	U+2297	SO
    #0F	SI	Shift In	⊙	U+2299	SI
    #10	TC7, DLE	Data Link Escape	⊟	U+229F	DL
    #11	DC1, XON, CON[10]	Device Control 1	◷	U+25F7	D1
    #12	DC2, RPT,[10] TAPE[c]	Device Control 2	◶	U+25F6	D2
    #13	DC3, XOF, XOFF	Device Control 3	◵	U+25F5	D3
    #14	DC4, COF, KMC,[10] TAPE[c]	Device Control 4	◴	U+25F4	D4
    #15	TC8, NAK	Negative Acknowledge	⍻	U+237B[9]	NK
    #16	TC9, SYN	Synchronization	⎍	U+238D	SY
    #17	TC10, ETB	End of Transmission Block	⊣	U+22A3	EB
    #18	CAN	Cancel	⧖	U+29D6	CN
    #19	EM	End of Medium	⍿	U+237F[9]	EM
    #1A	SUB	Substitute Character	␦	U+2426[12]	SB
    #1B	ESC	Escape	⊖	U+2296	EC
    #1C	IS4, FS	File Separator	◰	U+25F0	FS
    #1D	IS3, GS	Group Separator	◱	U+25F1	GS
    #1E	IS2, RS	Record Separator	◲	U+25F2	RS
    #1F	IS1 US	Unit Separator	◳	U+25F3	US
    #20	SP	Space	△	U+25B3	SP
    #7F	DEL	Delete	▨	—[d]	DT

    #C0 control code visual representations
    # Code Val Name 2X Description
    # 2400 00 NUL NU Symbol for Null
    # 2401 01 SOH SH Symbol for Start of Heading
    # 2402 02 STX SX Symbol for Start of Text
    # 2403 03 ETX EX Symbol for End of Text
    # 2404 04 EOT ET Symbol for End of Transmission
    # 2405 05 ENQ EQ Symbol for Enquiry
    # 2406 06 ACK AK Symbol for Acknowledge
    # 2407 07 BEL BL Symbol for Bell
    # 2409 09 BS BS Symbol for Backspace
    # 2409 09 HT HT Symbol for Horizontal Tab (1)
    # 240A 0A LF LF Symbol for Line Feed (1)
    # 240B 0B VT VT Symbol for Vertical Tab (1)
    # 240C 0C FF FF Symbol for Form Feed (2)
    # 240D 0D CR CR Symbol for Carriage Return (1)
    # 240E 0E SO SO Symbol for Shift Out
    # 240F 0F SI SI Symbol for Shift In
    # 2410 10 DLE DL Symbol for Data Link Escape
    # 2411 11 DC1 D1 Symbol for Device Control 1 (2)
    # 2412 12 DC2 D2 Symbol for Device Control 2 (2)
    # 2413 13 DC3 D3 Symbol for Device Control 3 (2)
    # 2414 14 DC4 D4 Symbol for Device Control 4 (2)
    # 2415 15 NAK NK Symbol for Negative Acknowledge
    # 2416 16 SYN SY Symbol for Synchronous Idle
    # 2417 17 ETB EB Symbol for End of Transmission Block
    # 2418 18 CAN CN Symbol for Cancel
    # 2419 19 EM EM Symbol for End of Medium
    # 241A 1A SUB SU Symbol for Substitute
    # 241B 1B ESC EC Symbol for Escape
    # 241C 1C FS FS Symbol for Field Separator (3)
    # 241D 1D GS GS Symbol for Group Separator (3)
    # 241E 1E RS RS Symbol for Record Separator (3)
    # 241F 1F US US Symbol for Unit Separator (3)
    # 2420 20 SP SP Symbol for Space (4)
    # 2421 7F DEL DT Symbol for Delete (4)

    #C1 control code visual representations
    #Code Val Name 2X Description
    # 80 80 80 (1)
    # 81 81 81 (1)
    # E022 82 BPH 82 Symbol for Break Permitted Here (2)
    # E023 83 NBH 83 Symbol for No Break Here (2)
    # E024 84 IND IN Symbol for Index (3)
    # E025 85 NEL NL Symbol for Next Line (4)
    # E026 86 SSA SS Symbol for Start Selected Area
    # E027 87 ESA ES Symbol for End Selected Area
    # E028 88 HTS HS Symbol for Character Tabulation Set
    # E029 89 HTJ HJ Symbol for Character Tabulation with Justification
    # E02A 8A VTS VS Symbol for Line Tabulation Set
    # E02B 8B PLD PD Symbol for Partial Line Forward
    # E02C 8C PLU PU Symbol for Partial Line Backward
    # E02D 8D RI RI Symbol for Reverse Line Feed
    # E02E 8E SS2 S2 Symbol for Single Shift 2
    # E02F 8F SS3 S3 Symbol for Single Shift 3
    # E030 90 DCS DC Symbol for Device Control String
    # E031 91 PU1 P1 Symbol for Private Use 1
    # E032 92 PU2 P2 Symbol for Private Use 2
    # E033 93 STS SE Symbol for Set Transmit State
    # E034 94 CCH CC Symbol for Cancel Character
    # E035 95 MW MW Symbol for Message Waiting
    # E036 96 SPA SP Symbol for Start Protected (Guarded) Area
    # E037 97 EPA EP Symbol for End Protected (Guarded) Area
    # E038 98 SOS 98 Symbol for Start of String (2)
    # 99 99 (1)
    # E03A 9A SCI 9A Symbol for Single Character Introducer (2)
    # E03B 9B CSI CS Symbol for Control Sequence Introducer (5)
    # E03C 9C ST ST Symbol for String Terminator
    # E03D 9D OSC OS Symbol for Operating System Command
    # E03E 9E PM PM Symbol for Privacy Message
    # E03F 9F APC AP Symbol for Application Program Command

    proc VIEW {args} {
        #*** !doctools
        #[call [fun VIEW] [arg string]]
        #[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets
        #[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408')
        #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions
        #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is.

        if {![llength $args]} {
            return ""
        }

        set string [lindex $args end]
        set defaults [dict create\
            -esc 1\
            -cr 1\
            -lf 0\
            -vt 0\
            -ht 1\
            -bs 1\
            -sp 1\
        ]
        set argopts [lrange $args 0 end-1]
        if {[llength $argopts] % 2 != 0} {
            error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [dict keys $defaults]"
        }
        set opts [dict merge $defaults $argopts]
        # -- --- --- --- ---
        set opt_esc     [dict get $opts -esc]
        set opt_cr      [dict get $opts -cr]
        set opt_lf      [dict get $opts -lf]
        set opt_vt      [dict get $opts -vt]
        set opt_ht      [dict get $opts -ht]
        set opt_bs      [dict get $opts -bs]
        set opt_sp      [dict get $opts -sp]
        # -- --- --- --- ---


        #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support)

        #Goal is not to map every control character?
        #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly
        #ETX -ctrl-c
        #EOT  ctrl-d  (EOF?)
        #SYN  ctrl-v
        #SUB  ctrl-z
        #CAN  ctrl-x
        #FS   ctrl-\  (SIGQUIT)
        set visuals_interesting [dict create\
            NUL [list \x00 \u2400]\
            ETX [list \x03 \u2403]\
            EOT [list \x04 \u2404]\
            BEL [list \x07 \u2407]\
            SYN [list \x16 \u2416]\
            CAN [list \x18 \u2418]\
            SUB [list \x1a \u241a]\
            FS  [list \x1c \u241c]\
            SOS [list \x98 \ue038]\
            CSI [list \x9b \ue03b]\
            ST  [list \x9c \ue03c]\
            PM  [list \x9e \ue03e]\
            APC [list \x9f \ue03f]\
        ]
        #it turns out we need pretty much everything for debugging
        set visuals_c0 [dict create\
            NUL [list \x00 \u2400]\
            SOH [list \x01 \u2401]\
            STX [list \x02 \u2402]\
            ETX [list \x03 \u2403]\
            EOT [list \x04 \u2404]\
            ENQ [list \x05 \u2405]\
            ACK [list \x06 \u2406]\
            BEL [list \x07 \u2407]\
            FF  [list \x0c \u240c]\
            SO  [list \x0e \u240e]\
            SF  [list \x0f \u240f]\
            DLE [list \x10 \u2410]\
            DC1 [list \x11 \u2411]\
            DC2 [list \x12 \u2412]\
            DC3 [list \x13 \u2413]\
            DC4 [list \x14 \u2414]\
            NAK [list \x15 \u2415]\
            SYN [list \x16 \u2416]\
            ETB [list \x17 \u2417]\
            CAN [list \x18 \u2418]\
            EM  [list \x19 \u2419]\
            SUB [list \x1a \u241a]\
            FS  [list \x1c \u241c]\
            GS  [list \x1d \u241d]\
            RS  [list \x1e \u241e]\
            US  [list \x1f \u241f]\
            DEL [list \x7f \u2421]\
        ]
        set visuals_c1 [dict create\
            BPH [list \x82 \ue022]\
            NBH [list \x83 \ue023]\
            IND [list \x84 \ue024]\
            NEL [list \x85 \ue025]\
            SSA [list \x86 \ue026]\
            ESA [list \x87 \ue027]\
            HTS [list \x88 \ue028]\
            HTJ [list \x89 \ue029]\
            VTS [list \x8a \ue02a]\
            PLD [list \x8b \ue02a]\
            PLU [list \x8c \ue02c]\
            RI  [list \x8d \ue02d]\
            SS2 [list \x8e \ue02e]\
            SS3 [list \x8f \ue02f]\
            DCS [list \x90 \ue030]\
            PU1 [list \x91 \ue031]\
            PU2 [list \x92 \ue032]\
            STS [list \x93 \ue033]\
            CCH [list \x94 \ue034]\
            MW  [list \x95 \ue035]\
            SPA [list \x96 \ue036]\
            EPA [list \x97 \ue037]\
            SOS [list \x98 \ue038]\
            SCI [list \x9a \ue03a]\
            CSI [list \x9b \ue03b]\
            ST  [list \x9c \ue03c]\
            OSC [list \x9d \ue03d]\
            PM  [list \x9e \ue03e]\
            APC [list \x9f \ue03f]\
        ]

        set visuals_opt [dict create]
        if {$opt_esc} {
            dict set visuals_opt ESC [list \x1b \u241b]
        }
        if {$opt_cr} {
            dict set visuals_opt CR [list \x0d \u240d]
        }
        if {$opt_lf} {
            dict set visuals_opt LF [list \x0a \u240a]
        }
        if {$opt_vt} {
            dict set visuals_opt VT [list \x0b \u240b]
        }
        if {$opt_ht} {
            dict set visuals_opt HT [list \x09 \u2409]
        }
        if {$opt_bs} {
            dict set visuals_opt BS [list \x08 \u2408]
        }
        if {$opt_sp} {
            dict set visuals_opt SP [list \x20 \u2420]
        }

        set visuals [dict merge $visuals_opt $visuals_c0 $visuals_c1]
        set  charmap [list]
        dict for {nm chars} $visuals {
            lappend charmap {*}$chars
        }
        return [string map $charmap $string]


        #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs
        #return [string map [list \033 \U2296 \007 \U237E] $string]
    }

    proc length {string} {
        #*** !doctools
        #[call [fun length] [arg string]]
        #[para]Returns the length of the string without ansi codes
        #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence.
        #[para]This is equivalent to calling string length on the result of stripansi $string
        #[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal.
        #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware.
        string length [stripansi $string]
    }

    proc trimleft {string args} {
        set intext 0
        set out ""
        #for split_codes only first or last pt can be empty string
        foreach {pt ansiblock} [split_codes $string] {
            if {!$intext} {
                if {$pt eq "" || [regexp {^\s+$} $pt]} {
                    append out $ansiblock
                } else {
                    append out [string trimleft $pt]$ansiblock
                    set intext 1
                }
            } else {
                append out $pt$ansiblock
            }
        }
        return $out
    }
    proc trimright {string} {
        if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing
        set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]]
        return [join $rtrimmed_list ""]
    }
    proc trim {string} {
        #make sure we do our ansi-scanning split only once - so use list-based trim operations
        #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length
        #we save a single function call by calling both here rather than _splits_trim
        join [_splits_trimright [_splits_trimleft [split_codes $string]]] ""
    }

    proc index {string index} {
        #*** !doctools
        #[call [fun index] [arg string] [arg index]]
        #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes)
        #[para]Returns the character (with applied ansi effect) at position index
        #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output.
        #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST)
        #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them.
        #[para]todo: SGR codes within ST-terminated strings not yet ignored properly
        #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards.
        #[para]As any operation using end-+<int> will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index.
        #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that.
        #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code
        #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered.
        #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be.
        #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible
        #[para]Notes:
        #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index
        #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks.
        #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal

        set splits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run

        #todo - end-x  +/-x+/-x etc
        set original_index $index

        set index [string map [list _ ""] $index]
        #short-circuit some trivial cases
        if {[string is integer -strict $index]} {
            if {$index < 0} {return ""}
            #this only short-circuits an index greater than length including ansi-chars
            #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length
            if {$index > [string length $string]} {return ""}
        } else {
            if {[string match end* $index]} {
                #for end- we will probably have to blow a few cycles stripping first and calculate the length
                if {$index ne "end"} {
                    set op [string index $index 3]
                    set offset [string range $index 4 end]
                    if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"}
                    if {$op eq "+" && $offset != 0} {
                        return ""
                    }
                } else {
                    set offset 0
                }
                #by now, if op = + then offset = 0  so we only need to handle the minus case
                set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal
                if {$offset == 0} {
                    set index [expr {$payload_len-1}]
                } else {
                    set index [expr {($payload_len-1) - $offset}]
                }
                if {$index < 0} {
                    #don't waste time splitting and looping the string
                    return ""
                }
            } else {
                #we are trying to avoid evaluating unbraced expr of potentially insecure origin
                regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string
                if {[string is integer -strict $tail]} {
                    #plain +-<int>
                    if {$op eq "-"} {
                        #return nothing for negative indices as per Tcl's lindex etc
                        return ""
                    }
                    set index $tail
                } else {
                    if {[regexp {(.*)([+-])(.*)} $index _match a op b]} {
                        if {[string is integer -strict $a] && [string is integer -strict $b]} {
                            if {$op eq "-"} {
                                set index [expr {$a - $b}]
                            } else {
                                set index [expr {$a + $b}]
                            }
                        } else {
                            error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
                        }
                    } else {
                        error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
                    }
                }
            }
        }

        #any pt could be empty if using split_codes_single (or just first and last pt if split_codes)
        set low   -1
        set high  -1
        set pt_index -2
        set pt_found -1
        set char ""
        set codes_in_effect ""
        #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go
        #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway)
        foreach {pt code} $splits {
            incr pt_index 2
            if {$pt ne ""} {
                set low [expr {$high + 1}] ;#last high
                incr high [string length $pt]
            }

            if {$pt ne "" && ($index >= $low && $index <= $high)} {
                set pt_found $pt_index
                set char [string index $pt $index-$low]
                break
            }

            if {[punk::ansi::codetype::is_sgr_reset $code]} {
                #we can throw away previous codes_in_effect
                set codes_in_effect ""
            } else {
                #may have partial resets - but we don't want to track individual states of SGR features
                #A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end.
                #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed.
                #Review - consider if any other types of code make sense to retain in the output in this context.
                if {[punk::ansi::codetype::is_sgr $code]} {
                    append codes_in_effect $code
                }
            }

        }
        if {$pt_found >= 0} {
            return $codes_in_effect$char
        } else {
            return ""
        }
    }

    proc _splits_trimleft {sclist} {
        set intext 0
        set outlist [list]
        foreach {pt ansiblock} $sclist {
            if {!$intext} {
                if {$pt eq "" || [regexp {^\s+$} $pt]} {
                    lappend outlist "" $ansiblock
                } else {
                    lappend outlist [string trimleft $pt] $ansiblock
                    set intext 1
                }
            } else {
                lappend outlist $pt $ansiblock
            }
        }
        return $outlist
    }
    proc _splits_trimright {sclist} {
        set intext 0
        set outlist [list]
        foreach {pt ansiblock} [lreverse $sclist] {
            if {!$intext} {
                if {$pt eq "" || [regexp {^\s+$} $pt]} {
                    lappend outlist "" $ansiblock
                } else {
                    lappend outlist [string trimright $pt] $ansiblock
                    set intext 1
                }
            } else {
                lappend outlist $pt $ansiblock
            }
        }
        return [lreverse $outlist]
    }
    proc _splits_trim {sclist} {
        return [_splits_trimright [_splits_trimleft $sclist]]
    }

    #*** !doctools
    #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}]
}

namespace eval punk::ansi::internal {
    proc splitn {str {len 1}} {
        #from textutil::split::splitn
        if {$len <= 0} {
            return -code error "len must be > 0"
        }
        if {$len == 1} {
            return [split $str {}]
        }
        set result [list]
        set max [string length $str]
        set i 0
        set j [expr {$len -1}]
        while {$i < $max} {
            lappend result [string range $str $i $j]
            incr i $len
            incr j $len
        }
        return $result
    }
    proc splitx {str {regexp {[\t \r\n]+}}} {
        #from textutil::split::splitx
        # Bugfix 476988
        if {[string length $str] == 0} {
            return {}
        }
        if {[string length $regexp] == 0} {
            return [::split $str ""]
        }
        if {[regexp $regexp {}]} {
            return -code error \
            "splitting on regexp \"$regexp\" would cause infinite loop"
        }
        set list  {}
        set start 0
        while {[regexp -start $start -indices -- $regexp $str match submatch]} {
            foreach {subStart subEnd} $submatch break
            foreach {matchStart matchEnd} $match break
            incr matchStart -1
            incr matchEnd
            lappend list [string range $str $start $matchStart]
            if {$subStart >= $start} {
                lappend list [string range $str $subStart $subEnd]
            }
            set start $matchEnd
        }
        lappend list [string range $str $start end]
        return $list
    }

    proc printing_length_addchar {i c} {
        upvar outchars outc
        upvar outsizes outs
        set nxt [llength $outc]
        if {$i < $nxt} {
            lset outc $i $c
        } else {
            lappend outc $c
        }
    }

    #string to 2digit hex - e.g used by XTGETTCAP
    proc str2hex {input} {
        set 2hex ""
        foreach ch [split $input ""] {
            append 2hex [format %02X [scan $ch %c]]
        }
        return $2hex
    }
    proc hex2str {2digithexchars} {
        set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with  tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required)
        if {$2digithexchars eq ""} {
            return ""
        }
        if {[string length $2digithexchars] % 2 != 0} {
            error "hex2str requires an even number of hex digits (2 per character)"
        }
        set 2str ""
        foreach pair [splitn $2digithexchars 2] {
            append 2str [format %c 0x$pair]
        }
        return $2str
    }
}

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Ready
package provide punk::ansi [namespace eval punk::ansi {
    variable version
    set version 0.1.1
}]
return


#*** !doctools
#[manpage_end]