# where line and column are ascii codes whose values are +31
# vt52 can be entered/exited via escapes
# This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
# (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
# (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
#ESC\[c - is more modern equiv of DECID
lappend PUNKARGS [list {
@id -id ::punk::ansi::vt52move
@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
}
if {[string length $text] < 2} {return $text}
set parts [punk::ansi::ta::split_codes $text]
#review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
#The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
if {[llength $parts] == 1} {return [lindex $parts 0]}
#[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
}
tcl::namespace::eval sequence_type {
proc is_Fe {code} {
#first byte after ESC identifies code type
#NOTE - we are looking for valid start of a single sequence here
#- not whether it is complete or where it ends, unless it's a fixed number of bytes
#\u0020-\u002F
# ESC <sp>!"#$%&'()*+,-./
#\u0030-\u003F
#ESC 0-9:;<=>?
#\u0040-\u005F
# ESC @A-Z[\]^
#\u0060-\u007E
proc is_Fe7 {code} {
# C1 control codes
if {[regexp {^\033\[[\u0040-\u005F]}]} {
#7bit - typical case
return 1
}
#7bit - typical case
# ESC @A-Z[\]^
return [regexp {^\033[\u0040-\u005F]} $code]
}
proc is_Fe {code} {
#although Fe7 more common - we'll put the simpler regex for 8 first
return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
}
proc is_Fe8 {code} {
#8bit
#review - all C1 escapes ? 0x80-0x90F
#review - all C1 escapes ? 0x80-0x9F
#This is possibly problematic as it is affected by encoding.
#According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
#"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
return 0
return [regexp {^[\u0080-\u09F]} $code]
}
#ESC 0-9,:,;,<,=,>,?
proc is_Fp {code} {
#single byte following ESC
return [regexp {^\033[\u0030-\u003F]$} $code]
}
#https://en.wikipedia.org/wiki/ISO/IEC_2022
#e.g
# ESC a (INT) interrupts the current process
# ESC c (RIS) reset terminal to initial state
#ESC `a-z{|}~
proc is_Fs {code} {
puts stderr "is_Fs unimplemented"
#single byte following ESC
return [regexp {^\033[\u0060-\u007E]$} $code]
}
proc is_nF {code} {
#2 bytes
#subcategorised by the low two bits of the first byte (n)
#further by whether the final byte is in \u0030-u003f (p) or not (t)
variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
#more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
#as described in ECMA-35 5.2
# where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
#- for 7-bit: 8 columns 16 rows
#- for 8-bit 16 columns 16 rows
proc codetable {which} {
set bits 8
switch -- $which {
ascii8 {
set which default
}
ascii {
set bits 7
}
default {
if {$which ni [encoding names]} {
error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
}
}
}
package require punk::ansi
set hibit_count [expr {$bits-4}]
set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
#Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
#DA1
variable last_da1_result
#first element in result is the terminal's architectural class 61,62,63,64.. ?
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
if {$a >= 0 && $a <= $numitems-1} {
#only a is in the range
if {$b < 0} {
set b 0
} else {
set b [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$b >=0 && $b <= $numitems-1} {
#only b is in the range
if {$a < 0} {
set a 0
} else {
set a [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
#both outside the range
if {$a < 0 && $b > 0} {
#spans the range in forward order
set a 0
set b [expr {$numitems-1}]
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$a > 0 && $b < 0} {
#spans the range in reverse order
set a [expr {$numitems-1}]
set b 0
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
}
#both outside of range on same side
}
}
} else {
set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
if {$idx >= 0} {
lappend index_list $idx
}
}
}
return $index_list
}
# showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
#REVIEW: This shouldn't really need the list itself - just the length would suffice
punk::args::define {
@ -2305,7 +2420,8 @@ namespace eval punk::lib {
#<0 ?
error "lindex_resolve len must be an integer"
}
set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
#todo - be stricter about malformations such as 1000_
if {[string is integer -strict $index]} {
#can match +i -i
if {$index < 0} {
@ -3345,8 +3461,12 @@ namespace eval punk::lib {
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled)
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
#detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
if {![punk::ansi::ta::detect_in_list $linelist]} {
#detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
#we use detectcode_in_list instead of detect_in_list
#detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
# - but the main reason is it is slightly faster.
if {![punk::ansi::ta::detectcode_in_list $linelist]} {
# where line and column are ascii codes whose values are +31
# vt52 can be entered/exited via escapes
# This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
# (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
# (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
#ESC\[c - is more modern equiv of DECID
lappend PUNKARGS [list {
@id -id ::punk::ansi::vt52move
@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
}
if {[string length $text] < 2} {return $text}
set parts [punk::ansi::ta::split_codes $text]
#review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
#The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
if {[llength $parts] == 1} {return [lindex $parts 0]}
#[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
}
tcl::namespace::eval sequence_type {
proc is_Fe {code} {
#first byte after ESC identifies code type
#NOTE - we are looking for valid start of a single sequence here
#- not whether it is complete or where it ends, unless it's a fixed number of bytes
#\u0020-\u002F
# ESC <sp>!"#$%&'()*+,-./
#\u0030-\u003F
#ESC 0-9:;<=>?
#\u0040-\u005F
# ESC @A-Z[\]^
#\u0060-\u007E
proc is_Fe7 {code} {
# C1 control codes
if {[regexp {^\033\[[\u0040-\u005F]}]} {
#7bit - typical case
return 1
}
#7bit - typical case
# ESC @A-Z[\]^
return [regexp {^\033[\u0040-\u005F]} $code]
}
proc is_Fe {code} {
#although Fe7 more common - we'll put the simpler regex for 8 first
return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
}
proc is_Fe8 {code} {
#8bit
#review - all C1 escapes ? 0x80-0x90F
#review - all C1 escapes ? 0x80-0x9F
#This is possibly problematic as it is affected by encoding.
#According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
#"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
return 0
return [regexp {^[\u0080-\u09F]} $code]
}
#ESC 0-9,:,;,<,=,>,?
proc is_Fp {code} {
#single byte following ESC
return [regexp {^\033[\u0030-\u003F]$} $code]
}
#https://en.wikipedia.org/wiki/ISO/IEC_2022
#e.g
# ESC a (INT) interrupts the current process
# ESC c (RIS) reset terminal to initial state
#ESC `a-z{|}~
proc is_Fs {code} {
puts stderr "is_Fs unimplemented"
#single byte following ESC
return [regexp {^\033[\u0060-\u007E]$} $code]
}
proc is_nF {code} {
#2 bytes
#subcategorised by the low two bits of the first byte (n)
#further by whether the final byte is in \u0030-u003f (p) or not (t)
variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
#more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
#as described in ECMA-35 5.2
# where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
#- for 7-bit: 8 columns 16 rows
#- for 8-bit 16 columns 16 rows
proc codetable {which} {
set bits 8
switch -- $which {
ascii8 {
set which default
}
ascii {
set bits 7
}
default {
if {$which ni [encoding names]} {
error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
}
}
}
package require punk::ansi
set hibit_count [expr {$bits-4}]
set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
#Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
#DA1
variable last_da1_result
#first element in result is the terminal's architectural class 61,62,63,64.. ?
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
if {$a >= 0 && $a <= $numitems-1} {
#only a is in the range
if {$b < 0} {
set b 0
} else {
set b [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$b >=0 && $b <= $numitems-1} {
#only b is in the range
if {$a < 0} {
set a 0
} else {
set a [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
#both outside the range
if {$a < 0 && $b > 0} {
#spans the range in forward order
set a 0
set b [expr {$numitems-1}]
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$a > 0 && $b < 0} {
#spans the range in reverse order
set a [expr {$numitems-1}]
set b 0
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
}
#both outside of range on same side
}
}
} else {
set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
if {$idx >= 0} {
lappend index_list $idx
}
}
}
return $index_list
}
# showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
#REVIEW: This shouldn't really need the list itself - just the length would suffice
punk::args::define {
@ -2305,7 +2420,8 @@ namespace eval punk::lib {
#<0 ?
error "lindex_resolve len must be an integer"
}
set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
#todo - be stricter about malformations such as 1000_
if {[string is integer -strict $index]} {
#can match +i -i
if {$index < 0} {
@ -3345,8 +3461,12 @@ namespace eval punk::lib {
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled)
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
#detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
if {![punk::ansi::ta::detect_in_list $linelist]} {
#detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
#we use detectcode_in_list instead of detect_in_list
#detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
# - but the main reason is it is slightly faster.
if {![punk::ansi::ta::detectcode_in_list $linelist]} {
# where line and column are ascii codes whose values are +31
# vt52 can be entered/exited via escapes
# This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
# (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
# (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
#ESC\[c - is more modern equiv of DECID
lappend PUNKARGS [list {
@id -id ::punk::ansi::vt52move
@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
}
if {[string length $text] < 2} {return $text}
set parts [punk::ansi::ta::split_codes $text]
#review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
#The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
if {[llength $parts] == 1} {return [lindex $parts 0]}
#[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
}
tcl::namespace::eval sequence_type {
proc is_Fe {code} {
#first byte after ESC identifies code type
#NOTE - we are looking for valid start of a single sequence here
#- not whether it is complete or where it ends, unless it's a fixed number of bytes
#\u0020-\u002F
# ESC <sp>!"#$%&'()*+,-./
#\u0030-\u003F
#ESC 0-9:;<=>?
#\u0040-\u005F
# ESC @A-Z[\]^
#\u0060-\u007E
proc is_Fe7 {code} {
# C1 control codes
if {[regexp {^\033\[[\u0040-\u005F]}]} {
#7bit - typical case
return 1
}
#7bit - typical case
# ESC @A-Z[\]^
return [regexp {^\033[\u0040-\u005F]} $code]
}
proc is_Fe {code} {
#although Fe7 more common - we'll put the simpler regex for 8 first
return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
}
proc is_Fe8 {code} {
#8bit
#review - all C1 escapes ? 0x80-0x90F
#review - all C1 escapes ? 0x80-0x9F
#This is possibly problematic as it is affected by encoding.
#According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
#"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
return 0
return [regexp {^[\u0080-\u09F]} $code]
}
#ESC 0-9,:,;,<,=,>,?
proc is_Fp {code} {
#single byte following ESC
return [regexp {^\033[\u0030-\u003F]$} $code]
}
#https://en.wikipedia.org/wiki/ISO/IEC_2022
#e.g
# ESC a (INT) interrupts the current process
# ESC c (RIS) reset terminal to initial state
#ESC `a-z{|}~
proc is_Fs {code} {
puts stderr "is_Fs unimplemented"
#single byte following ESC
return [regexp {^\033[\u0060-\u007E]$} $code]
}
proc is_nF {code} {
#2 bytes
#subcategorised by the low two bits of the first byte (n)
#further by whether the final byte is in \u0030-u003f (p) or not (t)
variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
#more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
#as described in ECMA-35 5.2
# where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
#- for 7-bit: 8 columns 16 rows
#- for 8-bit 16 columns 16 rows
proc codetable {which} {
set bits 8
switch -- $which {
ascii8 {
set which default
}
ascii {
set bits 7
}
default {
if {$which ni [encoding names]} {
error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
}
}
}
package require punk::ansi
set hibit_count [expr {$bits-4}]
set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
#Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
#DA1
variable last_da1_result
#first element in result is the terminal's architectural class 61,62,63,64.. ?
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
if {$a >= 0 && $a <= $numitems-1} {
#only a is in the range
if {$b < 0} {
set b 0
} else {
set b [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$b >=0 && $b <= $numitems-1} {
#only b is in the range
if {$a < 0} {
set a 0
} else {
set a [expr {$numitems-1}]
}
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} else {
#both outside the range
if {$a < 0 && $b > 0} {
#spans the range in forward order
set a 0
set b [expr {$numitems-1}]
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
} elseif {$a > 0 && $b < 0} {
#spans the range in reverse order
set a [expr {$numitems-1}]
set b 0
lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
}
#both outside of range on same side
}
}
} else {
set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
if {$idx >= 0} {
lappend index_list $idx
}
}
}
return $index_list
}
# showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
#REVIEW: This shouldn't really need the list itself - just the length would suffice
punk::args::define {
@ -2305,7 +2420,8 @@ namespace eval punk::lib {
#<0 ?
error "lindex_resolve len must be an integer"
}
set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
#todo - be stricter about malformations such as 1000_
if {[string is integer -strict $index]} {
#can match +i -i
if {$index < 0} {
@ -3345,8 +3461,12 @@ namespace eval punk::lib {
#NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
#This commonly happens if there is an unbalanced brace (which is a normal occurrence and needs to be handled)
#ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
#detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
if {![punk::ansi::ta::detect_in_list $linelist]} {
#detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
#we use detectcode_in_list instead of detect_in_list
#detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
# - but the main reason is it is slightly faster.
if {![punk::ansi::ta::detectcode_in_list $linelist]} {