From 2dd24a7ba9bb2a5f3bee47ff896e9f9cb4aefcba Mon Sep 17 00:00:00 2001
From: Julian Noble <julian@precisium.com.au>
Date: Wed, 13 Aug 2025 04:10:17 +1000
Subject: [PATCH] punk::args add -defaultdisplaytype dict|list; punk::pdf
 improvements

---
 src/bootsupport/modules/flagfilter-0.3.tm     |   5 +-
 src/bootsupport/modules/punk-0.1.tm           |  16 +-
 src/bootsupport/modules/punk/ansi-0.1.1.tm    | 237 +++++-
 src/bootsupport/modules/punk/args-0.2.tm      |  39 +-
 src/bootsupport/modules/punk/char-0.1.0.tm    | 101 +++
 src/bootsupport/modules/punk/console-0.1.1.tm |   1 +
 src/bootsupport/modules/punk/lib-0.1.2.tm     | 126 ++-
 src/bootsupport/modules/punk/ns-0.1.0.tm      |  16 +-
 src/bootsupport/modules/shellfilter-0.2.tm    |  40 +-
 src/bootsupport/modules/textblock-0.1.3.tm    |  28 +-
 src/modules/punk/args-999999.0a1.0.tm         |  39 +-
 src/modules/punk/pdf-999999.0a1.0.tm          | 799 ++++++++++++++----
 .../custom/_project/punk.basic/src/make.tcl   |   2 +-
 .../src/bootsupport/modules/flagfilter-0.3.tm |   5 +-
 .../src/bootsupport/modules/punk-0.1.tm       |  16 +-
 .../bootsupport/modules/punk/ansi-0.1.1.tm    | 237 +++++-
 .../src/bootsupport/modules/punk/args-0.2.tm  |  39 +-
 .../bootsupport/modules/punk/char-0.1.0.tm    | 101 +++
 .../bootsupport/modules/punk/console-0.1.1.tm |   1 +
 .../src/bootsupport/modules/punk/lib-0.1.2.tm | 126 ++-
 .../src/bootsupport/modules/punk/ns-0.1.0.tm  |  16 +-
 .../bootsupport/modules/shellfilter-0.2.tm    |  40 +-
 .../bootsupport/modules/textblock-0.1.3.tm    |  28 +-
 .../_project/punk.project-0.1/src/make.tcl    |   2 +-
 .../src/bootsupport/modules/flagfilter-0.3.tm |   5 +-
 .../src/bootsupport/modules/punk-0.1.tm       |  16 +-
 .../bootsupport/modules/punk/ansi-0.1.1.tm    | 237 +++++-
 .../src/bootsupport/modules/punk/args-0.2.tm  |  39 +-
 .../bootsupport/modules/punk/char-0.1.0.tm    | 101 +++
 .../bootsupport/modules/punk/console-0.1.1.tm |   1 +
 .../src/bootsupport/modules/punk/lib-0.1.2.tm | 126 ++-
 .../src/bootsupport/modules/punk/ns-0.1.0.tm  |  16 +-
 .../bootsupport/modules/shellfilter-0.2.tm    |  40 +-
 .../bootsupport/modules/textblock-0.1.3.tm    |  28 +-
 .../_project/punk.shell-0.1/src/make.tcl      |   2 +-
 .../_vfscommon.vfs/modules/punk/args-0.2.tm   |  39 +-
 .../_vfscommon.vfs/modules/punk/pdf-0.1.0.tm  | 799 ++++++++++++++----
 37 files changed, 2949 insertions(+), 560 deletions(-)

diff --git a/src/bootsupport/modules/flagfilter-0.3.tm b/src/bootsupport/modules/flagfilter-0.3.tm
index 1d37e215..00f58e82 100644
--- a/src/bootsupport/modules/flagfilter-0.3.tm
+++ b/src/bootsupport/modules/flagfilter-0.3.tm
@@ -1538,6 +1538,7 @@ namespace eval flagfilter {
         }
     }
 
+    #todo - rename 'cprocessor' is misleading
     oo::class create cprocessor {
         variable o_runid
         variable o_name
@@ -1577,7 +1578,9 @@ namespace eval flagfilter {
             if {[dict exists $o_pinfo match]} {
                 set o_matchspec [dict get $o_pinfo match]
             } else {
-                set o_matchspec {^[^-^/].*}  ;#match anything that isn't flaglike
+                #review - unix paths? conflict with windows style flag such as /w
+                #must accept empty string
+                set o_matchspec {^[^-^/].*|^$}  ;#match anything that isn't flaglike
             }
             set o_found_match 0
             set o_matched_argument "" ;#need o_found_match to differentiate match of empty string
diff --git a/src/bootsupport/modules/punk-0.1.tm b/src/bootsupport/modules/punk-0.1.tm
index 6fb185a9..83dad2bf 100644
--- a/src/bootsupport/modules/punk-0.1.tm
+++ b/src/bootsupport/modules/punk-0.1.tm
@@ -6947,7 +6947,8 @@ namespace eval punk {
             set newrow {}
             foreach oldrow $list_rows {
                 if {$j >= [llength $oldrow]} {
-                    continue
+                    #continue 
+                    lappend newrow ""
                 } else {
                     lappend newrow [lindex $oldrow $j]
                 }
@@ -6956,6 +6957,19 @@ namespace eval punk {
         }
         return $res
     }
+    proc transpose_equal_lists {list_rows} {
+        set columns [list]
+        set rowidx -1
+        foreach l $list_rows {
+            set colidx -1
+            incr rowidx
+            foreach val $l {
+                incr colidx
+                lset columns $colidx $rowidx $val
+            }
+        }
+        return $columns
+    }
     proc transpose_strings {list_of_strings} {
         set charlists [lmap v $list_of_strings {split $v ""}]
         set tchars [transpose_lists $charlists]
diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm
index 255715ad..ad2d58f4 100644
--- a/src/bootsupport/modules/punk/ansi-0.1.1.tm
+++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm
@@ -3785,7 +3785,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -3892,7 +3892,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -4158,7 +4158,8 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
     # where line and column are ascii codes whose values are +31
     # vt52 can be entered/exited via escapes
     # This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
-    # (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    # (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    #ESC\[c - is more modern equiv of DECID
 
     lappend PUNKARGS [list {
         @id     -id     ::punk::ansi::vt52move
@@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
         }
         if {[string length $text] < 2} {return $text}
         set parts [punk::ansi::ta::split_codes $text]
+        #review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
+        #The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
         if {[llength $parts] == 1} {return [lindex $parts 0]}
         set out ""
         #todo - try: join [lsearch -stride 2 -index 0 -subindices -all -inline  $parts *] ""
@@ -5686,21 +5689,106 @@ tcl::namespace::eval punk::ansi {
         #[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
     }
     tcl::namespace::eval sequence_type {
-        proc is_Fe {code} {
+        #first byte after ESC identifies code type
+        #NOTE - we are looking for valid start of a single sequence here
+        #- not whether it is complete or where it ends, unless it's a fixed number of bytes
+
+        #\u0020-\u002F
+        # ESC <sp>!"#$%&'()*+,-./
+
+        #\u0030-\u003F
+        #ESC 0-9:;<=>?
+
+        #\u0040-\u005F
+        # ESC  @A-Z[\]^
+
+        #\u0060-\u007E
+
+        proc is_Fe7 {code} {
             # C1 control codes
-            if {[regexp {^\033\[[\u0040-\u005F]}]} {
-                #7bit - typical case
-                return 1
-            }
+            #7bit - typical case
+            # ESC  @A-Z[\]^
+            return [regexp {^\033[\u0040-\u005F]} $code] 
+        }
+        proc is_Fe {code} {
+            #although Fe7 more common - we'll put the simpler regex for 8 first
+            return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
+        }
+        proc is_Fe8 {code} {
             #8bit
-            #review - all C1 escapes ? 0x80-0x90F
+            #review - all C1 escapes ? 0x80-0x9F
             #This is possibly problematic as it is affected by encoding.
             #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
             #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
-            return 0
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        #ESC 0-9,:,;,<,=,>,?
+        proc is_Fp {code} {
+            #single byte following ESC
+            return [regexp {^\033[\u0030-\u003F]$} $code] 
         }
+
+        #https://en.wikipedia.org/wiki/ISO/IEC_2022
+        #e.g
+        # ESC a (INT) interrupts the current process
+        # ESC c (RIS) reset terminal to initial state
+        #ESC `a-z{|}~
         proc is_Fs {code} {
-            puts stderr "is_Fs unimplemented"
+            #single byte following ESC
+            return [regexp {^\033[\u0060-\u007E]$} $code] 
+        }
+
+
+        proc is_nF {code} {
+            #2 bytes
+            #subcategorised by the low two bits of the first byte (n)
+            #further by whether the final byte is in \u0030-u003f (p) or not (t)
+            return [regexp {^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+
+        #review - test
+        #3Fp - private use 
+        #e.g vt100
+        # ESC#3 DECDHL double-height letters top half
+        # ESC#4 DECDHL double-height letters bottom half
+        # ESC#5 DECSWL single-width line
+        # ESC#6 DECDWL double-width line
+        proc is_3Fp {code} {
+            return [regexp {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} $code] ;#check regexp
+        }
+
+        proc is_code7 {code} {
+            #Fe | Fs | Fp | nF | Fe
+            return [regexp {^\033[\u0040-\u005F]|^\033[\u0060-\u007e]$|^\033[\u0030-\u003F]$|^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+        proc is_code8 {code} {
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        proc is_code {code} {
+            return [expr {[is_code8 $code] || [is_code7 $code]}]
+        }
+
+        proc classify {code} {
+            return [switch -regexp -- $code {
+                {^\033[\u0030-\u003F]$}                  {string cat Fp}
+                {^[\u0080-\u009F]|^\033[\u0040-\u005F]}  {string cat Fe}
+                {^\033[\u0060-\u007E]$}                  {string cat Fs}
+                {^\033[\u0020-\u002F]+[\u0030-\u007E]$} {
+                    #nF sequences
+                    set firstbytenum [scan [string index $code 1] %c]
+                    set lastbyte [string index $code end]
+
+                    set n [expr {$firstbytenum & 3}]
+                    if {[regexp {[\u0030-\u003F]} $lastbyte]} {
+                        set tp p
+                    } else {
+                        set tp t
+                    }
+                    string cat ${n}F$tp
+                }
+                {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} {string cat 3Fp}
+                default                                  {string cat unknown}
+            }]
         }
     }
     # -- --- --- --- --- --- --- --- --- --- ---
@@ -5718,6 +5806,7 @@ tcl::namespace::eval punk::ansi::ta {
     #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm
     #[list_begin definitions]
     tcl::namespace::path ::punk::ansi
+    namespace export detect detect_in_list detect_sgr extract length split_codes split_at_codes split_codes_single
 
     variable PUNKARGS
 
@@ -5748,20 +5837,27 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_osc_open {(?:\x1b\]|\u009d).*}
 
 
+    #review - distinguishing standalone codes vs those that are paired with contents considered part of the code
+    #e.g PM,SOS are 'paired' ended by ST
+
     #variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""]
     variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bD|\x1bE|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)}
     variable re_standalones_vt52 {(?:\x1bZ)}
 
-    #ESC Y move, ESC b foreground colour
+    # -- 
+    #ESC Y move                 - \x1bY<byte><byte> ie 2 bytes to close
+    #ESC b foreground colour    - \x1bb<byte>  1 byte to close
+    variable re_vt52_incomplete {(?:\x1bY(.){0,1}$|\x1bb$)}
+    #\x1bc vt52 bgcolour conflict ?
     #ESC F - gr-on ESC G - gr-off
-    variable re_vt52_open {(?:\x1bY|\x1bb|\x1bF)}
-    #\x1bc vt52 bgcolour conflict ??
+    # --
 
     #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess
     variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B}
     variable re_g0_open  {(?:\x1b\(0)}
     variable re_g0_close {(?:\x1b\(B)}
 
+    #detect start of ansicode that is closed by ST
     # DCS "ESC P" or "0x90" is also terminated by ST
     set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)}
     #ST terminators [list \007 \033\\ \u009c]
@@ -5777,12 +5873,15 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_ST      {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)}
 
 
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_vt52_incomplete}"
 
     #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open
 
     #default for regexes is non-newline-sensitive matching - ie matches can span lines
     # -- --- --- ---
-    variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
+    #variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
     # -- --- --- ---
     #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regexp TRIE generator that works with Tcl regexes
     #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone.
@@ -5802,20 +5901,24 @@ tcl::namespace::eval punk::ansi::ta {
     # plus more for auxiliary keypad codes in keypad application mode (and some in numeric mode)
 
     #regexp expanded syntax = ?x
+    #full detect - checking for closing sequences
     variable re_ansi_detect {(?x)
-            (?:\x1b(?:\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
+            (?:\x1b(?:\[(?:[\x20-\x3f]*[\x40-\x7e])|a|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
             |(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)
-            |(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]
+            |(?:\u009b)[\x20-\x3f]*[\x40-\x7e]
             |(?:\u009d)(?:[^\u009c]*)?\u009c
     }
     #---
+    #todo
+    #variable re_ansi_detectcode $re_ansi_detect
+    #variable re_ansi_detectcode {\x1b[\u0040-\u005F]|\x1b[\u0060-\u007e]|\x1b[\u0030-\u003F]|\x1b[\u0020-\u002F]+[\u0030-\u007E]}
+    variable re_ansi_detectcode {(?:\x1b(?:[\u0030-\u007E]|[\u0020-\u002F]+[\u0030-\u007E]))|[\u0090-\u009F]}
 
     # -- --- --- ---
     #variable re_csi_code  {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]}
 
 
 
-    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
 
     #may be same as detect - kept in case detect needs to diverge
     #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
@@ -5827,20 +5930,6 @@ tcl::namespace::eval punk::ansi::ta {
         set re_ansi_split_multi "(?:${re_ansi_split})+"
     }
 
-    lappend PUNKARGS [list -dynamic 0 {
-            @id     -id ::punk::ansi::ta::detect
-            @cmd    -name punk::ansi::ta::detect -help\
-                "Return a boolean indicating whether Ansi codes were detected in text.
-                Important caveat:
-                When text is a tcl list made from splitting (or lappending) some ansi string
-                - individual elements may be braced or have certain chars escaped.
-                (one example is if a list element contains an unbalanced  brace)
-                This can cause square brackets that form part of the ansi to be backslash escaped
-                - and the function can fail to match it as an Ansi code.
-                "
-            @values -min 1
-            text -type string
-    } ]
 
     #*** !doctools
     #[call [fun detect] [arg text]]
@@ -5851,10 +5940,35 @@ tcl::namespace::eval punk::ansi::ta {
 
     #detect any ansi escapes
     #review - only detect 'complete' codes - or just use the opening escapes for performance?
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::detect
+            @cmd    -name punk::ansi::ta::detect\
+                -summary\
+                "Test if text has completed ANSI codes"\
+                -help\
+                "Return a boolean indicating whether *complete* Ansi codes were detected in text.
+
+                By complete, it means that paired squences such as PM (privacy message) must be
+                closed. It also means that a truncated sequence such as \\x1b\\\[ or a lone escape
+                will not be detected as ANSI.
+
+                Use punk::ansi::ta::detectcode as a slightly faster detector for ANSI codes, that does
+                not require paired sequences to have both starting and end sequences to be detected.
+
+                Important caveat:
+                When text is a tcl list made from splitting (or lappending) some ansi string
+                - individual elements may be braced or have certain chars escaped.
+                (one example is if a list element contains an unbalanced  brace)
+                This can cause square brackets that form part of the ansi to be backslash escaped
+                - and the function can fail to match it as an Ansi code.
+                "
+            @values -min 1
+            text -type string -help\
+                "Block of text. See caveat above about lists."
+    } ]
     proc detect {text} [string map [list <re> [list $re_ansi_detect]] {
         regexp <re> $text
     }]
-
     #can be used on dicts - but will check keys too. keys could also contain ansi and have escapes
     proc detect_in_list {list} {
         #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
@@ -5865,6 +5979,22 @@ tcl::namespace::eval punk::ansi::ta {
         }
         return 0
     }
+
+    #will detect for example lone opening or closing PM
+    proc detectcode {text} [string map [list <re> [list $re_ansi_detectcode]] {
+        regexp <re> $text
+    }]
+    proc detectcode_in_list {list} {
+        #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
+        foreach item $list {
+            if {[detectcode $item]} {
+                return 1
+            }
+        }
+        return 0
+    }
+
+
     #surprisingly - the ::join operation can be (relatively) slow on lists containing ansi
     proc detect_in_list2 {list} {
         detect [join $list " "]
@@ -5915,6 +6045,8 @@ tcl::namespace::eval punk::ansi::ta {
         variable re_sgr
         expr {[regexp $re_sgr $text]}
     }
+
+    #perl: ta_strip
     proc strip {text} {
         #*** !doctools
         #[call [fun strip] [arg text]]
@@ -5922,6 +6054,39 @@ tcl::namespace::eval punk::ansi::ta {
         #[para]This is a tailcall to punk::ansi::ansistrip
         tailcall ansistrip $text
     }
+
+    lappend PUNKARGS [list {
+        @id     -id ::punk::ansi::ta::extract
+        @cmd    -name punk::ansi::ta::extract\
+            -summary\
+            "Return only the ANSI codes in text"\
+            -help\
+            "This is the opposite of strip,
+              returning only the ANSI codes in text."
+        @values -min 1 -max 1
+        text -type string 
+    } ]
+    proc extract {text} {
+        set parts [split_codes $text]
+        set out ""
+        foreach {pt code} $parts {
+            append out $code
+        }
+        return $out
+    }
+
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::length
+            @cmd    -name punk::ansi::ta::length\
+                -summary\
+                "Calculate length of text (excluding the ANSI codes)"\
+                -help\
+                "Calculate length of text (excluding the ANSI codes)
+                This is not the printing length of the string on screen."
+            @values -min 1
+            text -type string 
+    } ]
+    #perl: ta_length
     proc length {text} {
         #*** !doctools
         #[call [fun length] [arg text]]
@@ -5936,6 +6101,8 @@ tcl::namespace::eval punk::ansi::ta {
     #
     #}
 
+    #perl: ta_trunc
+    #truncate $text to $width columns while still including all the ANSI colour codes.
     proc trunc {text width args} {
 
     }
@@ -8504,6 +8671,10 @@ tcl::namespace::eval punk::ansi::internal {
     }
 }
 
+tcl::namespace::eval punk::ansi {
+    namespace import ::punk::ansi::ta::detect
+}
+
 #inserting into global namespace like this should be kept to a minimum.. but this is considered a core aspect of punk::ansi
 #todo - document
 interp alias {} ansistring {} ::punk::ansi::ansistring
diff --git a/src/bootsupport/modules/punk/args-0.2.tm b/src/bootsupport/modules/punk/args-0.2.tm
index fc438d57..a6224c0d 100644
--- a/src/bootsupport/modules/punk/args-0.2.tm
+++ b/src/bootsupport/modules/punk/args-0.2.tm
@@ -1364,6 +1364,11 @@ tcl::namespace::eval punk::args {
                                         #}
                                         tcl::dict::set tmp_optspec_defaults -type $v
                                     }
+                                    -defaultdisplaytype {
+                                        #how the -default is displayed
+                                        #-default doesn't have to be the same type as -type which validates user input that is not defaulted.
+                                        tcl::dict::set tmp_optspec_defaults -defaultdisplaytype $v
+                                    }
                                     -parsekey {
                                         tcl::dict::set tmp_optspec_defaults -parsekey $v
 
@@ -1441,7 +1446,7 @@ tcl::namespace::eval punk::args {
                                     default {
                                         set known   {   -parsekey -group -grouphelp\
                                                         -any -anyopts -arbitrary -form -minsize -maxsize -choices -choicegroups -choicemultiple -choicecolumns -choicelabels -choiceinfo\
-                                                        -type -range -typeranges -default -typedefaults
+                                                        -type -range -typeranges -default -defaultdisplaytype -typedefaults
                                                         -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted -nocase\
                                                         -nominsize -nomaxsize -norange -nochoices -nochoicelabels\
                                                         -type -optional -allow_ansi -validate_ansistripped -strip_ansi -multiple -prefix\
@@ -2052,6 +2057,9 @@ tcl::namespace::eval punk::args {
                                 tcl::dict::set spec_merged -optional 1
                             }
                         }
+                        -defaultdisplaytype {
+                            tcl::dict::set spec_merged -defaultdisplaytype $specval
+                        }
                         -typedefaults {
                             set typecount [llength [tcl::dict::get $spec_merged -type]]
                             if {$typecount != [llength $specval]} {
@@ -2114,7 +2122,7 @@ tcl::namespace::eval punk::args {
                                                     -form -type\
                                                     -parsekey -group\
                                                     -range -typeranges\
-                                                    -default -typedefaults\
+                                                    -default -defaultdisplaytype -typedefaults\
                                                     -minsize -maxsize -choices -choicegroups\
                                                     -choicemultiple -choicecolumns -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted\
                                                     -nocase -optional -multiple -validate_ansistripped -allow_ansi -strip_ansi -help\
@@ -3784,7 +3792,32 @@ tcl::namespace::eval punk::args {
                         }
 
                         if {[dict exists $arginfo -default]} {
-                            set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                            #default isn't necessarily of same type as -type required for validation
+                            #Guessing at the type from the data is likely to be unsatisfactory.
+
+                            set defaultdisplaytype [Dict_getdef $arginfo -defaultdisplaytype string]
+                            switch -- $defaultdisplaytype {
+                                dict {
+                                    #single level
+                                    set rawdefault [dict get $arginfo -default]
+                                    set default     "{\n"
+                                    dict for {k v} $rawdefault {
+                                        append default "  \"$k\" \"$v\"\n"
+                                    }
+                                    append default  "}"
+                                }
+                                list {
+                                    set default     "{\n"
+                                    foreach v $rawdefault {
+                                        append default "  \"$v\"\n"
+                                    }
+                                    append default "}"
+                                }
+                                default {
+                                    #set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                                    set default "'[dict get $arginfo -default]'"
+                                }
+                            }
                         } else {
                             set default ""
                         }
diff --git a/src/bootsupport/modules/punk/char-0.1.0.tm b/src/bootsupport/modules/punk/char-0.1.0.tm
index f8123b94..69df08b9 100644
--- a/src/bootsupport/modules/punk/char-0.1.0.tm
+++ b/src/bootsupport/modules/punk/char-0.1.0.tm
@@ -89,6 +89,107 @@ tcl::namespace::eval punk::char {
     variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
     variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
 
+
+    #more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
+    #as described in ECMA-35 5.2
+    # where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
+    #-  for 7-bit: 8 columns 16 rows
+    #-  for 8-bit 16 columns 16 rows
+    proc codetable {which} {
+        set bits 8
+        switch -- $which {
+            ascii8 {
+                set which default
+            }
+            ascii {
+                set bits 7
+            }
+            default {
+                if {$which ni [encoding names]} {
+                    error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
+                }
+            }
+        }
+        package require punk::ansi
+
+        set hibit_count [expr {$bits-4}]
+        set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
+        set columncount [expr {$bitcolumns + 6}]
+
+
+        #set header1 [list "" "b7\nb6\nb5" "0\n0\n0" "0\n0\n1" "0\n1\n0" "0\n1\n1" "1\n0\n0" "1\n0\n1" "1\n1\n0" "1\n1\n1"]
+        set header1 [list]
+        set hibits_label ""
+        set indent ""
+        for {set hb $bits} {$hb > 4} {incr hb -1} {
+            append hibits_label ${indent}b$hb\n
+            append indent "  "
+        }
+        set hibits_label [string range $hibits_label 0 end-1]
+        lappend header1 $hibits_label "" "" "" "" ""
+
+        for {set colidx 0} {$colidx < $bitcolumns} {incr colidx} {
+            set binval [format %0${hibit_count}b $colidx]
+            set binvalbits [split $binval ""]
+            set indent  ""
+            set display_hibits ""
+            foreach bb $binvalbits {
+                append display_hibits $indent$bb\n
+                append indent " "
+            }
+            set display_hibits [string range $display_hibits 0 end-1]
+            lappend header1 $display_hibits
+        }
+        #\u2193 down arrow
+        #right-down arrows
+        #\u2ba7
+        #\u21b4
+        #\u2b0e
+        set header2 [list "   Bits" b4 b3 b2 b1 "column \u2192\nrow \u2b0e" {*}[punk::lib::range 0 $bitcolumns-1]]
+        set headers [list $header1 $header2]
+
+        #set t [textblock::table -return tableobject -rows $rows]
+        set t [textblock::table -return tableobject]
+        #todo - fix textblock::table to allow configure -columncount
+        for {set c 0} {$c < $columncount} {incr c} {
+            $t add_column
+        }
+
+        set vheaders [punk::transpose_equal_lists $headers]
+        set hidx -1
+        foreach vh $vheaders {
+            incr hidx
+            $t configure_column $hidx -headers $vh
+        }
+        $t configure_header 0 -colspans [list 6 0 0 0 0 0 {*}[lrepeat $bitcolumns 1]]
+        $t configure_column 0 -blockalign left
+
+        #always 16 rows - remaining bits form the columns
+        for {set ridx 0} {$ridx <= 15} {incr ridx} {
+            set charlist [list]
+            set lowbits [format %04b $ridx]
+            for {set i 0} {$i < $bitcolumns} {incr i} {
+                set hibits [format %0${hibit_count}b $i]
+                set ch [format %c [scan ${hibits}${lowbits} %b]]
+                #puts "-->${hibits}${lowbits} ch:$ch"
+                if {$which ne "default"} {
+                    if {[catch {encoding convertfrom $which $ch} ch]} {
+                        set ch [punk::ansi::a red bold]-[punk::ansi::a]
+                        lappend charlist $ch
+                    } else {
+                        lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                    } 
+                } else {
+                    lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                }
+            }
+            set r [list "" {*}[split $lowbits ""] $ridx {*}$charlist]
+            $t add_row $r
+        }
+        puts stderr $t
+        $t print
+    }
+
     #just the 7-bit ascii. use [page ascii] for the 8-bit layout
     proc ascii {} {return {
         00 NUL  01 SOH  02 STX  03 ETX  04 EOT  05 ENQ  06 ACK  07 BEL
diff --git a/src/bootsupport/modules/punk/console-0.1.1.tm b/src/bootsupport/modules/punk/console-0.1.1.tm
index 4322ceaa..ea8d3f77 100644
--- a/src/bootsupport/modules/punk/console-0.1.1.tm
+++ b/src/bootsupport/modules/punk/console-0.1.1.tm
@@ -1336,6 +1336,7 @@ namespace eval punk::console {
     #https://vt100.net/docs/vt510-rm/DA1.html
     #
     proc get_device_attributes {{inoutchannels {stdin stdout}}} {
+        #Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
         #DA1
         variable last_da1_result
         #first element in result is the terminal's architectural class 61,62,63,64.. ?
diff --git a/src/bootsupport/modules/punk/lib-0.1.2.tm b/src/bootsupport/modules/punk/lib-0.1.2.tm
index 6ce76618..46cd5668 100644
--- a/src/bootsupport/modules/punk/lib-0.1.2.tm
+++ b/src/bootsupport/modules/punk/lib-0.1.2.tm
@@ -2242,6 +2242,121 @@ namespace eval punk::lib {
         }
     }
 
+
+    #review - compare to IMAP4 methods of specifying ranges?
+    punk::args::define {
+        @id     -id     ::punk::lib::indexset_resolve
+        @cmd    -name     punk::lib::indexset_resolve\
+            -summary\
+            "Resolve an indexset to a list of integers based on supplied list or string length."\
+            -help\
+            "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value.
+            An indexset consists of a comma delimited list of indexes or index-ranges.
+            The indexes are 0-based.
+            Ranges must be specified with .. as the separator.
+            Whitespace is ignored.
+            Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands,
+            e.g end-2 or 2+2.
+
+            end means the last page.
+            end-1 means the second last page.
+            0.. is the same as 0..end.
+            examples:
+            1,3..
+                output the page index 1 (2nd page) followed by all from index 3 to the end.
+            0-2,end
+                output the first 3 pages, and the last page.
+            end-1..0
+            output the indexes in reverse order from 2nd last page to first page."
+        @values -min 2 -max 2
+        numitems -type integer
+        indexset -type string
+    }
+    proc indexset_resolve {numitems indexset} {
+        if {![string is integer -strict $numitems] || ![regexp {^[\-\+_end,\.0-9]*$} $indexset]} {
+            #use parser on unhappy path only
+            set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve]
+            uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg]
+        } 
+        set index_list [list] ;#list of actual indexes within the range 
+        set iparts [split $indexset ,]
+        set index_list [list]
+        foreach ipart $iparts {
+            set ipart [string trim $ipart]
+            set rposn [string first .. $ipart]
+            if {$rposn>=0} {
+                #range
+                lassign           [punk::lib::string_splitbefore_indices $ipart $rposn $rposn+2] rawa _ rawb 
+                set rawa [string trim $rawa]
+                set rawb [string trim $rawb]
+                if {$rawa eq ""} {set rawa 0}
+                set a             [punk::lib::lindex_resolve $numitems $rawa]
+                if {$a == -3} {
+                    #undershot - leave negative
+                } elseif {$a == -2 && $rawa ne "-2"} {
+                    #overshot
+                    set a [expr {$numitems}] ;#put it outside the range on the upper side
+                }
+
+                if {$rawb eq ""} {
+                    if {$a > $numitems-1} {
+                        set rawb $a ;#make sure <overshot>.. doesn't return last item - should return nothing
+                    } else {
+                        set rawb end
+                    }
+                }
+                set b             [punk::lib::lindex_resolve $numitems $rawb]
+                if {$b == -3} {
+                    #undershot  - leave negative
+                } elseif {$b == -2 && $rawb ne "-2"} {
+                    set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side
+                }
+
+                #e.g make sure <overshot>.. doesn't return last item - should return nothing as both are above the range.
+                if {$a >= 0 && $a <= $numitems-1 && $b >=0 && $b <= $numitems-1} {
+                    lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                } else {
+                    if {$a >= 0 && $a <= $numitems-1} {
+                        #only a is in the range
+                        if {$b < 0} {
+                            set b 0
+                        } else {
+                            set b [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } elseif {$b >=0 && $b <= $numitems-1} {
+                        #only b is in the range
+                        if {$a < 0} {
+                            set a 0
+                        } else {
+                            set a [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } else {
+                        #both outside the range
+                        if {$a < 0 && $b > 0} {
+                            #spans the range in forward order
+                            set a 0 
+                            set b [expr {$numitems-1}]
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        } elseif {$a > 0 && $b < 0} {
+                            #spans the range in reverse order
+                            set a [expr {$numitems-1}]
+                            set b 0
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        }
+                        #both outside of range on same side
+                    }
+                }
+            } else {
+                set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
+                if {$idx >= 0} {
+                    lappend index_list $idx
+                }
+            }
+        }
+        return $index_list
+    }
     # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
     #REVIEW: This shouldn't really need the list itself - just the length would suffice
     punk::args::define {
@@ -2305,7 +2420,8 @@ namespace eval punk::lib {
             #<0 ?
             error "lindex_resolve len must be an integer"
         }
-        set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
+        set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
+        #todo - be stricter about malformations such as 1000_
         if {[string is integer -strict $index]} {
             #can match +i -i
             if {$index < 0} {
@@ -3345,8 +3461,12 @@ namespace eval punk::lib {
             #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
             #This commonly happens if there is an unbalanced brace  (which is a normal occurrence and needs to be handled)
             #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
-            #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
-            if {![punk::ansi::ta::detect_in_list $linelist]} {
+            #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
+
+            #we use detectcode_in_list instead of detect_in_list
+            #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
+            # - but the main reason is it is slightly faster.
+            if {![punk::ansi::ta::detectcode_in_list $linelist]} {
                 if {$opt_ansiresets} {
                     foreach ln $linelist {
                         lappend transformed $RST$ln$RST
diff --git a/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/bootsupport/modules/punk/ns-0.1.0.tm
index f8e55b02..10fda84e 100644
--- a/src/bootsupport/modules/punk/ns-0.1.0.tm
+++ b/src/bootsupport/modules/punk/ns-0.1.0.tm
@@ -13,6 +13,9 @@
 # @@ Meta End
 
 
+#BUGS
+# 2025-08
+# n// and n/// won't output info about 'namespace path' if there are no commands in the namespace
 
 # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
 ##  Requirements
@@ -1235,7 +1238,7 @@ tcl::namespace::eval punk::ns {
             if {[dict size [dict get $nsdict namespacepath]]} {
                 set path_text ""
                 if {!$opt_nspathcommands} {
-                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]]"
+                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]] (use n/// to display)"
                 } else {
                     append path_text \n "  Also resolving cmds in namespace paths:"
                     set nspathdict [dict get $nsdict namespacepath]
@@ -1247,8 +1250,17 @@ tcl::namespace::eval punk::ns {
                         }
                     } else {
                         #todo - change to display in column order to be same as main command listing
+                        set parentcommands [dict get $nsdict commands]
                         dict for {k v} $nspathdict {
-                            set pathcommands [dict get $v commands]
+                            set rawpathcommands [dict get $v commands]
+                            set pathcommands [list]
+                            foreach c $rawpathcommands {
+                                if {$c in $parentcommands} {
+                                    lappend pathcommands [punk::ansi::a strike]$c[a]
+                                } else {
+                                    lappend pathcommands $c
+                                }
+                            }
                             set columns 6
                             if {[llength $pathcommands] < 6} {
                                 set columns [llength $v]
diff --git a/src/bootsupport/modules/shellfilter-0.2.tm b/src/bootsupport/modules/shellfilter-0.2.tm
index e04e5107..8017d3f5 100644
--- a/src/bootsupport/modules/shellfilter-0.2.tm
+++ b/src/bootsupport/modules/shellfilter-0.2.tm
@@ -579,13 +579,15 @@ namespace eval shellfilter::chan {
     #punk::ansi::ansistrip converts at least some of the box drawing G0 chars to unicode - todo - more complete conversion
     #assumes line-buffering. a more advanced filter required if ansicodes can arrive split across separate read or write operations!
     oo::class create ansistrip {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
+        variable o_encbuf
         variable o_is_junction
         constructor {tf} {
             package require punk::ansi
             set o_trecord $tf
             set o_enc [dict get $tf -encoding]
+            set o_encbuf ""
             if {[dict exists $tf -junction]} {
                 set o_is_junction [dict get $tf -junction]
             } else {
@@ -614,10 +616,36 @@ namespace eval shellfilter::chan {
         method flush {transform_handle} {
             return ""
         }
+        #method write {transform_handle bytes} {
+        #    #broken due to occasional unexpected byte sequence
+        #    set instring [encoding convertfrom $o_enc $bytes] 
+        #    set outstring [punk::ansi::ansistrip $instring]
+        #    return [encoding convertto $o_enc $outstring]
+        #}
         method write {transform_handle bytes} {
-            set instring [encoding convertfrom $o_enc $bytes] 
-            set outstring [punk::ansi::ansistrip $instring]
-            return [encoding convertto $o_enc $outstring]
+            #set instring [tcl::encoding::convertfrom $o_enc $bytes] ;naive approach will break due to unexpected byte sequence - occasionally
+            #bytes can break at arbitrary points making encoding conversions invalid.
+
+            set inputbytes $o_encbuf$bytes
+            set o_encbuf ""
+            set tail_offset 0
+            while {$tail_offset < [string length $inputbytes] && [catch {tcl::encoding::convertfrom $o_enc [string range $inputbytes 0 end-$tail_offset]} stringdata]} {
+                incr tail_offset
+            }
+            if {$tail_offset > 0} {
+                if {$tail_offset < [string length $inputbytes]} {
+                    #stringdata from catch statement must be a valid result
+                    set t [expr {$tail_offset - 1}]
+                    set o_encbuf [string range $inputbytes end-$t end]
+                } else {
+                    set stringdata ""
+                    set o_encbuf $inputbytes
+                    return ""
+                }
+            }
+
+            set outstring [punk::ansi::ansistrip $stringdata]
+            return [tcl::encoding::convertto $o_enc $outstring]
         }
         method meta_is_redirection {} {
             return $o_is_junction
@@ -724,6 +752,8 @@ namespace eval shellfilter::chan {
             set emit ""
             if {[string last \x1b $buf] >= 0} {
                 #detect will detect ansi SGR and gron groff and other codes
+                #REVIEW - ta::detect won't detect SOS without paired ST for things like PM
+                # ta::detectcode will - but then split_codes_single will treat unpaired SOS as text?
                 if {[punk::ansi::ta::detect $buf]} {
                     #split_codes_single regex faster than split_codes - but more resulting parts
                     #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc)
@@ -1035,7 +1065,7 @@ namespace eval shellfilter::chan {
     }
     #todo - something 
     oo::class create rebuffer {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
         constructor {tf} {
             set o_trecord $tf
diff --git a/src/bootsupport/modules/textblock-0.1.3.tm b/src/bootsupport/modules/textblock-0.1.3.tm
index de0164cd..97969463 100644
--- a/src/bootsupport/modules/textblock-0.1.3.tm
+++ b/src/bootsupport/modules/textblock-0.1.3.tm
@@ -2528,7 +2528,7 @@ tcl::namespace::eval textblock {
                         set ansiborder_final $ansiborder_body_col_row
                         #$c will always have ansi resets due to overtype behaviour ?
                         #todo - review overtype
-                        if {[punk::ansi::ta::detect $c]} {
+                        if {[punk::ansi::ta::detectcode $c]} {
                             #use only the last ansi sequence in the cell value
                             #Filter out foreground and use background for ansiborder override
                             set parts [punk::ansi::ta::split_codes_single $c]
@@ -4377,6 +4377,7 @@ tcl::namespace::eval textblock {
             if {![punk::ansi::ta::detect $block]} {
                 return $block
             }
+            #could be newine in SOS/PM? review - terminal should theoretically ignore all chars til close of string
             foreach ln [split $block \n] {
                 set parts [punk::ansi::ta::split_codes $ln]
                 if {[lindex $parts 0] eq ""} {
@@ -4894,7 +4895,9 @@ tcl::namespace::eval textblock {
             }
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             #ansistripraw slightly faster than ansistrip - and won't affect width (avoid detect_g0/conversions)
             set textblock [punk::ansi::ansistripraw $textblock]
         }
@@ -4917,7 +4920,9 @@ tcl::namespace::eval textblock {
         } else {
             set tl $textblock
         }
-        if {[punk::ansi::ta::detect $tl]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[punk::ansi::ta::detectcode $tl]} {
             set tl [punk::ansi::ansistripraw $tl]
         }
         return [punk::char::ansifreestring_width $tl]
@@ -4964,7 +4969,7 @@ tcl::namespace::eval textblock {
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
         #ansistripraw on entire block in one go rather than line by line - result should be the same - review - make tests
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             set textblock [punk::ansi::ansistripraw $textblock]
         }
         if {[tcl::string::last \n $textblock] >= 0} {
@@ -5211,7 +5216,7 @@ tcl::namespace::eval textblock {
 
         set known_hasansi [tcl::dict::get $opts -known_hasansi]
         if {$known_hasansi eq ""} {
-            set block_has_ansi [punk::ansi::ta::detect $block]
+            set block_has_ansi [punk::ansi::ta::detectcode $block]
         } else {
             set block_has_ansi $known_hasansi
         }
@@ -5648,7 +5653,7 @@ tcl::namespace::eval textblock {
         set rowcount 0
         set blocklists [list]
         foreach b $blocks {
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set bl [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b]
             } else {
@@ -5676,7 +5681,7 @@ tcl::namespace::eval textblock {
         set i -1
         foreach b $args {
             incr i
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set blines [punk::lib::lines_as_list -ansireplays 1 -ansiresets auto -- $b]
             } else {
@@ -5799,7 +5804,7 @@ tcl::namespace::eval textblock {
             #testing of any decent size block line by line - even with max_string_length_line is slower than ta::detect anyway.
 
             #blocks passed to join can be ragged - so we can't pass -known_samewidth to pad
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 # - we need to join to use pad - even though we then need to immediately resplit REVIEW (make line list version of pad?)
                 set replay_block [::join [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b] \n]
                 #set blines [split [textblock::pad $replay_block -known_hasansi 1 -known_samewidth $is_samewidth -known_blockwidth $bwidth -width $bwidth -which right -padchar " "] \n]
@@ -8650,7 +8655,7 @@ tcl::namespace::eval textblock {
                 set cache_inner [overtype::block -blockalign $opt_blockalign -ellipsis $opt_ellipsis $underlay $cache_contentpattern]
                 #puts "frame--->ansiwrap -rawansi [ansistring VIEW $opt_ansibase] $cache_inner"
                 if {$opt_ansibase ne ""} {
-                    if {[punk::ansi::ta::detect $cache_inner]} {
+                    if {[punk::ansi::ta::detectcode $cache_inner]} {
                         #set cache_inner [punk::ansi::ansiwrap -rawansi $opt_ansibase $cache_inner]
                         #set cache_inner [punk::ansi::ansiwrap_raw $opt_ansibase "" "" $cache_inner]
                         #jjj ??? review
@@ -8745,7 +8750,7 @@ tcl::namespace::eval textblock {
 
             #set cwidth [textblock::width $contents]
             #JJJ
-            set contents_has_ansi [punk::ansi::ta::detect $contents]
+            set contents_has_ansi [punk::ansi::ta::detectcode $contents]
             if {$opt_ansibase ne ""} {
                 if {$contents_has_ansi} {
                     #set contents [punk::ansi::ansiwrap -rawansi $opt_ansibase $contents]
@@ -8799,8 +8804,9 @@ tcl::namespace::eval textblock {
                 if {$subposn >= 0} {
                     set content_line [lindex $clines $contentindex]
                     #review - different forms of reset e.g \x1b\[m ??
-                    if {[string range $content_line 0 3] eq "\x1b\[0m"} {
+                    if {[tcl::string::range $content_line 0 3] eq "\x1b\[0m"} {
                         set content_line [tcl::string::range $content_line 4 end]
+                        #::tcl::string::replace content_line 0 3
                     }
                     append content_line $opt_ansibase
                     append fs [tcl::string::replace $tline $subposn $subposn+$pattern_offset $content_line] \n
diff --git a/src/modules/punk/args-999999.0a1.0.tm b/src/modules/punk/args-999999.0a1.0.tm
index b35a0094..8e55fdd5 100644
--- a/src/modules/punk/args-999999.0a1.0.tm
+++ b/src/modules/punk/args-999999.0a1.0.tm
@@ -1364,6 +1364,11 @@ tcl::namespace::eval punk::args {
                                         #}
                                         tcl::dict::set tmp_optspec_defaults -type $v
                                     }
+                                    -defaultdisplaytype {
+                                        #how the -default is displayed
+                                        #-default doesn't have to be the same type as -type which validates user input that is not defaulted.
+                                        tcl::dict::set tmp_optspec_defaults -defaultdisplaytype $v
+                                    }
                                     -parsekey {
                                         tcl::dict::set tmp_optspec_defaults -parsekey $v
 
@@ -1441,7 +1446,7 @@ tcl::namespace::eval punk::args {
                                     default {
                                         set known   {   -parsekey -group -grouphelp\
                                                         -any -anyopts -arbitrary -form -minsize -maxsize -choices -choicegroups -choicemultiple -choicecolumns -choicelabels -choiceinfo\
-                                                        -type -range -typeranges -default -typedefaults
+                                                        -type -range -typeranges -default -defaultdisplaytype -typedefaults
                                                         -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted -nocase\
                                                         -nominsize -nomaxsize -norange -nochoices -nochoicelabels\
                                                         -type -optional -allow_ansi -validate_ansistripped -strip_ansi -multiple -prefix\
@@ -2052,6 +2057,9 @@ tcl::namespace::eval punk::args {
                                 tcl::dict::set spec_merged -optional 1
                             }
                         }
+                        -defaultdisplaytype {
+                            tcl::dict::set spec_merged -defaultdisplaytype $specval
+                        }
                         -typedefaults {
                             set typecount [llength [tcl::dict::get $spec_merged -type]]
                             if {$typecount != [llength $specval]} {
@@ -2114,7 +2122,7 @@ tcl::namespace::eval punk::args {
                                                     -form -type\
                                                     -parsekey -group\
                                                     -range -typeranges\
-                                                    -default -typedefaults\
+                                                    -default -defaultdisplaytype -typedefaults\
                                                     -minsize -maxsize -choices -choicegroups\
                                                     -choicemultiple -choicecolumns -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted\
                                                     -nocase -optional -multiple -validate_ansistripped -allow_ansi -strip_ansi -help\
@@ -3784,7 +3792,32 @@ tcl::namespace::eval punk::args {
                         }
 
                         if {[dict exists $arginfo -default]} {
-                            set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                            #default isn't necessarily of same type as -type required for validation
+                            #Guessing at the type from the data is likely to be unsatisfactory.
+
+                            set defaultdisplaytype [Dict_getdef $arginfo -defaultdisplaytype string]
+                            switch -- $defaultdisplaytype {
+                                dict {
+                                    #single level
+                                    set rawdefault [dict get $arginfo -default]
+                                    set default     "{\n"
+                                    dict for {k v} $rawdefault {
+                                        append default "  \"$k\" \"$v\"\n"
+                                    }
+                                    append default  "}"
+                                }
+                                list {
+                                    set default     "{\n"
+                                    foreach v $rawdefault {
+                                        append default "  \"$v\"\n"
+                                    }
+                                    append default "}"
+                                }
+                                default {
+                                    #set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                                    set default "'[dict get $arginfo -default]'"
+                                }
+                            }
                         } else {
                             set default ""
                         }
diff --git a/src/modules/punk/pdf-999999.0a1.0.tm b/src/modules/punk/pdf-999999.0a1.0.tm
index 17e2f8b7..df74d26c 100644
--- a/src/modules/punk/pdf-999999.0a1.0.tm
+++ b/src/modules/punk/pdf-999999.0a1.0.tm
@@ -82,70 +82,126 @@ namespace eval ::punk::pdf {
     variable base_x_per_c 4 ;#fixed width output font's x_per_c.
     variable cfg_blocksep_default "#pdf::text:blockstart type %type% pageindex %pageindex% blockindex %blockindex% y-index %y-index%  tlc {%x0% %y0%} brc {%brc%} warnings %warnings% : %marker%"
     variable cfg_pagesep_default  "#pdf::text:pagestart index %index% pageblocks %pageblocks% textblocks %textblocks% imageblocks %imageblocks% : [a bold green]PAGE[a]"
+    variable cfg_header_default
+    set cfg_header_default \
+"#pdf::text::header doc %doc% pages %pages% size %size% sha1 %sha1% punk::pdf %parserversion%
+#  args
+#    %args%"
+
+
     variable callid 0
     variable results
+    variable test
     array set results {}
 
     punk::args::define {
         @id         -id     ::punk::pdf::text
-        @cmd        -name   punk::pdf::text -help\
+        @cmd        -name   punk::pdf::text\
+            -summary\
+            "Extract text from non-image parts of a PDF"\
+            -help\
             "Extract text lines from a pdf
-            (No OCR - will only retrieve actual text contents)"
+            (No OCR - will only retrieve actual text contents)
+            The text function can only operate on the text spans provided by the underlying
+            engine (MuPDF) - which in some cases (for example spaces adjacent to brackets in
+            PDF32000_2008.pdf p24 eg4) does not interpret the number of whitespace characters
+            as a human reader would. The text data supplied for formulas such as in 7.10.1 of
+            the above document can be missing symbols such as the square-root symbol. This would
+            have to be converted to a unicode square root symbol with additional bracketing to
+            maintain meaning. The positioning of subscript numerals would also need to be
+            processed - and without font size information from tclMuPDF - this is currently
+            not practical to process reasonably. Such formulas will come out mangled.
+            "
         @leaders -min 0 -max 0
         @opts -min 0 -max 2
         -p|-page_indexes -parsekey -page_indexes -type string -default "0.." -help\
             "Comma delimited list of indexes and/or ranges specifying which pages to output.
-            The indexes are 0-based.
-            Ranges must be specified with .. as the separator.
-            end means the last page.
-            end-1 means the second last page.
-            0.. is the same as 0..end.
-            examples:
-                1,3..
-                output the page index 1 (2nd page) followed by all from index 3 to the end.
-                0-2,end
-                output the first 3 pages, and the last page.
-                end-1..0
-                output the pages in reverse order from 2nd last page to first page."
+              The indexes are 0-based.
+              Ranges must be specified with .. as the separator.
+              end means the last page.
+              end-1 means the second last page.
+              0.. is the same as .. and is the same as 0..end
+              examples:
+                  1,3..
+                  output the page index 1 (2nd page) followed by all from index 3 to the end.
+                  0..2,end
+                  output the first 3 pages, and the last page.
+                  end-1..0
+                  output the pages in reverse order from 2nd last page to first page."
         -b|-block_indexes -parsekey -block_indexes -type string -default "0.." -help\
             "Comma delimited list of indexes and/or ranges specifying which blocks to output.
-            Format is as per -page_indexes"
-        -merge_yblocks -default true -help\
-            "Whether to merge blocks vertically when they have the same starting y-index.
-            This usually makes the output more in line with the source document - but 
-            in some scenarios it might be desired to keep the blocks listed vertically
-            even though they appear side by side in the source."
+              Format is as per -page_indexes"
+        -merge_yblocks -default false -help\
+            "Whether to merge blocks vertically when they interleave in the y direction.
+              This usually makes the output more in line with the source document - but
+              in some scenarios it might be desired to keep the blocks listed vertically
+              even though they appear side by side in the source. Such vertical listing as
+              given by default when -merge_yblocks is false, does not necessarily match the
+              left to right order.
+              Note that especially when there is small-font text with other text to the right,
+              The small-font text that is not rightmost may be truncated (with trailing ellipsis)
+              to fit it into the vertically merged layout. Such truncation produces warnings on
+              the -errchannel and increments the warning count visible in the -sep-block line
+              preceding the merged blocks.
+              Smaller print that doesn't fit in the layout is sometimes not required to extract
+              the main data from a document such as an invoice - and the merged data can make
+              other elements more amenable to parsing. In practice, 2 passes with -merge_yblocks
+              configured on and off, combined with -sep_block data and a set of custom rules
+              based on the specific document set may be required for information extraction."
+        #------------------------------------------------------------------------------------
         -compact -type boolean -default false -help\
             "If compact is false - blank lines approximating the vertical spacing in the
               source document will be emitted between each block.
               If -merged_yblocks is true - there will still be potential vertical whitespace
               within any merged blocks, as the point of -merge_yblocks is to align elements
-              closely to that within the source document, but there will be no vertical layout
-              spacing between such blocks, or other non-merging blocks."
-        -image_place_ansi -type dict -default {border Term-grey78 inner Term-grey58 small Term-grey15} -help\
-            "3-element list of ANSI colour codes for the textual image placeholders.
+              closely to that within the source document, but there will be no vertical
+              layout spacing between such merged blocks, or other non-merging blocks.
+              This setting is redundant if -postcompact is true."
+        -postcompact -type boolean -default false -help\
+            "Strip out vertical spacing (empty or whitespace-only lines) from all blocks.
+              This sets -compact to true, and removes vertical gaps from any merged blocks.
+              The most vertically compact representation of the text will result from setting
+              -merge true, -postcompact true and -image_place_ansi to the empty string, as
+              well as omitting (or setting to empty string) -sep_page and -sep_block.
+              Note that if the source PDF actually positioned blank lines as strings of
+              whitespace, these too would be removed.
+              If -image_place_ansi is left as the default or has other ansi codes, the
+              image placeholders will not be stripped, even if they consist of the default
+              space characters.
+              If -sep_block or -sep_page are set to one or more spaces - these blank lines
+              will still be emitted."
+        -shrink_textfree_blocks -type boolean -default true -help\
+            "If a block is image only - don't output the placeholders.
+              These placeholders will still be emitted if -merge_yblocks is true and they
+              are in a merged block that has also merged text blocks. Set -image_place_ansi
+              to the empty string to suppress ANSI background for all image placeholders."
+        -image_place_ansi -type dict -defaultdisplaytype dict -default {border Term-grey78 inner Term-grey58 small Term-grey15} -help\
+            "Dict of ANSI colour codes for the textual image placeholders.
               Colour codes are those as listed by 'punk::ansi::a?'
-              If the default -image_place_chars values of the space character are being used, the colour
-              names should begin with a capital letter (indicating background as opposed to foreground colour).
+              If the default -image_place_chars values of the space character are being used,
+              the colour names should begin with a capital letter (indicating background as
+              opposed to foreground colour).
               The placeholders are by default space characters with the ANSI applied.
-              This means that just by stripping ANSI from the output, layout is maintained, and the
-              text can be processed simply, or the ANSI can be retained if your text parser wishes
-              to use it to make decisions.
-              The colour at element 'border' is for the border colour, 'inner' for the area within the border,
-              and the colour at element 'small' is for images too small for border representation. The border
-              bears no relation to the underlying image - and is merely to aid in seeing when images overlap
-              or are cropped etc. The dimensions of the placeholder are approximates only to the source
-              images, as they are in multiples of the fixed-width output font character width and
-              line height.
-              Setting -image_place_ansi to an empty value, or -image_place_chars to an empty value
-              will stop ANSI from being output at the image locations."
-        -image_place_chars -type dict -default {tlc " " hlt " " trc " " vlr " " brc " " hlb " " blc " " vll " " inner " " small " "} -help\
-            "Characters to use for image placeholders. The default list of empty spaces is recommended
-              for ease of visualisation and output parsing assuming an function such as punk::ansi::ansistrip
-              is available. An alternative set of characters such as a unicode block character might be useful
-              alongside turning ANSI off by setting -image_place_ansi to empty; if the unicode character is known
-              to be outside the domain of characters in the source document, and it is desired to use such chars
-              to detect approximate image position in relation to text.
+              This means that just by stripping ANSI from the output, layout is maintained,
+              and the text can be processed simply, or the ANSI can be retained if your text
+              parser wishes to use it to make decisions.
+              The colour at element 'border' is for the border colour, 'inner' for the area
+              within the border, and the colour at element 'small' is for images too small for
+              border representation. The border bears no relation to the underlying image - and
+              is merely to aid in seeing when images overlap or are cropped etc. The dimensions
+              of the placeholder are approximates only to the source images, as they are in
+              multiples of the fixed-width output font character width and line height.
+              Setting -image_place_ansi to an empty value, or -image_place_chars to an empty
+              value will stop ANSI from being output at the image locations."
+        #------------------------------------------------------------------------------------
+        -image_place_chars -type dict -defaultdisplaytype dict -default {tlc " " hlt " " trc " " vlr " " brc " " hlb " " blc " " vll " " inner " " small " "} -help\
+            "Characters to use for image placeholders. The default list of empty spaces is
+              recommended for ease of visualisation and output parsing assuming an function
+              such as punk::ansi::ansistrip is available. An alternative set of characters
+              such as a unicode block character might be useful alongside turning ANSI off by
+              setting -image_place_ansi to empty; if the unicode character is known to be
+              outside the domain of characters in the source document, and it is desired to
+              use such chars to detect approximate image position in relation to text.
                 tlc - border top left corner
                 hlt - border top horizontal lines
                 trc - border top right corner
@@ -157,40 +213,51 @@ namespace eval ::punk::pdf {
                 inner - image area within border
                 small - for images too small for borders
               "
+        -header    -type string -default "" -help\
+            "Header line to record details of the conversion run.
+              Set to 'default' to use the provided default template.
+              Substitutions available:
+                %doc%     - full path of source pdf
+                %pages%   - number of pages in the pdf
+                %size%    - size in bytes of the input pdf file
+                %sha1%    - sha1 checksum of the input pdf file
+                %parserversion% - version of punk::pdf used.
+                %args%    - arguments supplied to text command.
+                %default% - the default header template - for extending."
         -sep_page  -type string -default "" -help\
             "Send a line of output to stdout at the beginning of each
-            page in the PDF.
-            If set to the value: default
-            a default separator with some added info will be used.
-            line begins with: #pdf::text:pagestart
-            Substitutions available:
-                %index%       0-based index of the page (not the page number as in the PDF)
-                %pageblocks%  total number of blocks in the page, includes both
-                text and image blocks.
-                %textblocks%  number of text blocks in the page
-                %imageblocks% number of image blocks in the page
-                %nl%          newline
-                %default%     The default page sep - to allow extending."
+              page in the PDF.
+              If set to the value: default
+              a default separator with some added info will be used.
+              line begins with: #pdf::text:pagestart
+              Substitutions available:
+                  %index%       0-based index of the page (not the page number as in the PDF)
+                  %pageblocks%  total number of blocks in the page, includes both
+                  text and image blocks.
+                  %textblocks%  number of text blocks in the page
+                  %imageblocks% number of image blocks in the page
+                  %nl%          newline
+                  %default%     The default page sep - to allow extending."
         -sep_block -type string -default "" -help\
             "Send a line of output to stdout at the beginning of each
-            textblock in the PDF. This can aid in parsing.
-            If set to the the value: default
-            a default separator with some added info will be used.
-            line begins with: #pdf::text:blockstart
-            Substitutions available:
-                %type%      type of block text or image
-                %pageindex% 0-based index of the page (not the page number as in the PDF)
-                %y-index%   0-based index of block after the textblock list has
-                been sorted by y0
-                %x0%        first x coordinate of the block
-                %y0%        first y coordinate of the block
-                %x1%        second x coordinate of the block
-                %y1%        second y coordinate of the block
-                %nl%        newline
-                %warnings%  Number of warnings emitted to stderr for the block.
-                            These are normally for text overlay attempts, which should 
-                            only affect whitespace, but should be checked.
-                %default%   The default blocksep - to allow extending."
+              textblock in the PDF. This can aid in parsing.
+              If set to the the value: default
+              a default separator with some added info will be used.
+              line begins with: #pdf::text:blockstart
+              Substitutions available:
+                  %type%      type of block text or image
+                  %pageindex% 0-based index of the page (not the page number as in the PDF)
+                  %y-index%   0-based index of block after the textblock list has
+                  been sorted by y0
+                  %x0%        first x coordinate of the block
+                  %y0%        first y coordinate of the block
+                  %x1%        second x coordinate of the block
+                  %y1%        second y coordinate of the block
+                  %nl%        newline
+                  %warnings%  Number of warnings emitted to stderr for the block.
+                              These are normally for text overlay attempts, which should 
+                              only affect whitespace, but should be checked.
+                  %default%   The default blocksep - to allow extending."
         -outchannel -type string -default stdout -choicerestricted 0 -choices {return null stderr stdout}\
             -choicelabels {
                 return\
@@ -215,33 +282,33 @@ namespace eval ::punk::pdf {
               any open Tcl channel can be used."
         -debug_pageblock -default "" -type list -minsize 2 -maxsize 2 -help\
             "A 2-element list of the page then block index(s) for which to output
-            extra information on stderr.
-            Supplied value must contain 2 elements {indexset indexset}
-            The membership selection is performed the same way as -page_indexes.
-            e.g {.. end-1} means second-last block on every page."
+              extra information on stderr.
+              Supplied value if not empty, must contain 2 elements {indexset indexset}
+              The membership selection is performed the same way as -page_indexes.
+              e.g {.. end-1} means second-last block on every page."
         -debug_highlight -default "red bold" -help\
             "ANSI codes as understood by punk::ansi (see punk::ansi::a?)
 
-            These are used to colourise output if -debug_pageblock has been set"
+              These are used to colourise output if -debug_pageblock has been set"
         -highlight_overwrites -default "bold" -help\
             "Ansi codes as understood by punk::ansi (see punk::ansi::a?)
 
-            These are used to mark text elements which have been overlayed with
-            the same text data. This may occur for example in table rows in the source
-            document that have alternating colour or emphasis.
-            Having this ANSI applied can be useful when parsing the output to aid in
-            grouping lines. e.g by using [punk::ansi::ta::detect <str>] or similar.
-            
-            By applying ansi, the layout is not disturbed when viewing in a terminal,
-            and may be more visually similar to the source document.
-            Saved output can always be passed through an ansistrip function if needed,
-            or the default -highlight_overwrites can be overridden by supplying an empty
-            string."
-            
+              These are used to mark text elements which have been overlayed with
+              the same text data. This may occur for example in table rows in the source
+              document that have alternating colour or emphasis.
+              Having this ANSI applied can be useful when parsing the output to aid in
+              grouping lines. e.g by using [punk::ansi::ta::detect <str>] or similar.
+
+              By applying ansi, the layout is not disturbed when viewing in a terminal,
+              and may be more visually similar to the source document.
+              Saved output can always be passed through an ansistrip function if needed,
+              or the default -highlight_overwrites can be overridden by supplying an empty
+              string."
+
         -engine -type any -default MuPDF -choices {MuPDF} -help\
             "Only MuPDF is currently implemented. Be aware that while this script and tclMuPDF
-            are MIT/BSD licensed, the underlying MuPDF library is AGPL - which could place
-            some restrictions on commercial use."
+              are MIT/BSD licensed, the underlying MuPDF library is AGPL - which could place
+              some restrictions on commercial use."
         -warnings_engine -default 0 -choices {0 1 2}\
             -choicelabels {
                 0\
@@ -253,7 +320,8 @@ namespace eval ::punk::pdf {
             }\
             -help\
             "Whether to display internal warnings regarding the state of the PDF document.
-            The underlying MuPDF library can in many cases work with bad/corrupted PDFS."
+              The underlying MuPDF library can in many cases work with bad/corrupted PDFS,
+              and if so, may emit warnings."
         -warnings_textblock -default 2 -type integer -range {0 9} -help\
             "0 to disable.
               1 for least info, 9 for most.
@@ -263,12 +331,20 @@ namespace eval ::punk::pdf {
             "Path to the .pdf file to parse"
     }
     proc text {args} {
+
         variable callid
         incr callid
+
+        package require tcl::chan::variable
+        #variable testchan
+        #set testchan [::tcl::chan::variable ::punk::pdf::test($callid)]
+        #chan configure $testchan -translation lf
+
         variable results
         variable base_x_per_c
         variable cfg_blocksep_default
         variable cfg_pagesep_default
+        variable cfg_header_default
         set x_per_line [expr {$base_x_per_c * 2}] ;#hack for now
 
         set argd [punk::args::parse $args withid ::punk::pdf::text]
@@ -282,6 +358,10 @@ namespace eval ::punk::pdf {
         if {$opt_blocksep eq "default"} {
             set opt_blocksep $cfg_blocksep_default
         }
+        set opt_header              [dict get $opts -header]
+        if {$opt_header eq "default"} {
+            set opt_header $cfg_header_default
+        }
         set debug_pageblock         [dict get $opts -debug_pageblock]
         set debug_highlight         [string trim [dict get $opts -debug_highlight]]
         set highlight_overwrites    [string trim [dict get $opts -highlight_overwrites]]
@@ -289,8 +369,14 @@ namespace eval ::punk::pdf {
         set block_indexes           [string trim [dict get $opts -block_indexes]]
         set warnings_engine         [dict get $opts -warnings_engine]
         set opt_compact             [dict get $opts -compact]
+        set opt_postcompact         [dict get $opts -postcompact]
+        if {$opt_postcompact} {
+            set opt_compact 1 ;#don't emit inter-block spacing in the first place
+            #will also strip vertical space merged blocks
+        }
         set opt_image_place_ansi    [dict get $opts -image_place_ansi]
         set opt_image_place_chars   [dict get $opts -image_place_chars]
+        set opt_shrink_textfree_blocks [dict get $opts -shrink_textfree_blocks]
         if {$warnings_engine == 2} {
             #turn on printwarnings before we start
             ::mupdf::printwarnings 1
@@ -302,6 +388,8 @@ namespace eval ::punk::pdf {
             return {
                 package require tcl::chan::variable
                 set outchan [::tcl::chan::variable ::punk::pdf::results($callid)]
+                #chan configure $outchan -translation binary -encoding utf-8
+                chan configure $outchan -translation lf -encoding utf-8
             }
             null {
                 package require tcl::chan::null
@@ -320,7 +408,8 @@ namespace eval ::punk::pdf {
                 } else {
                     set errchan [::tcl::chan::variable ::punk::pdf::results($callid)]
                 }
-                chan configure $errchan -buffering none
+                #chan configure $errchan -buffering none -translation binary
+                chan configure $errchan -buffering none -translation lf -encoding utf-8
             }
             null {
                 package require tcl::chan::null
@@ -347,6 +436,13 @@ namespace eval ::punk::pdf {
 
         set d [::mupdf::open $fname]
         set npages [$d npages]
+        if {$opt_header ne ""} {
+                set opt_header [string map [list %default% $cfg_header_default] $opt_header]
+                package require sha1
+                set hash [sha1::sha1 -hex -file $fname]
+                set map [list %nl% \n %crlf% \r\n %doc% $fname %pages% $npages %size% [file size $fname] %sha1% $hash %args% $args %parserversion% [package present punk::pdf]]
+                puts $outchan [string map $map $opt_header]
+        }
 
         set page_index_list        [punk::lib::indexset_resolve $npages $page_indexes]
         set debug_pageblock_pages  [punk::lib::indexset_resolve $npages [lindex $debug_pageblock 0]]
@@ -495,21 +591,33 @@ namespace eval ::punk::pdf {
                         #    }
                         #}
                         #------------
-                        set cur_merge_base_fields [dict get [lindex $MERGE_BUFFER 0 0] header fields] 
-                        if {$bny0 >= [dict get $cur_merge_base_fields y1]} {
-                            #this block begins below the entire MERGE_BUFFER - time to render it
-                            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $warnings_textblock $outchan $errchan
-                            if {!$opt_compact} {
-                                #default: -compact false
-                                set vdist [expr {$bny0 - [dict get $cur_merge_base_fields y1]}]
-                                set vdistlines [expr {int(ceil($vdist / $x_per_line))}]
-                                if {$vdistlines > 0} {
-                                    set vseparator  [string repeat \n $vdistlines]
-                                    puts -nonewline $outchan $vseparator
+                        #puts "MB dimensions: [punk::pdf::system::merge_buffer_dimensions MERGE_BUFFER]"
+                        set mbdimensions [punk::pdf::system::merge_buffer_dimensions MERGE_BUFFER] ;#list x0 y0 x1 y1
+                        #set cur_merge_base_fields [dict get [lindex $MERGE_BUFFER 0 0] header fields]
+                        #if {$bny0 >= [dict get $cur_merge_base_fields y1]} {}
+                        if {$bny0 >= [lindex $mbdimensions 3]} {
+
+                            #this block begins below the entire MERGE_BUFFER - time to render previous block 
+                            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $opt_shrink_textfree_blocks $opt_postcompact $warnings_textblock $outchan $errchan
+
+                            if {$tbnum in $block_index_list} {
+                                #now this block's offset from the top
+                                if {!$opt_compact} {
+                                    #default: -compact false
+                                    #set vdist [expr {$bny0 - [dict get $cur_merge_base_fields y1]}]
+                                    set vdist [expr {$bny0 - [lindex $mbdimensions 3]}]
+                                    #set vdistlines [expr {int(ceil($vdist / $x_per_line))}]
+                                    set vdistlines [expr {int(floor($vdist / $x_per_line))}]
+                                    if {$vdistlines > 0} {
+                                        set vseparator  [string repeat \n $vdistlines]
+                                        puts -nonewline $outchan $vseparator
+                                    }
                                 }
                             }
                         } else {
-                            lappend MERGE_BUFFER [list]
+                            if {$tbnum in $block_index_list} {
+                                lappend MERGE_BUFFER [list]
+                            }
                         }
                     }
 
@@ -577,9 +685,17 @@ namespace eval ::punk::pdf {
 
                     set vertx [expr {$bny1 - $bny0}]
                     set img_char_height [expr {int(ceil($vertx / $x_per_line))}] ;#linecount
-                    #puts "--  imgstart_char_pos:$imgstart_char_pos img_char_width:$img_char_width"
+                    #puts "-- img_char_height:$img_char_height img_char:width: $img_char_width"
+                    if {$img_char_height == 0} {
+                        puts $errchan "WARNING type debug msg {zero height image}" ;#todo
+                        incr block_warning_count ;#0 based
+                        #nothing to render for a zero height img
+                        #MERGE_BUFFER will have header
+                        lset MERGE_BUFFER end end+1 [list warnings $block_warning_count]
+                        continue
+                    }
 
-                    #put a border around the image if it's big enough 
+                    #put a border around the image if it's big enough
                     #if it can't be bordered use a different colour
                     #this gives us 3 greys applied to space elements, to aid in seeing image overlaps and positioning.
                     #all can be easily stripped out with ansistrip to leave only spaces for text processing
@@ -629,14 +745,24 @@ namespace eval ::punk::pdf {
                             set a_inner  [dict get $opt_image_place_ansi inner]
                             set b [textblock::frame -type [list tlc $c_tlc hlt $c_hlt trc $c_trc vlr $c_vlr brc $c_brc hlb $c_hlb blc $c_blc vll $c_vll] -ansiborder [a $a_border] [punk::ansi::ansiwrap $a_inner $inner]]
                         }
-                    } 
+                    }
                     #set b [textblock::block $img_char_width $img_char_height " "]
                     #set b [punk::ansi::ansiwrap Term-102 $b]
                     set img_indent [textblock::block [expr {max(0,$imgstart_char_pos)}] $img_char_height \uFFFD]
                     if {$imgstart_char_pos < 0} {
                         #img representation is effectively cropped at left
                         #img_indent will just be a list of newlines to match height
+
+                        #even if img_indent and b have no ansi - result of renderspace will have ansi resets (todo review overtype::renderspace)
+                        set had_ansi 0
+                        if {[punk::ansi::ta::detect $b]} {
+                            set had_ansi 1
+                        }
                         set positioned_block [overtype::renderspace -insert_mode 0 -expand_right 1 -startcolumn $imgstart_char_pos $img_indent $b]
+                        if {!$had_ansi} {
+                            #if user disabled ansi on image_placeholders - we need to ensure it still has none after renderspace
+                            set positioned_block [punk::ansi::ansistrip $positioned_block]
+                        }
                     } else {
                         set positioned_block [textblock::join -- $img_indent $b]
                     }
@@ -663,7 +789,7 @@ namespace eval ::punk::pdf {
                         #(common to encounter line data where only some text elements are within the block we are currently at - e.g tables)
                         #todo - review treatment of 
                         #assume if it just extends out of the blocks x range - it's part of a larger block (?) review - xrangefudge?
-                        set has_element_in_range 0 
+                        set has_element_in_range 0
                         foreach xxt $ydata {
                             lassign $xxt dx0 dx1
                             set xrangefudge 0 ;#shouldn't be needed? 
@@ -682,7 +808,7 @@ namespace eval ::punk::pdf {
                             lassign $debug_pageblock dp db
 
                             if {$pageindex in $debug_pageblock_pages && $tbnum in $debug_pageblock_blocks} {
-                                set debuginfo "b:$tbnum yinfo: $yinfo ydata: $ydata"
+                                set debuginfo "b:$tbnum yinfo: $yinfo ydata: [punk::ansi::ansistring VIEW -lf 1 $ydata]"
                                 if {$debug_highlight ne ""} {
                                     set debuginfo "[punk::ansi::a {*}$debug_highlight]$debuginfo\x1b\[m"
                                 }
@@ -717,23 +843,44 @@ namespace eval ::punk::pdf {
                         set existing_y0s [lsearch -all -inline -index 0 -subindices $vfudged_lines *]
                         set existing_y1s [lsearch -all -inline -index 1 -subindices $vfudged_lines *]
                         set vfudged_line_added 0
+                        #set vfidx -1
+                        #foreach ey0 $existing_y0s {
+                        #    incr vfidx
+                        #    set vdiff [expr {abs($line_y0 - $ey0)}]
+                        #    #x units per char is normally narrower than height
+                        #    set linefudge 1.5 ;#choose linefudge less than assumed x_per_line of 2*base_x_per_c - or small font lines will not be separated #REVIEW
+                        #    if {$vdiff < ($linefudge * $base_x_per_c)} {
+                        #        #normalize to existing value
+                        #        #line_y1 unnormalised??
+                        #        #if y1 vdiff is about the same - normalise it too
+                        #        set ey1 [lindex $existing_y1s $vfidx]
+                        #        set vdiff2 [expr {abs($line_y1 -$ey1)}]
+                        #        if {abs($vdiff2 - $vdiff) <= 8} {
+                        #            set line_y1 $ey1
+                        #        }
+                        #        set vfudged_line_added 1
+                        #        lappend vfudged_lines [list $ey0 $line_y1 [lindex $fl 2]]
+                        #        break
+                        #    }
+                        #}
+
+                        #when mixing fonts in same line - aligning the bases is more common.
                         set vfidx -1
-                        foreach ey0 $existing_y0s {
+                        foreach ey1 $existing_y1s {
                             incr vfidx
-                            set vdiff [expr {abs($line_y0 - $ey0)}]
+                            set vdiff [expr {abs($line_y1 - $ey1)}]
                             #x units per char is normally narrower than height
                             set linefudge 1.5 ;#choose linefudge less than assumed x_per_line of 2*base_x_per_c - or small font lines will not be separated #REVIEW
                             if {$vdiff < ($linefudge * $base_x_per_c)} {
                                 #normalize to existing value
-                                #line_y1 unnormalised??
-                                #if y1 vdiff is about the same - normalise it too
-                                set ey1 [lindex $existing_y1s $vfidx]
-                                set vdiff2 [expr {abs($line_y1 -$ey1)}]
+                                #if y0 vdiff is about the same - normalise it too
+                                set ey0 [lindex $existing_y0s $vfidx]
+                                set vdiff2 [expr {abs($line_y0 -$ey0)}]
                                 if {abs($vdiff2 - $vdiff) <= 8} {
-                                    set line_y1 $ey1
+                                    set line_y0 $ey0
                                 }
                                 set vfudged_line_added 1
-                                lappend vfudged_lines [list $ey0 $line_y1 [lindex $fl 2]]
+                                lappend vfudged_lines [list $line_y0 $ey1 [lindex $fl 2]]
                                 break
                             }
                         }
@@ -774,7 +921,7 @@ namespace eval ::punk::pdf {
                         incr block_linenum
                         #puts "fline: $fl"
                         lassign $fl fy0 fy1 spans_by_x0  ;#don't pull out spans_by_x0_adjusted - it's adjusted within this loop by referring to its index in working_lines
-                        
+
                         set line_chunks [list]
                         #calculating x units per char for each string is only really possible if we know the exact font and size
                         set xidx -1 ;#index of spans in line, which are sorted by x0 (lhs)
@@ -783,6 +930,8 @@ namespace eval ::punk::pdf {
                             incr xidx
                             lassign $xxt tx0 tx1 text
 
+                            #puts -nonewline $testchan $text
+
 
                             #any span, including first may have rhs alignments with spans from previous lines in the same block
                             #look at original rhs alignment of spans in previous lines that match this one's x1
@@ -850,6 +999,7 @@ namespace eval ::punk::pdf {
 
                             #1st shot - use rhs of element immediately to the left (either existing TESTBLOCK or earliear spans on same line)
                             if {$xidx == 0} {
+                                set prevspan_x0 ""
                                 set prevspan_x1 ""
                                 set prevspan_x1_adjusted ""
                                 if {$TESTBLOCK ne ""} {
@@ -888,6 +1038,7 @@ namespace eval ::punk::pdf {
                             } else {
 
                                 #set prevspan_x1_adjusted [lindex $spans_by_x0_adjusted $xidx-1 1]
+                                set prevspan_x0 [lindex $spans_by_x0 $xidx-1 0]
                                 set prevspan_x1_adjusted [lindex $working_lines $block_linenum 3 $xidx-1 1]
                                 set prevspan_x1 [lindex $spans_by_x0 $xidx-1 1]
                                 #set effective_prev_x1 [expr {max($prevspan_x1,$prevspan_x1_adjusted)}]
@@ -983,7 +1134,7 @@ namespace eval ::punk::pdf {
                                 if {$pageindex in $debug_pageblock_pages && $tbnum in $debug_pageblock_blocks} {
                                     set debuginfo "span x0 xx1 text: [lindex $xxt 0] [lindex $xxt 1] [punk::ansi::a normal]'[lindex $xxt 2]'\x1b\[m"
                                     if {$debug_highlight ne ""} {
-                                        set debuginfo "[punk::ansi::a underline {*}$debug_highlight]$debuginfo\x1b\[m"
+                                        set debuginfo "[punk::ansi::a overline {*}$debug_highlight]$debuginfo\x1b\[m"
                                     }
                                     puts $errchan $debuginfo
                                     if {$prevspan_x1 ne $prevspan_x1_adjusted} {
@@ -992,9 +1143,9 @@ namespace eval ::punk::pdf {
                                     } else {
                                         set C "" ; set RST ""
                                     }
-                                    set debuginfo  "prevspan_x1:$prevspan_x1 prevspan_x1_adjusted: $C$prevspan_x1_adjusted$RST effective_prev_x1: $effective_prev_x1"
+                                    set debuginfo  "prev x0  x1     : $prevspan_x0 $prevspan_x1 prevspan_x1_adjusted: $C$prevspan_x1_adjusted$RST effective_prev_x1: $effective_prev_x1"
                                     if {$debug_highlight ne ""} {
-                                        set debuginfo "[punk::ansi::a overline {*}$debug_highlight]$debuginfo\x1b\[m"
+                                        set debuginfo "[punk::ansi::ansiwrap underline {*}$debug_highlight $debuginfo]"
                                     }
                                     puts $errchan $debuginfo
                                 }
@@ -1014,7 +1165,9 @@ namespace eval ::punk::pdf {
                                 #set buffer [string repeat " " $start_char_pos]
                                 #set x_gap_chars 0
                             } else {
-                                set x_gap [expr {$tx0 - $prevspan_x1}] ;#should be positive (but could it be negative, ie overlapped?)
+                                set x_gap [expr {$tx0 - $prevspan_x1}] ;#usually  positive but can easily be negative. e.g the word TAX in some fonts can have the left of the A slightly under the top bar of the T
+                                #if the font is large - this negative offset could be more than an entire width of our output character size.
+                                #we really need to know the source font to do this even close to properly - REVIEW
                                 set x_gap_chars [x_to_c $x_gap] ;#used to expand buffer below during collision detection.
                                 #set buffer [string repeat "^" $x_gap_chars]
                             }
@@ -1022,14 +1175,19 @@ namespace eval ::punk::pdf {
                             #if {$xidx > 0} {
                             #    set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1] + $x_gap_chars}]]
                             #}
-
-                            set text_points [c_to_x [string length $text]]
-                            set tend        [expr {$tx0 + $text_points}]
-                            set remspace    [expr {$tx1 - $tend}]
-                            #set tailbuffer  [string repeat "-" [x_to_c $remspace]]
-                            set tailbuffer ""
-
-                            if {$xidx > 0 && $effective_prev_x1 ne "" && $tx0 <= $effective_prev_x1} {
+                            if {$xidx > 0 && $x_gap >= -3 && $x_gap < 0} {
+                                #treat small negative gap (overlap) in source (prev_x1 > tx0) (typically for character overhang/underhang) as adjacency
+                                dict set page_seen_chunks $text 1
+                                set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
+                                lappend line_chunks "$buffer$text"
+                            } elseif {$xidx > 0 && $x_gap >= 0 && $x_gap < ($base_x_per_c * 0.75)} {
+                                #treat no, or small positive gap in source (prev_x1 < tx0) as intended adjacency
+                                #.75 is yet another fudge for lack of font info - todo - rework to another engine?
+                                #this branch could be merged with above, but kept separate for clarity and for specific comments.
+                                dict set page_seen_chunks $text 1
+                                set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
+                                lappend line_chunks "$buffer$text"
+                            } elseif {$xidx > 0 && $effective_prev_x1 ne "" && $tx0 <= $effective_prev_x1} {
                                 #possible overlap/overlay - left x of this data is before right x of previous
 
                                 #first use output font's x_per_c and see if only overlaying whitespace
@@ -1075,6 +1233,8 @@ namespace eval ::punk::pdf {
                                             #set xcursor_prev [string length [punk::ansi::ansistrip [join [lrange $line_chunks 0 end-1] ""]]]
                                             #set buffer_prev [string repeat " " [expr {$startpos - $xcursor_prev}]]
                                             #lset line_chunks end "$buffer_prev[punk::ansi::a {*}$highlight_overwrites]$text[punk::ansi::a]"
+
+
                                             lset line_chunks end "$buffer[punk::ansi::a {*}$highlight_overwrites]$text[punk::ansi::a]"
                                         }
                                         #else: no need to overwrite same text if no ansi
@@ -1110,7 +1270,7 @@ namespace eval ::punk::pdf {
                                         set expand [string repeat " " $expandchars]
                                         append buffer $expand
                                         # REVIEW - seems to work reasonably (e.g in FTP-1.1.1.pdf mentioned) - but we'll still output a warning
-                                        lappend line_chunks $buffer$text$tailbuffer
+                                        lappend line_chunks $buffer$text
 
                                         if {$char_overlap > 0 && $warnings_textblock >= 4} {
                                             puts $errchan "Warning  pageindex [format %4s $pageindex] blockindex [format %4s $tbnum] blockwarning [format %4s $block_warning_count] type {text overlay}"
@@ -1137,7 +1297,7 @@ namespace eval ::punk::pdf {
                                     if {$xidx > 0 && $x_gap_chars == 0} {
                                         set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
                                     }
-                                    lappend line_chunks "$buffer$text$tailbuffer"
+                                    lappend line_chunks "$buffer$text"
                                 }
                             } else {
                                 dict set page_seen_chunks $text 1
@@ -1147,7 +1307,7 @@ namespace eval ::punk::pdf {
                                         set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
                                     }
                                 }
-                                lappend line_chunks "$buffer$text$tailbuffer"
+                                lappend line_chunks "$buffer$text"
                             }
                             #position 3 is spans_by_x0_adjusted
                             set chunk [lindex $line_chunks end]
@@ -1208,10 +1368,10 @@ namespace eval ::punk::pdf {
                     puts $outchan "____________________________$bx1,$by1"
                 }
             }
-            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $warnings_textblock $outchan $errchan
             if {[llength $MERGE_BUFFER]} {
-                puts $errchan "WARNING: MERGE_BUFFER not empty, but should be empty at end of page"
-                puts $outchan [punk::lib::showdict -roottype list $MERGE_BUFFER]
+                _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $opt_shrink_textfree_blocks $opt_postcompact $warnings_textblock $outchan $errchan
+                #puts $errchan "WARNING: MERGE_BUFFER not empty, but should be empty at end of page"
+                #puts $outchan [punk::lib::showdict -roottype list $MERGE_BUFFER]
             }
         }
 
@@ -1219,12 +1379,15 @@ namespace eval ::punk::pdf {
             puts $errchan "MuPDF wasrepaired: [$d wasrepaired]"
             puts $errchan "MuPDF warnings   : [$d warnings]"
         }
+        flush $outchan
+        flush $errchan
         if {$opt_outchan eq "return" || $opt_errchan eq "return"} {
             #if both are 'return' at the same time - the same channel is used for both
-            flush $outchan
-            flush $errchan
-            set result [set results($callid)]
+            set result [encoding convertfrom utf-8 [set results($callid)]]
+            #set result [set results($callid)]
+            return $result
         }
+        return
     }
 
 }
@@ -1264,6 +1427,43 @@ tcl::namespace::eval punk::pdf::lib {
         return $result
     }
 
+    #subset of possible boms - just for experimenting
+    proc bom_ident {binstr} {
+        if {[string match \uFFEF* $binstr]} {
+            return bom
+        }
+        set first32 [string range $binstr 0 3]
+        set bytes [binary scan $binstr H2H2H2H2 a b c d]
+        switch -- $a {
+            fe {
+                if {"$b" eq "ff"} {
+                    return utf-16be
+                }
+            }
+            ef {
+                if {"$b$c" eq "bbbf"} {
+                    return utf-8
+                }
+            }
+            ff {
+                if {$b eq "fe"} {
+                    if {"$c$d" eq "0000"} {
+                        return utf-32le
+                    } else {
+                        return utf-16le
+                    }
+                }
+            }
+            default {
+                if {"$a$b$c$d" eq "0000feff"} {
+                    return utf-32be
+                }
+            }
+        }
+        return unknown
+    }
+
+
     #points to number of chars - in terms of fixed-width output font
     proc x_to_c {x} {
         upvar ::punk::pdf::base_x_per_c base_x_per_c
@@ -1277,20 +1477,50 @@ tcl::namespace::eval punk::pdf::lib {
         return [expr {$charcount * $base_x_per_c}]
     }
 
+    proc merge_buffer_dimensions {bufname} {
+        upvar $bufname MB
+        if {![llength $MB]} {
+            return [list 0 0 0 0] ;#x0 y0 x1 y1
+        }
+        set firstheader [lindex $MB 0 0]
+        set flds [dict get [lindex $firstheader 1] fields]
+        set dimensions [list [dict get $flds x0] [dict get $flds y0] [dict get $flds x1] [dict get $flds y1]]
+        #we can get negative values e.g image offset to left or top
+        foreach sametopblocks $MB {
+            foreach B $sametopblocks {
+                if {[lindex $B 0] eq "header"} {
+                    set flds [dict get [lindex $B 1] fields]
+                    set this_x0 [dict get $flds x0]
+                    set this_y0 [dict get $flds y0]
+                    set this_x1 [dict get $flds x1]
+                    set this_y1 [dict get $flds y1]
+                    lset dimensions 0 [expr {min([lindex $dimensions 0],$this_x0)}]
+                    lset dimensions 1 [expr {min([lindex $dimensions 1],$this_y0)}]
+                    lset dimensions 2 [expr {max([lindex $dimensions 2],$this_x1)}]
+                    lset dimensions 3 [expr {max([lindex $dimensions 3],$this_y1)}]
+                }
+            }
+        }
+        return $dimensions
+    }
     #whether merging on or off - we keep a MERGE_BUFFER
     #At every y-index change, and at end of page, process last entry(s)
-    proc _process_merge_buffer {bufname pageindex blocksep_default opt_blocksep domerge opt_warnings_textblock outc errc} {
+    proc _process_merge_buffer {bufname pageindex blocksep_default opt_blocksep domerge opt_shrink_textfree_blocks opt_postcompact opt_warnings_textblock outc errc} {
         upvar ::punk::pdf::base_x_per_c base_x_per_c
+        upvar ::punk::pdf::testchan testchan
         set x_per_line [expr {$base_x_per_c * 2}] ;#hack for now
         upvar $bufname MB
 
         if {!$domerge} {
             set block_warnings 0
+            #puts "MB:'$MB'"
             set warningrecords [lsearch -all -inline -index 0 $MB warnings]
             foreach wr $warningrecords {
                 incr block_warnings [lindex $wr 1]
             }
             #just emit the header & line blocks we have in the merge buffer one after the other
+            set blockresult "" ;#needed for postcompact
+            set emissions [list] ;#need to enable blank sep_block even when -postcompact is true
             foreach yset $MB {
                 foreach B $yset {
                     switch -- [lindex $B 0] {
@@ -1298,22 +1528,72 @@ tcl::namespace::eval punk::pdf::lib {
                             if {$opt_blocksep ne ""} {
                                 set sep [dict get [lindex $B 1] sep]
                                 set sep [string map [list <w> $block_warnings] $sep]
-                                puts $outc $sep
+                                #puts $outc $sep
+                                lappend emissions [list data $blockresult]
+                                lappend emissions [list sep $sep]
+                                set blockresult ""
+                                #append blockresult $sep \n
                             }
                         }
                         lines {
                             set lines [lindex $B 1]
+                            #puts $errc "--->[punk::ansi::ansistring VIEW $lines]"
+                            set idx -1
                             foreach l $lines {
-                                puts $outc $l
+                                incr idx
+                                if {[catch {
+                                    #puts $outc $l
+                                    append blockresult $l \n
+                                    } errMsg]} {
+                                        set prevline [lindex $lines $idx-1]
+                                        set nextline [lindex $lines $idx+1]
+                                        puts stderr "error writing output channel $outc\nerrMsg:$errMsg\n"
+                                        puts stderr "  prevline:'[punk::ansi::ansistring VIEW -lf 1 $prevline]'"
+                                        puts stderr "  lineview '[punk::ansi::ansistring VIEW -lf 1 $l]'"
+                                        puts stderr "  nextline:'[punk::ansi::ansistring VIEW -lf 1 $nextline]'"
+                                        #puts $testchan "=----------------------------------------="
+                                        #puts  $testchan $l
+                                        #puts $testchan "=----------------------------------------="
+                                    }
                             }
                         }
                         block {
-                            puts $outc [string map [list \uFFFD " "] [lindex $B 1]]
+                            if {!$opt_shrink_textfree_blocks} {
+                                #puts $outc [string map [list \uFFFD " "] [lindex $B 1]]
+                                append blockresult [string map [list \uFFFD " "] [lindex $B 1]] \n
+                                #puts $errc "--->[punk::ansi::ansistring VIEW [lindex $B 1]]"
+                            }
                         }
                         warnings {}
                     }
                 }
             }
+            lappend emissions [list data $blockresult]
+
+            set output ""
+            if {$opt_postcompact} {
+                foreach e $emissions {
+                    lassign $e etype data_or_header
+                    switch -- $etype {
+                        sep {
+                            append output $data_or_header \n
+                        }
+                        data {
+                            foreach ln [split $data_or_header \n] {
+                                if {[string trim $ln] ne ""} {
+                                    append output $ln \n
+                                }
+                            }
+                        }
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                foreach e $emissions {
+                    lassign $e _ data_or_header
+                    puts -nonewline $outc $data_or_header\n
+                }
+            }
             set MB [list]
             return
         }
@@ -1322,38 +1602,71 @@ tcl::namespace::eval punk::pdf::lib {
         set headers_structure [list]
         set mergedblock ""
         set warnings [list]
-        #puts "MB: $MB"
         if {[llength $MB]} {
-            set rootheader [lindex $MB 0 0 1]
-            #puts $errc "rootheader: $rootheader"
-            set base_y [dict get $rootheader fields y0]
+            #rootheader doesn't 'contain' subsequent blocks in x or y direction - it's just the y0 that triggers the new merge because it's below previous blocks
+            #set rootheader [lindex $MB 0 0 1]
+            ##puts $errc "rootheader: $rootheader"
+            #set base_y [dict get $rootheader fields y0]
+            set mbdimensions [merge_buffer_dimensions MB] ;# x0 y0 x1 y1
+            set base_y [lindex $mbdimensions 1]
+        }
+        #loop through and determine which blocks of those to be merged are RHS e.g largest x1
+        set max_x1 0
+        #todo - further refine to find blocks in the mergeset with no textblocks to right?
+        set rhs_block_indexes [list]
+        set textblock_x0_list [list]
+        foreach sametopblocks $MB {
+            foreach B $sametopblocks {
+                if {[lindex $B 0] eq "header"} {
+                    set flds [dict get [lindex $B 1] fields]
+                    #puts "flds: $flds"
+                    if {[dict get $flds type] eq "text"} {
+                        set this_x1 [dict get $flds x1]
+                        lappend textblock_x0_list [dict get $flds x0]
+                        if {abs($this_x1 - $max_x1) < 1} {
+                            lappend rhs_block_indexes [dict get $flds blockindex]
+                            set max_x1 [expr {max($this_x1,$max_x1)}]
+                        } elseif {$this_x1 > $max_x1} {
+                            set max_x1 $this_x1
+                            set rhs_block_indexes [list [dict get $flds blockindex]]
+                        }
+                    }
+                }
+            }
         }
-        foreach ybuf $MB {
+        foreach sametop $MB {
             set yheaders [list]
             set y_index ""
-            if {[llength $ybuf]} {
-                set firstheader [lindex $ybuf 0 1]
-                set blockrow_y [dict get $firstheader fields y0]
-                set blockrow_offset [expr {$blockrow_y - $base_y}]
-                set blockrow_x0 [dict get $firstheader fields x0]
-                set blockrow_x1 [dict get $firstheader fields x1]
+            if {[llength $sametop]} {
+                set firstheader [lindex $sametop 0 1]
+                set blockrow_y0 [dict get $firstheader fields y0]
+                set blockrow_y1 [dict get $firstheader fields y1]
+                set blockrow_top_offset [expr {$blockrow_y0 - $base_y}]
+                #set blockrow_x0 [dict get $firstheader fields x0]
+                #set blockrow_x1 [dict get $firstheader fields x1]
                 if {$opt_warnings_textblock >= 9} {
                     puts $errc "Warning type debug operation {merge blockrow} firstheader $firstheader"
                 }
             }
 
-            set leftright_idx -1
-            foreach B $ybuf {
-                incr leftright_idx
-                #if {$leftright_idx == 0} {
-                #    set firstheader [lindex $B 1]
-                #}
-                #while {[llength $ybuf]} {}
-                #set B [lpop ybuf 0]
+            set header_idx -1
+            #set leftright_max [expr {[llength $sametop] -1}]
+            set hdrs [lsearch -all -inline -index 0 $sametop header]
+            #puts "--- > $hdrs"
+            set header_max [expr {[llength $hdrs] -1}]
+            foreach B $sametop {
                 #puts "====>$B"
                 switch -- [lindex $B 0] {
                     header {
-                        set this_y_index [dict get [lindex $B 1] fields y-index]
+                        incr header_idx  ;#not necessarily in left-right order!
+                        set this_y_index        [dict get [lindex $B 1] fields y-index]
+                        set this_x0             [dict get [lindex $B 1] fields x0]
+                        set this_x1             [dict get [lindex $B 1] fields x1]
+                        set this_y0             [dict get [lindex $B 1] fields y0]
+                        set this_y1             [dict get [lindex $B 1] fields y1]
+                        set this_top_offset     [expr {$this_y0 - $base_y}]
+                        set this_bottom_offset  [expr {$this_y1 - $base_y}]
+                        set this_block_index    [dict get [lindex $B 1] fields blockindex]
                         if {$y_index eq ""} {
                             set y_index $this_y_index
                         } else {
@@ -1372,10 +1685,11 @@ tcl::namespace::eval punk::pdf::lib {
                         set lines [lindex $B 1]
                         #review - vertical oriented text at left can stomp on data during merge
                         #we are only given the bbox info by tclMuPDF
-                        if {abs($blockrow_x1 - $blockrow_x0) < 1} {
+                        if {abs($this_x1 - $this_x0) < 1} {
                             #non displayable
                             #todo - emit at document tail as a non-layed out block?
                             if {$opt_warnings_textblock > 0} {
+                                #review - firstheader vs this_block_index?
                                 #puts $errc "Warning - no x space. Undisplayed text on page:[dict get $firstheader fields pageindex] block:[dict get $firstheader fields blockindex] text:[join $lines \n]"
                                 puts $errc "Warning pageindex [dict get $firstheader fields pageindex] blockindex [dict get $firstheader fields blockindex] type {no x space} msg {Undisplayed text}  text \"[join $lines \n]\""
                             }
@@ -1386,38 +1700,95 @@ tcl::namespace::eval punk::pdf::lib {
                             #e.g overtype::textblock::ellipsis $space_available $block
 
                             set block [join $lines \n]
-                            set bwidth [textblock::widthtopline $block] ;#assume non-ragged block
+                            set numlines [llength $lines]
+                            set bwidth [textblock::width $block] ;#assume ragged block (differing line lengths)
                             set margintop ""
-                            if {$blockrow_offset > 0} {
-                                set offsetlines [expr {int(ceil($blockrow_offset / $x_per_line))}]
-                                set margintop [textblock::block $bwidth $offsetlines " "]
+                            set bottom_offsetlines [expr {int(round($this_bottom_offset / $x_per_line))}]
+                            if {[llength $MB] > 1 && $bottom_offsetlines >= $numlines} {
+                                #can use bottom of textblock dist from top (better line alignment for scenario: "smallprint fieldname:" "bigprint value" aligned at base  ???)
+                                set topmarginlines [expr {$bottom_offsetlines - $numlines}]
+                                #puts "====> offset from bottom bottom_offsetlines:$bottom_offsetlines topmarginlines:$topmarginlines this_y1:$this_y1"
+                                if {$topmarginlines > 0} {
+                                    set margintop [textblock::block $bwidth $topmarginlines " "]
+                                }
+                            } else {
+                                if {$this_top_offset > 0} {
+                                    #puts "====> offset from top"
+                                    #set offsetlines [expr {int(ceil($blockrow_top_offset / $x_per_line))}]
+                                    set offsetlines [expr {int(round($blockrow_top_offset / $x_per_line))}] ;#
+                                    #set offsetlines [expr {int(floor($this_top_offset / $x_per_line))}] ;#
+                                    if {$offsetlines > 0} {
+                                        set margintop [textblock::block $bwidth $offsetlines " "]
+                                    }
+                                }
+                            }
+                            if {$this_block_index ni $rhs_block_indexes} {
+                                #truncate overwidth blocks except for rhs
+                                #overwidth if can collide (or merge) with x0 of any other textblocks in the merge set
+                                #only interested in blocks where x0 >= this x1
+                                set next_x0_list [list]
+                                foreach x0 $textblock_x0_list {
+                                    if {$x0 >= ($this_x1 - $base_x_per_c)} {
+                                        lappend next_x0_list $x0
+                                    }
+                                }
+                                set next_x0_list [lsort -real $next_x0_list]
+                                set closest_x0 [lindex $next_x0_list 0]
+                                if {$closest_x0 ne ""} {
+                                    #set maxwidthchars [x_to_c $this_x1]
+                                    set maxwidthchars [x_to_c $closest_x0]
+                                    incr maxwidthchars -1 ;#vertically merging text blocks results in unintended text adjacencies - REVIEW
+                                    if {$maxwidthchars < $bwidth} {
+                                        #warn of rhs truncation
+                                        #text will have 'ellipsis'
+                                        #review - source text could already have ellipsis - this makes detecting truncation harder
+                                        #for now we will produce a warning if ellipsis is in the result and is on the right hand side
+                                        #todo - use a function other than textblock::frame which can report truncation
+                                        set block [textblock::frame -type {} -boxlimits {} -width [expr {$maxwidthchars +2}] $block] ;# +2 for vertical frame borders - which are removed but needed for block -width
+                                        set is_truncated 0
+                                        foreach ln [split [punk::ansi::ansistrip $block] \n] {
+                                            if {[regexp {[\u2026]$} $ln]} {
+                                                set is_truncated 1
+                                                break
+                                            }
+                                        }
+                                        if {$is_truncated} {
+                                            if {$opt_warnings_textblock > 0} {
+                                                puts $errc "Warning pageindex [dict get $firstheader fields pageindex] blockindex [dict get $firstheader fields blockindex] type {truncation} msg {merged textblock truncated on rhs}  text \"[join $lines \n]\""
+                                            }
+                                            lappend warnings 1
+                                        }
+                                    }
+                                }
+                            }
+                            #apply margintop after possible rhs truncation above (no need for ellipsis above content)
+                            if {$margintop ne ""} {
+                                set movedblock $margintop\n$block
+                            } else {
+                                set movedblock $block
                             }
+
                             if {$mergedblock eq ""} {
                                 #could be image - as we are using overtype - should result in blank area
                                 #todo - consider option to set bg colour, then create a block of spaces of the right size  - but don't add any chars other than ANSI codes which can be stripped easily.
-                                if {$margintop ne ""} {
-                                    set mergedblock $margintop\n$block
-                                } else {
-                                    set mergedblock $block
-                                }
+                                set mergedblock $movedblock
                             } else {
-                                if {$margintop ne ""} {
-                                    set movedblock $margintop\n$block
-                                } else {
-                                    set movedblock $block
-                                }
                                 set mergedblock [overtype::block -transparent 1 -overflow 1 $mergedblock $movedblock]
                             }
+
                         }
                     }
                     block {
                         set block [lindex $B 1]
                         set bwidth [textblock::widthtopline $block] ;#assume non-ragged block
                         set margintop ""
-                        if {$blockrow_offset > 0} {
-                            set offsetlines [expr {int(ceil($blockrow_offset / $x_per_line))}]
-                            set margintop [textblock::block $bwidth $offsetlines "\uFFFD"]
-                            #puts "=====> offset:$blockrow_offset"
+                        if {$this_top_offset > 0} {
+                            #set offsetlines [expr {int(ceil($this_top_offset / $x_per_line))}]
+                            set offsetlines [expr {int(round($this_top_offset / $x_per_line))}]
+                            if {$offsetlines > 0} {
+                                set margintop [textblock::block $bwidth $offsetlines "\uFFFD"]
+                            }
+                            #puts "=====> offset:$this_top_offset"
                         }
                         if {$mergedblock eq ""} {
                             if {$margintop ne ""} {
@@ -1431,6 +1802,7 @@ tcl::namespace::eval punk::pdf::lib {
                             } else {
                                 set movedblock $block
                             }
+                            #review - img placeholder can obscure text if above it - we don't have transparency info - so probably not what's wanted
                             set mergedblock [overtype::block -transparent \uFFFD -overflow 1 $mergedblock $movedblock]
                         }
                     }
@@ -1443,6 +1815,7 @@ tcl::namespace::eval punk::pdf::lib {
             lappend headers_structure $yheaders
         }
         if {[llength $all_headers] == 1} {
+            #puts stdout "---> [lindex $all_headers 0 1]"
             #only one block was in the MERGE_BUFFER
             set stored_filled_sep [dict get [lindex $all_headers 0] sep] 
             if {$opt_blocksep ne ""} {
@@ -1453,9 +1826,41 @@ tcl::namespace::eval punk::pdf::lib {
                 }
                 #all % placeholders filled, but <w> isn't
                 set sep [string map [list <w> $block_warnings] $stored_filled_sep]
+                #still want to allow emitting empty line when opt_postcompact is true, if opt_blocksep is set to space
                 puts $outc $sep
             }
-            puts $outc $mergedblock
+            #set is_text_free 1 ;#default to disprove
+            #set headerinfo [lindex $all_headers 0 1]
+            #set list_merges [dict get $headerinfo type]
+            #foreach blockrow_merge $list_merges {
+            #    foreach tp $blockrow_merge {
+            #        if {$tp eq "text"} {
+            #            #todo - look for whitespace only and treat as text free
+            #            set is_text_free 0
+            #        }
+            #    }
+            #}
+            set blockresult ""
+            if {$opt_shrink_textfree_blocks} {
+                set teststripped [punk::ansi::ansistrip $mergedblock]
+                if {[string trim $teststripped] ne ""} {
+                    set blockresult $mergedblock
+                }
+            }  else {
+                set blockresult $mergedblock
+            }
+            set output ""
+            if {$opt_postcompact} {
+                foreach ln [split $blockresult \n] {
+                    if {[string trim $ln] ne ""} {
+                        append output $ln \n
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                puts -nonewline $outc $blockresult\n
+            }
+
         } elseif {[llength $all_headers] > 1} {
             #more than one block was in the MERGE_BUFFER
             set merged_minx0 Inf
@@ -1489,7 +1894,27 @@ tcl::namespace::eval punk::pdf::lib {
                 dict set map %marker% "[punk::ansi::a bold cyan]MERGEDBLOCK[punk::ansi::a]"
                 puts $outc [string map $map $opt_blocksep]
             }
-            puts $outc $mergedblock
+            if {$opt_shrink_textfree_blocks} {
+                set teststripped [punk::ansi::ansistrip $mergedblock]
+                if {[string trim $teststripped] ne ""} {
+                    #puts $outc $mergedblock
+                    set blockresult $mergedblock
+                }
+            }  else {
+                #puts $outc $mergedblock
+                set blockresult $mergedblock
+            }
+            set output ""
+            if {$opt_postcompact} {
+                foreach ln [split $blockresult \n] {
+                    if {[string trim $ln] ne ""} {
+                        append output $ln \n
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                puts -nonewline $outc $blockresult\n
+            }
         }
         set MB [list]
     }
diff --git a/src/project_layouts/custom/_project/punk.basic/src/make.tcl b/src/project_layouts/custom/_project/punk.basic/src/make.tcl
index 8cb8153c..c3cbc9aa 100644
--- a/src/project_layouts/custom/_project/punk.basic/src/make.tcl
+++ b/src/project_layouts/custom/_project/punk.basic/src/make.tcl
@@ -3075,7 +3075,7 @@ foreach vfstail $vfs_tails {
                                 #copy the version that is mounted in this runtime to vfsname.new 
                                 if {[catch {
                                     file copy -force $building_runtime $buildfolder/$vfsname.new
-                                    catch {exec chmod +x $buildfolder/$vfsname.new}
+                                    catch {exec chmod +w $buildfolder/$vfsname.new}
                                 } errM]} {
                                     puts stderr "$kit_type 'file copy -force $building_runtime $buildfolder/$vfsname.new' failed\n$errM"
                                     error $errM
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/flagfilter-0.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/flagfilter-0.3.tm
index 1d37e215..00f58e82 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/flagfilter-0.3.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/flagfilter-0.3.tm
@@ -1538,6 +1538,7 @@ namespace eval flagfilter {
         }
     }
 
+    #todo - rename 'cprocessor' is misleading
     oo::class create cprocessor {
         variable o_runid
         variable o_name
@@ -1577,7 +1578,9 @@ namespace eval flagfilter {
             if {[dict exists $o_pinfo match]} {
                 set o_matchspec [dict get $o_pinfo match]
             } else {
-                set o_matchspec {^[^-^/].*}  ;#match anything that isn't flaglike
+                #review - unix paths? conflict with windows style flag such as /w
+                #must accept empty string
+                set o_matchspec {^[^-^/].*|^$}  ;#match anything that isn't flaglike
             }
             set o_found_match 0
             set o_matched_argument "" ;#need o_found_match to differentiate match of empty string
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm
index 6fb185a9..83dad2bf 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm
@@ -6947,7 +6947,8 @@ namespace eval punk {
             set newrow {}
             foreach oldrow $list_rows {
                 if {$j >= [llength $oldrow]} {
-                    continue
+                    #continue 
+                    lappend newrow ""
                 } else {
                     lappend newrow [lindex $oldrow $j]
                 }
@@ -6956,6 +6957,19 @@ namespace eval punk {
         }
         return $res
     }
+    proc transpose_equal_lists {list_rows} {
+        set columns [list]
+        set rowidx -1
+        foreach l $list_rows {
+            set colidx -1
+            incr rowidx
+            foreach val $l {
+                incr colidx
+                lset columns $colidx $rowidx $val
+            }
+        }
+        return $columns
+    }
     proc transpose_strings {list_of_strings} {
         set charlists [lmap v $list_of_strings {split $v ""}]
         set tchars [transpose_lists $charlists]
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
index 255715ad..ad2d58f4 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
@@ -3785,7 +3785,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -3892,7 +3892,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -4158,7 +4158,8 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
     # where line and column are ascii codes whose values are +31
     # vt52 can be entered/exited via escapes
     # This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
-    # (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    # (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    #ESC\[c - is more modern equiv of DECID
 
     lappend PUNKARGS [list {
         @id     -id     ::punk::ansi::vt52move
@@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
         }
         if {[string length $text] < 2} {return $text}
         set parts [punk::ansi::ta::split_codes $text]
+        #review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
+        #The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
         if {[llength $parts] == 1} {return [lindex $parts 0]}
         set out ""
         #todo - try: join [lsearch -stride 2 -index 0 -subindices -all -inline  $parts *] ""
@@ -5686,21 +5689,106 @@ tcl::namespace::eval punk::ansi {
         #[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
     }
     tcl::namespace::eval sequence_type {
-        proc is_Fe {code} {
+        #first byte after ESC identifies code type
+        #NOTE - we are looking for valid start of a single sequence here
+        #- not whether it is complete or where it ends, unless it's a fixed number of bytes
+
+        #\u0020-\u002F
+        # ESC <sp>!"#$%&'()*+,-./
+
+        #\u0030-\u003F
+        #ESC 0-9:;<=>?
+
+        #\u0040-\u005F
+        # ESC  @A-Z[\]^
+
+        #\u0060-\u007E
+
+        proc is_Fe7 {code} {
             # C1 control codes
-            if {[regexp {^\033\[[\u0040-\u005F]}]} {
-                #7bit - typical case
-                return 1
-            }
+            #7bit - typical case
+            # ESC  @A-Z[\]^
+            return [regexp {^\033[\u0040-\u005F]} $code] 
+        }
+        proc is_Fe {code} {
+            #although Fe7 more common - we'll put the simpler regex for 8 first
+            return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
+        }
+        proc is_Fe8 {code} {
             #8bit
-            #review - all C1 escapes ? 0x80-0x90F
+            #review - all C1 escapes ? 0x80-0x9F
             #This is possibly problematic as it is affected by encoding.
             #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
             #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
-            return 0
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        #ESC 0-9,:,;,<,=,>,?
+        proc is_Fp {code} {
+            #single byte following ESC
+            return [regexp {^\033[\u0030-\u003F]$} $code] 
         }
+
+        #https://en.wikipedia.org/wiki/ISO/IEC_2022
+        #e.g
+        # ESC a (INT) interrupts the current process
+        # ESC c (RIS) reset terminal to initial state
+        #ESC `a-z{|}~
         proc is_Fs {code} {
-            puts stderr "is_Fs unimplemented"
+            #single byte following ESC
+            return [regexp {^\033[\u0060-\u007E]$} $code] 
+        }
+
+
+        proc is_nF {code} {
+            #2 bytes
+            #subcategorised by the low two bits of the first byte (n)
+            #further by whether the final byte is in \u0030-u003f (p) or not (t)
+            return [regexp {^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+
+        #review - test
+        #3Fp - private use 
+        #e.g vt100
+        # ESC#3 DECDHL double-height letters top half
+        # ESC#4 DECDHL double-height letters bottom half
+        # ESC#5 DECSWL single-width line
+        # ESC#6 DECDWL double-width line
+        proc is_3Fp {code} {
+            return [regexp {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} $code] ;#check regexp
+        }
+
+        proc is_code7 {code} {
+            #Fe | Fs | Fp | nF | Fe
+            return [regexp {^\033[\u0040-\u005F]|^\033[\u0060-\u007e]$|^\033[\u0030-\u003F]$|^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+        proc is_code8 {code} {
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        proc is_code {code} {
+            return [expr {[is_code8 $code] || [is_code7 $code]}]
+        }
+
+        proc classify {code} {
+            return [switch -regexp -- $code {
+                {^\033[\u0030-\u003F]$}                  {string cat Fp}
+                {^[\u0080-\u009F]|^\033[\u0040-\u005F]}  {string cat Fe}
+                {^\033[\u0060-\u007E]$}                  {string cat Fs}
+                {^\033[\u0020-\u002F]+[\u0030-\u007E]$} {
+                    #nF sequences
+                    set firstbytenum [scan [string index $code 1] %c]
+                    set lastbyte [string index $code end]
+
+                    set n [expr {$firstbytenum & 3}]
+                    if {[regexp {[\u0030-\u003F]} $lastbyte]} {
+                        set tp p
+                    } else {
+                        set tp t
+                    }
+                    string cat ${n}F$tp
+                }
+                {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} {string cat 3Fp}
+                default                                  {string cat unknown}
+            }]
         }
     }
     # -- --- --- --- --- --- --- --- --- --- ---
@@ -5718,6 +5806,7 @@ tcl::namespace::eval punk::ansi::ta {
     #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm
     #[list_begin definitions]
     tcl::namespace::path ::punk::ansi
+    namespace export detect detect_in_list detect_sgr extract length split_codes split_at_codes split_codes_single
 
     variable PUNKARGS
 
@@ -5748,20 +5837,27 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_osc_open {(?:\x1b\]|\u009d).*}
 
 
+    #review - distinguishing standalone codes vs those that are paired with contents considered part of the code
+    #e.g PM,SOS are 'paired' ended by ST
+
     #variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""]
     variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bD|\x1bE|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)}
     variable re_standalones_vt52 {(?:\x1bZ)}
 
-    #ESC Y move, ESC b foreground colour
+    # -- 
+    #ESC Y move                 - \x1bY<byte><byte> ie 2 bytes to close
+    #ESC b foreground colour    - \x1bb<byte>  1 byte to close
+    variable re_vt52_incomplete {(?:\x1bY(.){0,1}$|\x1bb$)}
+    #\x1bc vt52 bgcolour conflict ?
     #ESC F - gr-on ESC G - gr-off
-    variable re_vt52_open {(?:\x1bY|\x1bb|\x1bF)}
-    #\x1bc vt52 bgcolour conflict ??
+    # --
 
     #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess
     variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B}
     variable re_g0_open  {(?:\x1b\(0)}
     variable re_g0_close {(?:\x1b\(B)}
 
+    #detect start of ansicode that is closed by ST
     # DCS "ESC P" or "0x90" is also terminated by ST
     set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)}
     #ST terminators [list \007 \033\\ \u009c]
@@ -5777,12 +5873,15 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_ST      {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)}
 
 
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_vt52_incomplete}"
 
     #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open
 
     #default for regexes is non-newline-sensitive matching - ie matches can span lines
     # -- --- --- ---
-    variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
+    #variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
     # -- --- --- ---
     #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regexp TRIE generator that works with Tcl regexes
     #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone.
@@ -5802,20 +5901,24 @@ tcl::namespace::eval punk::ansi::ta {
     # plus more for auxiliary keypad codes in keypad application mode (and some in numeric mode)
 
     #regexp expanded syntax = ?x
+    #full detect - checking for closing sequences
     variable re_ansi_detect {(?x)
-            (?:\x1b(?:\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
+            (?:\x1b(?:\[(?:[\x20-\x3f]*[\x40-\x7e])|a|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
             |(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)
-            |(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]
+            |(?:\u009b)[\x20-\x3f]*[\x40-\x7e]
             |(?:\u009d)(?:[^\u009c]*)?\u009c
     }
     #---
+    #todo
+    #variable re_ansi_detectcode $re_ansi_detect
+    #variable re_ansi_detectcode {\x1b[\u0040-\u005F]|\x1b[\u0060-\u007e]|\x1b[\u0030-\u003F]|\x1b[\u0020-\u002F]+[\u0030-\u007E]}
+    variable re_ansi_detectcode {(?:\x1b(?:[\u0030-\u007E]|[\u0020-\u002F]+[\u0030-\u007E]))|[\u0090-\u009F]}
 
     # -- --- --- ---
     #variable re_csi_code  {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]}
 
 
 
-    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
 
     #may be same as detect - kept in case detect needs to diverge
     #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
@@ -5827,20 +5930,6 @@ tcl::namespace::eval punk::ansi::ta {
         set re_ansi_split_multi "(?:${re_ansi_split})+"
     }
 
-    lappend PUNKARGS [list -dynamic 0 {
-            @id     -id ::punk::ansi::ta::detect
-            @cmd    -name punk::ansi::ta::detect -help\
-                "Return a boolean indicating whether Ansi codes were detected in text.
-                Important caveat:
-                When text is a tcl list made from splitting (or lappending) some ansi string
-                - individual elements may be braced or have certain chars escaped.
-                (one example is if a list element contains an unbalanced  brace)
-                This can cause square brackets that form part of the ansi to be backslash escaped
-                - and the function can fail to match it as an Ansi code.
-                "
-            @values -min 1
-            text -type string
-    } ]
 
     #*** !doctools
     #[call [fun detect] [arg text]]
@@ -5851,10 +5940,35 @@ tcl::namespace::eval punk::ansi::ta {
 
     #detect any ansi escapes
     #review - only detect 'complete' codes - or just use the opening escapes for performance?
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::detect
+            @cmd    -name punk::ansi::ta::detect\
+                -summary\
+                "Test if text has completed ANSI codes"\
+                -help\
+                "Return a boolean indicating whether *complete* Ansi codes were detected in text.
+
+                By complete, it means that paired squences such as PM (privacy message) must be
+                closed. It also means that a truncated sequence such as \\x1b\\\[ or a lone escape
+                will not be detected as ANSI.
+
+                Use punk::ansi::ta::detectcode as a slightly faster detector for ANSI codes, that does
+                not require paired sequences to have both starting and end sequences to be detected.
+
+                Important caveat:
+                When text is a tcl list made from splitting (or lappending) some ansi string
+                - individual elements may be braced or have certain chars escaped.
+                (one example is if a list element contains an unbalanced  brace)
+                This can cause square brackets that form part of the ansi to be backslash escaped
+                - and the function can fail to match it as an Ansi code.
+                "
+            @values -min 1
+            text -type string -help\
+                "Block of text. See caveat above about lists."
+    } ]
     proc detect {text} [string map [list <re> [list $re_ansi_detect]] {
         regexp <re> $text
     }]
-
     #can be used on dicts - but will check keys too. keys could also contain ansi and have escapes
     proc detect_in_list {list} {
         #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
@@ -5865,6 +5979,22 @@ tcl::namespace::eval punk::ansi::ta {
         }
         return 0
     }
+
+    #will detect for example lone opening or closing PM
+    proc detectcode {text} [string map [list <re> [list $re_ansi_detectcode]] {
+        regexp <re> $text
+    }]
+    proc detectcode_in_list {list} {
+        #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
+        foreach item $list {
+            if {[detectcode $item]} {
+                return 1
+            }
+        }
+        return 0
+    }
+
+
     #surprisingly - the ::join operation can be (relatively) slow on lists containing ansi
     proc detect_in_list2 {list} {
         detect [join $list " "]
@@ -5915,6 +6045,8 @@ tcl::namespace::eval punk::ansi::ta {
         variable re_sgr
         expr {[regexp $re_sgr $text]}
     }
+
+    #perl: ta_strip
     proc strip {text} {
         #*** !doctools
         #[call [fun strip] [arg text]]
@@ -5922,6 +6054,39 @@ tcl::namespace::eval punk::ansi::ta {
         #[para]This is a tailcall to punk::ansi::ansistrip
         tailcall ansistrip $text
     }
+
+    lappend PUNKARGS [list {
+        @id     -id ::punk::ansi::ta::extract
+        @cmd    -name punk::ansi::ta::extract\
+            -summary\
+            "Return only the ANSI codes in text"\
+            -help\
+            "This is the opposite of strip,
+              returning only the ANSI codes in text."
+        @values -min 1 -max 1
+        text -type string 
+    } ]
+    proc extract {text} {
+        set parts [split_codes $text]
+        set out ""
+        foreach {pt code} $parts {
+            append out $code
+        }
+        return $out
+    }
+
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::length
+            @cmd    -name punk::ansi::ta::length\
+                -summary\
+                "Calculate length of text (excluding the ANSI codes)"\
+                -help\
+                "Calculate length of text (excluding the ANSI codes)
+                This is not the printing length of the string on screen."
+            @values -min 1
+            text -type string 
+    } ]
+    #perl: ta_length
     proc length {text} {
         #*** !doctools
         #[call [fun length] [arg text]]
@@ -5936,6 +6101,8 @@ tcl::namespace::eval punk::ansi::ta {
     #
     #}
 
+    #perl: ta_trunc
+    #truncate $text to $width columns while still including all the ANSI colour codes.
     proc trunc {text width args} {
 
     }
@@ -8504,6 +8671,10 @@ tcl::namespace::eval punk::ansi::internal {
     }
 }
 
+tcl::namespace::eval punk::ansi {
+    namespace import ::punk::ansi::ta::detect
+}
+
 #inserting into global namespace like this should be kept to a minimum.. but this is considered a core aspect of punk::ansi
 #todo - document
 interp alias {} ansistring {} ::punk::ansi::ansistring
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.2.tm
index fc438d57..a6224c0d 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.2.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/args-0.2.tm
@@ -1364,6 +1364,11 @@ tcl::namespace::eval punk::args {
                                         #}
                                         tcl::dict::set tmp_optspec_defaults -type $v
                                     }
+                                    -defaultdisplaytype {
+                                        #how the -default is displayed
+                                        #-default doesn't have to be the same type as -type which validates user input that is not defaulted.
+                                        tcl::dict::set tmp_optspec_defaults -defaultdisplaytype $v
+                                    }
                                     -parsekey {
                                         tcl::dict::set tmp_optspec_defaults -parsekey $v
 
@@ -1441,7 +1446,7 @@ tcl::namespace::eval punk::args {
                                     default {
                                         set known   {   -parsekey -group -grouphelp\
                                                         -any -anyopts -arbitrary -form -minsize -maxsize -choices -choicegroups -choicemultiple -choicecolumns -choicelabels -choiceinfo\
-                                                        -type -range -typeranges -default -typedefaults
+                                                        -type -range -typeranges -default -defaultdisplaytype -typedefaults
                                                         -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted -nocase\
                                                         -nominsize -nomaxsize -norange -nochoices -nochoicelabels\
                                                         -type -optional -allow_ansi -validate_ansistripped -strip_ansi -multiple -prefix\
@@ -2052,6 +2057,9 @@ tcl::namespace::eval punk::args {
                                 tcl::dict::set spec_merged -optional 1
                             }
                         }
+                        -defaultdisplaytype {
+                            tcl::dict::set spec_merged -defaultdisplaytype $specval
+                        }
                         -typedefaults {
                             set typecount [llength [tcl::dict::get $spec_merged -type]]
                             if {$typecount != [llength $specval]} {
@@ -2114,7 +2122,7 @@ tcl::namespace::eval punk::args {
                                                     -form -type\
                                                     -parsekey -group\
                                                     -range -typeranges\
-                                                    -default -typedefaults\
+                                                    -default -defaultdisplaytype -typedefaults\
                                                     -minsize -maxsize -choices -choicegroups\
                                                     -choicemultiple -choicecolumns -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted\
                                                     -nocase -optional -multiple -validate_ansistripped -allow_ansi -strip_ansi -help\
@@ -3784,7 +3792,32 @@ tcl::namespace::eval punk::args {
                         }
 
                         if {[dict exists $arginfo -default]} {
-                            set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                            #default isn't necessarily of same type as -type required for validation
+                            #Guessing at the type from the data is likely to be unsatisfactory.
+
+                            set defaultdisplaytype [Dict_getdef $arginfo -defaultdisplaytype string]
+                            switch -- $defaultdisplaytype {
+                                dict {
+                                    #single level
+                                    set rawdefault [dict get $arginfo -default]
+                                    set default     "{\n"
+                                    dict for {k v} $rawdefault {
+                                        append default "  \"$k\" \"$v\"\n"
+                                    }
+                                    append default  "}"
+                                }
+                                list {
+                                    set default     "{\n"
+                                    foreach v $rawdefault {
+                                        append default "  \"$v\"\n"
+                                    }
+                                    append default "}"
+                                }
+                                default {
+                                    #set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                                    set default "'[dict get $arginfo -default]'"
+                                }
+                            }
                         } else {
                             set default ""
                         }
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
index f8123b94..69df08b9 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
@@ -89,6 +89,107 @@ tcl::namespace::eval punk::char {
     variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
     variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
 
+
+    #more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
+    #as described in ECMA-35 5.2
+    # where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
+    #-  for 7-bit: 8 columns 16 rows
+    #-  for 8-bit 16 columns 16 rows
+    proc codetable {which} {
+        set bits 8
+        switch -- $which {
+            ascii8 {
+                set which default
+            }
+            ascii {
+                set bits 7
+            }
+            default {
+                if {$which ni [encoding names]} {
+                    error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
+                }
+            }
+        }
+        package require punk::ansi
+
+        set hibit_count [expr {$bits-4}]
+        set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
+        set columncount [expr {$bitcolumns + 6}]
+
+
+        #set header1 [list "" "b7\nb6\nb5" "0\n0\n0" "0\n0\n1" "0\n1\n0" "0\n1\n1" "1\n0\n0" "1\n0\n1" "1\n1\n0" "1\n1\n1"]
+        set header1 [list]
+        set hibits_label ""
+        set indent ""
+        for {set hb $bits} {$hb > 4} {incr hb -1} {
+            append hibits_label ${indent}b$hb\n
+            append indent "  "
+        }
+        set hibits_label [string range $hibits_label 0 end-1]
+        lappend header1 $hibits_label "" "" "" "" ""
+
+        for {set colidx 0} {$colidx < $bitcolumns} {incr colidx} {
+            set binval [format %0${hibit_count}b $colidx]
+            set binvalbits [split $binval ""]
+            set indent  ""
+            set display_hibits ""
+            foreach bb $binvalbits {
+                append display_hibits $indent$bb\n
+                append indent " "
+            }
+            set display_hibits [string range $display_hibits 0 end-1]
+            lappend header1 $display_hibits
+        }
+        #\u2193 down arrow
+        #right-down arrows
+        #\u2ba7
+        #\u21b4
+        #\u2b0e
+        set header2 [list "   Bits" b4 b3 b2 b1 "column \u2192\nrow \u2b0e" {*}[punk::lib::range 0 $bitcolumns-1]]
+        set headers [list $header1 $header2]
+
+        #set t [textblock::table -return tableobject -rows $rows]
+        set t [textblock::table -return tableobject]
+        #todo - fix textblock::table to allow configure -columncount
+        for {set c 0} {$c < $columncount} {incr c} {
+            $t add_column
+        }
+
+        set vheaders [punk::transpose_equal_lists $headers]
+        set hidx -1
+        foreach vh $vheaders {
+            incr hidx
+            $t configure_column $hidx -headers $vh
+        }
+        $t configure_header 0 -colspans [list 6 0 0 0 0 0 {*}[lrepeat $bitcolumns 1]]
+        $t configure_column 0 -blockalign left
+
+        #always 16 rows - remaining bits form the columns
+        for {set ridx 0} {$ridx <= 15} {incr ridx} {
+            set charlist [list]
+            set lowbits [format %04b $ridx]
+            for {set i 0} {$i < $bitcolumns} {incr i} {
+                set hibits [format %0${hibit_count}b $i]
+                set ch [format %c [scan ${hibits}${lowbits} %b]]
+                #puts "-->${hibits}${lowbits} ch:$ch"
+                if {$which ne "default"} {
+                    if {[catch {encoding convertfrom $which $ch} ch]} {
+                        set ch [punk::ansi::a red bold]-[punk::ansi::a]
+                        lappend charlist $ch
+                    } else {
+                        lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                    } 
+                } else {
+                    lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                }
+            }
+            set r [list "" {*}[split $lowbits ""] $ridx {*}$charlist]
+            $t add_row $r
+        }
+        puts stderr $t
+        $t print
+    }
+
     #just the 7-bit ascii. use [page ascii] for the 8-bit layout
     proc ascii {} {return {
         00 NUL  01 SOH  02 STX  03 ETX  04 EOT  05 ENQ  06 ACK  07 BEL
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
index 4322ceaa..ea8d3f77 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
@@ -1336,6 +1336,7 @@ namespace eval punk::console {
     #https://vt100.net/docs/vt510-rm/DA1.html
     #
     proc get_device_attributes {{inoutchannels {stdin stdout}}} {
+        #Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
         #DA1
         variable last_da1_result
         #first element in result is the terminal's architectural class 61,62,63,64.. ?
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
index 6ce76618..46cd5668 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
@@ -2242,6 +2242,121 @@ namespace eval punk::lib {
         }
     }
 
+
+    #review - compare to IMAP4 methods of specifying ranges?
+    punk::args::define {
+        @id     -id     ::punk::lib::indexset_resolve
+        @cmd    -name     punk::lib::indexset_resolve\
+            -summary\
+            "Resolve an indexset to a list of integers based on supplied list or string length."\
+            -help\
+            "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value.
+            An indexset consists of a comma delimited list of indexes or index-ranges.
+            The indexes are 0-based.
+            Ranges must be specified with .. as the separator.
+            Whitespace is ignored.
+            Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands,
+            e.g end-2 or 2+2.
+
+            end means the last page.
+            end-1 means the second last page.
+            0.. is the same as 0..end.
+            examples:
+            1,3..
+                output the page index 1 (2nd page) followed by all from index 3 to the end.
+            0-2,end
+                output the first 3 pages, and the last page.
+            end-1..0
+            output the indexes in reverse order from 2nd last page to first page."
+        @values -min 2 -max 2
+        numitems -type integer
+        indexset -type string
+    }
+    proc indexset_resolve {numitems indexset} {
+        if {![string is integer -strict $numitems] || ![regexp {^[\-\+_end,\.0-9]*$} $indexset]} {
+            #use parser on unhappy path only
+            set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve]
+            uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg]
+        } 
+        set index_list [list] ;#list of actual indexes within the range 
+        set iparts [split $indexset ,]
+        set index_list [list]
+        foreach ipart $iparts {
+            set ipart [string trim $ipart]
+            set rposn [string first .. $ipart]
+            if {$rposn>=0} {
+                #range
+                lassign           [punk::lib::string_splitbefore_indices $ipart $rposn $rposn+2] rawa _ rawb 
+                set rawa [string trim $rawa]
+                set rawb [string trim $rawb]
+                if {$rawa eq ""} {set rawa 0}
+                set a             [punk::lib::lindex_resolve $numitems $rawa]
+                if {$a == -3} {
+                    #undershot - leave negative
+                } elseif {$a == -2 && $rawa ne "-2"} {
+                    #overshot
+                    set a [expr {$numitems}] ;#put it outside the range on the upper side
+                }
+
+                if {$rawb eq ""} {
+                    if {$a > $numitems-1} {
+                        set rawb $a ;#make sure <overshot>.. doesn't return last item - should return nothing
+                    } else {
+                        set rawb end
+                    }
+                }
+                set b             [punk::lib::lindex_resolve $numitems $rawb]
+                if {$b == -3} {
+                    #undershot  - leave negative
+                } elseif {$b == -2 && $rawb ne "-2"} {
+                    set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side
+                }
+
+                #e.g make sure <overshot>.. doesn't return last item - should return nothing as both are above the range.
+                if {$a >= 0 && $a <= $numitems-1 && $b >=0 && $b <= $numitems-1} {
+                    lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                } else {
+                    if {$a >= 0 && $a <= $numitems-1} {
+                        #only a is in the range
+                        if {$b < 0} {
+                            set b 0
+                        } else {
+                            set b [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } elseif {$b >=0 && $b <= $numitems-1} {
+                        #only b is in the range
+                        if {$a < 0} {
+                            set a 0
+                        } else {
+                            set a [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } else {
+                        #both outside the range
+                        if {$a < 0 && $b > 0} {
+                            #spans the range in forward order
+                            set a 0 
+                            set b [expr {$numitems-1}]
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        } elseif {$a > 0 && $b < 0} {
+                            #spans the range in reverse order
+                            set a [expr {$numitems-1}]
+                            set b 0
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        }
+                        #both outside of range on same side
+                    }
+                }
+            } else {
+                set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
+                if {$idx >= 0} {
+                    lappend index_list $idx
+                }
+            }
+        }
+        return $index_list
+    }
     # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
     #REVIEW: This shouldn't really need the list itself - just the length would suffice
     punk::args::define {
@@ -2305,7 +2420,8 @@ namespace eval punk::lib {
             #<0 ?
             error "lindex_resolve len must be an integer"
         }
-        set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
+        set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
+        #todo - be stricter about malformations such as 1000_
         if {[string is integer -strict $index]} {
             #can match +i -i
             if {$index < 0} {
@@ -3345,8 +3461,12 @@ namespace eval punk::lib {
             #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
             #This commonly happens if there is an unbalanced brace  (which is a normal occurrence and needs to be handled)
             #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
-            #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
-            if {![punk::ansi::ta::detect_in_list $linelist]} {
+            #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
+
+            #we use detectcode_in_list instead of detect_in_list
+            #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
+            # - but the main reason is it is slightly faster.
+            if {![punk::ansi::ta::detectcode_in_list $linelist]} {
                 if {$opt_ansiresets} {
                     foreach ln $linelist {
                         lappend transformed $RST$ln$RST
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
index f8e55b02..10fda84e 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
@@ -13,6 +13,9 @@
 # @@ Meta End
 
 
+#BUGS
+# 2025-08
+# n// and n/// won't output info about 'namespace path' if there are no commands in the namespace
 
 # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
 ##  Requirements
@@ -1235,7 +1238,7 @@ tcl::namespace::eval punk::ns {
             if {[dict size [dict get $nsdict namespacepath]]} {
                 set path_text ""
                 if {!$opt_nspathcommands} {
-                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]]"
+                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]] (use n/// to display)"
                 } else {
                     append path_text \n "  Also resolving cmds in namespace paths:"
                     set nspathdict [dict get $nsdict namespacepath]
@@ -1247,8 +1250,17 @@ tcl::namespace::eval punk::ns {
                         }
                     } else {
                         #todo - change to display in column order to be same as main command listing
+                        set parentcommands [dict get $nsdict commands]
                         dict for {k v} $nspathdict {
-                            set pathcommands [dict get $v commands]
+                            set rawpathcommands [dict get $v commands]
+                            set pathcommands [list]
+                            foreach c $rawpathcommands {
+                                if {$c in $parentcommands} {
+                                    lappend pathcommands [punk::ansi::a strike]$c[a]
+                                } else {
+                                    lappend pathcommands $c
+                                }
+                            }
                             set columns 6
                             if {[llength $pathcommands] < 6} {
                                 set columns [llength $v]
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/shellfilter-0.2.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/shellfilter-0.2.tm
index e04e5107..8017d3f5 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/shellfilter-0.2.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/shellfilter-0.2.tm
@@ -579,13 +579,15 @@ namespace eval shellfilter::chan {
     #punk::ansi::ansistrip converts at least some of the box drawing G0 chars to unicode - todo - more complete conversion
     #assumes line-buffering. a more advanced filter required if ansicodes can arrive split across separate read or write operations!
     oo::class create ansistrip {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
+        variable o_encbuf
         variable o_is_junction
         constructor {tf} {
             package require punk::ansi
             set o_trecord $tf
             set o_enc [dict get $tf -encoding]
+            set o_encbuf ""
             if {[dict exists $tf -junction]} {
                 set o_is_junction [dict get $tf -junction]
             } else {
@@ -614,10 +616,36 @@ namespace eval shellfilter::chan {
         method flush {transform_handle} {
             return ""
         }
+        #method write {transform_handle bytes} {
+        #    #broken due to occasional unexpected byte sequence
+        #    set instring [encoding convertfrom $o_enc $bytes] 
+        #    set outstring [punk::ansi::ansistrip $instring]
+        #    return [encoding convertto $o_enc $outstring]
+        #}
         method write {transform_handle bytes} {
-            set instring [encoding convertfrom $o_enc $bytes] 
-            set outstring [punk::ansi::ansistrip $instring]
-            return [encoding convertto $o_enc $outstring]
+            #set instring [tcl::encoding::convertfrom $o_enc $bytes] ;naive approach will break due to unexpected byte sequence - occasionally
+            #bytes can break at arbitrary points making encoding conversions invalid.
+
+            set inputbytes $o_encbuf$bytes
+            set o_encbuf ""
+            set tail_offset 0
+            while {$tail_offset < [string length $inputbytes] && [catch {tcl::encoding::convertfrom $o_enc [string range $inputbytes 0 end-$tail_offset]} stringdata]} {
+                incr tail_offset
+            }
+            if {$tail_offset > 0} {
+                if {$tail_offset < [string length $inputbytes]} {
+                    #stringdata from catch statement must be a valid result
+                    set t [expr {$tail_offset - 1}]
+                    set o_encbuf [string range $inputbytes end-$t end]
+                } else {
+                    set stringdata ""
+                    set o_encbuf $inputbytes
+                    return ""
+                }
+            }
+
+            set outstring [punk::ansi::ansistrip $stringdata]
+            return [tcl::encoding::convertto $o_enc $outstring]
         }
         method meta_is_redirection {} {
             return $o_is_junction
@@ -724,6 +752,8 @@ namespace eval shellfilter::chan {
             set emit ""
             if {[string last \x1b $buf] >= 0} {
                 #detect will detect ansi SGR and gron groff and other codes
+                #REVIEW - ta::detect won't detect SOS without paired ST for things like PM
+                # ta::detectcode will - but then split_codes_single will treat unpaired SOS as text?
                 if {[punk::ansi::ta::detect $buf]} {
                     #split_codes_single regex faster than split_codes - but more resulting parts
                     #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc)
@@ -1035,7 +1065,7 @@ namespace eval shellfilter::chan {
     }
     #todo - something 
     oo::class create rebuffer {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
         constructor {tf} {
             set o_trecord $tf
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm
index de0164cd..97969463 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/textblock-0.1.3.tm
@@ -2528,7 +2528,7 @@ tcl::namespace::eval textblock {
                         set ansiborder_final $ansiborder_body_col_row
                         #$c will always have ansi resets due to overtype behaviour ?
                         #todo - review overtype
-                        if {[punk::ansi::ta::detect $c]} {
+                        if {[punk::ansi::ta::detectcode $c]} {
                             #use only the last ansi sequence in the cell value
                             #Filter out foreground and use background for ansiborder override
                             set parts [punk::ansi::ta::split_codes_single $c]
@@ -4377,6 +4377,7 @@ tcl::namespace::eval textblock {
             if {![punk::ansi::ta::detect $block]} {
                 return $block
             }
+            #could be newine in SOS/PM? review - terminal should theoretically ignore all chars til close of string
             foreach ln [split $block \n] {
                 set parts [punk::ansi::ta::split_codes $ln]
                 if {[lindex $parts 0] eq ""} {
@@ -4894,7 +4895,9 @@ tcl::namespace::eval textblock {
             }
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             #ansistripraw slightly faster than ansistrip - and won't affect width (avoid detect_g0/conversions)
             set textblock [punk::ansi::ansistripraw $textblock]
         }
@@ -4917,7 +4920,9 @@ tcl::namespace::eval textblock {
         } else {
             set tl $textblock
         }
-        if {[punk::ansi::ta::detect $tl]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[punk::ansi::ta::detectcode $tl]} {
             set tl [punk::ansi::ansistripraw $tl]
         }
         return [punk::char::ansifreestring_width $tl]
@@ -4964,7 +4969,7 @@ tcl::namespace::eval textblock {
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
         #ansistripraw on entire block in one go rather than line by line - result should be the same - review - make tests
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             set textblock [punk::ansi::ansistripraw $textblock]
         }
         if {[tcl::string::last \n $textblock] >= 0} {
@@ -5211,7 +5216,7 @@ tcl::namespace::eval textblock {
 
         set known_hasansi [tcl::dict::get $opts -known_hasansi]
         if {$known_hasansi eq ""} {
-            set block_has_ansi [punk::ansi::ta::detect $block]
+            set block_has_ansi [punk::ansi::ta::detectcode $block]
         } else {
             set block_has_ansi $known_hasansi
         }
@@ -5648,7 +5653,7 @@ tcl::namespace::eval textblock {
         set rowcount 0
         set blocklists [list]
         foreach b $blocks {
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set bl [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b]
             } else {
@@ -5676,7 +5681,7 @@ tcl::namespace::eval textblock {
         set i -1
         foreach b $args {
             incr i
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set blines [punk::lib::lines_as_list -ansireplays 1 -ansiresets auto -- $b]
             } else {
@@ -5799,7 +5804,7 @@ tcl::namespace::eval textblock {
             #testing of any decent size block line by line - even with max_string_length_line is slower than ta::detect anyway.
 
             #blocks passed to join can be ragged - so we can't pass -known_samewidth to pad
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 # - we need to join to use pad - even though we then need to immediately resplit REVIEW (make line list version of pad?)
                 set replay_block [::join [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b] \n]
                 #set blines [split [textblock::pad $replay_block -known_hasansi 1 -known_samewidth $is_samewidth -known_blockwidth $bwidth -width $bwidth -which right -padchar " "] \n]
@@ -8650,7 +8655,7 @@ tcl::namespace::eval textblock {
                 set cache_inner [overtype::block -blockalign $opt_blockalign -ellipsis $opt_ellipsis $underlay $cache_contentpattern]
                 #puts "frame--->ansiwrap -rawansi [ansistring VIEW $opt_ansibase] $cache_inner"
                 if {$opt_ansibase ne ""} {
-                    if {[punk::ansi::ta::detect $cache_inner]} {
+                    if {[punk::ansi::ta::detectcode $cache_inner]} {
                         #set cache_inner [punk::ansi::ansiwrap -rawansi $opt_ansibase $cache_inner]
                         #set cache_inner [punk::ansi::ansiwrap_raw $opt_ansibase "" "" $cache_inner]
                         #jjj ??? review
@@ -8745,7 +8750,7 @@ tcl::namespace::eval textblock {
 
             #set cwidth [textblock::width $contents]
             #JJJ
-            set contents_has_ansi [punk::ansi::ta::detect $contents]
+            set contents_has_ansi [punk::ansi::ta::detectcode $contents]
             if {$opt_ansibase ne ""} {
                 if {$contents_has_ansi} {
                     #set contents [punk::ansi::ansiwrap -rawansi $opt_ansibase $contents]
@@ -8799,8 +8804,9 @@ tcl::namespace::eval textblock {
                 if {$subposn >= 0} {
                     set content_line [lindex $clines $contentindex]
                     #review - different forms of reset e.g \x1b\[m ??
-                    if {[string range $content_line 0 3] eq "\x1b\[0m"} {
+                    if {[tcl::string::range $content_line 0 3] eq "\x1b\[0m"} {
                         set content_line [tcl::string::range $content_line 4 end]
+                        #::tcl::string::replace content_line 0 3
                     }
                     append content_line $opt_ansibase
                     append fs [tcl::string::replace $tline $subposn $subposn+$pattern_offset $content_line] \n
diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl b/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl
index 8cb8153c..c3cbc9aa 100644
--- a/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl
+++ b/src/project_layouts/custom/_project/punk.project-0.1/src/make.tcl
@@ -3075,7 +3075,7 @@ foreach vfstail $vfs_tails {
                                 #copy the version that is mounted in this runtime to vfsname.new 
                                 if {[catch {
                                     file copy -force $building_runtime $buildfolder/$vfsname.new
-                                    catch {exec chmod +x $buildfolder/$vfsname.new}
+                                    catch {exec chmod +w $buildfolder/$vfsname.new}
                                 } errM]} {
                                     puts stderr "$kit_type 'file copy -force $building_runtime $buildfolder/$vfsname.new' failed\n$errM"
                                     error $errM
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/flagfilter-0.3.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/flagfilter-0.3.tm
index 1d37e215..00f58e82 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/flagfilter-0.3.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/flagfilter-0.3.tm
@@ -1538,6 +1538,7 @@ namespace eval flagfilter {
         }
     }
 
+    #todo - rename 'cprocessor' is misleading
     oo::class create cprocessor {
         variable o_runid
         variable o_name
@@ -1577,7 +1578,9 @@ namespace eval flagfilter {
             if {[dict exists $o_pinfo match]} {
                 set o_matchspec [dict get $o_pinfo match]
             } else {
-                set o_matchspec {^[^-^/].*}  ;#match anything that isn't flaglike
+                #review - unix paths? conflict with windows style flag such as /w
+                #must accept empty string
+                set o_matchspec {^[^-^/].*|^$}  ;#match anything that isn't flaglike
             }
             set o_found_match 0
             set o_matched_argument "" ;#need o_found_match to differentiate match of empty string
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm
index 6fb185a9..83dad2bf 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm
@@ -6947,7 +6947,8 @@ namespace eval punk {
             set newrow {}
             foreach oldrow $list_rows {
                 if {$j >= [llength $oldrow]} {
-                    continue
+                    #continue 
+                    lappend newrow ""
                 } else {
                     lappend newrow [lindex $oldrow $j]
                 }
@@ -6956,6 +6957,19 @@ namespace eval punk {
         }
         return $res
     }
+    proc transpose_equal_lists {list_rows} {
+        set columns [list]
+        set rowidx -1
+        foreach l $list_rows {
+            set colidx -1
+            incr rowidx
+            foreach val $l {
+                incr colidx
+                lset columns $colidx $rowidx $val
+            }
+        }
+        return $columns
+    }
     proc transpose_strings {list_of_strings} {
         set charlists [lmap v $list_of_strings {split $v ""}]
         set tchars [transpose_lists $charlists]
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
index 255715ad..ad2d58f4 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ansi-0.1.1.tm
@@ -3785,7 +3785,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -3892,7 +3892,7 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
         }
 
         set codestack [list]
-        if {[punk::ansi::ta::detect $text]} {
+        if {[punk::ansi::ta::detectcode $text]} {
             set emit ""
             #set parts [punk::ansi::ta::split_codes $text]
             set parts [punk::ansi::ta::split_codes_single $text]
@@ -4158,7 +4158,8 @@ Brightblack 100   Brightred 101   Brightgreen 102   Brightyellow 103   Brightblu
     # where line and column are ascii codes whose values are +31
     # vt52 can be entered/exited via escapes
     # This means we probably need to to wrap enter/exit vt52 and keep this state - as we don't have a standard way to query for terminal type
-    # (vt52 supports ESC Z - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    # (vt52 supports ESC Z (obs DECID) - but vt100 sometimes? doesn't - and querying at each output would be slow anyway, even if there was a common query :/ )
+    #ESC\[c - is more modern equiv of DECID
 
     lappend PUNKARGS [list {
         @id     -id     ::punk::ansi::vt52move
@@ -4946,6 +4947,8 @@ to 223 (=255 - 32)
         }
         if {[string length $text] < 2} {return $text}
         set parts [punk::ansi::ta::split_codes $text]
+        #review - if we have only one element of a paired codeset such as PM,SOS - it will not be found by split_codes
+        #The output technically then still contains ansi (which may for example be hidden by terminal despite lack of closing ST)
         if {[llength $parts] == 1} {return [lindex $parts 0]}
         set out ""
         #todo - try: join [lsearch -stride 2 -index 0 -subindices -all -inline  $parts *] ""
@@ -5686,21 +5689,106 @@ tcl::namespace::eval punk::ansi {
         #[list_end] [comment {--- end definitions namespace punk::ansi::codetype ---}]
     }
     tcl::namespace::eval sequence_type {
-        proc is_Fe {code} {
+        #first byte after ESC identifies code type
+        #NOTE - we are looking for valid start of a single sequence here
+        #- not whether it is complete or where it ends, unless it's a fixed number of bytes
+
+        #\u0020-\u002F
+        # ESC <sp>!"#$%&'()*+,-./
+
+        #\u0030-\u003F
+        #ESC 0-9:;<=>?
+
+        #\u0040-\u005F
+        # ESC  @A-Z[\]^
+
+        #\u0060-\u007E
+
+        proc is_Fe7 {code} {
             # C1 control codes
-            if {[regexp {^\033\[[\u0040-\u005F]}]} {
-                #7bit - typical case
-                return 1
-            }
+            #7bit - typical case
+            # ESC  @A-Z[\]^
+            return [regexp {^\033[\u0040-\u005F]} $code] 
+        }
+        proc is_Fe {code} {
+            #although Fe7 more common - we'll put the simpler regex for 8 first
+            return [expr {[is_Fe8 $code] || [is_Fe7 $code]}]
+        }
+        proc is_Fe8 {code} {
             #8bit
-            #review - all C1 escapes ? 0x80-0x90F
+            #review - all C1 escapes ? 0x80-0x9F
             #This is possibly problematic as it is affected by encoding.
             #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
             #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used."
-            return 0
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        #ESC 0-9,:,;,<,=,>,?
+        proc is_Fp {code} {
+            #single byte following ESC
+            return [regexp {^\033[\u0030-\u003F]$} $code] 
         }
+
+        #https://en.wikipedia.org/wiki/ISO/IEC_2022
+        #e.g
+        # ESC a (INT) interrupts the current process
+        # ESC c (RIS) reset terminal to initial state
+        #ESC `a-z{|}~
         proc is_Fs {code} {
-            puts stderr "is_Fs unimplemented"
+            #single byte following ESC
+            return [regexp {^\033[\u0060-\u007E]$} $code] 
+        }
+
+
+        proc is_nF {code} {
+            #2 bytes
+            #subcategorised by the low two bits of the first byte (n)
+            #further by whether the final byte is in \u0030-u003f (p) or not (t)
+            return [regexp {^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+
+        #review - test
+        #3Fp - private use 
+        #e.g vt100
+        # ESC#3 DECDHL double-height letters top half
+        # ESC#4 DECDHL double-height letters bottom half
+        # ESC#5 DECSWL single-width line
+        # ESC#6 DECDWL double-width line
+        proc is_3Fp {code} {
+            return [regexp {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} $code] ;#check regexp
+        }
+
+        proc is_code7 {code} {
+            #Fe | Fs | Fp | nF | Fe
+            return [regexp {^\033[\u0040-\u005F]|^\033[\u0060-\u007e]$|^\033[\u0030-\u003F]$|^\033[\u0020-\u002F]+[\u0030-\u007E]$} $code]
+        }
+        proc is_code8 {code} {
+            return [regexp {^[\u0080-\u09F]} $code]
+        }
+        proc is_code {code} {
+            return [expr {[is_code8 $code] || [is_code7 $code]}]
+        }
+
+        proc classify {code} {
+            return [switch -regexp -- $code {
+                {^\033[\u0030-\u003F]$}                  {string cat Fp}
+                {^[\u0080-\u009F]|^\033[\u0040-\u005F]}  {string cat Fe}
+                {^\033[\u0060-\u007E]$}                  {string cat Fs}
+                {^\033[\u0020-\u002F]+[\u0030-\u007E]$} {
+                    #nF sequences
+                    set firstbytenum [scan [string index $code 1] %c]
+                    set lastbyte [string index $code end]
+
+                    set n [expr {$firstbytenum & 3}]
+                    if {[regexp {[\u0030-\u003F]} $lastbyte]} {
+                        set tp p
+                    } else {
+                        set tp t
+                    }
+                    string cat ${n}F$tp
+                }
+                {^\033#[\u0020-\u002F]*[\u0030-\u003F]$} {string cat 3Fp}
+                default                                  {string cat unknown}
+            }]
         }
     }
     # -- --- --- --- --- --- --- --- --- --- ---
@@ -5718,6 +5806,7 @@ tcl::namespace::eval punk::ansi::ta {
     #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm
     #[list_begin definitions]
     tcl::namespace::path ::punk::ansi
+    namespace export detect detect_in_list detect_sgr extract length split_codes split_at_codes split_codes_single
 
     variable PUNKARGS
 
@@ -5748,20 +5837,27 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_osc_open {(?:\x1b\]|\u009d).*}
 
 
+    #review - distinguishing standalone codes vs those that are paired with contents considered part of the code
+    #e.g PM,SOS are 'paired' ended by ST
+
     #variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""]
     variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bD|\x1bE|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)}
     variable re_standalones_vt52 {(?:\x1bZ)}
 
-    #ESC Y move, ESC b foreground colour
+    # -- 
+    #ESC Y move                 - \x1bY<byte><byte> ie 2 bytes to close
+    #ESC b foreground colour    - \x1bb<byte>  1 byte to close
+    variable re_vt52_incomplete {(?:\x1bY(.){0,1}$|\x1bb$)}
+    #\x1bc vt52 bgcolour conflict ?
     #ESC F - gr-on ESC G - gr-off
-    variable re_vt52_open {(?:\x1bY|\x1bb|\x1bF)}
-    #\x1bc vt52 bgcolour conflict ??
+    # --
 
     #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess
     variable re_g0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B}
     variable re_g0_open  {(?:\x1b\(0)}
     variable re_g0_close {(?:\x1b\(B)}
 
+    #detect start of ansicode that is closed by ST
     # DCS "ESC P" or "0x90" is also terminated by ST
     set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)}
     #ST terminators [list \007 \033\\ \u009c]
@@ -5777,12 +5873,15 @@ tcl::namespace::eval punk::ansi::ta {
     variable re_ST      {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)}
 
 
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    #variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
+    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_ST_open}|${re_vt52_incomplete}"
 
     #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open
 
     #default for regexes is non-newline-sensitive matching - ie matches can span lines
     # -- --- --- ---
-    variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
+    #variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
     # -- --- --- ---
     #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regexp TRIE generator that works with Tcl regexes
     #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone.
@@ -5802,20 +5901,24 @@ tcl::namespace::eval punk::ansi::ta {
     # plus more for auxiliary keypad codes in keypad application mode (and some in numeric mode)
 
     #regexp expanded syntax = ?x
+    #full detect - checking for closing sequences
     variable re_ansi_detect {(?x)
-            (?:\x1b(?:\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
+            (?:\x1b(?:\[(?:[\x20-\x3f]*[\x40-\x7e])|a|c|7|8|M|D|E|H|=|>|<|A|B|C|I|J|K|L|M|Z|(?:Y(?:..))|(?:b(?:.))|\((?:0|B)|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|(?:\#(?:3|4|5|6|8))))
             |(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)
-            |(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]
+            |(?:\u009b)[\x20-\x3f]*[\x40-\x7e]
             |(?:\u009d)(?:[^\u009c]*)?\u009c
     }
     #---
+    #todo
+    #variable re_ansi_detectcode $re_ansi_detect
+    #variable re_ansi_detectcode {\x1b[\u0040-\u005F]|\x1b[\u0060-\u007e]|\x1b[\u0030-\u003F]|\x1b[\u0020-\u002F]+[\u0030-\u007E]}
+    variable re_ansi_detectcode {(?:\x1b(?:[\u0030-\u007E]|[\u0020-\u002F]+[\u0030-\u007E]))|[\u0090-\u009F]}
 
     # -- --- --- ---
     #variable re_csi_code  {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]}
 
 
 
-    variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_standalones_vt52}|${re_ST_open}|${re_g0_open}|${re_vt52_open}"
 
     #may be same as detect - kept in case detect needs to diverge
     #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}"
@@ -5827,20 +5930,6 @@ tcl::namespace::eval punk::ansi::ta {
         set re_ansi_split_multi "(?:${re_ansi_split})+"
     }
 
-    lappend PUNKARGS [list -dynamic 0 {
-            @id     -id ::punk::ansi::ta::detect
-            @cmd    -name punk::ansi::ta::detect -help\
-                "Return a boolean indicating whether Ansi codes were detected in text.
-                Important caveat:
-                When text is a tcl list made from splitting (or lappending) some ansi string
-                - individual elements may be braced or have certain chars escaped.
-                (one example is if a list element contains an unbalanced  brace)
-                This can cause square brackets that form part of the ansi to be backslash escaped
-                - and the function can fail to match it as an Ansi code.
-                "
-            @values -min 1
-            text -type string
-    } ]
 
     #*** !doctools
     #[call [fun detect] [arg text]]
@@ -5851,10 +5940,35 @@ tcl::namespace::eval punk::ansi::ta {
 
     #detect any ansi escapes
     #review - only detect 'complete' codes - or just use the opening escapes for performance?
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::detect
+            @cmd    -name punk::ansi::ta::detect\
+                -summary\
+                "Test if text has completed ANSI codes"\
+                -help\
+                "Return a boolean indicating whether *complete* Ansi codes were detected in text.
+
+                By complete, it means that paired squences such as PM (privacy message) must be
+                closed. It also means that a truncated sequence such as \\x1b\\\[ or a lone escape
+                will not be detected as ANSI.
+
+                Use punk::ansi::ta::detectcode as a slightly faster detector for ANSI codes, that does
+                not require paired sequences to have both starting and end sequences to be detected.
+
+                Important caveat:
+                When text is a tcl list made from splitting (or lappending) some ansi string
+                - individual elements may be braced or have certain chars escaped.
+                (one example is if a list element contains an unbalanced  brace)
+                This can cause square brackets that form part of the ansi to be backslash escaped
+                - and the function can fail to match it as an Ansi code.
+                "
+            @values -min 1
+            text -type string -help\
+                "Block of text. See caveat above about lists."
+    } ]
     proc detect {text} [string map [list <re> [list $re_ansi_detect]] {
         regexp <re> $text
     }]
-
     #can be used on dicts - but will check keys too. keys could also contain ansi and have escapes
     proc detect_in_list {list} {
         #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
@@ -5865,6 +5979,22 @@ tcl::namespace::eval punk::ansi::ta {
         }
         return 0
     }
+
+    #will detect for example lone opening or closing PM
+    proc detectcode {text} [string map [list <re> [list $re_ansi_detectcode]] {
+        regexp <re> $text
+    }]
+    proc detectcode_in_list {list} {
+        #loop is commonly faster than using join. (certain ansi codes triggering list quoting? review)
+        foreach item $list {
+            if {[detectcode $item]} {
+                return 1
+            }
+        }
+        return 0
+    }
+
+
     #surprisingly - the ::join operation can be (relatively) slow on lists containing ansi
     proc detect_in_list2 {list} {
         detect [join $list " "]
@@ -5915,6 +6045,8 @@ tcl::namespace::eval punk::ansi::ta {
         variable re_sgr
         expr {[regexp $re_sgr $text]}
     }
+
+    #perl: ta_strip
     proc strip {text} {
         #*** !doctools
         #[call [fun strip] [arg text]]
@@ -5922,6 +6054,39 @@ tcl::namespace::eval punk::ansi::ta {
         #[para]This is a tailcall to punk::ansi::ansistrip
         tailcall ansistrip $text
     }
+
+    lappend PUNKARGS [list {
+        @id     -id ::punk::ansi::ta::extract
+        @cmd    -name punk::ansi::ta::extract\
+            -summary\
+            "Return only the ANSI codes in text"\
+            -help\
+            "This is the opposite of strip,
+              returning only the ANSI codes in text."
+        @values -min 1 -max 1
+        text -type string 
+    } ]
+    proc extract {text} {
+        set parts [split_codes $text]
+        set out ""
+        foreach {pt code} $parts {
+            append out $code
+        }
+        return $out
+    }
+
+    lappend PUNKARGS [list {
+            @id     -id ::punk::ansi::ta::length
+            @cmd    -name punk::ansi::ta::length\
+                -summary\
+                "Calculate length of text (excluding the ANSI codes)"\
+                -help\
+                "Calculate length of text (excluding the ANSI codes)
+                This is not the printing length of the string on screen."
+            @values -min 1
+            text -type string 
+    } ]
+    #perl: ta_length
     proc length {text} {
         #*** !doctools
         #[call [fun length] [arg text]]
@@ -5936,6 +6101,8 @@ tcl::namespace::eval punk::ansi::ta {
     #
     #}
 
+    #perl: ta_trunc
+    #truncate $text to $width columns while still including all the ANSI colour codes.
     proc trunc {text width args} {
 
     }
@@ -8504,6 +8671,10 @@ tcl::namespace::eval punk::ansi::internal {
     }
 }
 
+tcl::namespace::eval punk::ansi {
+    namespace import ::punk::ansi::ta::detect
+}
+
 #inserting into global namespace like this should be kept to a minimum.. but this is considered a core aspect of punk::ansi
 #todo - document
 interp alias {} ansistring {} ::punk::ansi::ansistring
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.2.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.2.tm
index fc438d57..a6224c0d 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.2.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/args-0.2.tm
@@ -1364,6 +1364,11 @@ tcl::namespace::eval punk::args {
                                         #}
                                         tcl::dict::set tmp_optspec_defaults -type $v
                                     }
+                                    -defaultdisplaytype {
+                                        #how the -default is displayed
+                                        #-default doesn't have to be the same type as -type which validates user input that is not defaulted.
+                                        tcl::dict::set tmp_optspec_defaults -defaultdisplaytype $v
+                                    }
                                     -parsekey {
                                         tcl::dict::set tmp_optspec_defaults -parsekey $v
 
@@ -1441,7 +1446,7 @@ tcl::namespace::eval punk::args {
                                     default {
                                         set known   {   -parsekey -group -grouphelp\
                                                         -any -anyopts -arbitrary -form -minsize -maxsize -choices -choicegroups -choicemultiple -choicecolumns -choicelabels -choiceinfo\
-                                                        -type -range -typeranges -default -typedefaults
+                                                        -type -range -typeranges -default -defaultdisplaytype -typedefaults
                                                         -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted -nocase\
                                                         -nominsize -nomaxsize -norange -nochoices -nochoicelabels\
                                                         -type -optional -allow_ansi -validate_ansistripped -strip_ansi -multiple -prefix\
@@ -2052,6 +2057,9 @@ tcl::namespace::eval punk::args {
                                 tcl::dict::set spec_merged -optional 1
                             }
                         }
+                        -defaultdisplaytype {
+                            tcl::dict::set spec_merged -defaultdisplaytype $specval
+                        }
                         -typedefaults {
                             set typecount [llength [tcl::dict::get $spec_merged -type]]
                             if {$typecount != [llength $specval]} {
@@ -2114,7 +2122,7 @@ tcl::namespace::eval punk::args {
                                                     -form -type\
                                                     -parsekey -group\
                                                     -range -typeranges\
-                                                    -default -typedefaults\
+                                                    -default -defaultdisplaytype -typedefaults\
                                                     -minsize -maxsize -choices -choicegroups\
                                                     -choicemultiple -choicecolumns -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted\
                                                     -nocase -optional -multiple -validate_ansistripped -allow_ansi -strip_ansi -help\
@@ -3784,7 +3792,32 @@ tcl::namespace::eval punk::args {
                         }
 
                         if {[dict exists $arginfo -default]} {
-                            set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                            #default isn't necessarily of same type as -type required for validation
+                            #Guessing at the type from the data is likely to be unsatisfactory.
+
+                            set defaultdisplaytype [Dict_getdef $arginfo -defaultdisplaytype string]
+                            switch -- $defaultdisplaytype {
+                                dict {
+                                    #single level
+                                    set rawdefault [dict get $arginfo -default]
+                                    set default     "{\n"
+                                    dict for {k v} $rawdefault {
+                                        append default "  \"$k\" \"$v\"\n"
+                                    }
+                                    append default  "}"
+                                }
+                                list {
+                                    set default     "{\n"
+                                    foreach v $rawdefault {
+                                        append default "  \"$v\"\n"
+                                    }
+                                    append default "}"
+                                }
+                                default {
+                                    #set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                                    set default "'[dict get $arginfo -default]'"
+                                }
+                            }
                         } else {
                             set default ""
                         }
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
index f8123b94..69df08b9 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/char-0.1.0.tm
@@ -89,6 +89,107 @@ tcl::namespace::eval punk::char {
     variable invalid "???" ;# ideally this would be 0xFFFD - which should display as black diamond/rhombus with question mark. As at 2023 - this tends to display indistinguishably from other missing glyph boxes - so we use a longer sequence we can detect by length and to display obviously
     variable invalid_display_char \u25ab; #todo - change to 0xFFFD once diamond glyph more common?
 
+
+    #more useful for referring to ANSI documentation would be a proper 7-bit and 8-bit 'Code Table' layout
+    #as described in ECMA-35 5.2
+    # where the positions of the table are in one-to-one correspondence with the bit combinations of the code.
+    #-  for 7-bit: 8 columns 16 rows
+    #-  for 8-bit 16 columns 16 rows
+    proc codetable {which} {
+        set bits 8
+        switch -- $which {
+            ascii8 {
+                set which default
+            }
+            ascii {
+                set bits 7
+            }
+            default {
+                if {$which ni [encoding names]} {
+                    error "codetable unsupported - use 'ascii' or an entry from the result of the 'encoding names' command."
+                }
+            }
+        }
+        package require punk::ansi
+
+        set hibit_count [expr {$bits-4}]
+        set bitcolumns [expr {2**$hibit_count}] ;#always 4 bits for the rows - remaining bits for the columns
+        set columncount [expr {$bitcolumns + 6}]
+
+
+        #set header1 [list "" "b7\nb6\nb5" "0\n0\n0" "0\n0\n1" "0\n1\n0" "0\n1\n1" "1\n0\n0" "1\n0\n1" "1\n1\n0" "1\n1\n1"]
+        set header1 [list]
+        set hibits_label ""
+        set indent ""
+        for {set hb $bits} {$hb > 4} {incr hb -1} {
+            append hibits_label ${indent}b$hb\n
+            append indent "  "
+        }
+        set hibits_label [string range $hibits_label 0 end-1]
+        lappend header1 $hibits_label "" "" "" "" ""
+
+        for {set colidx 0} {$colidx < $bitcolumns} {incr colidx} {
+            set binval [format %0${hibit_count}b $colidx]
+            set binvalbits [split $binval ""]
+            set indent  ""
+            set display_hibits ""
+            foreach bb $binvalbits {
+                append display_hibits $indent$bb\n
+                append indent " "
+            }
+            set display_hibits [string range $display_hibits 0 end-1]
+            lappend header1 $display_hibits
+        }
+        #\u2193 down arrow
+        #right-down arrows
+        #\u2ba7
+        #\u21b4
+        #\u2b0e
+        set header2 [list "   Bits" b4 b3 b2 b1 "column \u2192\nrow \u2b0e" {*}[punk::lib::range 0 $bitcolumns-1]]
+        set headers [list $header1 $header2]
+
+        #set t [textblock::table -return tableobject -rows $rows]
+        set t [textblock::table -return tableobject]
+        #todo - fix textblock::table to allow configure -columncount
+        for {set c 0} {$c < $columncount} {incr c} {
+            $t add_column
+        }
+
+        set vheaders [punk::transpose_equal_lists $headers]
+        set hidx -1
+        foreach vh $vheaders {
+            incr hidx
+            $t configure_column $hidx -headers $vh
+        }
+        $t configure_header 0 -colspans [list 6 0 0 0 0 0 {*}[lrepeat $bitcolumns 1]]
+        $t configure_column 0 -blockalign left
+
+        #always 16 rows - remaining bits form the columns
+        for {set ridx 0} {$ridx <= 15} {incr ridx} {
+            set charlist [list]
+            set lowbits [format %04b $ridx]
+            for {set i 0} {$i < $bitcolumns} {incr i} {
+                set hibits [format %0${hibit_count}b $i]
+                set ch [format %c [scan ${hibits}${lowbits} %b]]
+                #puts "-->${hibits}${lowbits} ch:$ch"
+                if {$which ne "default"} {
+                    if {[catch {encoding convertfrom $which $ch} ch]} {
+                        set ch [punk::ansi::a red bold]-[punk::ansi::a]
+                        lappend charlist $ch
+                    } else {
+                        lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                    } 
+                } else {
+                    lappend charlist [punk::ansi::ansistring VIEW -lf 1 -vt 1 $ch]
+                }
+            }
+            set r [list "" {*}[split $lowbits ""] $ridx {*}$charlist]
+            $t add_row $r
+        }
+        puts stderr $t
+        $t print
+    }
+
     #just the 7-bit ascii. use [page ascii] for the 8-bit layout
     proc ascii {} {return {
         00 NUL  01 SOH  02 STX  03 ETX  04 EOT  05 ENQ  06 ACK  07 BEL
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
index 4322ceaa..ea8d3f77 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/console-0.1.1.tm
@@ -1336,6 +1336,7 @@ namespace eval punk::console {
     #https://vt100.net/docs/vt510-rm/DA1.html
     #
     proc get_device_attributes {{inoutchannels {stdin stdout}}} {
+        #Note the vt52 rough equivalen \x1bZ - commonly supported but probably best considered obsolete as it collides with ECMA 48 SCI Single Character Introducer
         #DA1
         variable last_da1_result
         #first element in result is the terminal's architectural class 61,62,63,64.. ?
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
index 6ce76618..46cd5668 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.2.tm
@@ -2242,6 +2242,121 @@ namespace eval punk::lib {
         }
     }
 
+
+    #review - compare to IMAP4 methods of specifying ranges?
+    punk::args::define {
+        @id     -id     ::punk::lib::indexset_resolve
+        @cmd    -name     punk::lib::indexset_resolve\
+            -summary\
+            "Resolve an indexset to a list of integers based on supplied list or string length."\
+            -help\
+            "Resolve an 'indexset' to a list of actual indices within the range of the provided numitems value.
+            An indexset consists of a comma delimited list of indexes or index-ranges.
+            The indexes are 0-based.
+            Ranges must be specified with .. as the separator.
+            Whitespace is ignored.
+            Each index (or endpoint of an index-range) can be of the forms accepted by Tcl list or string commands,
+            e.g end-2 or 2+2.
+
+            end means the last page.
+            end-1 means the second last page.
+            0.. is the same as 0..end.
+            examples:
+            1,3..
+                output the page index 1 (2nd page) followed by all from index 3 to the end.
+            0-2,end
+                output the first 3 pages, and the last page.
+            end-1..0
+            output the indexes in reverse order from 2nd last page to first page."
+        @values -min 2 -max 2
+        numitems -type integer
+        indexset -type string
+    }
+    proc indexset_resolve {numitems indexset} {
+        if {![string is integer -strict $numitems] || ![regexp {^[\-\+_end,\.0-9]*$} $indexset]} {
+            #use parser on unhappy path only
+            set errmsg [punk::args::usage -scheme error ::punk::lib::indexset_resolve]
+            uplevel 1 [list return -code error -errorcode {TCL WRONGARGS PUNK} $errmsg]
+        } 
+        set index_list [list] ;#list of actual indexes within the range 
+        set iparts [split $indexset ,]
+        set index_list [list]
+        foreach ipart $iparts {
+            set ipart [string trim $ipart]
+            set rposn [string first .. $ipart]
+            if {$rposn>=0} {
+                #range
+                lassign           [punk::lib::string_splitbefore_indices $ipart $rposn $rposn+2] rawa _ rawb 
+                set rawa [string trim $rawa]
+                set rawb [string trim $rawb]
+                if {$rawa eq ""} {set rawa 0}
+                set a             [punk::lib::lindex_resolve $numitems $rawa]
+                if {$a == -3} {
+                    #undershot - leave negative
+                } elseif {$a == -2 && $rawa ne "-2"} {
+                    #overshot
+                    set a [expr {$numitems}] ;#put it outside the range on the upper side
+                }
+
+                if {$rawb eq ""} {
+                    if {$a > $numitems-1} {
+                        set rawb $a ;#make sure <overshot>.. doesn't return last item - should return nothing
+                    } else {
+                        set rawb end
+                    }
+                }
+                set b             [punk::lib::lindex_resolve $numitems $rawb]
+                if {$b == -3} {
+                    #undershot  - leave negative
+                } elseif {$b == -2 && $rawb ne "-2"} {
+                    set b [expr {$numitems}] ;#overshot - put it outside the range on the upper side
+                }
+
+                #e.g make sure <overshot>.. doesn't return last item - should return nothing as both are above the range.
+                if {$a >= 0 && $a <= $numitems-1 && $b >=0 && $b <= $numitems-1} {
+                    lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                } else {
+                    if {$a >= 0 && $a <= $numitems-1} {
+                        #only a is in the range
+                        if {$b < 0} {
+                            set b 0
+                        } else {
+                            set b [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } elseif {$b >=0 && $b <= $numitems-1} {
+                        #only b is in the range
+                        if {$a < 0} {
+                            set a 0
+                        } else {
+                            set a [expr {$numitems-1}]
+                        }
+                        lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                    } else {
+                        #both outside the range
+                        if {$a < 0 && $b > 0} {
+                            #spans the range in forward order
+                            set a 0 
+                            set b [expr {$numitems-1}]
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        } elseif {$a > 0 && $b < 0} {
+                            #spans the range in reverse order
+                            set a [expr {$numitems-1}]
+                            set b 0
+                            lappend index_list {*}[punk::lib::range $a $b] ;#required for tcl8.6, on tcl9 this will call lseq internally.
+                        }
+                        #both outside of range on same side
+                    }
+                }
+            } else {
+                set idx [punk::lib::lindex_resolve_basic $numitems $ipart]
+                if {$idx >= 0} {
+                    lappend index_list $idx
+                }
+            }
+        }
+        return $index_list
+    }
     # showdict uses lindex_resolve results -2 & -3 to determine whether index is out of bounds on upper vs lower side
     #REVIEW: This shouldn't really need the list itself - just the length would suffice
     punk::args::define {
@@ -2305,7 +2420,8 @@ namespace eval punk::lib {
             #<0 ?
             error "lindex_resolve len must be an integer"
         }
-        set index [tcl::string::map {_ {}} $index] ;#forward compatibility with integers such as 1_000
+        set index [tcl::string::map {_ {}} $index] ;#basic forward compatibility with integers such as 1_000 for 8.6
+        #todo - be stricter about malformations such as 1000_
         if {[string is integer -strict $index]} {
             #can match +i -i
             if {$index < 0} {
@@ -3345,8 +3461,12 @@ namespace eval punk::lib {
             #NOTE: running ta::detect on a list (or dict) as a whole can be problematic if items in the list have backslash escapes due to Tcl list quoting and escaping behaviour.
             #This commonly happens if there is an unbalanced brace  (which is a normal occurrence and needs to be handled)
             #ta::detect on a list of ansi-containing string may appear to work for some simple inputs but is not reliable
-            #detect_in_list will check at first level. (not intended for detecting ansi in deeper structures)
-            if {![punk::ansi::ta::detect_in_list $linelist]} {
+            #detect_in_list/detectcode_in_list will check at first level. (not intended for detecting ansi in deeper structures)
+
+            #we use detectcode_in_list instead of detect_in_list
+            #detectcode_in_list will detect unclosed (or unopened) paired sequences such as PM (privacy message)
+            # - but the main reason is it is slightly faster.
+            if {![punk::ansi::ta::detectcode_in_list $linelist]} {
                 if {$opt_ansiresets} {
                     foreach ln $linelist {
                         lappend transformed $RST$ln$RST
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
index f8e55b02..10fda84e 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/ns-0.1.0.tm
@@ -13,6 +13,9 @@
 # @@ Meta End
 
 
+#BUGS
+# 2025-08
+# n// and n/// won't output info about 'namespace path' if there are no commands in the namespace
 
 # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
 ##  Requirements
@@ -1235,7 +1238,7 @@ tcl::namespace::eval punk::ns {
             if {[dict size [dict get $nsdict namespacepath]]} {
                 set path_text ""
                 if {!$opt_nspathcommands} {
-                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]]"
+                    append path_text \n "  Also resolving cmds in namespace paths: [dict keys [dict get $nsdict namespacepath]] (use n/// to display)"
                 } else {
                     append path_text \n "  Also resolving cmds in namespace paths:"
                     set nspathdict [dict get $nsdict namespacepath]
@@ -1247,8 +1250,17 @@ tcl::namespace::eval punk::ns {
                         }
                     } else {
                         #todo - change to display in column order to be same as main command listing
+                        set parentcommands [dict get $nsdict commands]
                         dict for {k v} $nspathdict {
-                            set pathcommands [dict get $v commands]
+                            set rawpathcommands [dict get $v commands]
+                            set pathcommands [list]
+                            foreach c $rawpathcommands {
+                                if {$c in $parentcommands} {
+                                    lappend pathcommands [punk::ansi::a strike]$c[a]
+                                } else {
+                                    lappend pathcommands $c
+                                }
+                            }
                             set columns 6
                             if {[llength $pathcommands] < 6} {
                                 set columns [llength $v]
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/shellfilter-0.2.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/shellfilter-0.2.tm
index e04e5107..8017d3f5 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/shellfilter-0.2.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/shellfilter-0.2.tm
@@ -579,13 +579,15 @@ namespace eval shellfilter::chan {
     #punk::ansi::ansistrip converts at least some of the box drawing G0 chars to unicode - todo - more complete conversion
     #assumes line-buffering. a more advanced filter required if ansicodes can arrive split across separate read or write operations!
     oo::class create ansistrip {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
+        variable o_encbuf
         variable o_is_junction
         constructor {tf} {
             package require punk::ansi
             set o_trecord $tf
             set o_enc [dict get $tf -encoding]
+            set o_encbuf ""
             if {[dict exists $tf -junction]} {
                 set o_is_junction [dict get $tf -junction]
             } else {
@@ -614,10 +616,36 @@ namespace eval shellfilter::chan {
         method flush {transform_handle} {
             return ""
         }
+        #method write {transform_handle bytes} {
+        #    #broken due to occasional unexpected byte sequence
+        #    set instring [encoding convertfrom $o_enc $bytes] 
+        #    set outstring [punk::ansi::ansistrip $instring]
+        #    return [encoding convertto $o_enc $outstring]
+        #}
         method write {transform_handle bytes} {
-            set instring [encoding convertfrom $o_enc $bytes] 
-            set outstring [punk::ansi::ansistrip $instring]
-            return [encoding convertto $o_enc $outstring]
+            #set instring [tcl::encoding::convertfrom $o_enc $bytes] ;naive approach will break due to unexpected byte sequence - occasionally
+            #bytes can break at arbitrary points making encoding conversions invalid.
+
+            set inputbytes $o_encbuf$bytes
+            set o_encbuf ""
+            set tail_offset 0
+            while {$tail_offset < [string length $inputbytes] && [catch {tcl::encoding::convertfrom $o_enc [string range $inputbytes 0 end-$tail_offset]} stringdata]} {
+                incr tail_offset
+            }
+            if {$tail_offset > 0} {
+                if {$tail_offset < [string length $inputbytes]} {
+                    #stringdata from catch statement must be a valid result
+                    set t [expr {$tail_offset - 1}]
+                    set o_encbuf [string range $inputbytes end-$t end]
+                } else {
+                    set stringdata ""
+                    set o_encbuf $inputbytes
+                    return ""
+                }
+            }
+
+            set outstring [punk::ansi::ansistrip $stringdata]
+            return [tcl::encoding::convertto $o_enc $outstring]
         }
         method meta_is_redirection {} {
             return $o_is_junction
@@ -724,6 +752,8 @@ namespace eval shellfilter::chan {
             set emit ""
             if {[string last \x1b $buf] >= 0} {
                 #detect will detect ansi SGR and gron groff and other codes
+                #REVIEW - ta::detect won't detect SOS without paired ST for things like PM
+                # ta::detectcode will - but then split_codes_single will treat unpaired SOS as text?
                 if {[punk::ansi::ta::detect $buf]} {
                     #split_codes_single regex faster than split_codes - but more resulting parts
                     #'single' refers to number of escapes - but can still contain e.g multiple SGR codes (or mode set operations etc)
@@ -1035,7 +1065,7 @@ namespace eval shellfilter::chan {
     }
     #todo - something 
     oo::class create rebuffer {
-        variable o_trecord 
+        variable o_trecord
         variable o_enc
         constructor {tf} {
             set o_trecord $tf
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm
index de0164cd..97969463 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/textblock-0.1.3.tm
@@ -2528,7 +2528,7 @@ tcl::namespace::eval textblock {
                         set ansiborder_final $ansiborder_body_col_row
                         #$c will always have ansi resets due to overtype behaviour ?
                         #todo - review overtype
-                        if {[punk::ansi::ta::detect $c]} {
+                        if {[punk::ansi::ta::detectcode $c]} {
                             #use only the last ansi sequence in the cell value
                             #Filter out foreground and use background for ansiborder override
                             set parts [punk::ansi::ta::split_codes_single $c]
@@ -4377,6 +4377,7 @@ tcl::namespace::eval textblock {
             if {![punk::ansi::ta::detect $block]} {
                 return $block
             }
+            #could be newine in SOS/PM? review - terminal should theoretically ignore all chars til close of string
             foreach ln [split $block \n] {
                 set parts [punk::ansi::ta::split_codes $ln]
                 if {[lindex $parts 0] eq ""} {
@@ -4894,7 +4895,9 @@ tcl::namespace::eval textblock {
             }
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             #ansistripraw slightly faster than ansistrip - and won't affect width (avoid detect_g0/conversions)
             set textblock [punk::ansi::ansistripraw $textblock]
         }
@@ -4917,7 +4920,9 @@ tcl::namespace::eval textblock {
         } else {
             set tl $textblock
         }
-        if {[punk::ansi::ta::detect $tl]} {
+        #review detectcode vs detect. detectcode won't look for completness of all codes to give a positive result
+        #ansistripraw splits on complete codes..
+        if {[punk::ansi::ta::detectcode $tl]} {
             set tl [punk::ansi::ansistripraw $tl]
         }
         return [punk::char::ansifreestring_width $tl]
@@ -4964,7 +4969,7 @@ tcl::namespace::eval textblock {
             set textblock [textutil::tabify::untabify2 $textblock $tw]
         }
         #ansistripraw on entire block in one go rather than line by line - result should be the same - review - make tests
-        if {[string length $textblock] > 1 && [punk::ansi::ta::detect $textblock]} {
+        if {[string length $textblock] > 1 && [punk::ansi::ta::detectcode $textblock]} {
             set textblock [punk::ansi::ansistripraw $textblock]
         }
         if {[tcl::string::last \n $textblock] >= 0} {
@@ -5211,7 +5216,7 @@ tcl::namespace::eval textblock {
 
         set known_hasansi [tcl::dict::get $opts -known_hasansi]
         if {$known_hasansi eq ""} {
-            set block_has_ansi [punk::ansi::ta::detect $block]
+            set block_has_ansi [punk::ansi::ta::detectcode $block]
         } else {
             set block_has_ansi $known_hasansi
         }
@@ -5648,7 +5653,7 @@ tcl::namespace::eval textblock {
         set rowcount 0
         set blocklists [list]
         foreach b $blocks {
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set bl [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b]
             } else {
@@ -5676,7 +5681,7 @@ tcl::namespace::eval textblock {
         set i -1
         foreach b $args {
             incr i
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 #-ansireplays 1 quite expensive e.g 7ms in 2024
                 set blines [punk::lib::lines_as_list -ansireplays 1 -ansiresets auto -- $b]
             } else {
@@ -5799,7 +5804,7 @@ tcl::namespace::eval textblock {
             #testing of any decent size block line by line - even with max_string_length_line is slower than ta::detect anyway.
 
             #blocks passed to join can be ragged - so we can't pass -known_samewidth to pad
-            if {[punk::ansi::ta::detect $b]} {
+            if {[punk::ansi::ta::detectcode $b]} {
                 # - we need to join to use pad - even though we then need to immediately resplit REVIEW (make line list version of pad?)
                 set replay_block [::join [punk::lib::lines_as_list -ansireplays 1 -ansiresets $ansiresets -- $b] \n]
                 #set blines [split [textblock::pad $replay_block -known_hasansi 1 -known_samewidth $is_samewidth -known_blockwidth $bwidth -width $bwidth -which right -padchar " "] \n]
@@ -8650,7 +8655,7 @@ tcl::namespace::eval textblock {
                 set cache_inner [overtype::block -blockalign $opt_blockalign -ellipsis $opt_ellipsis $underlay $cache_contentpattern]
                 #puts "frame--->ansiwrap -rawansi [ansistring VIEW $opt_ansibase] $cache_inner"
                 if {$opt_ansibase ne ""} {
-                    if {[punk::ansi::ta::detect $cache_inner]} {
+                    if {[punk::ansi::ta::detectcode $cache_inner]} {
                         #set cache_inner [punk::ansi::ansiwrap -rawansi $opt_ansibase $cache_inner]
                         #set cache_inner [punk::ansi::ansiwrap_raw $opt_ansibase "" "" $cache_inner]
                         #jjj ??? review
@@ -8745,7 +8750,7 @@ tcl::namespace::eval textblock {
 
             #set cwidth [textblock::width $contents]
             #JJJ
-            set contents_has_ansi [punk::ansi::ta::detect $contents]
+            set contents_has_ansi [punk::ansi::ta::detectcode $contents]
             if {$opt_ansibase ne ""} {
                 if {$contents_has_ansi} {
                     #set contents [punk::ansi::ansiwrap -rawansi $opt_ansibase $contents]
@@ -8799,8 +8804,9 @@ tcl::namespace::eval textblock {
                 if {$subposn >= 0} {
                     set content_line [lindex $clines $contentindex]
                     #review - different forms of reset e.g \x1b\[m ??
-                    if {[string range $content_line 0 3] eq "\x1b\[0m"} {
+                    if {[tcl::string::range $content_line 0 3] eq "\x1b\[0m"} {
                         set content_line [tcl::string::range $content_line 4 end]
+                        #::tcl::string::replace content_line 0 3
                     }
                     append content_line $opt_ansibase
                     append fs [tcl::string::replace $tline $subposn $subposn+$pattern_offset $content_line] \n
diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl b/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl
index 8cb8153c..c3cbc9aa 100644
--- a/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl
+++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/make.tcl
@@ -3075,7 +3075,7 @@ foreach vfstail $vfs_tails {
                                 #copy the version that is mounted in this runtime to vfsname.new 
                                 if {[catch {
                                     file copy -force $building_runtime $buildfolder/$vfsname.new
-                                    catch {exec chmod +x $buildfolder/$vfsname.new}
+                                    catch {exec chmod +w $buildfolder/$vfsname.new}
                                 } errM]} {
                                     puts stderr "$kit_type 'file copy -force $building_runtime $buildfolder/$vfsname.new' failed\n$errM"
                                     error $errM
diff --git a/src/vfs/_vfscommon.vfs/modules/punk/args-0.2.tm b/src/vfs/_vfscommon.vfs/modules/punk/args-0.2.tm
index fc438d57..a6224c0d 100644
--- a/src/vfs/_vfscommon.vfs/modules/punk/args-0.2.tm
+++ b/src/vfs/_vfscommon.vfs/modules/punk/args-0.2.tm
@@ -1364,6 +1364,11 @@ tcl::namespace::eval punk::args {
                                         #}
                                         tcl::dict::set tmp_optspec_defaults -type $v
                                     }
+                                    -defaultdisplaytype {
+                                        #how the -default is displayed
+                                        #-default doesn't have to be the same type as -type which validates user input that is not defaulted.
+                                        tcl::dict::set tmp_optspec_defaults -defaultdisplaytype $v
+                                    }
                                     -parsekey {
                                         tcl::dict::set tmp_optspec_defaults -parsekey $v
 
@@ -1441,7 +1446,7 @@ tcl::namespace::eval punk::args {
                                     default {
                                         set known   {   -parsekey -group -grouphelp\
                                                         -any -anyopts -arbitrary -form -minsize -maxsize -choices -choicegroups -choicemultiple -choicecolumns -choicelabels -choiceinfo\
-                                                        -type -range -typeranges -default -typedefaults
+                                                        -type -range -typeranges -default -defaultdisplaytype -typedefaults
                                                         -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted -nocase\
                                                         -nominsize -nomaxsize -norange -nochoices -nochoicelabels\
                                                         -type -optional -allow_ansi -validate_ansistripped -strip_ansi -multiple -prefix\
@@ -2052,6 +2057,9 @@ tcl::namespace::eval punk::args {
                                 tcl::dict::set spec_merged -optional 1
                             }
                         }
+                        -defaultdisplaytype {
+                            tcl::dict::set spec_merged -defaultdisplaytype $specval
+                        }
                         -typedefaults {
                             set typecount [llength [tcl::dict::get $spec_merged -type]]
                             if {$typecount != [llength $specval]} {
@@ -2114,7 +2122,7 @@ tcl::namespace::eval punk::args {
                                                     -form -type\
                                                     -parsekey -group\
                                                     -range -typeranges\
-                                                    -default -typedefaults\
+                                                    -default -defaultdisplaytype -typedefaults\
                                                     -minsize -maxsize -choices -choicegroups\
                                                     -choicemultiple -choicecolumns -choiceprefix -choiceprefixdenylist -choiceprefixreservelist -choicerestricted\
                                                     -nocase -optional -multiple -validate_ansistripped -allow_ansi -strip_ansi -help\
@@ -3784,7 +3792,32 @@ tcl::namespace::eval punk::args {
                         }
 
                         if {[dict exists $arginfo -default]} {
-                            set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                            #default isn't necessarily of same type as -type required for validation
+                            #Guessing at the type from the data is likely to be unsatisfactory.
+
+                            set defaultdisplaytype [Dict_getdef $arginfo -defaultdisplaytype string]
+                            switch -- $defaultdisplaytype {
+                                dict {
+                                    #single level
+                                    set rawdefault [dict get $arginfo -default]
+                                    set default     "{\n"
+                                    dict for {k v} $rawdefault {
+                                        append default "  \"$k\" \"$v\"\n"
+                                    }
+                                    append default  "}"
+                                }
+                                list {
+                                    set default     "{\n"
+                                    foreach v $rawdefault {
+                                        append default "  \"$v\"\n"
+                                    }
+                                    append default "}"
+                                }
+                                default {
+                                    #set default "'$A_DEFAULT[dict get $arginfo -default]$RST'"
+                                    set default "'[dict get $arginfo -default]'"
+                                }
+                            }
                         } else {
                             set default ""
                         }
diff --git a/src/vfs/_vfscommon.vfs/modules/punk/pdf-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/pdf-0.1.0.tm
index 4fde460e..15767ef5 100644
--- a/src/vfs/_vfscommon.vfs/modules/punk/pdf-0.1.0.tm
+++ b/src/vfs/_vfscommon.vfs/modules/punk/pdf-0.1.0.tm
@@ -82,70 +82,126 @@ namespace eval ::punk::pdf {
     variable base_x_per_c 4 ;#fixed width output font's x_per_c.
     variable cfg_blocksep_default "#pdf::text:blockstart type %type% pageindex %pageindex% blockindex %blockindex% y-index %y-index%  tlc {%x0% %y0%} brc {%brc%} warnings %warnings% : %marker%"
     variable cfg_pagesep_default  "#pdf::text:pagestart index %index% pageblocks %pageblocks% textblocks %textblocks% imageblocks %imageblocks% : [a bold green]PAGE[a]"
+    variable cfg_header_default
+    set cfg_header_default \
+"#pdf::text::header doc %doc% pages %pages% size %size% sha1 %sha1% punk::pdf %parserversion%
+#  args
+#    %args%"
+
+
     variable callid 0
     variable results
+    variable test
     array set results {}
 
     punk::args::define {
         @id         -id     ::punk::pdf::text
-        @cmd        -name   punk::pdf::text -help\
+        @cmd        -name   punk::pdf::text\
+            -summary\
+            "Extract text from non-image parts of a PDF"\
+            -help\
             "Extract text lines from a pdf
-            (No OCR - will only retrieve actual text contents)"
+            (No OCR - will only retrieve actual text contents)
+            The text function can only operate on the text spans provided by the underlying
+            engine (MuPDF) - which in some cases (for example spaces adjacent to brackets in
+            PDF32000_2008.pdf p24 eg4) does not interpret the number of whitespace characters
+            as a human reader would. The text data supplied for formulas such as in 7.10.1 of
+            the above document can be missing symbols such as the square-root symbol. This would
+            have to be converted to a unicode square root symbol with additional bracketing to
+            maintain meaning. The positioning of subscript numerals would also need to be
+            processed - and without font size information from tclMuPDF - this is currently
+            not practical to process reasonably. Such formulas will come out mangled.
+            "
         @leaders -min 0 -max 0
         @opts -min 0 -max 2
         -p|-page_indexes -parsekey -page_indexes -type string -default "0.." -help\
             "Comma delimited list of indexes and/or ranges specifying which pages to output.
-            The indexes are 0-based.
-            Ranges must be specified with .. as the separator.
-            end means the last page.
-            end-1 means the second last page.
-            0.. is the same as 0..end.
-            examples:
-                1,3..
-                output the page index 1 (2nd page) followed by all from index 3 to the end.
-                0-2,end
-                output the first 3 pages, and the last page.
-                end-1..0
-                output the pages in reverse order from 2nd last page to first page."
+              The indexes are 0-based.
+              Ranges must be specified with .. as the separator.
+              end means the last page.
+              end-1 means the second last page.
+              0.. is the same as .. and is the same as 0..end
+              examples:
+                  1,3..
+                  output the page index 1 (2nd page) followed by all from index 3 to the end.
+                  0..2,end
+                  output the first 3 pages, and the last page.
+                  end-1..0
+                  output the pages in reverse order from 2nd last page to first page."
         -b|-block_indexes -parsekey -block_indexes -type string -default "0.." -help\
             "Comma delimited list of indexes and/or ranges specifying which blocks to output.
-            Format is as per -page_indexes"
-        -merge_yblocks -default true -help\
-            "Whether to merge blocks vertically when they have the same starting y-index.
-            This usually makes the output more in line with the source document - but 
-            in some scenarios it might be desired to keep the blocks listed vertically
-            even though they appear side by side in the source."
+              Format is as per -page_indexes"
+        -merge_yblocks -default false -help\
+            "Whether to merge blocks vertically when they interleave in the y direction.
+              This usually makes the output more in line with the source document - but
+              in some scenarios it might be desired to keep the blocks listed vertically
+              even though they appear side by side in the source. Such vertical listing as
+              given by default when -merge_yblocks is false, does not necessarily match the
+              left to right order.
+              Note that especially when there is small-font text with other text to the right,
+              The small-font text that is not rightmost may be truncated (with trailing ellipsis)
+              to fit it into the vertically merged layout. Such truncation produces warnings on
+              the -errchannel and increments the warning count visible in the -sep-block line
+              preceding the merged blocks.
+              Smaller print that doesn't fit in the layout is sometimes not required to extract
+              the main data from a document such as an invoice - and the merged data can make
+              other elements more amenable to parsing. In practice, 2 passes with -merge_yblocks
+              configured on and off, combined with -sep_block data and a set of custom rules
+              based on the specific document set may be required for information extraction."
+        #------------------------------------------------------------------------------------
         -compact -type boolean -default false -help\
             "If compact is false - blank lines approximating the vertical spacing in the
               source document will be emitted between each block.
               If -merged_yblocks is true - there will still be potential vertical whitespace
               within any merged blocks, as the point of -merge_yblocks is to align elements
-              closely to that within the source document, but there will be no vertical layout
-              spacing between such blocks, or other non-merging blocks."
-        -image_place_ansi -type dict -default {border Term-grey78 inner Term-grey58 small Term-grey15} -help\
-            "3-element list of ANSI colour codes for the textual image placeholders.
+              closely to that within the source document, but there will be no vertical
+              layout spacing between such merged blocks, or other non-merging blocks.
+              This setting is redundant if -postcompact is true."
+        -postcompact -type boolean -default false -help\
+            "Strip out vertical spacing (empty or whitespace-only lines) from all blocks.
+              This sets -compact to true, and removes vertical gaps from any merged blocks.
+              The most vertically compact representation of the text will result from setting
+              -merge true, -postcompact true and -image_place_ansi to the empty string, as
+              well as omitting (or setting to empty string) -sep_page and -sep_block.
+              Note that if the source PDF actually positioned blank lines as strings of
+              whitespace, these too would be removed.
+              If -image_place_ansi is left as the default or has other ansi codes, the
+              image placeholders will not be stripped, even if they consist of the default
+              space characters.
+              If -sep_block or -sep_page are set to one or more spaces - these blank lines
+              will still be emitted."
+        -shrink_textfree_blocks -type boolean -default true -help\
+            "If a block is image only - don't output the placeholders.
+              These placeholders will still be emitted if -merge_yblocks is true and they
+              are in a merged block that has also merged text blocks. Set -image_place_ansi
+              to the empty string to suppress ANSI background for all image placeholders."
+        -image_place_ansi -type dict -defaultdisplaytype dict -default {border Term-grey78 inner Term-grey58 small Term-grey15} -help\
+            "Dict of ANSI colour codes for the textual image placeholders.
               Colour codes are those as listed by 'punk::ansi::a?'
-              If the default -image_place_chars values of the space character are being used, the colour
-              names should begin with a capital letter (indicating background as opposed to foreground colour).
+              If the default -image_place_chars values of the space character are being used,
+              the colour names should begin with a capital letter (indicating background as
+              opposed to foreground colour).
               The placeholders are by default space characters with the ANSI applied.
-              This means that just by stripping ANSI from the output, layout is maintained, and the
-              text can be processed simply, or the ANSI can be retained if your text parser wishes
-              to use it to make decisions.
-              The colour at element 'border' is for the border colour, 'inner' for the area within the border,
-              and the colour at element 'small' is for images too small for border representation. The border
-              bears no relation to the underlying image - and is merely to aid in seeing when images overlap
-              or are cropped etc. The dimensions of the placeholder are approximates only to the source
-              images, as they are in multiples of the fixed-width output font character width and
-              line height.
-              Setting -image_place_ansi to an empty value, or -image_place_chars to an empty value
-              will stop ANSI from being output at the image locations."
-        -image_place_chars -type dict -default {tlc " " hlt " " trc " " vlr " " brc " " hlb " " blc " " vll " " inner " " small " "} -help\
-            "Characters to use for image placeholders. The default list of empty spaces is recommended
-              for ease of visualisation and output parsing assuming an function such as punk::ansi::ansistrip
-              is available. An alternative set of characters such as a unicode block character might be useful
-              alongside turning ANSI off by setting -image_place_ansi to empty; if the unicode character is known
-              to be outside the domain of characters in the source document, and it is desired to use such chars
-              to detect approximate image position in relation to text.
+              This means that just by stripping ANSI from the output, layout is maintained,
+              and the text can be processed simply, or the ANSI can be retained if your text
+              parser wishes to use it to make decisions.
+              The colour at element 'border' is for the border colour, 'inner' for the area
+              within the border, and the colour at element 'small' is for images too small for
+              border representation. The border bears no relation to the underlying image - and
+              is merely to aid in seeing when images overlap or are cropped etc. The dimensions
+              of the placeholder are approximates only to the source images, as they are in
+              multiples of the fixed-width output font character width and line height.
+              Setting -image_place_ansi to an empty value, or -image_place_chars to an empty
+              value will stop ANSI from being output at the image locations."
+        #------------------------------------------------------------------------------------
+        -image_place_chars -type dict -defaultdisplaytype dict -default {tlc " " hlt " " trc " " vlr " " brc " " hlb " " blc " " vll " " inner " " small " "} -help\
+            "Characters to use for image placeholders. The default list of empty spaces is
+              recommended for ease of visualisation and output parsing assuming an function
+              such as punk::ansi::ansistrip is available. An alternative set of characters
+              such as a unicode block character might be useful alongside turning ANSI off by
+              setting -image_place_ansi to empty; if the unicode character is known to be
+              outside the domain of characters in the source document, and it is desired to
+              use such chars to detect approximate image position in relation to text.
                 tlc - border top left corner
                 hlt - border top horizontal lines
                 trc - border top right corner
@@ -157,40 +213,51 @@ namespace eval ::punk::pdf {
                 inner - image area within border
                 small - for images too small for borders
               "
+        -header    -type string -default "" -help\
+            "Header line to record details of the conversion run.
+              Set to 'default' to use the provided default template.
+              Substitutions available:
+                %doc%     - full path of source pdf
+                %pages%   - number of pages in the pdf
+                %size%    - size in bytes of the input pdf file
+                %sha1%    - sha1 checksum of the input pdf file
+                %parserversion% - version of punk::pdf used.
+                %args%    - arguments supplied to text command.
+                %default% - the default header template - for extending."
         -sep_page  -type string -default "" -help\
             "Send a line of output to stdout at the beginning of each
-            page in the PDF.
-            If set to the value: default
-            a default separator with some added info will be used.
-            line begins with: #pdf::text:pagestart
-            Substitutions available:
-                %index%       0-based index of the page (not the page number as in the PDF)
-                %pageblocks%  total number of blocks in the page, includes both
-                text and image blocks.
-                %textblocks%  number of text blocks in the page
-                %imageblocks% number of image blocks in the page
-                %nl%          newline
-                %default%     The default page sep - to allow extending."
+              page in the PDF.
+              If set to the value: default
+              a default separator with some added info will be used.
+              line begins with: #pdf::text:pagestart
+              Substitutions available:
+                  %index%       0-based index of the page (not the page number as in the PDF)
+                  %pageblocks%  total number of blocks in the page, includes both
+                  text and image blocks.
+                  %textblocks%  number of text blocks in the page
+                  %imageblocks% number of image blocks in the page
+                  %nl%          newline
+                  %default%     The default page sep - to allow extending."
         -sep_block -type string -default "" -help\
             "Send a line of output to stdout at the beginning of each
-            textblock in the PDF. This can aid in parsing.
-            If set to the the value: default
-            a default separator with some added info will be used.
-            line begins with: #pdf::text:blockstart
-            Substitutions available:
-                %type%      type of block text or image
-                %pageindex% 0-based index of the page (not the page number as in the PDF)
-                %y-index%   0-based index of block after the textblock list has
-                been sorted by y0
-                %x0%        first x coordinate of the block
-                %y0%        first y coordinate of the block
-                %x1%        second x coordinate of the block
-                %y1%        second y coordinate of the block
-                %nl%        newline
-                %warnings%  Number of warnings emitted to stderr for the block.
-                            These are normally for text overlay attempts, which should 
-                            only affect whitespace, but should be checked.
-                %default%   The default blocksep - to allow extending."
+              textblock in the PDF. This can aid in parsing.
+              If set to the the value: default
+              a default separator with some added info will be used.
+              line begins with: #pdf::text:blockstart
+              Substitutions available:
+                  %type%      type of block text or image
+                  %pageindex% 0-based index of the page (not the page number as in the PDF)
+                  %y-index%   0-based index of block after the textblock list has
+                  been sorted by y0
+                  %x0%        first x coordinate of the block
+                  %y0%        first y coordinate of the block
+                  %x1%        second x coordinate of the block
+                  %y1%        second y coordinate of the block
+                  %nl%        newline
+                  %warnings%  Number of warnings emitted to stderr for the block.
+                              These are normally for text overlay attempts, which should 
+                              only affect whitespace, but should be checked.
+                  %default%   The default blocksep - to allow extending."
         -outchannel -type string -default stdout -choicerestricted 0 -choices {return null stderr stdout}\
             -choicelabels {
                 return\
@@ -215,33 +282,33 @@ namespace eval ::punk::pdf {
               any open Tcl channel can be used."
         -debug_pageblock -default "" -type list -minsize 2 -maxsize 2 -help\
             "A 2-element list of the page then block index(s) for which to output
-            extra information on stderr.
-            Supplied value must contain 2 elements {indexset indexset}
-            The membership selection is performed the same way as -page_indexes.
-            e.g {.. end-1} means second-last block on every page."
+              extra information on stderr.
+              Supplied value if not empty, must contain 2 elements {indexset indexset}
+              The membership selection is performed the same way as -page_indexes.
+              e.g {.. end-1} means second-last block on every page."
         -debug_highlight -default "red bold" -help\
             "ANSI codes as understood by punk::ansi (see punk::ansi::a?)
 
-            These are used to colourise output if -debug_pageblock has been set"
+              These are used to colourise output if -debug_pageblock has been set"
         -highlight_overwrites -default "bold" -help\
             "Ansi codes as understood by punk::ansi (see punk::ansi::a?)
 
-            These are used to mark text elements which have been overlayed with
-            the same text data. This may occur for example in table rows in the source
-            document that have alternating colour or emphasis.
-            Having this ANSI applied can be useful when parsing the output to aid in
-            grouping lines. e.g by using [punk::ansi::ta::detect <str>] or similar.
-            
-            By applying ansi, the layout is not disturbed when viewing in a terminal,
-            and may be more visually similar to the source document.
-            Saved output can always be passed through an ansistrip function if needed,
-            or the default -highlight_overwrites can be overridden by supplying an empty
-            string."
-            
+              These are used to mark text elements which have been overlayed with
+              the same text data. This may occur for example in table rows in the source
+              document that have alternating colour or emphasis.
+              Having this ANSI applied can be useful when parsing the output to aid in
+              grouping lines. e.g by using [punk::ansi::ta::detect <str>] or similar.
+
+              By applying ansi, the layout is not disturbed when viewing in a terminal,
+              and may be more visually similar to the source document.
+              Saved output can always be passed through an ansistrip function if needed,
+              or the default -highlight_overwrites can be overridden by supplying an empty
+              string."
+
         -engine -type any -default MuPDF -choices {MuPDF} -help\
             "Only MuPDF is currently implemented. Be aware that while this script and tclMuPDF
-            are MIT/BSD licensed, the underlying MuPDF library is AGPL - which could place
-            some restrictions on commercial use."
+              are MIT/BSD licensed, the underlying MuPDF library is AGPL - which could place
+              some restrictions on commercial use."
         -warnings_engine -default 0 -choices {0 1 2}\
             -choicelabels {
                 0\
@@ -253,7 +320,8 @@ namespace eval ::punk::pdf {
             }\
             -help\
             "Whether to display internal warnings regarding the state of the PDF document.
-            The underlying MuPDF library can in many cases work with bad/corrupted PDFS."
+              The underlying MuPDF library can in many cases work with bad/corrupted PDFS,
+              and if so, may emit warnings."
         -warnings_textblock -default 2 -type integer -range {0 9} -help\
             "0 to disable.
               1 for least info, 9 for most.
@@ -263,12 +331,20 @@ namespace eval ::punk::pdf {
             "Path to the .pdf file to parse"
     }
     proc text {args} {
+
         variable callid
         incr callid
+
+        package require tcl::chan::variable
+        #variable testchan
+        #set testchan [::tcl::chan::variable ::punk::pdf::test($callid)]
+        #chan configure $testchan -translation lf
+
         variable results
         variable base_x_per_c
         variable cfg_blocksep_default
         variable cfg_pagesep_default
+        variable cfg_header_default
         set x_per_line [expr {$base_x_per_c * 2}] ;#hack for now
 
         set argd [punk::args::parse $args withid ::punk::pdf::text]
@@ -282,6 +358,10 @@ namespace eval ::punk::pdf {
         if {$opt_blocksep eq "default"} {
             set opt_blocksep $cfg_blocksep_default
         }
+        set opt_header              [dict get $opts -header]
+        if {$opt_header eq "default"} {
+            set opt_header $cfg_header_default
+        }
         set debug_pageblock         [dict get $opts -debug_pageblock]
         set debug_highlight         [string trim [dict get $opts -debug_highlight]]
         set highlight_overwrites    [string trim [dict get $opts -highlight_overwrites]]
@@ -289,8 +369,14 @@ namespace eval ::punk::pdf {
         set block_indexes           [string trim [dict get $opts -block_indexes]]
         set warnings_engine         [dict get $opts -warnings_engine]
         set opt_compact             [dict get $opts -compact]
+        set opt_postcompact         [dict get $opts -postcompact]
+        if {$opt_postcompact} {
+            set opt_compact 1 ;#don't emit inter-block spacing in the first place
+            #will also strip vertical space merged blocks
+        }
         set opt_image_place_ansi    [dict get $opts -image_place_ansi]
         set opt_image_place_chars   [dict get $opts -image_place_chars]
+        set opt_shrink_textfree_blocks [dict get $opts -shrink_textfree_blocks]
         if {$warnings_engine == 2} {
             #turn on printwarnings before we start
             ::mupdf::printwarnings 1
@@ -302,6 +388,8 @@ namespace eval ::punk::pdf {
             return {
                 package require tcl::chan::variable
                 set outchan [::tcl::chan::variable ::punk::pdf::results($callid)]
+                #chan configure $outchan -translation binary -encoding utf-8
+                chan configure $outchan -translation lf -encoding utf-8
             }
             null {
                 package require tcl::chan::null
@@ -320,7 +408,8 @@ namespace eval ::punk::pdf {
                 } else {
                     set errchan [::tcl::chan::variable ::punk::pdf::results($callid)]
                 }
-                chan configure $errchan -buffering none
+                #chan configure $errchan -buffering none -translation binary
+                chan configure $errchan -buffering none -translation lf -encoding utf-8
             }
             null {
                 package require tcl::chan::null
@@ -347,6 +436,13 @@ namespace eval ::punk::pdf {
 
         set d [::mupdf::open $fname]
         set npages [$d npages]
+        if {$opt_header ne ""} {
+                set opt_header [string map [list %default% $cfg_header_default] $opt_header]
+                package require sha1
+                set hash [sha1::sha1 -hex -file $fname]
+                set map [list %nl% \n %crlf% \r\n %doc% $fname %pages% $npages %size% [file size $fname] %sha1% $hash %args% $args %parserversion% [package present punk::pdf]]
+                puts $outchan [string map $map $opt_header]
+        }
 
         set page_index_list        [punk::lib::indexset_resolve $npages $page_indexes]
         set debug_pageblock_pages  [punk::lib::indexset_resolve $npages [lindex $debug_pageblock 0]]
@@ -495,21 +591,33 @@ namespace eval ::punk::pdf {
                         #    }
                         #}
                         #------------
-                        set cur_merge_base_fields [dict get [lindex $MERGE_BUFFER 0 0] header fields] 
-                        if {$bny0 >= [dict get $cur_merge_base_fields y1]} {
-                            #this block begins below the entire MERGE_BUFFER - time to render it
-                            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $warnings_textblock $outchan $errchan
-                            if {!$opt_compact} {
-                                #default: -compact false
-                                set vdist [expr {$bny0 - [dict get $cur_merge_base_fields y1]}]
-                                set vdistlines [expr {int(ceil($vdist / $x_per_line))}]
-                                if {$vdistlines > 0} {
-                                    set vseparator  [string repeat \n $vdistlines]
-                                    puts -nonewline $outchan $vseparator
+                        #puts "MB dimensions: [punk::pdf::system::merge_buffer_dimensions MERGE_BUFFER]"
+                        set mbdimensions [punk::pdf::system::merge_buffer_dimensions MERGE_BUFFER] ;#list x0 y0 x1 y1
+                        #set cur_merge_base_fields [dict get [lindex $MERGE_BUFFER 0 0] header fields]
+                        #if {$bny0 >= [dict get $cur_merge_base_fields y1]} {}
+                        if {$bny0 >= [lindex $mbdimensions 3]} {
+
+                            #this block begins below the entire MERGE_BUFFER - time to render previous block 
+                            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $opt_shrink_textfree_blocks $opt_postcompact $warnings_textblock $outchan $errchan
+
+                            if {$tbnum in $block_index_list} {
+                                #now this block's offset from the top
+                                if {!$opt_compact} {
+                                    #default: -compact false
+                                    #set vdist [expr {$bny0 - [dict get $cur_merge_base_fields y1]}]
+                                    set vdist [expr {$bny0 - [lindex $mbdimensions 3]}]
+                                    #set vdistlines [expr {int(ceil($vdist / $x_per_line))}]
+                                    set vdistlines [expr {int(floor($vdist / $x_per_line))}]
+                                    if {$vdistlines > 0} {
+                                        set vseparator  [string repeat \n $vdistlines]
+                                        puts -nonewline $outchan $vseparator
+                                    }
                                 }
                             }
                         } else {
-                            lappend MERGE_BUFFER [list]
+                            if {$tbnum in $block_index_list} {
+                                lappend MERGE_BUFFER [list]
+                            }
                         }
                     }
 
@@ -577,9 +685,17 @@ namespace eval ::punk::pdf {
 
                     set vertx [expr {$bny1 - $bny0}]
                     set img_char_height [expr {int(ceil($vertx / $x_per_line))}] ;#linecount
-                    #puts "--  imgstart_char_pos:$imgstart_char_pos img_char_width:$img_char_width"
+                    #puts "-- img_char_height:$img_char_height img_char:width: $img_char_width"
+                    if {$img_char_height == 0} {
+                        puts $errchan "WARNING type debug msg {zero height image}" ;#todo
+                        incr block_warning_count ;#0 based
+                        #nothing to render for a zero height img
+                        #MERGE_BUFFER will have header
+                        lset MERGE_BUFFER end end+1 [list warnings $block_warning_count]
+                        continue
+                    }
 
-                    #put a border around the image if it's big enough 
+                    #put a border around the image if it's big enough
                     #if it can't be bordered use a different colour
                     #this gives us 3 greys applied to space elements, to aid in seeing image overlaps and positioning.
                     #all can be easily stripped out with ansistrip to leave only spaces for text processing
@@ -629,14 +745,24 @@ namespace eval ::punk::pdf {
                             set a_inner  [dict get $opt_image_place_ansi inner]
                             set b [textblock::frame -type [list tlc $c_tlc hlt $c_hlt trc $c_trc vlr $c_vlr brc $c_brc hlb $c_hlb blc $c_blc vll $c_vll] -ansiborder [a $a_border] [punk::ansi::ansiwrap $a_inner $inner]]
                         }
-                    } 
+                    }
                     #set b [textblock::block $img_char_width $img_char_height " "]
                     #set b [punk::ansi::ansiwrap Term-102 $b]
                     set img_indent [textblock::block [expr {max(0,$imgstart_char_pos)}] $img_char_height \uFFFD]
                     if {$imgstart_char_pos < 0} {
                         #img representation is effectively cropped at left
                         #img_indent will just be a list of newlines to match height
+
+                        #even if img_indent and b have no ansi - result of renderspace will have ansi resets (todo review overtype::renderspace)
+                        set had_ansi 0
+                        if {[punk::ansi::ta::detect $b]} {
+                            set had_ansi 1
+                        }
                         set positioned_block [overtype::renderspace -insert_mode 0 -expand_right 1 -startcolumn $imgstart_char_pos $img_indent $b]
+                        if {!$had_ansi} {
+                            #if user disabled ansi on image_placeholders - we need to ensure it still has none after renderspace
+                            set positioned_block [punk::ansi::ansistrip $positioned_block]
+                        }
                     } else {
                         set positioned_block [textblock::join -- $img_indent $b]
                     }
@@ -663,7 +789,7 @@ namespace eval ::punk::pdf {
                         #(common to encounter line data where only some text elements are within the block we are currently at - e.g tables)
                         #todo - review treatment of 
                         #assume if it just extends out of the blocks x range - it's part of a larger block (?) review - xrangefudge?
-                        set has_element_in_range 0 
+                        set has_element_in_range 0
                         foreach xxt $ydata {
                             lassign $xxt dx0 dx1
                             set xrangefudge 0 ;#shouldn't be needed? 
@@ -682,7 +808,7 @@ namespace eval ::punk::pdf {
                             lassign $debug_pageblock dp db
 
                             if {$pageindex in $debug_pageblock_pages && $tbnum in $debug_pageblock_blocks} {
-                                set debuginfo "b:$tbnum yinfo: $yinfo ydata: $ydata"
+                                set debuginfo "b:$tbnum yinfo: $yinfo ydata: [punk::ansi::ansistring VIEW -lf 1 $ydata]"
                                 if {$debug_highlight ne ""} {
                                     set debuginfo "[punk::ansi::a {*}$debug_highlight]$debuginfo\x1b\[m"
                                 }
@@ -717,23 +843,44 @@ namespace eval ::punk::pdf {
                         set existing_y0s [lsearch -all -inline -index 0 -subindices $vfudged_lines *]
                         set existing_y1s [lsearch -all -inline -index 1 -subindices $vfudged_lines *]
                         set vfudged_line_added 0
+                        #set vfidx -1
+                        #foreach ey0 $existing_y0s {
+                        #    incr vfidx
+                        #    set vdiff [expr {abs($line_y0 - $ey0)}]
+                        #    #x units per char is normally narrower than height
+                        #    set linefudge 1.5 ;#choose linefudge less than assumed x_per_line of 2*base_x_per_c - or small font lines will not be separated #REVIEW
+                        #    if {$vdiff < ($linefudge * $base_x_per_c)} {
+                        #        #normalize to existing value
+                        #        #line_y1 unnormalised??
+                        #        #if y1 vdiff is about the same - normalise it too
+                        #        set ey1 [lindex $existing_y1s $vfidx]
+                        #        set vdiff2 [expr {abs($line_y1 -$ey1)}]
+                        #        if {abs($vdiff2 - $vdiff) <= 8} {
+                        #            set line_y1 $ey1
+                        #        }
+                        #        set vfudged_line_added 1
+                        #        lappend vfudged_lines [list $ey0 $line_y1 [lindex $fl 2]]
+                        #        break
+                        #    }
+                        #}
+
+                        #when mixing fonts in same line - aligning the bases is more common.
                         set vfidx -1
-                        foreach ey0 $existing_y0s {
+                        foreach ey1 $existing_y1s {
                             incr vfidx
-                            set vdiff [expr {abs($line_y0 - $ey0)}]
+                            set vdiff [expr {abs($line_y1 - $ey1)}]
                             #x units per char is normally narrower than height
                             set linefudge 1.5 ;#choose linefudge less than assumed x_per_line of 2*base_x_per_c - or small font lines will not be separated #REVIEW
                             if {$vdiff < ($linefudge * $base_x_per_c)} {
                                 #normalize to existing value
-                                #line_y1 unnormalised??
-                                #if y1 vdiff is about the same - normalise it too
-                                set ey1 [lindex $existing_y1s $vfidx]
-                                set vdiff2 [expr {abs($line_y1 -$ey1)}]
+                                #if y0 vdiff is about the same - normalise it too
+                                set ey0 [lindex $existing_y0s $vfidx]
+                                set vdiff2 [expr {abs($line_y0 -$ey0)}]
                                 if {abs($vdiff2 - $vdiff) <= 8} {
-                                    set line_y1 $ey1
+                                    set line_y0 $ey0
                                 }
                                 set vfudged_line_added 1
-                                lappend vfudged_lines [list $ey0 $line_y1 [lindex $fl 2]]
+                                lappend vfudged_lines [list $line_y0 $ey1 [lindex $fl 2]]
                                 break
                             }
                         }
@@ -774,7 +921,7 @@ namespace eval ::punk::pdf {
                         incr block_linenum
                         #puts "fline: $fl"
                         lassign $fl fy0 fy1 spans_by_x0  ;#don't pull out spans_by_x0_adjusted - it's adjusted within this loop by referring to its index in working_lines
-                        
+
                         set line_chunks [list]
                         #calculating x units per char for each string is only really possible if we know the exact font and size
                         set xidx -1 ;#index of spans in line, which are sorted by x0 (lhs)
@@ -783,6 +930,8 @@ namespace eval ::punk::pdf {
                             incr xidx
                             lassign $xxt tx0 tx1 text
 
+                            #puts -nonewline $testchan $text
+
 
                             #any span, including first may have rhs alignments with spans from previous lines in the same block
                             #look at original rhs alignment of spans in previous lines that match this one's x1
@@ -850,6 +999,7 @@ namespace eval ::punk::pdf {
 
                             #1st shot - use rhs of element immediately to the left (either existing TESTBLOCK or earliear spans on same line)
                             if {$xidx == 0} {
+                                set prevspan_x0 ""
                                 set prevspan_x1 ""
                                 set prevspan_x1_adjusted ""
                                 if {$TESTBLOCK ne ""} {
@@ -888,6 +1038,7 @@ namespace eval ::punk::pdf {
                             } else {
 
                                 #set prevspan_x1_adjusted [lindex $spans_by_x0_adjusted $xidx-1 1]
+                                set prevspan_x0 [lindex $spans_by_x0 $xidx-1 0]
                                 set prevspan_x1_adjusted [lindex $working_lines $block_linenum 3 $xidx-1 1]
                                 set prevspan_x1 [lindex $spans_by_x0 $xidx-1 1]
                                 #set effective_prev_x1 [expr {max($prevspan_x1,$prevspan_x1_adjusted)}]
@@ -983,7 +1134,7 @@ namespace eval ::punk::pdf {
                                 if {$pageindex in $debug_pageblock_pages && $tbnum in $debug_pageblock_blocks} {
                                     set debuginfo "span x0 xx1 text: [lindex $xxt 0] [lindex $xxt 1] [punk::ansi::a normal]'[lindex $xxt 2]'\x1b\[m"
                                     if {$debug_highlight ne ""} {
-                                        set debuginfo "[punk::ansi::a underline {*}$debug_highlight]$debuginfo\x1b\[m"
+                                        set debuginfo "[punk::ansi::a overline {*}$debug_highlight]$debuginfo\x1b\[m"
                                     }
                                     puts $errchan $debuginfo
                                     if {$prevspan_x1 ne $prevspan_x1_adjusted} {
@@ -992,9 +1143,9 @@ namespace eval ::punk::pdf {
                                     } else {
                                         set C "" ; set RST ""
                                     }
-                                    set debuginfo  "prevspan_x1:$prevspan_x1 prevspan_x1_adjusted: $C$prevspan_x1_adjusted$RST effective_prev_x1: $effective_prev_x1"
+                                    set debuginfo  "prev x0  x1     : $prevspan_x0 $prevspan_x1 prevspan_x1_adjusted: $C$prevspan_x1_adjusted$RST effective_prev_x1: $effective_prev_x1"
                                     if {$debug_highlight ne ""} {
-                                        set debuginfo "[punk::ansi::a overline {*}$debug_highlight]$debuginfo\x1b\[m"
+                                        set debuginfo "[punk::ansi::ansiwrap underline {*}$debug_highlight $debuginfo]"
                                     }
                                     puts $errchan $debuginfo
                                 }
@@ -1014,7 +1165,9 @@ namespace eval ::punk::pdf {
                                 #set buffer [string repeat " " $start_char_pos]
                                 #set x_gap_chars 0
                             } else {
-                                set x_gap [expr {$tx0 - $prevspan_x1}] ;#should be positive (but could it be negative, ie overlapped?)
+                                set x_gap [expr {$tx0 - $prevspan_x1}] ;#usually  positive but can easily be negative. e.g the word TAX in some fonts can have the left of the A slightly under the top bar of the T
+                                #if the font is large - this negative offset could be more than an entire width of our output character size.
+                                #we really need to know the source font to do this even close to properly - REVIEW
                                 set x_gap_chars [x_to_c $x_gap] ;#used to expand buffer below during collision detection.
                                 #set buffer [string repeat "^" $x_gap_chars]
                             }
@@ -1022,14 +1175,19 @@ namespace eval ::punk::pdf {
                             #if {$xidx > 0} {
                             #    set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1] + $x_gap_chars}]]
                             #}
-
-                            set text_points [c_to_x [string length $text]]
-                            set tend        [expr {$tx0 + $text_points}]
-                            set remspace    [expr {$tx1 - $tend}]
-                            #set tailbuffer  [string repeat "-" [x_to_c $remspace]]
-                            set tailbuffer ""
-
-                            if {$xidx > 0 && $effective_prev_x1 ne "" && $tx0 <= $effective_prev_x1} {
+                            if {$xidx > 0 && $x_gap >= -3 && $x_gap < 0} {
+                                #treat small negative gap (overlap) in source (prev_x1 > tx0) (typically for character overhang/underhang) as adjacency
+                                dict set page_seen_chunks $text 1
+                                set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
+                                lappend line_chunks "$buffer$text"
+                            } elseif {$xidx > 0 && $x_gap >= 0 && $x_gap < ($base_x_per_c * 0.75)} {
+                                #treat no, or small positive gap in source (prev_x1 < tx0) as intended adjacency
+                                #.75 is yet another fudge for lack of font info - todo - rework to another engine?
+                                #this branch could be merged with above, but kept separate for clarity and for specific comments.
+                                dict set page_seen_chunks $text 1
+                                set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
+                                lappend line_chunks "$buffer$text"
+                            } elseif {$xidx > 0 && $effective_prev_x1 ne "" && $tx0 <= $effective_prev_x1} {
                                 #possible overlap/overlay - left x of this data is before right x of previous
 
                                 #first use output font's x_per_c and see if only overlaying whitespace
@@ -1075,6 +1233,8 @@ namespace eval ::punk::pdf {
                                             #set xcursor_prev [string length [punk::ansi::ansistrip [join [lrange $line_chunks 0 end-1] ""]]]
                                             #set buffer_prev [string repeat " " [expr {$startpos - $xcursor_prev}]]
                                             #lset line_chunks end "$buffer_prev[punk::ansi::a {*}$highlight_overwrites]$text[punk::ansi::a]"
+
+
                                             lset line_chunks end "$buffer[punk::ansi::a {*}$highlight_overwrites]$text[punk::ansi::a]"
                                         }
                                         #else: no need to overwrite same text if no ansi
@@ -1110,7 +1270,7 @@ namespace eval ::punk::pdf {
                                         set expand [string repeat " " $expandchars]
                                         append buffer $expand
                                         # REVIEW - seems to work reasonably (e.g in FTP-1.1.1.pdf mentioned) - but we'll still output a warning
-                                        lappend line_chunks $buffer$text$tailbuffer
+                                        lappend line_chunks $buffer$text
 
                                         if {$char_overlap > 0 && $warnings_textblock >= 4} {
                                             puts $errchan "Warning  pageindex [format %4s $pageindex] blockindex [format %4s $tbnum] blockwarning [format %4s $block_warning_count] type {text overlay}"
@@ -1137,7 +1297,7 @@ namespace eval ::punk::pdf {
                                     if {$xidx > 0 && $x_gap_chars == 0} {
                                         set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
                                     }
-                                    lappend line_chunks "$buffer$text$tailbuffer"
+                                    lappend line_chunks "$buffer$text"
                                 }
                             } else {
                                 dict set page_seen_chunks $text 1
@@ -1147,7 +1307,7 @@ namespace eval ::punk::pdf {
                                         set buffer [string repeat " " [expr {[x_to_c $effective_prev_x1]}]]
                                     }
                                 }
-                                lappend line_chunks "$buffer$text$tailbuffer"
+                                lappend line_chunks "$buffer$text"
                             }
                             #position 3 is spans_by_x0_adjusted
                             set chunk [lindex $line_chunks end]
@@ -1208,10 +1368,10 @@ namespace eval ::punk::pdf {
                     puts $outchan "____________________________$bx1,$by1"
                 }
             }
-            _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $warnings_textblock $outchan $errchan
             if {[llength $MERGE_BUFFER]} {
-                puts $errchan "WARNING: MERGE_BUFFER not empty, but should be empty at end of page"
-                puts $outchan [punk::lib::showdict -roottype list $MERGE_BUFFER]
+                _process_merge_buffer MERGE_BUFFER $pageindex $cfg_blocksep_default $opt_blocksep $opt_merge_yblocks $opt_shrink_textfree_blocks $opt_postcompact $warnings_textblock $outchan $errchan
+                #puts $errchan "WARNING: MERGE_BUFFER not empty, but should be empty at end of page"
+                #puts $outchan [punk::lib::showdict -roottype list $MERGE_BUFFER]
             }
         }
 
@@ -1219,12 +1379,15 @@ namespace eval ::punk::pdf {
             puts $errchan "MuPDF wasrepaired: [$d wasrepaired]"
             puts $errchan "MuPDF warnings   : [$d warnings]"
         }
+        flush $outchan
+        flush $errchan
         if {$opt_outchan eq "return" || $opt_errchan eq "return"} {
             #if both are 'return' at the same time - the same channel is used for both
-            flush $outchan
-            flush $errchan
-            set result [set results($callid)]
+            set result [encoding convertfrom utf-8 [set results($callid)]]
+            #set result [set results($callid)]
+            return $result
         }
+        return
     }
 
 }
@@ -1264,6 +1427,43 @@ tcl::namespace::eval punk::pdf::lib {
         return $result
     }
 
+    #subset of possible boms - just for experimenting
+    proc bom_ident {binstr} {
+        if {[string match \uFFEF* $binstr]} {
+            return bom
+        }
+        set first32 [string range $binstr 0 3]
+        set bytes [binary scan $binstr H2H2H2H2 a b c d]
+        switch -- $a {
+            fe {
+                if {"$b" eq "ff"} {
+                    return utf-16be
+                }
+            }
+            ef {
+                if {"$b$c" eq "bbbf"} {
+                    return utf-8
+                }
+            }
+            ff {
+                if {$b eq "fe"} {
+                    if {"$c$d" eq "0000"} {
+                        return utf-32le
+                    } else {
+                        return utf-16le
+                    }
+                }
+            }
+            default {
+                if {"$a$b$c$d" eq "0000feff"} {
+                    return utf-32be
+                }
+            }
+        }
+        return unknown
+    }
+
+
     #points to number of chars - in terms of fixed-width output font
     proc x_to_c {x} {
         upvar ::punk::pdf::base_x_per_c base_x_per_c
@@ -1277,20 +1477,50 @@ tcl::namespace::eval punk::pdf::lib {
         return [expr {$charcount * $base_x_per_c}]
     }
 
+    proc merge_buffer_dimensions {bufname} {
+        upvar $bufname MB
+        if {![llength $MB]} {
+            return [list 0 0 0 0] ;#x0 y0 x1 y1
+        }
+        set firstheader [lindex $MB 0 0]
+        set flds [dict get [lindex $firstheader 1] fields]
+        set dimensions [list [dict get $flds x0] [dict get $flds y0] [dict get $flds x1] [dict get $flds y1]]
+        #we can get negative values e.g image offset to left or top
+        foreach sametopblocks $MB {
+            foreach B $sametopblocks {
+                if {[lindex $B 0] eq "header"} {
+                    set flds [dict get [lindex $B 1] fields]
+                    set this_x0 [dict get $flds x0]
+                    set this_y0 [dict get $flds y0]
+                    set this_x1 [dict get $flds x1]
+                    set this_y1 [dict get $flds y1]
+                    lset dimensions 0 [expr {min([lindex $dimensions 0],$this_x0)}]
+                    lset dimensions 1 [expr {min([lindex $dimensions 1],$this_y0)}]
+                    lset dimensions 2 [expr {max([lindex $dimensions 2],$this_x1)}]
+                    lset dimensions 3 [expr {max([lindex $dimensions 3],$this_y1)}]
+                }
+            }
+        }
+        return $dimensions
+    }
     #whether merging on or off - we keep a MERGE_BUFFER
     #At every y-index change, and at end of page, process last entry(s)
-    proc _process_merge_buffer {bufname pageindex blocksep_default opt_blocksep domerge opt_warnings_textblock outc errc} {
+    proc _process_merge_buffer {bufname pageindex blocksep_default opt_blocksep domerge opt_shrink_textfree_blocks opt_postcompact opt_warnings_textblock outc errc} {
         upvar ::punk::pdf::base_x_per_c base_x_per_c
+        upvar ::punk::pdf::testchan testchan
         set x_per_line [expr {$base_x_per_c * 2}] ;#hack for now
         upvar $bufname MB
 
         if {!$domerge} {
             set block_warnings 0
+            #puts "MB:'$MB'"
             set warningrecords [lsearch -all -inline -index 0 $MB warnings]
             foreach wr $warningrecords {
                 incr block_warnings [lindex $wr 1]
             }
             #just emit the header & line blocks we have in the merge buffer one after the other
+            set blockresult "" ;#needed for postcompact
+            set emissions [list] ;#need to enable blank sep_block even when -postcompact is true
             foreach yset $MB {
                 foreach B $yset {
                     switch -- [lindex $B 0] {
@@ -1298,22 +1528,72 @@ tcl::namespace::eval punk::pdf::lib {
                             if {$opt_blocksep ne ""} {
                                 set sep [dict get [lindex $B 1] sep]
                                 set sep [string map [list <w> $block_warnings] $sep]
-                                puts $outc $sep
+                                #puts $outc $sep
+                                lappend emissions [list data $blockresult]
+                                lappend emissions [list sep $sep]
+                                set blockresult ""
+                                #append blockresult $sep \n
                             }
                         }
                         lines {
                             set lines [lindex $B 1]
+                            #puts $errc "--->[punk::ansi::ansistring VIEW $lines]"
+                            set idx -1
                             foreach l $lines {
-                                puts $outc $l
+                                incr idx
+                                if {[catch {
+                                    #puts $outc $l
+                                    append blockresult $l \n
+                                    } errMsg]} {
+                                        set prevline [lindex $lines $idx-1]
+                                        set nextline [lindex $lines $idx+1]
+                                        puts stderr "error writing output channel $outc\nerrMsg:$errMsg\n"
+                                        puts stderr "  prevline:'[punk::ansi::ansistring VIEW -lf 1 $prevline]'"
+                                        puts stderr "  lineview '[punk::ansi::ansistring VIEW -lf 1 $l]'"
+                                        puts stderr "  nextline:'[punk::ansi::ansistring VIEW -lf 1 $nextline]'"
+                                        #puts $testchan "=----------------------------------------="
+                                        #puts  $testchan $l
+                                        #puts $testchan "=----------------------------------------="
+                                    }
                             }
                         }
                         block {
-                            puts $outc [string map [list \uFFFD " "] [lindex $B 1]]
+                            if {!$opt_shrink_textfree_blocks} {
+                                #puts $outc [string map [list \uFFFD " "] [lindex $B 1]]
+                                append blockresult [string map [list \uFFFD " "] [lindex $B 1]] \n
+                                #puts $errc "--->[punk::ansi::ansistring VIEW [lindex $B 1]]"
+                            }
                         }
                         warnings {}
                     }
                 }
             }
+            lappend emissions [list data $blockresult]
+
+            set output ""
+            if {$opt_postcompact} {
+                foreach e $emissions {
+                    lassign $e etype data_or_header
+                    switch -- $etype {
+                        sep {
+                            append output $data_or_header \n
+                        }
+                        data {
+                            foreach ln [split $data_or_header \n] {
+                                if {[string trim $ln] ne ""} {
+                                    append output $ln \n
+                                }
+                            }
+                        }
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                foreach e $emissions {
+                    lassign $e _ data_or_header
+                    puts -nonewline $outc $data_or_header\n
+                }
+            }
             set MB [list]
             return
         }
@@ -1322,38 +1602,71 @@ tcl::namespace::eval punk::pdf::lib {
         set headers_structure [list]
         set mergedblock ""
         set warnings [list]
-        #puts "MB: $MB"
         if {[llength $MB]} {
-            set rootheader [lindex $MB 0 0 1]
-            #puts $errc "rootheader: $rootheader"
-            set base_y [dict get $rootheader fields y0]
+            #rootheader doesn't 'contain' subsequent blocks in x or y direction - it's just the y0 that triggers the new merge because it's below previous blocks
+            #set rootheader [lindex $MB 0 0 1]
+            ##puts $errc "rootheader: $rootheader"
+            #set base_y [dict get $rootheader fields y0]
+            set mbdimensions [merge_buffer_dimensions MB] ;# x0 y0 x1 y1
+            set base_y [lindex $mbdimensions 1]
+        }
+        #loop through and determine which blocks of those to be merged are RHS e.g largest x1
+        set max_x1 0
+        #todo - further refine to find blocks in the mergeset with no textblocks to right?
+        set rhs_block_indexes [list]
+        set textblock_x0_list [list]
+        foreach sametopblocks $MB {
+            foreach B $sametopblocks {
+                if {[lindex $B 0] eq "header"} {
+                    set flds [dict get [lindex $B 1] fields]
+                    #puts "flds: $flds"
+                    if {[dict get $flds type] eq "text"} {
+                        set this_x1 [dict get $flds x1]
+                        lappend textblock_x0_list [dict get $flds x0]
+                        if {abs($this_x1 - $max_x1) < 1} {
+                            lappend rhs_block_indexes [dict get $flds blockindex]
+                            set max_x1 [expr {max($this_x1,$max_x1)}]
+                        } elseif {$this_x1 > $max_x1} {
+                            set max_x1 $this_x1
+                            set rhs_block_indexes [list [dict get $flds blockindex]]
+                        }
+                    }
+                }
+            }
         }
-        foreach ybuf $MB {
+        foreach sametop $MB {
             set yheaders [list]
             set y_index ""
-            if {[llength $ybuf]} {
-                set firstheader [lindex $ybuf 0 1]
-                set blockrow_y [dict get $firstheader fields y0]
-                set blockrow_offset [expr {$blockrow_y - $base_y}]
-                set blockrow_x0 [dict get $firstheader fields x0]
-                set blockrow_x1 [dict get $firstheader fields x1]
+            if {[llength $sametop]} {
+                set firstheader [lindex $sametop 0 1]
+                set blockrow_y0 [dict get $firstheader fields y0]
+                set blockrow_y1 [dict get $firstheader fields y1]
+                set blockrow_top_offset [expr {$blockrow_y0 - $base_y}]
+                #set blockrow_x0 [dict get $firstheader fields x0]
+                #set blockrow_x1 [dict get $firstheader fields x1]
                 if {$opt_warnings_textblock >= 9} {
                     puts $errc "Warning type debug operation {merge blockrow} firstheader $firstheader"
                 }
             }
 
-            set leftright_idx -1
-            foreach B $ybuf {
-                incr leftright_idx
-                #if {$leftright_idx == 0} {
-                #    set firstheader [lindex $B 1]
-                #}
-                #while {[llength $ybuf]} {}
-                #set B [lpop ybuf 0]
+            set header_idx -1
+            #set leftright_max [expr {[llength $sametop] -1}]
+            set hdrs [lsearch -all -inline -index 0 $sametop header]
+            #puts "--- > $hdrs"
+            set header_max [expr {[llength $hdrs] -1}]
+            foreach B $sametop {
                 #puts "====>$B"
                 switch -- [lindex $B 0] {
                     header {
-                        set this_y_index [dict get [lindex $B 1] fields y-index]
+                        incr header_idx  ;#not necessarily in left-right order!
+                        set this_y_index        [dict get [lindex $B 1] fields y-index]
+                        set this_x0             [dict get [lindex $B 1] fields x0]
+                        set this_x1             [dict get [lindex $B 1] fields x1]
+                        set this_y0             [dict get [lindex $B 1] fields y0]
+                        set this_y1             [dict get [lindex $B 1] fields y1]
+                        set this_top_offset     [expr {$this_y0 - $base_y}]
+                        set this_bottom_offset  [expr {$this_y1 - $base_y}]
+                        set this_block_index    [dict get [lindex $B 1] fields blockindex]
                         if {$y_index eq ""} {
                             set y_index $this_y_index
                         } else {
@@ -1372,10 +1685,11 @@ tcl::namespace::eval punk::pdf::lib {
                         set lines [lindex $B 1]
                         #review - vertical oriented text at left can stomp on data during merge
                         #we are only given the bbox info by tclMuPDF
-                        if {abs($blockrow_x1 - $blockrow_x0) < 1} {
+                        if {abs($this_x1 - $this_x0) < 1} {
                             #non displayable
                             #todo - emit at document tail as a non-layed out block?
                             if {$opt_warnings_textblock > 0} {
+                                #review - firstheader vs this_block_index?
                                 #puts $errc "Warning - no x space. Undisplayed text on page:[dict get $firstheader fields pageindex] block:[dict get $firstheader fields blockindex] text:[join $lines \n]"
                                 puts $errc "Warning pageindex [dict get $firstheader fields pageindex] blockindex [dict get $firstheader fields blockindex] type {no x space} msg {Undisplayed text}  text \"[join $lines \n]\""
                             }
@@ -1386,38 +1700,95 @@ tcl::namespace::eval punk::pdf::lib {
                             #e.g overtype::textblock::ellipsis $space_available $block
 
                             set block [join $lines \n]
-                            set bwidth [textblock::widthtopline $block] ;#assume non-ragged block
+                            set numlines [llength $lines]
+                            set bwidth [textblock::width $block] ;#assume ragged block (differing line lengths)
                             set margintop ""
-                            if {$blockrow_offset > 0} {
-                                set offsetlines [expr {int(ceil($blockrow_offset / $x_per_line))}]
-                                set margintop [textblock::block $bwidth $offsetlines " "]
+                            set bottom_offsetlines [expr {int(round($this_bottom_offset / $x_per_line))}]
+                            if {[llength $MB] > 1 && $bottom_offsetlines >= $numlines} {
+                                #can use bottom of textblock dist from top (better line alignment for scenario: "smallprint fieldname:" "bigprint value" aligned at base  ???)
+                                set topmarginlines [expr {$bottom_offsetlines - $numlines}]
+                                #puts "====> offset from bottom bottom_offsetlines:$bottom_offsetlines topmarginlines:$topmarginlines this_y1:$this_y1"
+                                if {$topmarginlines > 0} {
+                                    set margintop [textblock::block $bwidth $topmarginlines " "]
+                                }
+                            } else {
+                                if {$this_top_offset > 0} {
+                                    #puts "====> offset from top"
+                                    #set offsetlines [expr {int(ceil($blockrow_top_offset / $x_per_line))}]
+                                    set offsetlines [expr {int(round($blockrow_top_offset / $x_per_line))}] ;#
+                                    #set offsetlines [expr {int(floor($this_top_offset / $x_per_line))}] ;#
+                                    if {$offsetlines > 0} {
+                                        set margintop [textblock::block $bwidth $offsetlines " "]
+                                    }
+                                }
+                            }
+                            if {$this_block_index ni $rhs_block_indexes} {
+                                #truncate overwidth blocks except for rhs
+                                #overwidth if can collide (or merge) with x0 of any other textblocks in the merge set
+                                #only interested in blocks where x0 >= this x1
+                                set next_x0_list [list]
+                                foreach x0 $textblock_x0_list {
+                                    if {$x0 >= ($this_x1 - $base_x_per_c)} {
+                                        lappend next_x0_list $x0
+                                    }
+                                }
+                                set next_x0_list [lsort -real $next_x0_list]
+                                set closest_x0 [lindex $next_x0_list 0]
+                                if {$closest_x0 ne ""} {
+                                    #set maxwidthchars [x_to_c $this_x1]
+                                    set maxwidthchars [x_to_c $closest_x0]
+                                    incr maxwidthchars -1 ;#vertically merging text blocks results in unintended text adjacencies - REVIEW
+                                    if {$maxwidthchars < $bwidth} {
+                                        #warn of rhs truncation
+                                        #text will have 'ellipsis'
+                                        #review - source text could already have ellipsis - this makes detecting truncation harder
+                                        #for now we will produce a warning if ellipsis is in the result and is on the right hand side
+                                        #todo - use a function other than textblock::frame which can report truncation
+                                        set block [textblock::frame -type {} -boxlimits {} -width [expr {$maxwidthchars +2}] $block] ;# +2 for vertical frame borders - which are removed but needed for block -width
+                                        set is_truncated 0
+                                        foreach ln [split [punk::ansi::ansistrip $block] \n] {
+                                            if {[regexp {[\u2026]$} $ln]} {
+                                                set is_truncated 1
+                                                break
+                                            }
+                                        }
+                                        if {$is_truncated} {
+                                            if {$opt_warnings_textblock > 0} {
+                                                puts $errc "Warning pageindex [dict get $firstheader fields pageindex] blockindex [dict get $firstheader fields blockindex] type {truncation} msg {merged textblock truncated on rhs}  text \"[join $lines \n]\""
+                                            }
+                                            lappend warnings 1
+                                        }
+                                    }
+                                }
+                            }
+                            #apply margintop after possible rhs truncation above (no need for ellipsis above content)
+                            if {$margintop ne ""} {
+                                set movedblock $margintop\n$block
+                            } else {
+                                set movedblock $block
                             }
+
                             if {$mergedblock eq ""} {
                                 #could be image - as we are using overtype - should result in blank area
                                 #todo - consider option to set bg colour, then create a block of spaces of the right size  - but don't add any chars other than ANSI codes which can be stripped easily.
-                                if {$margintop ne ""} {
-                                    set mergedblock $margintop\n$block
-                                } else {
-                                    set mergedblock $block
-                                }
+                                set mergedblock $movedblock
                             } else {
-                                if {$margintop ne ""} {
-                                    set movedblock $margintop\n$block
-                                } else {
-                                    set movedblock $block
-                                }
                                 set mergedblock [overtype::block -transparent 1 -overflow 1 $mergedblock $movedblock]
                             }
+
                         }
                     }
                     block {
                         set block [lindex $B 1]
                         set bwidth [textblock::widthtopline $block] ;#assume non-ragged block
                         set margintop ""
-                        if {$blockrow_offset > 0} {
-                            set offsetlines [expr {int(ceil($blockrow_offset / $x_per_line))}]
-                            set margintop [textblock::block $bwidth $offsetlines "\uFFFD"]
-                            #puts "=====> offset:$blockrow_offset"
+                        if {$this_top_offset > 0} {
+                            #set offsetlines [expr {int(ceil($this_top_offset / $x_per_line))}]
+                            set offsetlines [expr {int(round($this_top_offset / $x_per_line))}]
+                            if {$offsetlines > 0} {
+                                set margintop [textblock::block $bwidth $offsetlines "\uFFFD"]
+                            }
+                            #puts "=====> offset:$this_top_offset"
                         }
                         if {$mergedblock eq ""} {
                             if {$margintop ne ""} {
@@ -1431,6 +1802,7 @@ tcl::namespace::eval punk::pdf::lib {
                             } else {
                                 set movedblock $block
                             }
+                            #review - img placeholder can obscure text if above it - we don't have transparency info - so probably not what's wanted
                             set mergedblock [overtype::block -transparent \uFFFD -overflow 1 $mergedblock $movedblock]
                         }
                     }
@@ -1443,6 +1815,7 @@ tcl::namespace::eval punk::pdf::lib {
             lappend headers_structure $yheaders
         }
         if {[llength $all_headers] == 1} {
+            #puts stdout "---> [lindex $all_headers 0 1]"
             #only one block was in the MERGE_BUFFER
             set stored_filled_sep [dict get [lindex $all_headers 0] sep] 
             if {$opt_blocksep ne ""} {
@@ -1453,9 +1826,41 @@ tcl::namespace::eval punk::pdf::lib {
                 }
                 #all % placeholders filled, but <w> isn't
                 set sep [string map [list <w> $block_warnings] $stored_filled_sep]
+                #still want to allow emitting empty line when opt_postcompact is true, if opt_blocksep is set to space
                 puts $outc $sep
             }
-            puts $outc $mergedblock
+            #set is_text_free 1 ;#default to disprove
+            #set headerinfo [lindex $all_headers 0 1]
+            #set list_merges [dict get $headerinfo type]
+            #foreach blockrow_merge $list_merges {
+            #    foreach tp $blockrow_merge {
+            #        if {$tp eq "text"} {
+            #            #todo - look for whitespace only and treat as text free
+            #            set is_text_free 0
+            #        }
+            #    }
+            #}
+            set blockresult ""
+            if {$opt_shrink_textfree_blocks} {
+                set teststripped [punk::ansi::ansistrip $mergedblock]
+                if {[string trim $teststripped] ne ""} {
+                    set blockresult $mergedblock
+                }
+            }  else {
+                set blockresult $mergedblock
+            }
+            set output ""
+            if {$opt_postcompact} {
+                foreach ln [split $blockresult \n] {
+                    if {[string trim $ln] ne ""} {
+                        append output $ln \n
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                puts -nonewline $outc $blockresult\n
+            }
+
         } elseif {[llength $all_headers] > 1} {
             #more than one block was in the MERGE_BUFFER
             set merged_minx0 Inf
@@ -1489,7 +1894,27 @@ tcl::namespace::eval punk::pdf::lib {
                 dict set map %marker% "[punk::ansi::a bold cyan]MERGEDBLOCK[punk::ansi::a]"
                 puts $outc [string map $map $opt_blocksep]
             }
-            puts $outc $mergedblock
+            if {$opt_shrink_textfree_blocks} {
+                set teststripped [punk::ansi::ansistrip $mergedblock]
+                if {[string trim $teststripped] ne ""} {
+                    #puts $outc $mergedblock
+                    set blockresult $mergedblock
+                }
+            }  else {
+                #puts $outc $mergedblock
+                set blockresult $mergedblock
+            }
+            set output ""
+            if {$opt_postcompact} {
+                foreach ln [split $blockresult \n] {
+                    if {[string trim $ln] ne ""} {
+                        append output $ln \n
+                    }
+                }
+                puts -nonewline $outc $output
+            } else {
+                puts -nonewline $outc $blockresult\n
+            }
         }
         set MB [list]
     }