diff --git a/src/doc/punk/_module_fileline-0.1.0.tm.man b/src/doc/punk/_module_fileline-0.1.0.tm.man index be45f826..525bc7b3 100644 --- a/src/doc/punk/_module_fileline-0.1.0.tm.man +++ b/src/doc/punk/_module_fileline-0.1.0.tm.man @@ -29,10 +29,20 @@ [para]No support for lone carriage-returns being interpreted as line-endings. [para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. [subsection dependencies] -[para] packages used by punk::fileline +[para] packages needed by punk::fileline [list_begin itemized] [item] [package {Tcl 8.6}] -[list_end] +[list_end] [comment {- end dependencies list -}] +[subsection {optional dependencies}] +[para] packages that add functionality but aren't strictly required +[list_begin itemized] +[item] [package {punk::ansi}] +[para] - recommended for class::textinfo [method chunk_boundary_display] +[item] [package {punk::char}] +[para] - recommended for class::textinfo [method chunk_boundary_display] +[item] [package {overtype}] +[para] - recommended for class::textinfo [method chunk_boundary_display] +[list_end] [comment {- end optional dependencies list -}] [section API] [subsection {Namespace punk::fileline::class}] [para] class definitions @@ -56,10 +66,11 @@ or [para] objName chunk 0 end [call class::textinfo [method chunklen]] [para] Number of bytes/characters in the raw data of the file +[call class::textinfo [method chunk_boundary_display]] +[para]Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend +[para]Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour [call class::textinfo [method linecount]] [para] Number of lines in the raw data of the file, counted as per the policy in effect -[call class::textinfo [method regenerate_lines]] -[para]generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex [call class::textinfo [method line] [arg lineindex]] [para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata [para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) @@ -91,6 +102,8 @@ or [para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex [para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. [para]The 'payload' value is the same as is returned from the [method linepayload] method. +[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] +[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx [call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] [call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] [call class::textinfo [method lines] [arg startidx] [arg endidx]] @@ -111,6 +124,11 @@ or [para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted [para]startidx higher than endidx is allowed [para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max +[call class::textinfo [method regenerate_lines]] +[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex +[para]This is called automatically by the Constructor during object creation +[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. +[para]TODO - review whether such manual control will be necessary/desirable [list_end] [list_end] [comment {--- end class enumeration ---}] [subsection {Namespace punk::fileline}] @@ -143,4 +161,12 @@ or [section Internal] [subsection {Namespace punk::fileline::system}] [para] Internal functions that are not part of the API +[subsection {Namespace punk::fileline::ansi}] +[para]These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable +[para]See [package punk::ansi] for documentation +[list_begin definitions] +[call [fun ansi::a]] +[call [fun ansi::a+]] +[call [fun ansi::stripansi]] +[list_end] [comment {--- end definitions namespace punk::fileline::ansi ---}] [manpage_end] diff --git a/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n b/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n index 49fdd12d..9fe9afc7 100644 --- a/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n +++ b/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n @@ -282,9 +282,9 @@ class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR .sp class::textinfo \fBchunklen\fR .sp -class::textinfo \fBlinecount\fR +class::textinfo \fBchunk_boundary_display\fR .sp -class::textinfo \fBregenerate_lines\fR +class::textinfo \fBlinecount\fR .sp class::textinfo \fBline\fR \fIlineindex\fR .sp @@ -294,6 +294,8 @@ class::textinfo \fBlinemeta\fR \fIlineindex\fR .sp class::textinfo \fBlineinfo\fR \fIlineindex\fR .sp +class::textinfo \fBlineinfolist\fR \fIstartidx\fR \fIendidx\fR +.sp class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR .sp class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR @@ -312,8 +314,16 @@ class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR .sp class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR .sp +class::textinfo \fBregenerate_lines\fR +.sp \fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR .sp +\fBansi::a\fR +.sp +\fBansi::a+\fR +.sp +\fBansi::stripansi\fR +.sp .BE .SH DESCRIPTION .PP @@ -351,10 +361,26 @@ No support for lone carriage-returns being interpreted as line-endings\&. CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module\&. .SS DEPENDENCIES .PP -packages used by punk::fileline +packages needed by punk::fileline .IP \(bu \fBTcl 8\&.6\fR .PP +.SS "OPTIONAL DEPENDENCIES" +.PP +packages that add functionality but aren't strictly required +.IP \(bu +\fBpunk::ansi\fR +.sp +- recommended for class::textinfo \fBchunk_boundary_display\fR +.IP \(bu +\fBpunk::char\fR +.sp +- recommended for class::textinfo \fBchunk_boundary_display\fR +.IP \(bu +\fBovertype\fR +.sp +- recommended for class::textinfo \fBchunk_boundary_display\fR +.PP .SH API .SS "NAMESPACE PUNK::FILELINE::CLASS" .PP @@ -394,13 +420,15 @@ class::textinfo \fBchunklen\fR .sp Number of bytes/characters in the raw data of the file .TP -class::textinfo \fBlinecount\fR +class::textinfo \fBchunk_boundary_display\fR .sp -Number of lines in the raw data of the file, counted as per the policy in effect +Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend +.sp +Defaults to using ansi colour if punk::ansi module is available\&. Use -ansi 0 to disable colour .TP -class::textinfo \fBregenerate_lines\fR +class::textinfo \fBlinecount\fR .sp -generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex +Number of lines in the raw data of the file, counted as per the policy in effect .TP class::textinfo \fBline\fR \fIlineindex\fR .sp @@ -462,6 +490,10 @@ This returns the same info as the \fBlinemeta\fR with an added key of 'payload' .sp The 'payload' value is the same as is returned from the \fBlinepayload\fR method\&. .TP +class::textinfo \fBlineinfolist\fR \fIstartidx\fR \fIendidx\fR +.sp +Returns list of lineinfo dicts for each line in line index range startidx to endidx +.TP class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR .TP class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR @@ -501,6 +533,16 @@ Basic addition and subtraction expressions such as 4-1 5+2 are accepted startidx higher than endidx is allowed .sp Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max +.TP +class::textinfo \fBregenerate_lines\fR +.sp +generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex +.sp +This is called automatically by the Constructor during object creation +.sp +It is exposed in the API experimentally - as chunk and line manipulation functions are considered\&. +.sp +TODO - review whether such manual control will be necessary/desirable .RE .PP .SS "NAMESPACE PUNK::FILELINE" @@ -550,6 +592,18 @@ This function automatically uses lseq (if Tcl >= 8\&.7) when number of boundarie .SS "NAMESPACE PUNK::FILELINE::SYSTEM" .PP Internal functions that are not part of the API +.SS "NAMESPACE PUNK::FILELINE::ANSI" +.PP +These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable +.PP +See \fBpunk::ansi\fR for documentation +.TP +\fBansi::a\fR +.TP +\fBansi::a+\fR +.TP +\fBansi::stripansi\fR +.PP .SH KEYWORDS file, module, parse, text .SH COPYRIGHT diff --git a/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md b/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md index 9ed788e3..c6e4c6d2 100644 --- a/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md +++ b/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md @@ -28,17 +28,21 @@ punkshell\_module\_punk::fileline \- file line\-handling utilities - [dependencies](#subsection3) + - [optional dependencies](#subsection4) + - [API](#section3) - - [Namespace punk::fileline::class](#subsection4) + - [Namespace punk::fileline::class](#subsection5) - - [Namespace punk::fileline](#subsection5) + - [Namespace punk::fileline](#subsection6) - - [Namespace punk::fileline::lib](#subsection6) + - [Namespace punk::fileline::lib](#subsection7) - [Internal](#section4) - - [Namespace punk::fileline::system](#subsection7) + - [Namespace punk::fileline::system](#subsection8) + + - [Namespace punk::fileline::ansi](#subsection9) - [Keywords](#keywords) @@ -51,22 +55,27 @@ package require punk::fileline [class::textinfo __constructor__ *datachunk* ?option value\.\.\.?](#1) [class::textinfo __chunk__ *chunkstart* *chunkend*](#2) [class::textinfo __chunklen__](#3) -[class::textinfo __linecount__](#4) -[class::textinfo __regenerate\_lines__](#5) +[class::textinfo __chunk\_boundary\_display__](#4) +[class::textinfo __linecount__](#5) [class::textinfo __line__ *lineindex*](#6) [class::textinfo __linepayload__ *lineindex*](#7) [class::textinfo __linemeta__ *lineindex*](#8) [class::textinfo __lineinfo__ *lineindex*](#9) -[class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx*](#10) -[class::textinfo __linerange\_to\_chunk__ *startidx* *endidx*](#11) -[class::textinfo __lines__ *startidx* *endidx*](#12) -[class::textinfo __linepayloads__ *startidx* *endidx*](#13) -[class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend*](#14) -[class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.?](#15) -[class::textinfo __numeric\_linerange__ *startidx* *endidx*](#16) -[class::textinfo __numeric\_chunkrange__ *startidx* *endidx*](#17) -[class::textinfo __normalize\_indices__ *startidx* *endidx* *max*](#18) -[__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize*](#19) +[class::textinfo __lineinfolist__ *startidx* *endidx*](#10) +[class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx*](#11) +[class::textinfo __linerange\_to\_chunk__ *startidx* *endidx*](#12) +[class::textinfo __lines__ *startidx* *endidx*](#13) +[class::textinfo __linepayloads__ *startidx* *endidx*](#14) +[class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend*](#15) +[class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.?](#16) +[class::textinfo __numeric\_linerange__ *startidx* *endidx*](#17) +[class::textinfo __numeric\_chunkrange__ *startidx* *endidx*](#18) +[class::textinfo __normalize\_indices__ *startidx* *endidx* *max*](#19) +[class::textinfo __regenerate\_lines__](#20) +[__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize*](#21) +[__ansi::a__](#22) +[__ansi::a\+__](#23) +[__ansi::stripansi__](#24) # DESCRIPTION @@ -114,13 +123,29 @@ something else before the data is supplied to this module\. ## dependencies -packages used by punk::fileline +packages needed by punk::fileline - __Tcl 8\.6__ +## optional dependencies + +packages that add functionality but aren't strictly required + + - __punk::ansi__ + + \- recommended for class::textinfo __chunk\_boundary\_display__ + + - __punk::char__ + + \- recommended for class::textinfo __chunk\_boundary\_display__ + + - __overtype__ + + \- recommended for class::textinfo __chunk\_boundary\_display__ + # API -## Namespace punk::fileline::class +## Namespace punk::fileline::class class definitions @@ -155,15 +180,18 @@ class definitions Number of bytes/characters in the raw data of the file - - class::textinfo __linecount__ + - class::textinfo __chunk\_boundary\_display__ - Number of lines in the raw data of the file, counted as per the policy - in effect + Returns a string displaying the boundaries at chunksize bytes between + chunkstart and chunkend + + Defaults to using ansi colour if punk::ansi module is available\. Use + \-ansi 0 to disable colour - - class::textinfo __regenerate\_lines__ + - class::textinfo __linecount__ - generate a list of lines from the stored raw data chunk and keep a map - of line\-endings indexed by lineindex + Number of lines in the raw data of the file, counted as per the policy + in effect - class::textinfo __line__ *lineindex* @@ -243,17 +271,22 @@ class definitions The 'payload' value is the same as is returned from the __linepayload__ method\. - - class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx* + - class::textinfo __lineinfolist__ *startidx* *endidx* + + Returns list of lineinfo dicts for each line in line index range + startidx to endidx - - class::textinfo __linerange\_to\_chunk__ *startidx* *endidx* + - class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx* - - class::textinfo __lines__ *startidx* *endidx* + - class::textinfo __linerange\_to\_chunk__ *startidx* *endidx* - - class::textinfo __linepayloads__ *startidx* *endidx* + - class::textinfo __lines__ *startidx* *endidx* - - class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend* + - class::textinfo __linepayloads__ *startidx* *endidx* - - class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.? + - class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend* + + - class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.? Return a list of dicts each with structure like the result of the __lineinfo__ method \- but possibly with extra keys for truncation @@ -270,7 +303,7 @@ class definitions method \- and will not be reflected in __lineinfo__ queries to the main chunk\. - - class::textinfo __numeric\_linerange__ *startidx* *endidx* + - class::textinfo __numeric\_linerange__ *startidx* *endidx* A helper to return any Tcl\-style end end\-x values given to startidx or endidx; converted to their specific values based on the current state @@ -279,13 +312,13 @@ class definitions This is used internally by API functions such as __line__ to enable it to accept more expressive indices - - class::textinfo __numeric\_chunkrange__ *startidx* *endidx* + - class::textinfo __numeric\_chunkrange__ *startidx* *endidx* A helper to return any Tcl\-style end end\-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data - - class::textinfo __normalize\_indices__ *startidx* *endidx* *max* + - class::textinfo __normalize\_indices__ *startidx* *endidx* *max* A utility to convert some of the of Tcl\-style list\-index expressions such as end, end\-1 etc to valid indices in the range 0 to the supplied @@ -298,13 +331,25 @@ class definitions Unlike Tcl's index expressions \- we raise an error if the calculated index is out of bounds 0 to max -## Namespace punk::fileline + - class::textinfo __regenerate\_lines__ + + generate a list of lines from the current state of the stored raw data + chunk and keep a map of line\-endings indexed by lineindex + + This is called automatically by the Constructor during object creation + + It is exposed in the API experimentally \- as chunk and line + manipulation functions are considered\. + + TODO \- review whether such manual control will be necessary/desirable + +## Namespace punk::fileline Core API functions for punk::fileline -## Namespace punk::fileline::lib +## Namespace punk::fileline::lib - - __lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize* + - __lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize* Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize @@ -339,10 +384,23 @@ Core API functions for punk::fileline # Internal -## Namespace punk::fileline::system +## Namespace punk::fileline::system Internal functions that are not part of the API +## Namespace punk::fileline::ansi + +These are ansi functions imported from punk::ansi \- or no\-ops if that package is +unavailable + +See __punk::ansi__ for documentation + + - __ansi::a__ + + - __ansi::a\+__ + + - __ansi::stripansi__ + # KEYWORDS [file](\.\./\.\./\.\./index\.md\#file), [module](\.\./\.\./\.\./index\.md\#module), diff --git a/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html b/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html index 5519d64f..590bcd09 100644 --- a/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html +++ b/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html @@ -117,18 +117,20 @@
  • Concepts
  • Notes
  • dependencies
  • +
  • optional dependencies
  • API
  • Internal
  • Keywords
  • @@ -144,22 +146,27 @@
  • class::textinfo constructor datachunk ?option value...?
  • class::textinfo chunk chunkstart chunkend
  • class::textinfo chunklen
  • -
  • class::textinfo linecount
  • -
  • class::textinfo regenerate_lines
  • +
  • class::textinfo chunk_boundary_display
  • +
  • class::textinfo linecount
  • class::textinfo line lineindex
  • class::textinfo linepayload lineindex
  • class::textinfo linemeta lineindex
  • class::textinfo lineinfo lineindex
  • -
  • class::textinfo linerange_to_chunkrange startidx endidx
  • -
  • class::textinfo linerange_to_chunk startidx endidx
  • -
  • class::textinfo lines startidx endidx
  • -
  • class::textinfo linepayloads startidx endidx
  • -
  • class::textinfo chunkrange_to_linerange chunkstart chunkend
  • -
  • class::textinfo chunkrange_to_lineinfolist chunkstart chunkend ?option value...?
  • -
  • class::textinfo numeric_linerange startidx endidx
  • -
  • class::textinfo numeric_chunkrange startidx endidx
  • -
  • class::textinfo normalize_indices startidx endidx max
  • -
  • lib::range_spans_chunk_boundaries start end chunksize
  • +
  • class::textinfo lineinfolist startidx endidx
  • +
  • class::textinfo linerange_to_chunkrange startidx endidx
  • +
  • class::textinfo linerange_to_chunk startidx endidx
  • +
  • class::textinfo lines startidx endidx
  • +
  • class::textinfo linepayloads startidx endidx
  • +
  • class::textinfo chunkrange_to_linerange chunkstart chunkend
  • +
  • class::textinfo chunkrange_to_lineinfolist chunkstart chunkend ?option value...?
  • +
  • class::textinfo numeric_linerange startidx endidx
  • +
  • class::textinfo numeric_chunkrange startidx endidx
  • +
  • class::textinfo normalize_indices startidx endidx max
  • +
  • class::textinfo regenerate_lines
  • +
  • lib::range_spans_chunk_boundaries start end chunksize
  • +
  • ansi::a
  • +
  • ansi::a+
  • +
  • ansi::stripansi
  • @@ -188,14 +195,25 @@

    CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module.

    dependencies

    -

    packages used by punk::fileline

    +

    packages needed by punk::fileline

    +

    optional dependencies

    +

    packages that add functionality but aren't strictly required

    + +

    API

    -

    Namespace punk::fileline::class

    +

    Namespace punk::fileline::class

    class definitions

    1. CLASS textinfo

      @@ -217,10 +235,11 @@ or

      objName chunk 0 end

      class::textinfo chunklen

      Number of bytes/characters in the raw data of the file

      -
      class::textinfo linecount
      +
      class::textinfo chunk_boundary_display
      +

      Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend

      +

      Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour

      +
      class::textinfo linecount

      Number of lines in the raw data of the file, counted as per the policy in effect

      -
      class::textinfo regenerate_lines
      -

      generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex

      class::textinfo line lineindex

      Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata

      A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting)

      @@ -252,44 +271,51 @@ or

      Return a dict of the metadata and text for the line indicated by the zero-based lineindex

      This returns the same info as the linemeta with an added key of 'payload' which is the text of the line without line-ending.

      The 'payload' value is the same as is returned from the linepayload method.

      -
      class::textinfo linerange_to_chunkrange startidx endidx
      +
      class::textinfo lineinfolist startidx endidx
      +

      Returns list of lineinfo dicts for each line in line index range startidx to endidx

      +
      class::textinfo linerange_to_chunkrange startidx endidx
      -
      class::textinfo linerange_to_chunk startidx endidx
      +
      class::textinfo linerange_to_chunk startidx endidx
      -
      class::textinfo lines startidx endidx
      +
      class::textinfo lines startidx endidx
      -
      class::textinfo linepayloads startidx endidx
      +
      class::textinfo linepayloads startidx endidx
      -
      class::textinfo chunkrange_to_linerange chunkstart chunkend
      +
      class::textinfo chunkrange_to_linerange chunkstart chunkend
      -
      class::textinfo chunkrange_to_lineinfolist chunkstart chunkend ?option value...?
      +
      class::textinfo chunkrange_to_lineinfolist chunkstart chunkend ?option value...?

      Return a list of dicts each with structure like the result of the lineinfo method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied

      The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list.

      truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending)

      Note that this truncation info is only in the return value of this method - and will not be reflected in lineinfo queries to the main chunk.

      -
      class::textinfo numeric_linerange startidx endidx
      +
      class::textinfo numeric_linerange startidx endidx

      A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data

      This is used internally by API functions such as line to enable it to accept more expressive indices

      -
      class::textinfo numeric_chunkrange startidx endidx
      +
      class::textinfo numeric_chunkrange startidx endidx

      A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data

      -
      class::textinfo normalize_indices startidx endidx max
      +
      class::textinfo normalize_indices startidx endidx max

      A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max

      Basic addition and subtraction expressions such as 4-1 5+2 are accepted

      startidx higher than endidx is allowed

      Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max

      +
      class::textinfo regenerate_lines
      +

      generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex

      +

      This is called automatically by the Constructor during object creation

      +

      It is exposed in the API experimentally - as chunk and line manipulation functions are considered.

      +

      TODO - review whether such manual control will be necessary/desirable

    -

    Namespace punk::fileline

    +

    Namespace punk::fileline

    Core API functions for punk::fileline

    -

    Namespace punk::fileline::lib

    +

    Namespace punk::fileline::lib

    Secondary functions that are part of the API

    -
    lib::range_spans_chunk_boundaries start end chunksize
    +
    lib::range_spans_chunk_boundaries start end chunksize

    Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range.

    @@ -313,9 +339,21 @@ or

    Internal

    -

    Namespace punk::fileline::system

    +

    Namespace punk::fileline::system

    Internal functions that are not part of the API

    +

    Namespace punk::fileline::ansi

    +

    These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable

    +

    See punk::ansi for documentation

    +
    +
    ansi::a
    +
    +
    ansi::a+
    +
    +
    ansi::stripansi
    +
    +
    +

    Keywords

    file, module, parse, text

    diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 9d6ddf97..6664af03 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -325,7 +325,12 @@ namespace eval punk::ansi { set res [list] foreach i [split $code ";"] { set ix [lsearch -exact $SGR_map $i] - if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} + if {[string is digit -strict $code]} { + if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} + } else { + #reverse lookup code from name + if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} + } } set res } diff --git a/src/modules/punk/fileline-999999.0a1.0.tm b/src/modules/punk/fileline-999999.0a1.0.tm index b1f4eb8b..dd638bd5 100644 --- a/src/modules/punk/fileline-999999.0a1.0.tm +++ b/src/modules/punk/fileline-999999.0a1.0.tm @@ -55,19 +55,38 @@ #*** !doctools #[subsection dependencies] -#[para] packages used by punk::fileline +#[para] packages needed by punk::fileline #[list_begin itemized] -package require Tcl 8.6 + package require Tcl 8.6 + #*** !doctools + #[item] [package {Tcl 8.6}] + + + # #package require frobz + # #*** !doctools + # #[item] [package {frobz}] + #*** !doctools -#[item] [package {Tcl 8.6}] +#[list_end] [comment {- end dependencies list -}] + +#*** !doctools +#[subsection {optional dependencies}] +#[para] packages that add functionality but aren't strictly required +#[list_begin itemized] + + #*** !doctools + #[item] [package {punk::ansi}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] + #[item] [package {punk::char}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] + #[item] [package {overtype}] + #[para] - recommended for class::textinfo [method chunk_boundary_display] -# #package require frobz -# #*** !doctools -# #[item] [package {frobz}] #*** !doctools -#[list_end] +#[list_end] [comment {- end optional dependencies list -}] + # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -130,6 +149,15 @@ namespace eval punk::fileline::class { # set chunkdata [lb]fileutil::cat -translation binary[rb] #[example_end] #[para] when loading the data + namespace eval [namespace current] { + set nspath [namespace path] + foreach p [list ::punk::fileline ::punk::fileline::ansi] { + if {$p ni $nspath} { + lappend nspath $p + } + } + namespace path $nspath + } set o_chunk $datachunk set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message set defaults [dict create\ @@ -193,6 +221,307 @@ namespace eval punk::fileline::class { #[para] Number of bytes/characters in the raw data of the file return [string length $o_chunk] } + method chunk_boundary_display {chunkstart chunkend chunksize args} { + #*** !doctools + #[call class::textinfo [method chunk_boundary_display]] + #[para]Returns a string displaying the boundaries at chunksize bytes between chunkstart and chunkend + #[para]Defaults to using ansi colour if punk::ansi module is available. Use -ansi 0 to disable colour + set defaults [dict create\ + -ansi $::punk::fileline::ansi::enabled\ + -offset 0\ + -displaybytes 200\ + -truncatedmark "..."\ + -completemark "---"\ + -moremark " + "\ + -continuemark " > "\ + -linemaxwidth 100\ + -linebase 0\ + -limit -1\ + -boundaries {}\ + -showconfig 0\ + -boundaryheader {Boundary %i% at %b%}\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "[self]::chunk_boundary error: unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- + set opt_ansi [dict get $opts -ansi] + set opt_offset [dict get $opts -offset] + set opt_displaybytes [dict get $opts -displaybytes] + set opt_tmark [dict get $opts -truncatedmark] + set opt_cmark [dict get $opts -completemark] + set opt_linemax [dict get $opts -linemaxwidth] + set opt_linebase [dict get $opts -linebase] + set opt_linebase [string map [list _ ""] $opt_linebase] + set opt_limit [dict get $opts -limit] ;#limit number of boundaries to display + set opt_boundaries [dict get $opts -boundaries] ;#use pre-calculated boundaries if supplied + set opt_showconfig [dict get $opts -showconfig] + set opt_boundaryheader [dict get $opts -boundaryheader] + # -- --- --- --- --- --- + package require overtype + # will require punk::char and punk::ansi + + if {"::punk::fileline::ansi::stripansi" ne [info commands ::punk::fileline::ansi::stripansi]} { + namespace eval ::punk::fileline::ansi { + namespace import ::punk::ansi::* + } + } + + #This mechanism for enabling/disabling ansi is a bit clumsy - prone to errors with regard to keeping in sync with any api changes in punk ansi + #It's done here to allow this to be used without the full set of punk modules and/or shell - REVIEW + + #risk of failing to reset on error + set pre_ansi_enabled $::punk::fileline::ansi::enabled + if {$opt_ansi} { + set ::punk::fileline::ansi::enabled 1 + } else { + set ::punk::fileline::ansi::enabled 0 + } + if {"::punk::fileline::stripansi" ne [info commands ::punk::fileline::stripansi]} { + proc ::punk::fileline::a {args} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::a {*}$args + } else { + return "" + } + } + proc ::punk::fileline::a+ {args} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::a+ {*}$args + } else { + return "" + } + } + proc ::punk::fileline::stripansi {str} { + if {$::punk::fileline::ansi::enabled} { + tailcall ::punk::fileline::ansi::stripansi $str + } else { + return $str + } + } + } + set maxline [lindex [my chunkrange_to_linerange $chunkend $chunkend] 0] + set minline [lindex [my chunkrange_to_linerange $chunkstart $chunkstart] 0] + + #suport simple end+-int (+-)start(+-)int to set linebase to line corresponding to chunkstart or chunkend + #also simple int+int and int-int - nothing more complicated (similar to Tcl lrange etc in that regard) + #commonly this will be something like -start or -end + if {![string is integer -strict $opt_linebase]} { + set sign "" + set errunrecognised "unrecognised -linebase value '$opt_linebase'. Expected positive or negative integer or -start -start-int -start+int -end -end-int -end+int or -eof (where leading - is optional but probably desirable) " + if {[string index $opt_linebase 0] eq "-"} { + set sign - + set tail [string range $opt_linebase 1 end] + } else { + set tail [string trimleft $opt_linebase +];#ignore + + } + if {[string match eof* $tail]} { + set endmath [string range $tail 3 end] + #todo endmath? + if {$tail eq "eof"} { + set lastline [lindex [my chunkrange_to_linerange end end] 0] + set linebase ${sign}$lastline + } else { + error $errunrecognised + } + } elseif {[string match end* $tail]} { + set endmath [string range $tail 3 end] + if {[string length $endmath]} { + set op [string index $endmath 0] + if {$op in {+ -}} { + set operand [string range $endmath 1 end] + if {[string is integer -strict $operand]} { + if {$op eq "+"} { + set linebase [expr {$maxline + $operand}] + } else { + set linebase [expr {$maxline - $operand}] + } + } else { + error $errunrecognised + } + } else { + error $errunrecognised + } + } else { + set linebase $maxline + } + set linebase ${sign}$linebase + } elseif {[string match start* $tail]} { + set endmath [string range $tail 5 end] + if {[string length $endmath]} { + set op [string index $endmath 0] + if {$op in {+ -}} { + set operand [string range $endmath 1 end] + if {[string is integer -strict $operand]} { + if {$op eq "+"} { + set linebase [expr {$minline + $operand}] + } else { + set linebase [expr {$minline - $operand}] + } + } else { + error $errunrecognised + } + } else { + error $errunrecognised + } + } else { + set linebase $minline + } + set linebase ${sign}$linebase + } elseif {[string match *-* $tail]} { + set extras [lassign [split $tail -] int1 int2] + if {[llength $extras]} { + error $errunrecognised + } + if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { + error $errunrecognised + } + set linebase [expr {$int1 - $int2}] + set linebase ${sign}$linebase + } elseif {[string match *+* $tail]} { + set extras [lassign [split $tail +] int1 int2] + if {[llength $extras]} { + error $errunrecognised + } + if {![string is integer -strict $int1] || ![string is integer -strict $int2]} { + error $errunrecognised + } + set linebase [expr {$int1 + $int2}] + set linebase ${sign}$linebase + } else { + error $errunrecognised + } + + } else { + set linebase $opt_linebase + } + + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + + if {![llength $opt_boundaries]} { + set binfo [lib::range_spans_chunk_boundaries $chunkstart $chunkend $chunksize -offset $opt_offset] + set boundaries [dict get $binfo boundaries] + } else { + set boundaries [list] + foreach b $opt_boundaries { + if {$chunkstart <= $b && $chunkend >= $b} { + lappend boundaries [expr {$b + $opt_offset}] + } + } + } + + + if {![llength $boundaries]} { + return "No boundaries found between $chunkstart and $chunkend for chunksize $chunksize (when offset $opt_offset)" + } + if {$opt_showconfig} { + set result "chunk range $chunkstart $chunkend line range $minline $maxline linebase $linebase limit $opt_limit\n" + } else { + set result "" + } + set pre_bytes [expr {$opt_displaybytes /2}] + set post_bytes $pre_bytes + set max_bytes [expr {[my chunklen] -1}] + if {$opt_limit > 0} { + set boundaries [lrange $boundaries[unset boundaries] 0 $opt_limit-1] + } + + set i 0 + foreach b $boundaries { + if {$opt_boundaryheader ne ""} { + set j [expr {$i+1}] + append result [string map [list %b% $b %i% $i %j% $j] $opt_boundaryheader] \n + } + set low [expr {max(($b - $pre_bytes),0)}] + set high [expr {min(($b + $post_bytes),$max_bytes)}] + + set lineinfolist [my chunkrange_to_lineinfolist $low $high -show_truncated 1] + set le_map [list \r\n \r \n ] + set result_list [list] + foreach lineinfo $lineinfolist { + set lineidx [dict get $lineinfo lineindex] + + set linenum [expr {$lineidx + $linebase}] + set s [dict get $lineinfo start] + set e [dict get $lineinfo end] + + set boundarymarker "" + set displayidx "" + set linenum_display $linenum + if {$s <= $b && $e >= $b} { + set idx [expr {$b - $s}] ;#index into whole position in whole line - not so useful if we're viewing a small section of a line + set char [string index [my line $lineidx] $idx] + set char_display [string map [list \r \n ] $char] + if {[dict get $lineinfo is_truncated]} { + set tside [dict get $lineinfo truncatedside] + set truncated [dict get $lineinfo truncated] + set tlen [string length $truncated] + if {"left" in $tside} { + set tleft [dict get $lineinfo truncatedleft] + set tleftlen [string length $tleft] + set displayidx [expr {$idx - $tleftlen}] + } elseif {"right" in $tside} { + set displayidx $idx + } + } else { + set displayidx $idx + } + set boundarymarker "'[a+ green bold]$char_display[a]'@$displayidx" + set linenum_display ${linenum_display},$idx + } + + set lhs_status $opt_cmark ;#default + set rhs_status $opt_cmark ;#default + if {[dict get $lineinfo is_truncated]} { + set line [dict get $lineinfo truncated] + set tside [dict get $lineinfo truncatedside] + if {"left" in $tside && "right" in $tside } { + set lhs_status $opt_tmark + set rhs_status $opt_tmark + } elseif {"left" in $tside} { + set lhs_status $opt_tmark + } elseif {"right" in $tside} { + set rhs_status $opt_tmark + } + + + } else { + set line [my line $lineidx] + } + if {$displayidx ne ""} { + set line [string replace $line $displayidx $displayidx [a+ White green bold]$char_display[a]] + } + set displayline [string map $le_map $line] + lappend result_list [list $linenum_display $boundarymarker $lhs_status $displayline $rhs_status] + } + set title_linenum "LNUM" + set linenums [lsearch -index 0 -all -inline -subindices $result_list *] + set markers [lsearch -index 1 -all -inline -subindices $result_list *] + set lines [lsearch -index 3 -all -inline -subindices $result_list *] + set title_marker "" + set title_line "Line" + #todo - use punk::char for unicode support of wide chars etc? + set widest_linenum [tcl::mathfunc::max {*}[lmap v [concat [list $title_linenum] $linenums] {string length $v}]] + set widest_marker [tcl::mathfunc::max {*}[lmap v [concat [list $title_marker] $markers] {string length [stripansi $v]}]] + set widest_status [expr {max([string length $opt_cmark], [string length $opt_tmark])}] + set widest_line [tcl::mathfunc::max {*}[lmap v [concat [list $title_line] $lines] {string length $v}]] + foreach row $result_list { + lassign $row linenum marker lhs_status line rhs_status + append result [format " %-*s " $widest_linenum $linenum] + append result [format " %-*s " $widest_marker $marker] + append result [format " %-*s " $widest_status $lhs_status] + append result [format " %-*s " $widest_line $line] + append result [format " %-*s " $widest_status $rhs_status] \n + } + incr i + } + set ::punk::fileline::ansi::enabled $pre_ansi_enabled + return $result + } method linecount {} { #*** !doctools #[call class::textinfo [method linecount]] @@ -369,6 +698,15 @@ namespace eval punk::fileline::class { ########################### set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] set start_info [dict get $o_linemap $start_lineindex] + + + if {$chunkstart > [dict get $start_info start]} { + dict set first is_truncated 1 + dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line + } else { + dict set first is_truncated 0 + } + if {$opt_show_truncated} { #line1 if {$chunkstart > [dict get $start_info start]} { @@ -382,13 +720,9 @@ namespace eval punk::fileline::class { set lhs [string range $payload_and_le 0 $split-1] dict set first truncated $truncated - dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line dict set first truncatedleft $lhs - dict set first is_truncated 1 - } else { - dict set first is_truncated 0 } - } + } ########################### ########################### @@ -410,35 +744,45 @@ namespace eval punk::fileline::class { if {$end_lineindex == $start_lineindex} { #same record set end_info $start_info + + + if {$chunkend < [dict get $end_info end]} { + #there is rhs truncation + if {[dict get $first is_truncated]} { + dict set first truncatedside [list left right] + } else { + dict set first is_truncated 1 + dict set first truncatedside [list right] + } + } + if {$opt_show_truncated} { if {$chunkend < [dict get $end_info end]} { - #lhere is rhs truncation - if {[dict get $first is_truncated]} { - dict set first truncatedside [list left right] - } else { - dict set first is_truncated 1 - dict set first truncatedside [list right] - } + #there is rhs truncation and we need to return the splits #do rhs truncation - possibly in addition to existing lhs truncation # ... + set payload [lindex $o_payloadlist $end_lineindex] + set line_start [dict get $end_info start] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] + set payload_and_le "${payload}${le_chars}" + set split [expr {$chunkend - $line_start}] + set truncated [string range $payload_and_le 0 $split] + set rhs [string range $payload_and_le $split+1 end] + dict set first truncatedright $rhs if {"left" ni [dict get $first truncatedside]} { #rhs truncation only - set payload [lindex $o_payloadlist $end_lineindex] - set line_start [dict get $end_info start] - set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] - set payload_and_le "${payload}${le_chars}" puts "payload_and_le: $payload_and_le" - puts "LENGHT: [string length $payload_and_le]" + puts "LENGTH: [string length $payload_and_le]" #--- - set split [expr {$chunkend - $line_start}] - set truncated [string range $payload_and_le 0 $split] - set rhs [string range $payload_and_le $split+1 end] #--- dict set first truncated $truncated dict set first truncatedside [list right] - dict set first truncatedright $rhs } else { #truncated on both sides + set lhslen [string length [dict get $first truncatedleft]] + #re-truncate the truncation to reapply the original lhs truncation + set truncated [string range $truncated $lhslen end] + dict set first truncated $truncated } } } @@ -447,10 +791,18 @@ namespace eval punk::fileline::class { } else { set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] set end_info [dict get $o_linemap $end_lineindex] + + + if {$chunkend < [dict get $end_info end]} { + dict set last is_truncated 1 + dict set last truncatedside [list right] + } else { + dict set last is_truncated 0 + } + if {$opt_show_truncated} { if {$chunkend < [dict get $end_info end]} { #there is rhs truncation - and last line in range is a different line to first one - dict set last is_truncated 1 set payload [lindex $o_payloadlist $end_lineindex] set line_start [dict get $end_info start] set line_end [dict get $end_info end] @@ -464,7 +816,6 @@ namespace eval punk::fileline::class { set rhs [string range $payload_and_le $split+1 end] dict set last truncated $truncated - dict set last truncatedside [list right] dict set last truncatedright $rhs #this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr #this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' @@ -479,12 +830,15 @@ namespace eval punk::fileline::class { lappend infolist $last } ########################### - + #assert all records have is_truncated key. + #assert if is_truncated == 1 truncatedside should contain a list of either left, right or both left and right + #assert If not opt_show_truncated - then truncated records will not have truncated,truncatedleft,truncatedright keys. return $infolist } + #need to check truncations so that any split \r\n is counted precisely todo method chunk_le_counts {chunkstart chunkend} { - set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend] + set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend -show_truncated 1] set lf_count 0 set crlf_count 0 set none_count 0 @@ -498,7 +852,33 @@ namespace eval punk::fileline::class { incr none_count } } - return [dict create lf $lf_count crlf $crlf_count unterminated $none_count] + #even without split crlf - this can overcount by counting the lf or crlf in a line which had an ending not in the chunk range specified + + #check first and last infoline for truncations + #Also check if the truncation is directly between an crlf + #both an lhs split and an rhs split could land between cr and lf + #to be precise - we should presumably count the part within our chunk as either a none for cr or an lf + #This means a caller counting chunk by chunk using this method will sometimes get the wrong answer depending on where crlfs lie relative to their chosen chunk size + #This is presumably ok - as it should be a well known thing to watch out for. + #If we're only receiving chunk by chunk we can't reliably detect splits vs lone s in the data + #There are surely more efficient ways for a caller to count line-endings in the way that makes sense for them + #but we should makes things as easy as possible for users of this line/chunk structure anyway. + + set first [lindex $infolines 0] + if {[dict get $first is_truncated]} { + #could be the only line - and truncated at one or both ends. + #both a left and a right truncation could split a crlf + + } + set last [lindex $infolines end] + if {[dict get $first lineindex] != [dict get $last lineindex]} { + #only need to process last if it is a different line + #if so - then split can only be left side + + } + + + return [dict create lf $lf_count crlf $crlf_count unterminated $none_count warning line_ending_splits_unimplemented] } #todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk @@ -571,7 +951,7 @@ namespace eval punk::fileline::class { error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" } if {$endidx < 0 || $endidx > $max} { - error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max" + error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max (try $max or end)" } return [list $startidx $endidx] } @@ -704,8 +1084,31 @@ namespace eval punk::fileline { #[para] Core API functions for punk::fileline #[list_begin definitions] + proc file_textinfo {filename} { + set fd [open $filename r] ;#open gives a good enough error message if file not readable + fconfigure $fd -translation binary + set rawfiledata [read $fd] + close $fd + set textobj [class::textinfo new $rawfiledata] + puts stdout "Bytes loaded : [$textobj chunklen]" + puts stdout "Lines recognised : [$textobj linecount]" + set leinfo [$textobj chunk_le_counts 0 end] + puts stdout "crlf endings (windows) : [dict get $leinfo crlf]" + puts stdout "lf endings (unix) : [dict get $leinfo crlf]" + puts stdout "unterminated lines : [dict get $leinfo unterminated]" + return $textobj + } - + proc file_boundary_display {filename startbyte endbyte chunksize args} { + set fd [open $filename r] ;#use default error if file not readable + fconfigure $fd -translation binary + set rawfiledata [read $fd] + close $fd + set textobj [class::textinfo new $rawfiledata] + set result [$textobj chunk_boundary_display $startbyte $endbyte $chunksize {*}$args] + $textobj destroy + return $result + } @@ -728,7 +1131,7 @@ namespace eval punk::fileline::lib { - proc range_spans_chunk_boundaries {start end chunksize} { + proc range_spans_chunk_boundaries {start end chunksize args} { #*** !doctools #[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] #[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. @@ -751,12 +1154,12 @@ namespace eval punk::fileline::lib { #[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 if {[catch {package require Tcl 8.7}]} { #only one implementation available for older Tcl - tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args } if {(($end - $start) / $chunksize) < 75} { - tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize {*}$args } else { - tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize + tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize {*}$args } } @@ -779,8 +1182,22 @@ namespace eval punk::fileline::system { #for 8.7+ using lseq - #much faster when resultant boundary size is large - proc _range_spans_chunk_boundaries_lseq {start end chunksize} { + #much faster when resultant boundary size is large (at least when offset 0) + proc _range_spans_chunk_boundaries_lseq {start end chunksize args} { + set defaults [dict create\ + -offset 0\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- + set opt_offset [dict get $opts -offset] + # -- --- --- --- + set smod [expr {$start % $chunksize}] if {$smod != 0} { set start [expr {$start + ($chunksize - $smod)}] @@ -789,12 +1206,30 @@ namespace eval punk::fileline::system { } } set boundaries [lseq $start to $end $chunksize] + if {$opt_offset} { + set boundaries [lap v $boundaries[unset boundaries] {expr {$v + $opt_offset}}] + } + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] } #faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) #gets very slow (comparitively) with large resultsets - proc _range_spans_chunk_boundaries_tcl {start end chunksize} { + proc _range_spans_chunk_boundaries_tcl {start end chunksize args} { + set defaults [dict create\ + -offset 0\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- + set opt_offset [dict get $opts -offset] + # -- --- --- --- + set is_span 0 set smod [expr {$start % $chunksize}] if {$smod != 0} { @@ -802,9 +1237,9 @@ namespace eval punk::fileline::system { } set boundaries [list] for {set b $start} {$b <= $end} {incr b $chunksize} { - lappend boundaries $b + lappend boundaries [expr {$b + $opt_offset}] } - return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries offset $opt_offset] } proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { @@ -815,6 +1250,22 @@ namespace eval punk::fileline::system { } } } +namespace eval punk::fileline::ansi { + #*** !doctools + #[subsection {Namespace punk::fileline::ansi}] + #[para]These are ansi functions imported from punk::ansi - or no-ops if that package is unavailable + #[para]See [package punk::ansi] for documentation + #[list_begin definitions] + variable enabled 1 + #*** !doctools + #[call [fun ansi::a]] + #[call [fun ansi::a+]] + #[call [fun ansi::stripansi]] + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline::ansi ---}] +} + # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::fileline [namespace eval punk::fileline { diff --git a/src/modules/punk/mix/commandset/loadedlib-999999.0a1.0.tm b/src/modules/punk/mix/commandset/loadedlib-999999.0a1.0.tm index 4a6d1100..97602c7f 100644 --- a/src/modules/punk/mix/commandset/loadedlib-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/loadedlib-999999.0a1.0.tm @@ -419,7 +419,7 @@ namespace eval punk::mix::commandset::loadedlib { if {![file exists $source_file]} { error "Unable to verify source file existence at: $source_file" } - set source_data [fcat $source_file -translation binary] + set source_data [fcat -translation binary $source_file] if {![string match "*package provide*" $source_data]} { puts stderr "Sorry - unable to verify source file contains 'package provide' statement of some sort - copy manually" return false diff --git a/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm b/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm index 0865a09a..918a3b21 100644 --- a/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm @@ -116,7 +116,11 @@ namespace eval punk::mix::commandset::scriptwrap { #It is more likely to catch issues if adjustments are made to the initial batch-script code in a template. # #cmd allows labels at call sites to span lines with line continuation character ^ - #target labels can't span lines with ^ - cmd doesn't recognise them. + #target labels can't span lines with ^ - cmd doesn't recognise them (They possibly do span in a way - but but the newlines may be included in the label - so they may be hard/impossible to call). + #Note that we can't filter obviously non-batch-script lines before processing - as the way batch label-scanning works it scans in chunks of 512 bytes so all lines are relevant. + #This means label-like things could be incorrectly found in other script data - that's partly the point of this check + #Note that we can't filter obviously non-batch-script lines before processing - as the way batch label-scanning works it scans in chunks of 512 bytes so all lines are relevant. + #This means label-like things could be incorrectly found in other script data - that's partly the point of this check. proc checkoutput {filepath args} { if {![file exists $filepath]} { error "punk::mix::commandset:scriptwrap error cannot find file '$filepath'" @@ -142,8 +146,8 @@ namespace eval punk::mix::commandset::scriptwrap { # -- --- --- --- --- --- --- # #### load file #### - ##set raw_filedata [fcat $filepath -translation binary] - #don't use fcat/fileutil::cat - as we may need to look at data beyond a ctrl-z (\x1A) section + ##set raw_filedata [fcat -translation binary $filepath] + # - as we may need to look at data beyond a ctrl-z (\x1A) section set fd [open $filepath r] fconfigure $fd -translation binary set raw_filedata [read $fd] @@ -198,124 +202,304 @@ namespace eval punk::mix::commandset::scriptwrap { puts stdout "[a+ yellow bold]WARNING: More than one unterminated line reported - seems fishy[a]" } puts "Checking line based labels and 512 byte boundaries from call sites for possible labels and code execution points." - set result "" set line_count [$objFile linecount] set callid 0 ;#id for callsite and objects created set file_offset 0 set error_labels [list] set warning_labels [list] - for {set lineindex 0} {$lineindex < $line_count} {incr lineindex} { - set lineinfo [$objFile lineinfo $lineindex] - set ln [dict get $lineinfo payload] - set linenum [expr {$lineindex + 1}] + set call_labels_found [dict create] + set target_labels_found [dict create] + set possible_target_labels_found [dict create] + set warning_target_labels_found [dict create] + for {set callingline_index 0} {$callingline_index < $line_count} {incr callingline_index} { + set callingline_info [$objFile lineinfo $callingline_index] + set callingline_payload [dict get $callingline_info payload] + set callingline_len [dict get $callingline_info linelen] + set callingline_num [expr {$callingline_index + 1}] set callposn -1 - set trimln [string trim $ln] + set trimln [string trim $callingline_payload] if {[string match "rem *" $trimln] || [string match "@rem *" $trimln] || [string match "REM *" $trimln] || [string match "@REM *" $trimln]} { #ignore things that look like a call that are beind a REM } else { - foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s+|^)(@*GOTO\s*:)(\S.*)}] { - if {[regexp $search_regex $ln _m precall call labelplus]} { - set callposn [expr {$file_offset + [string length $ln]}] ;#take callposn as end of line .. review - multiline statements? + + #todo - better callsite analysis. There can be data between @GOTO or @CALL and : other than just whitespace! + + #todo - allow analysis of colon-less call. May need to check list of internal commands - but what about external ones? + #foreach search_regex [list {(.*\s+|^)(@*call\s*:*)(\S.*)} {(.*\s+|^)(@*CALL\s*:*)(\S.*)} {(.*\s+|^)(@*goto\s*:*)(\S.*)} {(.*\s+|^)(@*GOTO\s*:*)(\S.*)}] {} + foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s*|.*\s+|^)(@*GOTO\s*:)(\S.*)} {(.*\|\|.*)(@*GOTO\s*:)(\S.*)}] { + if {[regexp $search_regex $callingline_payload _m precall call labelplus]} { + #todo further checks to see if it's actually a batch script line + # - - - - work out what cmd.exe considers start of 512B boundaries when scanning from a callsite #callposn affected by newlines? + #set callposn [expr {$file_offset + [string length $callingline_payload]}] ;#take callposn as end of line .. review - multiline statements? + set callposn [expr {$file_offset + $callingline_len}] + + #Note there are anomalies around target labels in bracketed sections such as IF blocks + #this is bad practice - as it can lead to unbalanced braces - but batch files can still work under cmd.exe with them in some cases + #e.g unbalanced trailing bracket may be ignored. + #A working script with target-labels in braces can fail due to boundary issues we don't detect (callsite for boundary counting may need to be at end of entire multiline if block??) + #For now - just make sure punk templates don't do this - but it would be nice to be able to detect. + + #set callposn $file_offset + #set callposn [expr {$file_offset + [string length $precall]}] + # - - - - break } } - #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. + set callsite_labelfound 0 ;#until proven if {$callposn != -1} { - puts stdout "[a+ bold cyan]CALLSITE on line $linenum ending at byte $callposn[a]" + set callposn_lineindex [lindex [$objFile chunkrange_to_linerange $callposn $callposn] 0] + #the line represented by callposn may actually be beyond the calling_line_index + set labelinfo [batchlib::get_callsite_label $labelplus] + if {[dict get $labelinfo labelfound]} { + set callsite_labelfound 1 + set label [dict get $labelinfo label] + set call_label_record [list label $label line $callingline_num] + dict lappend call_labels_found $label $call_label_record + } else { + puts stderr "[a+ yellow bold]WARNING - apparent callsite $callposn but couldn't verify label[a]" + puts stderr "Line:\n$trimln" + } + } + + #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. + if {$callsite_labelfound} { + puts stdout "[a+ bold cyan]CALLSITE on line $callingline_num ending at byte $callposn[a]" set callsummary [string range "${call}${labelplus}" 0 100] if {[string length $callsummary] < [string length ${call}${labelplus}]} { puts stdout " CALLSITE: $callsummary (truncated to 100 bytes)" } else { puts stdout " CALLSITE: '${call}${labelplus}'" } - puts stdout " [a+ cyan]FULLINE: $ln[a]" + puts stdout " [a+ cyan]FULLINE: $callingline_payload[a]" + + ################################## - set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split + #set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split #NOTE it is invalid to assume label always terminated by space - pair of % characters (variable substitution) can contain a space without terminating label + #set word1 [lindex $labelpluswords 0] - set word1 [lindex $labelpluswords 0] - set word1len [string length $word1] - set labeltail [string range $labelplus $word1len end] - if {[string index $word1 end] eq "^"} { - if {![string length $labeltail]} { - #label - } - } else { - } - #todo batchlib::get_callsite_label $labelplus ################################## - set label $word1 + set labelsize [string length $label] #scan forward for labels at boundaries set forward_chunk [$objFile chunk $callposn end] + set forward_chunk_base $callposn ;#name for clarity + incr callid - set callvar "call-${callid}_fromline-${linenum}" + set callvar "call-${callid}_fromline-${callingline_num}" upvar 0 $callvar objForwardScan set objForwardScan [fileline::textinfo new $forward_chunk] + + + ################################################################################################################################## #Forward scan 1 - check at normal line boundaries - and see if collides with a chunk boundary - and if the label is obscured or ok set dsize [$objForwardScan chunklen] set num_boundaries [expr {$dsize / 512} ] - puts "scanning $dsize forward bytes in file for labels - num_boundaries: $num_boundaries" - set scan_offset 0 + puts "scanning $dsize forward bytes in file starting at $forward_chunk_base for label '$label' - num_boundaries: $num_boundaries" set total_offset $file_offset set found_forward_label 0 foreach scanlineinfo [$objForwardScan lineinfolist 0 end] { - set line_bytes [dict get $scanlineinfo linelen] + set scanline_start [dict get $scanlineinfo start] + set scanline_bytes [dict get $scanlineinfo linelen] set scanline [dict get $scanlineinfo payload] - set scanline_relstart [dict get $scanlineinfo start] - set line_global_start $total_offset - set line_global_end [expr {$total_offset + $line_bytes}] + set line_start_global [expr {$forward_chunk_base + $scanline_start}] + set line_index_global [lindex [$objFile chunkrange_to_linerange $line_start_global $line_start_global] 0] + set line_num_global [expr {$line_index_global + 1}] set trimscanline [string trim $scanline] - if {[string match ":$label*" $trimscanline]} { - incr found_forward_label - set label_posn_in_line [string first : $scanline] - set labelposn [expr {$scan_offset + $label_posn_in_line}] - #we only really care about exactly landing on a boundary or else the next 512 byte boundaries above the labelposn - if {($labelposn % 512) == 0} { - set ubound [expr {($labelposn / 512) * 512}] - } else { - set ubound [expr {(($labelposn / 512)+1) * 512}] + + set found_targetlabel_at_line 0 ;# until disproven + if {[string first : $scanline] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $scanline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + #add to target_labels_found record below + set scan_target_label_record [list label $label line $line_num_global] + set found_targetlabel_at_line 1 } - set lbound [expr {$ubound - $labelsize}] - if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { - lappend error_labels [list label $label call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $linenum]] - puts stdout "[a+ bold red]ERROR: label $trimscanline at offset from callsite: $labelposn total offset: $total_offset[a]" - puts stdout "[a+ bold red] This label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" + } + + if {$found_targetlabel_at_line} { + set scan_target_label_same_line_seen false + if {[dict exists $target_labels_found $label]} { + set thislabel_records [dict get $target_labels_found $label] + foreach previous $thislabel_records { + if {[dict get $previous line] eq $line_num_global} { + set scan_target_label_same_line_seen true + } + } + } + incr found_forward_label + if {!$scan_target_label_same_line_seen} { + set label_posn_in_line [string first : $scanline] + set labelposn [expr {$scanline_start + $label_posn_in_line}] + #we only really care about exactly landing on a boundary or else the next 512 byte boundaries above the labelposn + if {($labelposn % 512) == 0} { + set ubound [expr {($labelposn / 512) * 512}] + } else { + set ubound [expr {(($labelposn / 512)+1) * 512}] + } + set lbound [expr {$ubound - $labelsize}] + if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { + dict set scan_target_label_record error linestart_and_call_offset_bytes + lappend error_labels [list label $label linestart_and_call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $callingline_num] bad_target_line $line_num_global] + puts stdout "[a+ bold red]ERROR: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" + puts [$objForwardScan chunk_boundary_display [dict get $scanlineinfo start] [dict get $scanlineinfo end] 512 -linebase $callposn_lineindex+1 -limit 1] ;#+1 on callposn_linindex to do editor-style linenums + } else { + dict set scan_target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $line_num_global target-label $trimscanline at line-beginning and with byte offset from callsite: $labelposn offset in file: $line_start_global[a]" + } + dict lappend target_labels_found $label $scan_target_label_record } else { - puts stdout "[a+ bold green]OK: label $trimscanline at offset from callsite: $labelposn total offset: $total_offset[a]" + puts stdout "OK - seen label $label on $line_num_global before" } } - incr total_offset $line_bytes - incr scan_offset $line_bytes + incr total_offset $scanline_bytes } + ################################################################################################################################## + #todo #forward scan 2 - check any boundaries missed above because the label isn't at the begining of a line #these are potentially hidden labels that could activate without requiring the label be at the beginning of a line #check boundary spans relative to start of this objForwardScan chunk - set forward_spaninfo [fileline::range_spans_chunk_boundaries {*}[$objForwardScan numeric_chunkrange 0 end] 512] - if {[dict get $forward_spaninfo is_span]} { - set boundaries [dict get $forward_spaninfo boundaries] - if {[llength $boundaries] > 1} { - puts stdout "line $linenum scan from call label $label at $callposn. Callsite-relative boundaries crossed: [lrange $boundaries 1 end]" + + #adjust boundary-search by resetting counter each time crlf encountered + set forward_lines [$objForwardScan chunkrange_to_lineinfolist 0 end] + set boundary_positions [list 0] + set scanner_offset 0 + set scanner_position 0 + foreach forwardbline_info $forward_lines { + #review - do we need to check the payload in case we have configured the textinfo object to split the file only on lf - (not true by default) + set forwardbline_len [dict get $forwardbline_info linelen] + set forwardbline_spaninfo [fileline::range_spans_chunk_boundaries [expr {$scanner_position + $scanner_offset}] [expr {$scanner_position + $scanner_offset + $forwardbline_len}] 512] + set forwardbline_boundaries [dict get $forwardbline_spaninfo boundaries] + + foreach b $forwardbline_boundaries { + set relb [expr $b + $scanner_offset] + if {$relb <= [dict get $forwardbline_info end]} { + lappend boundary_positions $relb + } else { + #leave it for the next line - as we may need to adjust offset anyway + break + } } + if {[dict get $forwardbline_info le] eq "crlf"} { + set scanner_offset [expr {[dict get $forwardbline_info end] - [lindex $boundary_positions end]}] ;#reset on crlf + #puts "+++++ set scanner_offset $scanner_offset" + } + set scanner_position [dict get $forwardbline_info end] + } + set boundary_positions [lsearch -all -not -inline $boundary_positions 0] + if {[llength $boundary_positions]} { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, boundaries (possibly with offsets) to check $boundary_positions[a]" + } else { + puts stdout "[a+ blue bold]----> [llength $forward_lines] forward lines, No boundaries to check (generally expected for files with crlf line endings and no extremely long lines)[a]" } + + if {[llength $boundary_positions]} { + puts stdout "line $callingline_num scan from call label $label ending at position $callposn. Next Callsite-relative boundary [lindex $boundary_positions 0]" + + for {set i 0} {$i < [llength $boundary_positions]} {incr i} { + set b [lindex $boundary_positions $i] + if {$i < [llength $boundary_positions]-1} { + set nextb [lindex $boundary_positions $i+1] + set top $nextb + } else { + set top end + } + + set forwardbline_infolist [$objForwardScan chunkrange_to_lineinfolist $b $top -show_truncated 1] + set forwardbline_info [lindex $forwardbline_infolist 0] + if {[dict get $forwardbline_info is_truncated]} { + set payload_from_boundary [dict get $forwardbline_info truncated] + } else { + set payload_from_boundary [dict get $forwardbline_info payload] + } + set forwardbline_len [dict get $forwardbline_info linelen] + set forwardbline_index [dict get $forwardbline_info lineindex] + set forwardbline_start [dict get $forwardbline_info start] + set forwardbline_start_global [expr {$forward_chunk_base + $forwardbline_start}] + set forwardbline_index_global [lindex [$objFile chunkrange_to_linerange $forwardbline_start_global $forwardbline_start_global] 0] + set forwardbline_num_global [expr {$forwardbline_index_global + 1}] + + set found_targetlabel_at_boundary 0 + if {[string first : $payload_from_boundary] >= 0} { + #puts stdout "Possible label at boundary $b - testing" + set labelinfo [batchlib::get_target_label_from_line $payload_from_boundary] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + incr found_forward_label + set found_targetlabel_at_boundary 1 + } elseif {[dict get $labelinfo labelfound]} { + set unsearched_label [dict get $labelinfo label] + puts stderr "[a+ cyan]Line $forwardbline_num_global: Found an item that cmd may interpret as a target label because of its location at a boundary $b - but it doesn't seem to be the one we are looking for. Looking for '$label' Found: '[dict get $labelinfo label]'[a]" + puts stderr "[a+ yellow]Warning - if the label '$unsearched_label' on line $forwardbline_num_global isn't meant to be a target - it may be safest to make sure batch script isn't using CALL or GOTO with target :$unsearched_label" + puts stdout "linedata:\n" + #puts stdout "'$payload_from_boundary'" + puts [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + + #dubious value to check call_labels_found - as we didn't run through and find all call labels first! + if {$unsearched_label in [dict keys $call_labels_found]} { + set boundary_target_label_record [list label $unsearched_label line $forwardbline_num_global error found_via_boundary_check_on_a_different_call_label] + dict lappend warning_target_labels_found $unsearched_label $boundary_target_label_record + } else { + set possible_target_label_record [list label $unsearched_label line $forwardbline_num_global] + dict lappend possible_target_labels_found $unsearched_label $possible_target_label_record + } + } else { + set note "" + if {[dict exists $labelinfo note]} { + set note [dict get $labelinfo note] + } + if {$note ne "prefix_fail"} { + puts stdout "no label detected at boundary $b - probably ok. Note from target-label scanner: $note" + } + } + if {$found_targetlabel_at_boundary} { + set target_label_record [list label $label line $forwardbline_num_global error call_offset_bytes] + dict lappend target_labels_found $label $target_label_record + set note "possibly unreliable or dangerous target-label at line $forwardbline_num_global may execute line [expr {$forwardbline_num_global +1}].\n" + append note "Target label not at line start but was found by scanning 512byte chunks from callsite with count resets at any crlf encountered\n" + append note "Adjust spacing between line $callingline_num and $forwardbline_num_global to avoid the 512 boundary - and re-test for other boundary problems" + lappend error_labels [list label $label call_offset_bytes $b callsite [list call ${call}${labelplus} call_linenum $callingline_num] note $note] + puts stdout "[a+ bold red]ERROR: line $forwardbline_num_global target-label [dict get $labelinfo rawlabel] found at boundary and with byte offset from callsite: $b [a]" + puts stdout "[a+ bold red] This target-label appears to fall at or just after the 512byte boundary at byte $b[a] [a+ yellow bold]from callsite.[a]" + puts stdout "[a+ bold yellow]Code may execute at line [expr {$forwardbline_num_global + 1}] (or at next 512Byte boundary in some circumstances)[a]" + puts stdout "[a+ bold yellow]Recommend adjusting spacing between line $callingline_num and $forwardbline_num_global[a]" + puts stdout [$objForwardScan chunk_boundary_display [dict get $forwardbline_info start] [dict get $forwardbline_info end] 0 -boundaries $b -linebase $callposn_lineindex+1 -limit 1] + } + #if found any label - peek at next boundary + if {[dict get $labelinfo labelfound] && $i+1 < [llength $boundary_positions]} { + set next_lineinfolist [$objForwardScan chunkrange_to_lineinfolist $nextb end -show_truncated 1] + set next_lineinfo [lindex $next_lineinfolist 0] + puts "peek next boundary data - line [expr {$forwardbline_num_global + 1}]:" + #if {[dict get $next_lineinfo is_truncated]} { + # puts [dict get $next_lineinfo truncated] + #} else { + # puts [dict get $next_lineinfo payload] + #} + puts [$objForwardScan chunk_boundary_display [dict get $next_lineinfo start] [dict get $next_lineinfo end] 0 -boundaries $nextb -linebase $callposn_lineindex+1 -limit 1] + } + } + } + } + $objForwardScan destroy + #scan behind for labels at boundaries - using offset from start of file #we do a backward scan even if a forward label has been found, so that we can warn of duplicate labels. set prior_start 0 - set prior_end $lineindex ;#only scan from file start to call-site + set prior_end $callingline_index ;#only scan from file start to call-site - set prior_total_offset 0 + set pline_begin 0 set found_backward_label 0 set p_linenum 0 for {set pidx $prior_start} {$pidx <= $prior_end} {incr pidx} { @@ -323,36 +507,71 @@ namespace eval punk::mix::commandset::scriptwrap { set pline [dict get $plineinfo payload] incr p_linenum set pline_bytes [dict get $plineinfo linelen] ;#includes lf or crlf ending bytes - set pline_start $prior_total_offset + set pline_start $pline_begin if {$pline_start != [dict get $plineinfo start]} { error "checkoutput error: line $p_linenum - calculated start $pline_start not equal to stored start [dict get $plineinfo start]" } - set pline_end [expr {$prior_total_offset + $pline_bytes -1}] + set pline_end [expr {$pline_begin + $pline_bytes -1}] if {$pline_end != [dict get $plineinfo end]} { error "checkoutput error: line $p_linenum - calculated end $pline_end not equal to stored end [dict get $plineinfo end]" } set trimpline [string trim $pline] - #callsite labels appear to be literal - not subject to % expansion and escaping for example. - if {[string match ":$label*" $trimpline]} { - incr found_backward_label - set prior_label_posn_in_line [string first : $pline] - set prior_label_posn [expr {$prior_total_offset + $prior_label_posn_in_line}] - if {($prior_label_posn % 512) == 0} { - set p_ubound [expr {($prior_label_posn / 512) * 512}] - } else { - set p_ubound [expr {(($prior_label_posn /512) +1) * 512}] - } - set p_lbound [expr {$p_ubound - $labelsize}] - if {($prior_label_posn >= $p_lbound) && ($prior_label_posn <= $p_ubound)} { - lappend error_labels [list label $label file_offset_bytes $prior_label_posn callsite [list call ${call}${labelplus} call_linenum $linenum]] - puts stdout "[a+ bold red]ERROR: label '$trimpline' at line $p_linenum and offset from file start: $prior_label_posn total offset: $prior_total_offset[a]" - puts stdout "[a+ bold red] This label appears to span the 512byte boundary at byte $p_ubound[a] [a+ yellow bold]from file start[a]" - } else { - puts stdout "[a+ bold green]OK: prior label '$trimpline' at offset from file start: $prior_label_posn total offset: $prior_total_offset[a]" - + #todo - process leading part of line before : + #e.g the following are valid (leading # is not part of the examples) + # ====== : label + # also + #%=== == : label + # also + #%= ,,,, ;;; = : label + + #these token delimiters (; , = 0x0B ox0C 0xFF ) + #can also occur after the colon e.g + #: ;label + + #the following is a valid target for @GOTO :#something + #: ;#something + + #It is possible for closing bracket ) to also be invisible if there is no open ( active + #This only seems to work for a single ) at beggining of the line multiple ) even separated by spaces or ; etc seem to stop the target being found. + #The lone unbalanced ) can act like a comment in other contexts - and can appear multiple times, but only if first ) on the line is followed by a delimiter + #Essentially all characters following the first ) are ignored - but if the first is something like )) then cmd tries to interpret that as a command and fails + # e.g + #) ignored + #);)))) ignored + #)) causes error as cmd tries to run "))" as a command. + #This is a reason why *target* labels shouldn't appear in bracketed blocks - as code jumps to a point where ( ) will be unbalanced + + #target labels are literal with regards to % ie not subject to % expansion - but ^ must still be processed + if {[string first : $pline] >= 0} { + #space (and some other chars) allowed between colon and label at target - (but not at callsite) + set labelinfo [batchlib::get_target_label_from_line $pline] + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set target_label_record [list label $label line $p_linenum] + puts stdout "$labelinfo" + incr found_backward_label + set prior_label_posn_in_line [string first : $pline] + set prior_label_posn [expr {$pline_begin + $prior_label_posn_in_line}] + if {($prior_label_posn % 512) == 0} { + set p_ubound [expr {($prior_label_posn / 512) * 512}] + } else { + set p_ubound [expr {(($prior_label_posn /512) +1) * 512}] + } + set p_lbound [expr {$p_ubound - $labelsize}] + if {($prior_label_posn >= $p_lbound) && ($prior_label_posn <= $p_ubound)} { + dict set target_label_record error linestart_and_overlap + lappend error_labels [list label $label linestart_and_overlap $prior_label_posn callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold red]ERROR: target-label '$trimpline' at line $p_linenum and offset from file start: $prior_label_posn line start: $pline_begin[a]" + puts stdout "[a+ bold red] This target-label appears to span the 512byte boundary at byte $p_ubound[a] [a+ yellow bold]from file start[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 512 -linebase 1 -limit 1] + } else { + dict set target_label_record ok 1 + puts stdout "[a+ bold green]OK: file line: $p_linenum target-label '$trimpline' before call from line $callingline_num. Target is at offset from file start: $prior_label_posn line start: $pline_begin[a]" + } + dict lappend call_labels_found $label $target_label_record } + #else - label we weren't searching for - even if at file boundary it should be picked up when actually searched? review } set spaninfo [fileline::range_spans_chunk_boundaries $pline_start $pline_end 512] if {[dict get $spaninfo is_span]} { @@ -365,44 +584,78 @@ namespace eval punk::mix::commandset::scriptwrap { continue } #overlap test is just a warning - we have a label-like thing overlapping the boundary + #todo - take account of fact that target label can be ": labelname" - so using just labelsize won't detect all overlaps + #The label could even be at the end of a long line that appears at first to be a comment e.g something like + # : whatever : sneakylabel + # or + #@REM ============================================================================================================================================================ : sneakylabel + + #The fact that it overlaps - means it's probably not being found with lf line-endings - and only the label :whatever should be found with crlf endings + #- but we won't always catch that something's fishy + #review set overlaptail [string range $pline [expr {$b - $labelsize}] [expr {($b + $labelsize) -1}]] ;#subtracting labelsize gives earliest possible overlap if {[string match "*:$label *" $overlaptail] } { - lappend warning_labels [list label $label warning label_spanning callsite [list call ${call}${labelplus} call_linenum $linenum]] + lappend warning_labels [list label $label warning label_spanning callsite [list call ${call}${labelplus} call_linenum $callingline_num]] puts stdout "[a+ bold yellow] WARNING: possible label $label spans boundary $b from start of file" } set pline_tail [string range $pline $b end] - #if {[string match ":$label *" $pline_tail]} {} - set re1 {\s*:%lbl%[\s|^|=].*} - set re1 [string map [list %lbl% $label] $re1] - set re2 {\s*:%lbl%$} - set re2 [string map [list %lbl% $label] $re2] - if {[regexp $re1 $pline_tail] || [regexp $re2 $pline_tail]} { - lappend error_labels [list label $label file_offset_bytes $b note "label at boundary but no preceeding newline - cmd may interpret as label and execute following line or code at next boundary" callsite [list call ${call}${labelplus} call_linenum $linenum]] - puts stdout "[a+ bold red]ERROR: *possible* label '$label' at line $p_linenum and offset from file start: $b total offset: $prior_total_offset[a]" - puts stdout "[a+ bold red] This label with no preceeding newline appears to span the 512byte boundary at byte $b[a] [a+ yellow bold]from file start[a]" - puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" - set tail_start $b - set tail_end [expr {$b + [string length $pline_tail]}] - set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] - if {[dict get $tail_spaninfo is_span]} { - set tail_boundaries [dict get $tail_spaninfo boundaries] - set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] - if {[llength $extra_tail_boundaries]} { - puts "Line $p_linenum also spans additional boundaries: $extra_tail_boundaries" - set next_boundary [lindex $extra_tail_boundaries 0] - set next_boundary_data [string range $pline [expr {$prior_total_offset + $next_boundary}] end] - puts "Line $p_linenum data at next boundary: [a+ yellow bold]$next_boundary_data[a]" - puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + + if {[string first : $pline_tail] >= 0} { + set labelinfo [batchlib::get_target_label_from_line $pline_tail] + set labelfound 0 + if {[dict get $labelinfo labelfound] && [dict get $labelinfo label] eq $label} { + set labelfound 1 + } elseif {[dict get $labelinfo labelfound]} { + puts stdout "Note: detected target label [dict get $labelinfo label] at file offset $b at boundary with no preceeding newline - but it's not the one we're currently scanning for" + } + if {$labelfound} { + set label_found_name [dict get $labelinfo label] + incr found_backward_label + + lappend error_labels [list label $label_found_name file_offset_bytes $b note "label at boundary but no preceeding newline - cmd may interpret as label and execute following line or code at next boundary" callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + + puts stdout "[a+ bold red]ERROR: *possible* label '$label_found_name' at line $p_linenum and offset from file start: $b line start: $pline_begin[a]" + puts stdout "[a+ bold red] This label with no preceeding newline appears to span the 512byte boundary at byte $b[a] [a+ yellow bold]from file start[a]" + puts stdout "[a+ bold red] cmd.exe may find this label - but it probably shouldn't be relied upon[a]" + puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" + + set target_label_record [list label $label_found_name line $p_linenum] + if {$label_found_name in [dict keys $call_labels_found]} { + dict set target_label_record error "called_label_at_file_offset_boundary" + dict lappend target_labels_found $label_found_name $target_label_record + } else { + #review - we need to get better at finding all calls! + dict set target_label_record error "uncalled_label_at_file_offset_boundary" + dict lappend possible_target_labels_found $label_found_name $target_label_record } - } else { - if {$pidx+1 < [$objFile linecount]} { - set nextlineinfo [$objFile lineinfo $pidx+1] - set nextpayload [dict get $nextlineinfo payload] - puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" - puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + + + set tail_start $b + set tail_end [expr {$b + [string length $pline_tail]}] + set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] + if {[dict get $tail_spaninfo is_span]} { + set tail_boundaries [dict get $tail_spaninfo boundaries] + set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] + if {[llength $extra_tail_boundaries]} { + puts "Line $p_linenum also spans additional boundaries: $extra_tail_boundaries" + set next_boundary [lindex $extra_tail_boundaries 0] + #boundary doesn't reset if no crlf - we are still within the line - so can calc from line beginning + set next_boundary_data [string range $pline [expr {$pline_begin + $next_boundary}] end] + puts "Line $p_linenum data at next boundary: [a+ yellow bold]$next_boundary_data[a]" + puts [$objFile chunk_boundary_display [dict get $plineinfo start] [dict get $plineinfo end] 0 -boundaries $next_boundary -linebase 1 -limit 1] + + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } } else { - #EOF reached + if {$pidx+1 < [$objFile linecount]} { + set nextlineinfo [$objFile lineinfo $pidx+1] + set nextpayload [dict get $nextlineinfo payload] + puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } else { + #EOF reached + } } } } @@ -410,29 +663,74 @@ namespace eval punk::mix::commandset::scriptwrap { } } - incr prior_total_offset $pline_bytes + incr pline_begin $pline_bytes } if {$found_forward_label == 0} { if {[string toupper $label] eq "EOF"} { - #EOF/eof label is special - it doesn't have to exist - but if it does - it probably shouldn't be on a boundary - puts stdout "[a+ bold green]OK: label :$label doesn't exist - but it's not required. callsite: [list call ${call}${labelplus} call_linenum $linenum] [a]" + #EOF/eof label is special - it doesn't have to exist - but if it does - it probably shouldn't be spanning a boundary + puts stdout "[a+ bold green]OK: label :$label doesn't exist - but it's usually not meant to. callsite: [list call ${call}${labelplus} call_linenum $callingline_num] [a]" } else { if {$found_backward_label == 0} { - lappend warning_labels [list label $label warning label_not_found callsite [list call ${call}${labelplus} call_linenum $linenum]] + lappend warning_labels [list label $label warning label_not_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] puts stdout "[a+ bold yellow]WARNING: label :$label not found (in forward or backward scan)[a]" } } } if {($found_forward_label + $found_backward_label) > 1} { - lappend warning_labels [list label $label warning multiple_labels_found callsite [list call ${call}${labelplus} call_linenum $linenum]] - puts stdout "[a+ bold yellow]WARNING: label :$label seems to appear multiple times[a]" + #puts "target_labels_found: $target_labels_found" + dict for {targetkey targethits} $target_labels_found { + set targetlines [list] + foreach record $targethits { + lappend targetlines [dict get $record line] + } + set remaining [list] + set previous "" ; + foreach lnum [lsort -integer -increasing $targetlines] { + if {$previous eq ""} { + lappend remaining $lnum + } else { + if {$lnum-1 == $previous} { + puts stdout "[a+ green bold]OK[a] - target-label $targetkey appears on immediately adjacent lines $previous and $lnum - assuming it is a boundary-avoidance tactic rather than an inadvertent duplicate" + set remaining [lrange $remaining 0 end-1];#retain latest - we will allow a run of targets on subsequent lines + } + lappend remaining $lnum + } + set previous [lindex $remaining end] + } + if {[llength $remaining] > 1} { + lappend warning_labels [list label $label warning multiple_target_labels_found callsite [list call ${call}${labelplus} call_linenum $callingline_num]] + puts stdout "[a+ bold yellow]WARNING: label :$label seems to appear multiple times[a]" + } + } } } } - incr file_offset [string length $ln] - incr file_offset ;# for unix nl + incr file_offset $callingline_len ;#including per-line stored line-ending } + if {[dict size $possible_target_labels_found] > 0} { + #puts stdout "Possibly bogus target-labels: [dict keys $possible_target_labels_found]" + set bogus_summary [list] + foreach pb [dict keys $possible_target_labels_found] { + if {$pb in [dict keys $call_labels_found]} { + puts stdout "[a+ yellow bold]Warning - target for label $pb was found with a record as being possibly bogus. record: [dict get $possible_target_labels_found $pb][a]" + puts stdout "[a+ yellow bold]Consider moving this target-label and re-checking[a]" + puts stdout "[a+ yellow bold]It may be a call label line that was found by boundary scanning - which shouldn't really happen[a]" + puts stdout "Call record [dict get $call_labels_found $pb]" + lappend warning_labels [list label $pb warning possibly_bogus_target list_of_target_hits [dict get $possible_target_labels_found $pb]] + } + set records [dict get $possible_target_labels_found $pb] + set blines [list] + foreach rec $records { + lappend blines [dict get $rec line] + } + lappend bogus_summary [list label $pb found_on_lines $blines] + } + puts stdout "[a+ cyan]Possibly bogus target-labels: $bogus_summary[a]" + puts stdout "These are usually nothing to be concerned about. Some will almost always turn up in a polyglot script that contains batch script." + puts stdout "If some of the label names appear to contain newlines, or are prefixes of or exact matches with legitimate labels - you might consider adjusting the boundary spacing with whitespace or comments to get a different result." + } + set result "" if {[llength $warning_labels]} { append result "WARNING:" \n append result "The following labels had warnings" \n @@ -450,6 +748,15 @@ namespace eval punk::mix::commandset::scriptwrap { append result " [a+ bold red]$err[a]" \n } } + if {[dict size $warning_target_labels_found] > 0} { + puts stdout "target-labels with minor warnings: [dict keys $warning_target_labels_found]" + } + append result "call-labels-found: [dict keys $call_labels_found]" \n + append result "target-labels-found: [dict keys $target_labels_found]" \n + if {![llength $warning_labels] && ![llength $error_labels]} { + puts stderr \n + puts stderr "[a+ green bold]OK No warnings or errors considered major enough to return in result.[a]" + } return $result } #specific filepath to just wrap one script at the tcl-payload or xxx-payload-pre-tcl site @@ -459,6 +766,7 @@ namespace eval punk::mix::commandset::scriptwrap { -askme 1\ -outputfolder "\uFFFF"\ -template "\uFFFF"\ + -returnextra 0\ ] set known_opts [dict keys $defaults] dict for {k v} $args { @@ -481,6 +789,7 @@ namespace eval punk::mix::commandset::scriptwrap { set opt_askme [dict get $opts -askme] set opt_template [dict get $opts -template] set opt_outputfolder [dict get $opts -outputfolder] + set opt_returnextra [dict get $opts -returnextra] # -- --- --- --- --- --- --- --- --- --- --- --- @@ -794,6 +1103,10 @@ namespace eval punk::mix::commandset::scriptwrap { set check_result [checkoutput $output_file] set with_errors "" set with_warnings "" + set call_labels [list] + set target_labels [list] + set errorlist [list] + set warninglist [list] if {$check_result ne ""} { puts stdout $check_result set check_lines [split $check_result \n] @@ -801,18 +1114,40 @@ namespace eval punk::mix::commandset::scriptwrap { set trimcl [string trim $cl] if {[string match "ERROR:*" $trimcl]} { set with_errors "[a+ bold red]with errors[a]" + lappend errorlist $trimcl } if {[string match "WARNING:*" $trimcl]} { set with_warnings "[a+ bold yellow] with warnings[a]" + lappend errorlist $trimcl + } + if {[string match "call-labels-found:*" $trimcl]} { + set call_labels [string trim [string range $trimcl [string length "call-labels-found:"] end]] + } + if {[string match "target-labels-found:*" $trimcl]} { + set target_labels [string trim [string range $trimcl [string length "target-labels-found:"] end]] } } + } else { + puts stderr "Expected output from checkoutput - but got none" } #even though chmod might exist on windows - we will leave permissions alone if {$::tcl_platform(platform) ne "windows"} { catch {exec chmod +x $output_file} } puts stdout "-done- $with_errors $with_warnings" - return $output_file + if {$opt_returnextra} { + set result [list filename $output_file batch_call_labels $call_labels batch_target_labels $target_labels] + if {[llength $warninglist]} { + dict set result warnings $warninglist + } + if {[llength $errorlist]} { + dict set result errors $errorlist + } + } else { + set result [list filename $output_file] + } + + return $result } namespace eval lib { @@ -1027,10 +1362,31 @@ namespace eval punk::mix::commandset::scriptwrap { } namespace eval batchlib { - proc get_callsite_label {labelplus} { + # + #see also: https://www.dostips.com/forum/viewtopic.php?t=3803 'Rules for label names vs GOTO and CALL + # review - we may need different get_callsite_label functions? + + proc get_callsite_label {labelstart} { + #labelstart is the character immediately following the colon (which is optional at callsite) - a label such as ::label doesn't seem valid at call or target sites + #e.g for @goto %= possible comment=% :mylabe%%l etc + #we would expect to be passed only "mylabe%%1 etc" + #It is up to the caller to determine where a callsite label begins. + #note that: + #@REM ----- + #@goto ^ + #:label + #@REM----- + # is a valid callsite - but doesn't appear to be found by the label scanner as it's own target label even though :label is on it's own line from non-batch perspective + # so the caller will have to do some batch-style line processing to find all call sites + #Also, for the following 2 lines + #@REM ^ + #:label + # the label will be found - yet if the :label was a command such as @GOTO - it would not be run as a callsite + + #a quick'n'dirty fix for some ways various escapes are handled within labels at callsite. #There seem to be very different rules for labels at target site - presumably because they are not part of a command - # Mostly it seems target labels are more literal + # Mostly it seems target labels are more literal with regards to % chars - but ^ are processed the same way at target label #some rules.. #callsite labels can't have space between : and label - but target labels can #label terminated by =,: even if prefixed by ^ and even if in squotes or dquotes @@ -1038,46 +1394,203 @@ namespace eval punk::mix::commandset::scriptwrap { #may resolve variables within the label - but characters from variable value can terminate. #as we don't have access to the variable values - we should normalize %varname% to empty string at callsite - but perhaps emit warning somewhere # The target labels don't seem to - #a single % resolves to empty + #a single % resolves to empty - depending. (starts invar processing - and decides if it was a var depending on whether it was closed?) #sequences of % don't begin a var - number of % in labelname = number of %s divided by 2 and rounded down. ie 1->0 2->1 3-> 1 4->2 5->2 6->3 etc #spaces in % wrapped var names don't terminate label #spaces aren't escaped by ^ or quoting #sequences of ^ seem to follow same counting rule as % #e.g @goto :la%path%bel where path begins with C:\Program Files.. becomes label :laC + if {[string index $labelstart 0] in [list : " " \t = {;}]} { + #return everything as tail - nothing was consumed + return [list labelfound 0 note "invalid first character for callsite label" tail $labelstart] + } #The due to whitespace and most chars except : and % being alowed inside vars - it seems the best first step # -------------- start % handling % - set chars [split $labelplus ""] - set percentrun 0 + set inputchars [split $labelstart ""] + set percentrun 0 ;#0|1 because we use invar-toggling rather than running total of number of percents in a sequence set invar 0 - set output "" - - foreach c $chars { + set labelout "" + set varsfound [list] + set varterminals [list :] + set labelterminals [list + , {;} = " " \t] + set varname "" + set caretseq 0 + set inputconsumed 0 + foreach c $inputchars { if {!$invar} { - if {$c ne "%"} { - append output [string repeat % [expr {$percentrun / 2}]] $c - set percentrun 0 - } else { - + if {$c eq "%"} { + set caretseq 0 + set lookahead [lrange $inputchars $inputconsumed+1 end] + if {"%" in $lookahead} { + set invar 1 + incr percentrun + } else { + incr percentrun + } + } elseif {$c eq "^"} { + if {$caretseq} { + set caretseq 0 + append labelout "^" ;#for every pair encountered in direct sequence - second gets included in label + } else { + set caretseq 1 + } + } else { + set caretseq 0 + if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + #subst %i with value - here we have no way of getting that - so use blank - user will get warning that target not found + set percentrun 0 + } else { + append labelout [string repeat % [expr {$percentrun / 2}]] + set percentrun 0 + if {$c in $labelterminals} { + break + } + append labelout $c + } } } else { - #in var - if {$c eq "%" && $percentrun == 0} { - set invar 1 - } elseif {$c eq "%"} - + #in var - don't do anything with carets(?) + if {$c eq "%" && $percentrun == 1} { + #double percent - rather than just an empty var - emit one % + append labelout % + set invar 0 + set percentrun 0 + } elseif {$c eq "%"} { + #presume percentrun is 0 + set invar 0 + lappend varsfound $varname; set varname "" + } elseif {$c in $varterminals} { + set invar 0 + lappend varsfound $varname; set varname "" } else { - append varname $c + if {$percentrun && ($c in [list 0 1 2 3 4 5 6 7 8 9])} { + #review - seems to terminate var - and substitute? + #this branch untested - case where we have %i and further % - what if it was %1var% ? does %1 get substituted ? or %1var% - test + set invar 0 + append varname $c + } else { + append varname $c + } + set percentrun 0 } } + incr inputconsumed } # -------------- end % handling % - + set tail [string range $labelstart $inputconsumed end] #caret -- etc + if {$labelout eq ""} { + set resultdict [dict create labelfound 0] + if {[llength $varsfound]} { + dict set resultdict vars $varsfound + dict set resultdict note "empty label but vars exist - may be legit" + } else { + dict set resultdict note "empty label - no vars" + } + dict set resultdict tail $tail + return $resultdict + } + + return [list labelfound 1 label $labelout tail $tail] + } + proc get_target_label_from_line {labelline} { + #scan a whole line - or a 'line' starting at some chunk boundary we found for a label + #caller should resolve any trailing caret and subsequent line and include them in the call + #note that we may be scanning all sorts of things in a polyglot file - but we're interested in seeing if cmd.exe might interpret it as a label + #target labels don't have %var% processing - they will be literal + set firstcolon [string first : $labelline] + if {$firstcolon == -1} { + return [list labelfound 0 note "no_colon"] + } + set prefixpart [string range $labelline 0 $firstcolon-1] + set targetpart [string range $labelline $firstcolon+1 end] + + set prefixok 1;#default assumption + set invisible_prefix_chars [list {;} , = " " \t] + set prefixchars [split $prefixpart ""] + # % and ^ in the prefix - whether doubled etc or not - will stop label being found + #ANY first char seems to be allowed in prefixpart (it won't be colon, because we already split on that) + #perhaps this is done by cmd.exe to reduce off-by-one errors?? weird... + # but it does allow labels to be found in certain # tcl/bashsh comment lines, which could be both dangerous and ...useful. + #start prefix check at char 1 instead of 0 + foreach pchar [lrange $prefixchars 1 end] { + if {$pchar ni $invisible_prefix_chars} { + set prefixok 0 + break + } + } + if {!$prefixok} { + return [list labelfound 0 note "prefix_fail"] + } + #no problems before colon - now see if targetpart can be interpreted as a label + #we again have some potential invisible chars before label begins. + set charindex [expr {$firstcolon +1}] ;#track position so we can return index of where we believe label begins + set targetchars [split $targetpart ""] + set inlabel 0 + set labelposn -1 + # --- + set inlabel_terminals [list : + " " \t \r \n] ;# , ; = don't seem to terminate a target label, but do terminate a calling label + # + and whitespace terminate caller and target + # --- + # consider: + #@goto :14^ + # :14^ + #caller is searching for label "14" but won't match - presumably target scanner has escaped the trailing space + set label "" + set rawlabel "" + set caretseq 0 ;# 0|1 + foreach tchar $targetchars { + if {$tchar in [list + :]} { + break + } + if {!$inlabel} { + if {$tchar ni $invisible_prefix_chars} { + #beginning of target label + set labelposn $charindex + set inlabel 1 + append rawlabel $tchar + if {$tchar eq "^"} { + set caretseq 1 + } else { + append label $tchar + } + } + } else { + if {$tchar in $inlabel_terminals} { + #caret stops them from terminating + if {$caretseq} { + set caretseq 0 + append label $tchar + append rawlabel $tchar + } else { + break + } + } else { + append rawlabel $tchar + if {$tchar eq "^"} { + if {$caretseq} { + set caretseq 0 + append label "^" ;#for every pair encountered in direct sequence - second gets included in label + } else { + set caretseq 1 + } + } else { + set caretseq 0 + append label $tchar ;#for target labels - all including %var% is directly part of the label target + } + } + } + incr charindex + } + if {$labelposn == -1} { + return [list labelfound 0 note "no_label_found_after_colon"] + } - return [list label $label tail $tail] + #return rawlabel so we can see it as it appears in the data - as opposed to how it is interpreted as a label by cmd.exe + return [list labelfound 1 label $label rawlabel $rawlabel] } } diff --git a/src/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd b/src/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd index 462d0c9f..e73b193c 100644 --- a/src/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd +++ b/src/modules/punk/mix/templates/utility/scriptappwrappers/punk-multishell.cmd @@ -18,26 +18,30 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe @REM On unix-like systems, call with sh, bash or tclsh. (powershell untested on unix - and requires wscript if security elevation is used) @REM Due to lack of shebang (#! line) Unix-like systems will probably (hopefully) default to sh if the script is called without an interpreter - but it may depend on the shell in use when called. @REM If you find yourself really wanting/needing to add a shebang line - do so on the basis that the script will exist on unix-like systems only. -@SET "validshells=pwsh,sh,bash,tclsh" -@SET shells[10]="pwsh" -@SET shells[11]="sh" -@set shells[12]="bash" -@SET shells[13]="tclsh" +@SETLOCAL EnableExtensions EnableDelayedExpansion +@SET "validshells= ^(10^) 'pwsh' ^(11^) 'sh' (^12^) 'bash' (^13^) 'tclsh'" +@SET "shells[10]=pwsh" +@SET "shells[11]=sh" +@set "shells[12]=bash" +@SET "shells[13]=tclsh" : -@SET "nextshell=tclsh" +@SET "nextshell=13" : @rem asadmin is for automatic elevation to administrator. Separate window will be created (seems unavoidable with current elevation mechanism) and user will still get security prompt (probably reasonable). : @SET "asadmin=0" : -@REM nextshell set to pwsh,sh,bash or tclsh +@REM nextshell set to index for validshells .eg 10 for pwsh @REM @ECHO nextshell is %nextshell% -@CALL SET keyRemoved=%%validshells:%nextshell%=%% +@SET "selected=!shells[%nextshell%]!" +@REM @ECHO selected %selected% +@CALL SET "keyRemoved=%%validshells:'!selected!'=%%" +@REM @ECHO keyremoved %keyRemoved% @REM Note that 'powershell' e.g v5 is just a fallback for when pwsh is not available @REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### -@REM -- cmd/batch file section (ignored on unix) +@REM -- cmd/batch file section (ignored on unix but should be left in place) @REM -- This section intended mainly to launch the next shell (and to escalate privileges if necessary) -@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script is useful, but is probably the least expressive language and most error prone. +@REM -- Avoid customising this if you are not familiar with batch scripting. cmd/batch script can be useful, but is probably the least expressive language and most error prone. @REM -- For example - as this file needs to use unix-style lf line-endings - the label scanner is susceptible to the 512Byte boundary issue: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 @REM -- This label issue can be triggered/abused in files with crlf line endings too - but it is less likely to happen accidentaly. @REm -- See also: https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts/4095133#4095133 @@ -59,7 +63,6 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe @REM ############################################################################################################################ @REM -- custom windows payloads should be in powershell,tclsh (or sh/bash if available) code sections @REM ## ### ### ### ### ### ### ### ### ### ### ### ### ### -@SETLOCAL EnableExtensions EnableDelayedExpansion @SET "winpath=%~dp0" @SET "fname=%~nx0" @REM @ECHO fname %fname% @@ -67,36 +70,35 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe @REM @ECHO commandlineascalled %0 @REM @ECHO commandlineresolved %~f0 @CALL :getNormalizedScriptTail nftail -@ECHO normalizedscripttail %nftail% +@REM @ECHO normalizedscripttail %nftail% @CALL :getFileTail %0 clinetail -@ECHO clinetail %clinetail% +@REM @ECHO clinetail %clinetail% @CALL :stringToUpper %~nx0 capscripttail -@ECHO capscriptname: %capscripttail% -@CALL :isNumeric "blah" -@CALL :isNumeric etc -@CALL :isNumeric 3 -@CALL :isNumeric 6 +@REM @ECHO capscriptname: %capscripttail% -@IF %nftail%==%capscripttail% ( +@IF "%nftail%"=="%capscripttail%" ( @ECHO forcing asadmin=1 due to file name on filesystem being uppercase @SET "asadmin=1" ) else ( @CALL :stringToUpper %clinetail% capcmdlinetail - @ECHO capcmdlinetail %capcmdlintetail% - IF %clinetail%==%capcmdlinetail% ( + @REM @ECHO capcmdlinetail !capcmdlinetail! + IF "%clinetail%"=="!capcmdlinetail!" ( @ECHO forcing asadmin=1 due to cmdline scriptname in uppercase @set "asadmin=1" ) ) @SET "vbsGetPrivileges=%temp%\punk_bat_elevate_%fname%.vbs" @SET arglist=%* +@IF "%1"=="PUNK-ELEVATED" ( + GOTO :gotPrivileges +) @IF !asadmin!==1 ( net file 1>NUL 2>NUL - @IF '!errorlevel!'=='0' ( GOTO gotPrivileges ) else ( GOTO getPrivileges ) + @IF '!errorlevel!'=='0' ( GOTO :gotPrivileges ) else ( GOTO :getPrivileges ) ) @GOTO skip_privileges :getPrivileges -@IF '%1'=='PUNK-ELEVATED' (echo PUNK-ELEVATED & shift /1 & goto gotPrivileges) +@IF '%1'=='PUNK-ELEVATED' (echo PUNK-ELEVATED & shift /1 & goto :gotPrivileges ) @ECHO Set UAC = CreateObject^("Shell.Application"^) > "%vbsGetPrivileges%" @ECHO args = "PUNK-ELEVATED " >> "%vbsGetPrivileges%" @ECHO For Each strArg in WScript.Arguments >> "%vbsGetPrivileges%" @@ -111,7 +113,7 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe @REM setlocal & pushd . @PUSHD . @cd /d %~dp0 -@IF '%1'=='PUNK-ELEVATED' ( +@IF "%1"=="PUNK-ELEVATED" ( @DEL "%vbsGetPrivileges%" 1>nul 2>nul @SET arglist=%arglist:~14% ) @@ -125,17 +127,17 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe fc "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >nul || goto different @REM @ECHO "files same" @SET need_ps1=0 - @GOTO :pscontinue - :different - @REM @ECHO "files differ" - @SET need_ps1=1 ) +@GOTO :pscontinue +:different +@REM @ECHO "files differ" +@SET need_ps1=1 :pscontinue @IF !need_ps1!==1 ( COPY "%~dp0%~n0.cmd" "%~dp0%~n0.ps1" >NUL ) @REM avoid using CALL to launch pwsh,tclsh etc - it will intercept some args such as /? -@IF %nextshell%==pwsh ( +@IF "!shells[%nextshell%]!"=="pwsh" ( REM pws vs powershell hasn't been tested because we didn't need to copy cmd to ps1 this time REM test availability of preferred option of powershell7+ pwsh pwsh -nop -nol -c set-executionpolicy -Scope Process Unrestricted; write-host "statusmessage: pwsh-found" >NUL @@ -151,18 +153,18 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe SET task_exitcode=!errorlevel! ) ) ELSE ( - IF %nextshell%==bash ( + IF "!shells[%nextshell%]!"=="bash" ( CALL :getWslPath %winpath% wslpath REM ECHO wslfullpath "!wslpath!%fname%" - %nextshell% "!wslpath!%fname%" %arglist% & SET task_exitcode=!errorlevel! + !shells[%nextshell%]! "!wslpath!%fname%" %arglist% & SET task_exitcode=!errorlevel! ) ELSE ( REM probably tclsh or sh IF NOT "x%keyRemoved%"=="x%validshells%" ( REM sh on windows uses /c/ instead of /mnt/c - at least if using msys. Todo, review what is the norm on windows with and without msys2,cygwin,wsl REM and what logic if any may be needed. For now sh with /c/xxx seems to work the same as sh with c:/xxx - %nextshell% "%~dp0%fname%" %arglist% & SET task_exitcode=!errorlevel! + !shells[%nextshell%]! "%~dp0%fname%" %arglist% & SET task_exitcode=!errorlevel! ) ELSE ( - ECHO %fname% has invalid nextshell value %nextshell% valid options are %validshells% + ECHO %fname% has invalid nextshell value ^(%nextshell%^) !shells[%nextshell%]! valid options are %validshells% SET task_exitcode=66 GOTO :exit_multishell ) @@ -170,6 +172,7 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe ) @REM batch file library functions @GOTO :endlib + :getWslPath @SETLOCAL @SET "_path=%~p1" @@ -223,7 +226,7 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe ) ) @EXIT /B - +@REM boundary padding :getNormalizedScriptTail @SETLOCAL @SET "result=%~nx0" @@ -240,6 +243,8 @@ set -- "$@" "a=[list shebangless punk MULTISHELL tclsh sh bash cmd pwsh powershe :getNormalizedFileTailFromPath @REM warn via echo, and do not set return variable if path not found @REM note that %~nx1 does not preserve case of provided path - hence the name 'normalized' +@REM boundary padding +@REM boundary padding @SETLOCAL @CALL :stringContains %~1 "\" hasBackSlash @CALL :stringContains %~1 "/" hasForwardSlash @@ -403,7 +408,7 @@ shift && set -- "${@:1:$#-1}" # -- This if block only needed if Tcl didn't exit or return above. if false==false # else { then - : + : # # ## ### ### ### ### ### ### ### ### ### ### ### ### ### # -- sh/bash script section # -- leave as is if all that is required is launching the Tcl payload" @@ -487,12 +492,13 @@ $1 = @' ' : comment end hide powershell-block from Tcl \ # This comment with closing brace should stay in place whether 'if' commented or not } -: multishell cmd exit label - return exitcode +: multishell doubled-up cmd exit label - return exitcode +:exit_multishell :exit_multishell : \ @REM @ECHO exitcode: !task_exitcode! : \ -@IF '%asadmin%'=='1' (echo. & @cmd /k echo elevated prompt: type exit to quit) +@IF "%1"=="PUNK-ELEVATED" (echo. & @cmd /k echo elevated prompt: type exit to quit) : \ @EXIT /B !task_exitcode! # cmd has exited diff --git a/src/modules/punk/mix/util-999999.0a1.0.tm b/src/modules/punk/mix/util-999999.0a1.0.tm index f421b928..8b369758 100644 --- a/src/modules/punk/mix/util-999999.0a1.0.tm +++ b/src/modules/punk/mix/util-999999.0a1.0.tm @@ -35,13 +35,10 @@ namespace eval punk::mix::util { namespace export * - + #NOTE fileutil::cat seems to silently ignore options if passed at end instead of before file! proc fcat {args} { variable has_winpath - if {$::tcl_platform(platform) ne "windows"} { - return [fileutil::cat {*}$args] - } set knownopts [list -eofchar -translation -encoding --] set last_opt 0 @@ -73,7 +70,21 @@ namespace eval punk::mix::util { if {![llength $paths]} { error "Unable to find file in the supplied arguments: $args. Ensure options are all -opt val pairs and that file name(s) follow" } + #puts stderr "opts: $opts paths: $paths" + + #let's proceed, but warn the user if an apparent option is in paths + foreach opt [list -encoding -eofchar -translation] { + if {$opt in $paths} { + puts stderr "fcat WARNING: apparent option $opt found after file argument(s) (expected them before filenames). Passing to fileutil::cat anyway - but for at least some versions, these options may be ignored. commandline 'fcat $args'" + } + } + + if {$::tcl_platform(platform) ne "windows"} { + return [fileutil::cat {*}$args] + } + + set finalpaths [list] foreach p $paths { if {$has_winpath && [punk::winpath::illegalname_test $p]} { diff --git a/src/scriptapps/punk.tcl b/src/scriptapps/punk.tcl new file mode 100644 index 00000000..757ae1a8 --- /dev/null +++ b/src/scriptapps/punk.tcl @@ -0,0 +1,15 @@ +#puts stdout "launching punk87" + +set dirname [file dirname [file normalize [info script]]] +if {[file tail $dirname] eq "bin"} { + if {[file exists [file join $dirname ../src/punk86.vfs/main.tcl]]} { + #tclsh [file join $dirname ../src/punk86.vfs/main.tcl] {*}$::argv + source [file join $dirname ../src/punk86.vfs/main.tcl] + } else { + puts stderr "Unable to locate punk87 entry-point main.tcl" + } +} else { + puts stderr "punk87 launch script must be run from the punk bin folder" +} +#puts stdout "-done-" +