From c69b9090a4b160a6e0700213863eefcab033b451 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Mon, 8 Jan 2024 13:07:10 +1100 Subject: [PATCH] scriptwrap fixes, fileline module and documentation updates --- src/doc/punk/_module_fileline-0.1.0.tm.man | 146 +++ src/doc/punk/_module_path-0.1.0.tm.man | 30 +- .../files/punk/_module_fileline-0.1.0.tm.n | 559 ++++++++++++ .../man/files/punk/_module_path-0.1.0.tm.n | 42 +- src/embedded/man/index.n | 21 + src/embedded/man/toc.n | 3 + src/embedded/md/.doc/tocdoc | 1 + src/embedded/md/.idx | 2 +- src/embedded/md/.toc | 2 +- src/embedded/md/.xrf | 2 +- .../files/punk/_module_fileline-0.1.0.tm.md | 353 ++++++++ .../doc/files/punk/_module_path-0.1.0.tm.md | 29 +- src/embedded/md/doc/toc.md | 2 + src/embedded/md/index.md | 13 +- src/embedded/md/toc.md | 2 + src/embedded/www/.doc/tocdoc | 1 + src/embedded/www/.idx | 2 +- src/embedded/www/.toc | 2 +- src/embedded/www/.xrf | 2 +- .../files/punk/_module_fileline-0.1.0.tm.html | 326 +++++++ .../doc/files/punk/_module_path-0.1.0.tm.html | 33 +- src/embedded/www/doc/toc.html | 6 +- src/embedded/www/index.html | 24 +- src/embedded/www/toc.html | 6 +- src/modules/punk/cap-999999.0a1.0.tm | 4 +- src/modules/punk/fileline-999999.0a1.0.tm | 829 ++++++++++++++++++ src/modules/punk/fileline-buildversion.txt | 3 + .../punk/mix/commandset/doc-999999.0a1.0.tm | 5 +- .../mix/commandset/module-999999.0a1.0.tm | 2 +- .../mix/commandset/scriptwrap-999999.0a1.0.tm | 307 +++++-- .../modules/template_module-0.0.1.tm | 36 +- src/modules/punk/path-999999.0a1.0.tm | 30 +- 32 files changed, 2676 insertions(+), 149 deletions(-) create mode 100644 src/doc/punk/_module_fileline-0.1.0.tm.man create mode 100644 src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n create mode 100644 src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md create mode 100644 src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html create mode 100644 src/modules/punk/fileline-999999.0a1.0.tm create mode 100644 src/modules/punk/fileline-buildversion.txt diff --git a/src/doc/punk/_module_fileline-0.1.0.tm.man b/src/doc/punk/_module_fileline-0.1.0.tm.man new file mode 100644 index 00000000..be45f826 --- /dev/null +++ b/src/doc/punk/_module_fileline-0.1.0.tm.man @@ -0,0 +1,146 @@ +[comment {--- punk::docgen generated from inline doctools comments ---}] +[comment {--- punk::docgen DO NOT EDIT DOCS HERE UNLESS YOU REMOVE THESE COMMENT LINES ---}] +[comment {--- punk::docgen overwrites this file ---}] +[manpage_begin punkshell_module_punk::fileline 0 0.1.0] +[copyright "2024"] +[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] +[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] +[require punk::fileline] +[keywords module text parse file] +[description] +[para] - +[section Overview] +[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) +[para]This is important for certain text files where examining the number of chars/bytes is important +[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. +[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. +[subsection Concepts] +[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. +[example_begin] + package require punk::fileline + package require fileutil + set rawdata [lb]fileutil::cat data.txt -translation binary[rb] + punk::fileline::class::textinfo create obj_data $rawdata + puts stdout [lb]obj_data linecount[rb] +[example_end] +[subsection Notes] +[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. +[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. +[para]No support for lone carriage-returns being interpreted as line-endings. +[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. +[subsection dependencies] +[para] packages used by punk::fileline +[list_begin itemized] +[item] [package {Tcl 8.6}] +[list_end] +[section API] +[subsection {Namespace punk::fileline::class}] +[para] class definitions +[list_begin enumerated] +[enum] CLASS [class textinfo] +[list_begin definitions] + [para] [emph METHODS] +[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] +[para] Constructor for textinfo object which represents a chunk or all of a file +[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: +[example_begin] + fconfigure $fd -translation binary + set chunkdata [lb]read $fd[rb]] +or + set chunkdata [lb]fileutil::cat -translation binary[rb] +[example_end] +[para] when loading the data +[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] +[para]Return a range of bytes from the underlying raw chunk data. +[para] e.g The following retrieves the entire chunk +[para] objName chunk 0 end +[call class::textinfo [method chunklen]] +[para] Number of bytes/characters in the raw data of the file +[call class::textinfo [method linecount]] +[para] Number of lines in the raw data of the file, counted as per the policy in effect +[call class::textinfo [method regenerate_lines]] +[para]generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex +[call class::textinfo [method line] [arg lineindex]] +[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata +[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) +[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" +[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending +[call class::textinfo [method linepayload] [arg lineindex]] +[para]Return the text of the line indicated by the zero-based lineindex +[para]The line-ending is not returned in the data - but is still stored against this lineindex +[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method +[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used +[para]To retrieve an entire line including line-ending use the [method line] method. +[call class::textinfo [method linemeta] [arg lineindex]] +[para]Return a dict of the metadata for the line indicated by the zero-based lineindex +[para]Keys returned include +[list_begin itemized] +[item] le +[para] A string representing the type of line-ending: crlf|lf|none +[item] linelen +[para] The number of characters/bytes in the whole line including line-ending if any +[item] payloadlen +[para] The number of character/bytes in the line excluding line-ending +[item] start +[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins +[item] end +[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends +[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload +[list_end] +[call class::textinfo [method lineinfo] [arg lineindex]] +[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex +[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. +[para]The 'payload' value is the same as is returned from the [method linepayload] method. +[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] +[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] +[call class::textinfo [method lines] [arg startidx] [arg endidx]] +[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] +[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] +[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] +[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied +[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. +[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) +[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. +[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] +[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data +[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices +[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] +[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data +[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] +[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max +[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted +[para]startidx higher than endidx is allowed +[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max +[list_end] +[list_end] [comment {--- end class enumeration ---}] +[subsection {Namespace punk::fileline}] +[para] Core API functions for punk::fileline +[list_begin definitions] +[list_end] [comment {--- end definitions namespace punk::fileline ---}] +[subsection {Namespace punk::fileline::lib}] +[para] Secondary functions that are part of the API +[list_begin definitions] +[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] +[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. +[list_begin arguments] + [arg_def integer start] + [para] zero-based start index of range + [arg_def integer end] + [para] zero-based end index of range + [arg_def integer chunksize] + [para] Number of bytes/characters in chunk +[list_end] +[para]returns a dict with the keys is_span and boundaries +[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize +[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize +[para]e.g +[example_begin] + range_spans_chunk_boundaries 10 1750 512 + is_span 1 boundaries {512 1024 1536} +[example_end] +[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 +[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] +[section Internal] +[subsection {Namespace punk::fileline::system}] +[para] Internal functions that are not part of the API +[manpage_end] diff --git a/src/doc/punk/_module_path-0.1.0.tm.man b/src/doc/punk/_module_path-0.1.0.tm.man index 9e448786..e9154899 100644 --- a/src/doc/punk/_module_path-0.1.0.tm.man +++ b/src/doc/punk/_module_path-0.1.0.tm.man @@ -46,8 +46,8 @@ [para] ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah) [para] Explicitly specifying -nocase 0 will require the entire case to match including the driveletter. [call [fun treefilenames] [arg basepath] [arg tailglob] [opt {option value...}]] -basic (glob based) list of filenames matching tailglob - recursive -no natsorting - so order is dependent on filesystem +[para]basic (glob based) list of filenames matching tailglob - recursive +[para]no natsorting - so order is dependent on filesystem [call [fun relative] [arg reference] [arg location]] [para] Taking two directory paths, a reference and a location, computes the path of the location relative to the reference. @@ -65,14 +65,30 @@ no natsorting - so order is dependent on filesystem [item] [para] Notes: [para] Both paths must be the same type - ie both absolute or both relative -[para] Case sensitive. ie relative /etc /etC +[para] Case sensitive. ie punk::path::relative /etc /etC will return ../etC [para] On windows, the drive-letter component (only) is not case sensitive -[para] ie relative c:/etc C:/etc returns . -[para] but relative c:/etc C:/Etc returns ../Etc +[example_begin] + P% punk::path::relative c:/etc C:/etc + - . +[example_end] +[para] The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail +[example_begin] + P% punk::path::relative c:/etc C:/Etc + - ../Etc +[example_end] [para] On windows, if the paths are absolute and specifiy different volumes, only the location will be returned. - ie relative c:/etc d:/etc/blah - returns d:/etc/blah +[example_begin] + P% punk::path::relative c:/etc d:/etc/blah + - d:/etc/blah +[example_end] +[para] Unix-like examples: +[example_begin] + P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below + - somewhere/below + P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + - ../../lib/here +[example_end] [list_end] [list_end] [comment {--- end definitions namespace punk::path ---}] [subsection {Namespace punk::path::lib}] diff --git a/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n b/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n new file mode 100644 index 00000000..49fdd12d --- /dev/null +++ b/src/embedded/man/files/punk/_module_fileline-0.1.0.tm.n @@ -0,0 +1,559 @@ +'\" +'\" Generated from file '_module_fileline-0\&.1\&.0\&.tm\&.man' by tcllib/doctools with format 'nroff' +'\" Copyright (c) 2024 +'\" +.TH "punkshell_module_punk::fileline" 0 0\&.1\&.0 doc "punk fileline" +.\" The -*- nroff -*- definitions below are for supplemental macros used +.\" in Tcl/Tk manual entries. +.\" +.\" .AP type name in/out ?indent? +.\" Start paragraph describing an argument to a library procedure. +.\" type is type of argument (int, etc.), in/out is either "in", "out", +.\" or "in/out" to describe whether procedure reads or modifies arg, +.\" and indent is equivalent to second arg of .IP (shouldn't ever be +.\" needed; use .AS below instead) +.\" +.\" .AS ?type? ?name? +.\" Give maximum sizes of arguments for setting tab stops. Type and +.\" name are examples of largest possible arguments that will be passed +.\" to .AP later. If args are omitted, default tab stops are used. +.\" +.\" .BS +.\" Start box enclosure. From here until next .BE, everything will be +.\" enclosed in one large box. +.\" +.\" .BE +.\" End of box enclosure. +.\" +.\" .CS +.\" Begin code excerpt. +.\" +.\" .CE +.\" End code excerpt. +.\" +.\" .VS ?version? ?br? +.\" Begin vertical sidebar, for use in marking newly-changed parts +.\" of man pages. The first argument is ignored and used for recording +.\" the version when the .VS was added, so that the sidebars can be +.\" found and removed when they reach a certain age. If another argument +.\" is present, then a line break is forced before starting the sidebar. +.\" +.\" .VE +.\" End of vertical sidebar. +.\" +.\" .DS +.\" Begin an indented unfilled display. +.\" +.\" .DE +.\" End of indented unfilled display. +.\" +.\" .SO ?manpage? +.\" Start of list of standard options for a Tk widget. The manpage +.\" argument defines where to look up the standard options; if +.\" omitted, defaults to "options". The options follow on successive +.\" lines, in three columns separated by tabs. +.\" +.\" .SE +.\" End of list of standard options for a Tk widget. +.\" +.\" .OP cmdName dbName dbClass +.\" Start of description of a specific option. cmdName gives the +.\" option's name as specified in the class command, dbName gives +.\" the option's name in the option database, and dbClass gives +.\" the option's class in the option database. +.\" +.\" .UL arg1 arg2 +.\" Print arg1 underlined, then print arg2 normally. +.\" +.\" .QW arg1 ?arg2? +.\" Print arg1 in quotes, then arg2 normally (for trailing punctuation). +.\" +.\" .PQ arg1 ?arg2? +.\" Print an open parenthesis, arg1 in quotes, then arg2 normally +.\" (for trailing punctuation) and then a closing parenthesis. +.\" +.\" # Set up traps and other miscellaneous stuff for Tcl/Tk man pages. +.if t .wh -1.3i ^B +.nr ^l \n(.l +.ad b +.\" # Start an argument description +.de AP +.ie !"\\$4"" .TP \\$4 +.el \{\ +. ie !"\\$2"" .TP \\n()Cu +. el .TP 15 +.\} +.ta \\n()Au \\n()Bu +.ie !"\\$3"" \{\ +\&\\$1 \\fI\\$2\\fP (\\$3) +.\".b +.\} +.el \{\ +.br +.ie !"\\$2"" \{\ +\&\\$1 \\fI\\$2\\fP +.\} +.el \{\ +\&\\fI\\$1\\fP +.\} +.\} +.. +.\" # define tabbing values for .AP +.de AS +.nr )A 10n +.if !"\\$1"" .nr )A \\w'\\$1'u+3n +.nr )B \\n()Au+15n +.\" +.if !"\\$2"" .nr )B \\w'\\$2'u+\\n()Au+3n +.nr )C \\n()Bu+\\w'(in/out)'u+2n +.. +.AS Tcl_Interp Tcl_CreateInterp in/out +.\" # BS - start boxed text +.\" # ^y = starting y location +.\" # ^b = 1 +.de BS +.br +.mk ^y +.nr ^b 1u +.if n .nf +.if n .ti 0 +.if n \l'\\n(.lu\(ul' +.if n .fi +.. +.\" # BE - end boxed text (draw box now) +.de BE +.nf +.ti 0 +.mk ^t +.ie n \l'\\n(^lu\(ul' +.el \{\ +.\" Draw four-sided box normally, but don't draw top of +.\" box if the box started on an earlier page. +.ie !\\n(^b-1 \{\ +\h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' +.\} +.el \}\ +\h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\l'|0u-1.5n\(ul' +.\} +.\} +.fi +.br +.nr ^b 0 +.. +.\" # VS - start vertical sidebar +.\" # ^Y = starting y location +.\" # ^v = 1 (for troff; for nroff this doesn't matter) +.de VS +.if !"\\$2"" .br +.mk ^Y +.ie n 'mc \s12\(br\s0 +.el .nr ^v 1u +.. +.\" # VE - end of vertical sidebar +.de VE +.ie n 'mc +.el \{\ +.ev 2 +.nf +.ti 0 +.mk ^t +\h'|\\n(^lu+3n'\L'|\\n(^Yu-1v\(bv'\v'\\n(^tu+1v-\\n(^Yu'\h'-|\\n(^lu+3n' +.sp -1 +.fi +.ev +.\} +.nr ^v 0 +.. +.\" # Special macro to handle page bottom: finish off current +.\" # box/sidebar if in box/sidebar mode, then invoked standard +.\" # page bottom macro. +.de ^B +.ev 2 +'ti 0 +'nf +.mk ^t +.if \\n(^b \{\ +.\" Draw three-sided box if this is the box's first page, +.\" draw two sides but no top otherwise. +.ie !\\n(^b-1 \h'-1.5n'\L'|\\n(^yu-1v'\l'\\n(^lu+3n\(ul'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c +.el \h'-1.5n'\L'|\\n(^yu-1v'\h'\\n(^lu+3n'\L'\\n(^tu+1v-\\n(^yu'\h'|0u'\c +.\} +.if \\n(^v \{\ +.nr ^x \\n(^tu+1v-\\n(^Yu +\kx\h'-\\nxu'\h'|\\n(^lu+3n'\ky\L'-\\n(^xu'\v'\\n(^xu'\h'|0u'\c +.\} +.bp +'fi +.ev +.if \\n(^b \{\ +.mk ^y +.nr ^b 2 +.\} +.if \\n(^v \{\ +.mk ^Y +.\} +.. +.\" # DS - begin display +.de DS +.RS +.nf +.sp +.. +.\" # DE - end display +.de DE +.fi +.RE +.sp +.. +.\" # SO - start of list of standard options +.de SO +'ie '\\$1'' .ds So \\fBoptions\\fR +'el .ds So \\fB\\$1\\fR +.SH "STANDARD OPTIONS" +.LP +.nf +.ta 5.5c 11c +.ft B +.. +.\" # SE - end of list of standard options +.de SE +.fi +.ft R +.LP +See the \\*(So manual entry for details on the standard options. +.. +.\" # OP - start of full description for a single option +.de OP +.LP +.nf +.ta 4c +Command-Line Name: \\fB\\$1\\fR +Database Name: \\fB\\$2\\fR +Database Class: \\fB\\$3\\fR +.fi +.IP +.. +.\" # CS - begin code excerpt +.de CS +.RS +.nf +.ta .25i .5i .75i 1i +.. +.\" # CE - end code excerpt +.de CE +.fi +.RE +.. +.\" # UL - underline word +.de UL +\\$1\l'|0\(ul'\\$2 +.. +.\" # QW - apply quotation marks to word +.de QW +.ie '\\*(lq'"' ``\\$1''\\$2 +.\"" fix emacs highlighting +.el \\*(lq\\$1\\*(rq\\$2 +.. +.\" # PQ - apply parens and quotation marks to word +.de PQ +.ie '\\*(lq'"' (``\\$1''\\$2)\\$3 +.\"" fix emacs highlighting +.el (\\*(lq\\$1\\*(rq\\$2)\\$3 +.. +.\" # QR - quoted range +.de QR +.ie '\\*(lq'"' ``\\$1''\\-``\\$2''\\$3 +.\"" fix emacs highlighting +.el \\*(lq\\$1\\*(rq\\-\\*(lq\\$2\\*(rq\\$3 +.. +.\" # MT - "empty" string +.de MT +.QW "" +.. +.BS +.SH NAME +punkshell_module_punk::fileline \- file line-handling utilities +.SH SYNOPSIS +package require \fBpunk::fileline \fR +.sp +class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? +.sp +class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR +.sp +class::textinfo \fBchunklen\fR +.sp +class::textinfo \fBlinecount\fR +.sp +class::textinfo \fBregenerate_lines\fR +.sp +class::textinfo \fBline\fR \fIlineindex\fR +.sp +class::textinfo \fBlinepayload\fR \fIlineindex\fR +.sp +class::textinfo \fBlinemeta\fR \fIlineindex\fR +.sp +class::textinfo \fBlineinfo\fR \fIlineindex\fR +.sp +class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR +.sp +class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? +.sp +class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR +.sp +class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR +.sp +\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR +.sp +.BE +.SH DESCRIPTION +.PP +- +.SH OVERVIEW +.PP +Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) +.PP +This is important for certain text files where examining the number of chars/bytes is important +.PP +For example - windows \&.cmd/\&.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved\&. +.PP +Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem\&. It is for operating on text-file like data\&. +.SS CONCEPTS +.PP +A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation\&. +.CS + + + package require punk::fileline + package require fileutil + set rawdata [fileutil::cat data\&.txt -translation binary] + punk::fileline::class::textinfo create obj_data $rawdata + puts stdout [obj_data linecount] + +.CE +.SS NOTES +.PP +Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files\&. +.PP +This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired\&. +.PP +No support for lone carriage-returns being interpreted as line-endings\&. +.PP +CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module\&. +.SS DEPENDENCIES +.PP +packages used by punk::fileline +.IP \(bu +\fBTcl 8\&.6\fR +.PP +.SH API +.SS "NAMESPACE PUNK::FILELINE::CLASS" +.PP +class definitions +.IP [1] +CLASS \fBtextinfo\fR +.RS +.sp +\fIMETHODS\fR +.TP +class::textinfo \fBconstructor\fR \fIdatachunk\fR ?option value\&.\&.\&.? +.sp +Constructor for textinfo object which represents a chunk or all of a file +.sp +datachunk should be passed with the file data including line-endings as-is for full functionality\&. ie use something like: +.CS + + + fconfigure $fd -translation binary + set chunkdata [read $fd]] +or + set chunkdata [fileutil::cat -translation binary] + +.CE +.sp +when loading the data +.TP +class::textinfo \fBchunk\fR \fIchunkstart\fR \fIchunkend\fR +.sp +Return a range of bytes from the underlying raw chunk data\&. +.sp +e\&.g The following retrieves the entire chunk +.sp +objName chunk 0 end +.TP +class::textinfo \fBchunklen\fR +.sp +Number of bytes/characters in the raw data of the file +.TP +class::textinfo \fBlinecount\fR +.sp +Number of lines in the raw data of the file, counted as per the policy in effect +.TP +class::textinfo \fBregenerate_lines\fR +.sp +generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex +.TP +class::textinfo \fBline\fR \fIlineindex\fR +.sp +Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata +.sp +A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) +.sp +Whilst such data may not conform to definitions (e\&.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" +.sp +To return just the data which might more commonly be needed for dealing with lines, use the \fBlinepayload\fR method - which returns the line data minus line-ending +.TP +class::textinfo \fBlinepayload\fR \fIlineindex\fR +.sp +Return the text of the line indicated by the zero-based lineindex +.sp +The line-ending is not returned in the data - but is still stored against this lineindex +.sp +Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the \fBlinemeta\fR method +.sp +To retrieve both the line text and metadata in a single call the \fBlineinfo\fR method can be used +.sp +To retrieve an entire line including line-ending use the \fBline\fR method\&. +.TP +class::textinfo \fBlinemeta\fR \fIlineindex\fR +.sp +Return a dict of the metadata for the line indicated by the zero-based lineindex +.sp +Keys returned include +.RS +.IP \(bu +le +.sp +A string representing the type of line-ending: crlf|lf|none +.IP \(bu +linelen +.sp +The number of characters/bytes in the whole line including line-ending if any +.IP \(bu +payloadlen +.sp +The number of character/bytes in the line excluding line-ending +.IP \(bu +start +.sp +The zero-based index into the associated raw file data indicating at which byte/character index this line begins +.IP \(bu +end +.sp +The zero-based index into the associated raw file data indicating at which byte/character index this line ends +.sp +This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload +.RE +.TP +class::textinfo \fBlineinfo\fR \fIlineindex\fR +.sp +Return a dict of the metadata and text for the line indicated by the zero-based lineindex +.sp +This returns the same info as the \fBlinemeta\fR with an added key of 'payload' which is the text of the line without line-ending\&. +.sp +The 'payload' value is the same as is returned from the \fBlinepayload\fR method\&. +.TP +class::textinfo \fBlinerange_to_chunkrange\fR \fIstartidx\fR \fIendidx\fR +.TP +class::textinfo \fBlinerange_to_chunk\fR \fIstartidx\fR \fIendidx\fR +.TP +class::textinfo \fBlines\fR \fIstartidx\fR \fIendidx\fR +.TP +class::textinfo \fBlinepayloads\fR \fIstartidx\fR \fIendidx\fR +.TP +class::textinfo \fBchunkrange_to_linerange\fR \fIchunkstart\fR \fIchunkend\fR +.TP +class::textinfo \fBchunkrange_to_lineinfolist\fR \fIchunkstart\fR \fIchunkend\fR ?option value\&.\&.\&.? +.sp +Return a list of dicts each with structure like the result of the \fBlineinfo\fR method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied +.sp +The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list\&. +.sp +truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) +.sp +Note that this truncation info is only in the return value of this method - and will not be reflected in \fBlineinfo\fR queries to the main chunk\&. +.TP +class::textinfo \fBnumeric_linerange\fR \fIstartidx\fR \fIendidx\fR +.sp +A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data +.sp +This is used internally by API functions such as \fBline\fR to enable it to accept more expressive indices +.TP +class::textinfo \fBnumeric_chunkrange\fR \fIstartidx\fR \fIendidx\fR +.sp +A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data +.TP +class::textinfo \fBnormalize_indices\fR \fIstartidx\fR \fIendidx\fR \fImax\fR +.sp +A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max +.sp +Basic addition and subtraction expressions such as 4-1 5+2 are accepted +.sp +startidx higher than endidx is allowed +.sp +Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max +.RE +.PP +.SS "NAMESPACE PUNK::FILELINE" +.PP +Core API functions for punk::fileline +.PP +.SS "NAMESPACE PUNK::FILELINE::LIB" +.PP +Secondary functions that are part of the API +.TP +\fBlib::range_spans_chunk_boundaries\fR \fIstart\fR \fIend\fR \fIchunksize\fR +.sp +Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range\&. +.RS +.TP +integer \fIstart\fR +.sp +zero-based start index of range +.TP +integer \fIend\fR +.sp +zero-based end index of range +.TP +integer \fIchunksize\fR +.sp +Number of bytes/characters in chunk +.RE +.sp +returns a dict with the keys is_span and boundaries +.sp +is_span 0|1 indicates if the range specified spans a boundary of chunksize +.sp +boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize +.sp +e\&.g +.CS + + + range_spans_chunk_boundaries 10 1750 512 + is_span 1 boundaries {512 1024 1536} + +.CE +.sp +This function automatically uses lseq (if Tcl >= 8\&.7) when number of boundaries spanned is approximately greater than 75 +.PP +.SH INTERNAL +.SS "NAMESPACE PUNK::FILELINE::SYSTEM" +.PP +Internal functions that are not part of the API +.SH KEYWORDS +file, module, parse, text +.SH COPYRIGHT +.nf +Copyright (c) 2024 + +.fi diff --git a/src/embedded/man/files/punk/_module_path-0.1.0.tm.n b/src/embedded/man/files/punk/_module_path-0.1.0.tm.n index c8a5ff72..a5be8569 100644 --- a/src/embedded/man/files/punk/_module_path-0.1.0.tm.n +++ b/src/embedded/man/files/punk/_module_path-0.1.0.tm.n @@ -347,7 +347,9 @@ ie - the driveletter alone in paths such as c:/etc will still be case insensitiv Explicitly specifying -nocase 0 will require the entire case to match including the driveletter\&. .TP \fBtreefilenames\fR \fIbasepath\fR \fItailglob\fR ?option value\&.\&.\&.? +.sp basic (glob based) list of filenames matching tailglob - recursive +.sp no natsorting - so order is dependent on filesystem .TP \fBrelative\fR \fIreference\fR \fIlocation\fR @@ -379,18 +381,46 @@ Notes: .sp Both paths must be the same type - ie both absolute or both relative .sp -Case sensitive\&. ie relative /etc /etC +Case sensitive\&. ie punk::path::relative /etc /etC will return \&.\&./etC .sp On windows, the drive-letter component (only) is not case sensitive +.CS + + + P% punk::path::relative c:/etc C:/etc + - \&. + +.CE .sp -ie relative c:/etc C:/etc returns \&. -.sp -but relative c:/etc C:/Etc returns \&.\&./Etc +The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail +.CS + + + P% punk::path::relative c:/etc C:/Etc + - \&.\&./Etc + +.CE .sp On windows, if the paths are absolute and specifiy different volumes, only the location will be returned\&. -ie relative c:/etc d:/etc/blah -returns d:/etc/blah +.CS + + + P% punk::path::relative c:/etc d:/etc/blah + - d:/etc/blah + +.CE +.sp +Unix-like examples: +.CS + + + P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below + - somewhere/below + P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + - \&.\&./\&.\&./lib/here + +.CE .RE .PP .SS "NAMESPACE PUNK::PATH::LIB" diff --git a/src/embedded/man/index.n b/src/embedded/man/index.n index 80c874e9..41f44d9a 100644 --- a/src/embedded/man/index.n +++ b/src/embedded/man/index.n @@ -284,6 +284,12 @@ changelog \fBfiles/project_changes\&.n\fR punkshell__project_changes .RE +file +.RS +.TP +\fBfiles/punk/_module_fileline-0\&.1\&.0\&.tm\&.n\fR +punkshell_module_punk::fileline +.RE filesystem .RS .TP @@ -296,9 +302,18 @@ module \fBfiles/punk/_module_cap-0\&.1\&.0\&.tm\&.n\fR punkshell_module_punk::cap .TP +\fBfiles/punk/_module_fileline-0\&.1\&.0\&.tm\&.n\fR +punkshell_module_punk::fileline +.TP \fBfiles/punk/_module_path-0\&.1\&.0\&.tm\&.n\fR punkshell_module_punk::path .RE +parse +.RS +.TP +\fBfiles/punk/_module_fileline-0\&.1\&.0\&.tm\&.n\fR +punkshell_module_punk::fileline +.RE path .RS .TP @@ -347,4 +362,10 @@ punkshell__project_changes \fBfiles/project_intro\&.n\fR punkshell__project_intro .RE +text +.RS +.TP +\fBfiles/punk/_module_fileline-0\&.1\&.0\&.tm\&.n\fR +punkshell_module_punk::fileline +.RE .RE diff --git a/src/embedded/man/toc.n b/src/embedded/man/toc.n index f459e74d..1ec27a2d 100644 --- a/src/embedded/man/toc.n +++ b/src/embedded/man/toc.n @@ -285,6 +285,9 @@ doc \fBpunkshell_module_punk::cap\fR \fIfiles/punk/_module_cap-0\&.1\&.0\&.tm\&.n\fR: capability provider and handler plugin system .TP +\fBpunkshell_module_punk::fileline\fR +\fIfiles/punk/_module_fileline-0\&.1\&.0\&.tm\&.n\fR: file line-handling utilities +.TP \fBpunkshell_module_punk::mix::commandset::project\fR \fIfiles/punk/mix/commandset/_module_project-0\&.1\&.0\&.tm\&.n\fR: pmix commandset - project .TP diff --git a/src/embedded/md/.doc/tocdoc b/src/embedded/md/.doc/tocdoc index e3cd5c57..1e536399 100644 --- a/src/embedded/md/.doc/tocdoc +++ b/src/embedded/md/.doc/tocdoc @@ -3,6 +3,7 @@ [item doc/files/project_changes.md punkshell__project_changes {punkshell Changes}] [item doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}] [item doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}] +[item doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline {file line-handling utilities}] [item doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}] [item doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}] [toc_end] diff --git a/src/embedded/md/.idx b/src/embedded/md/.idx index 6186d3ff..c892561d 100644 --- a/src/embedded/md/.idx +++ b/src/embedded/md/.idx @@ -1 +1 @@ -{shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}}} {{changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {shell doc/files/project_intro.md punkshell__project_intro} . {punk doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/main.md punkshell} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} \ No newline at end of file +{file {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} repl {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} shell {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} changelog {{doc/files/project_changes.md punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path}} punk {{doc/files/project_intro.md punkshell__project_intro} {doc/files/project_changes.md punkshell__project_changes} {doc/files/main.md punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap}}} {{shell doc/files/project_changes.md punkshell__project_changes} . {changelog doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/main.md punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_intro.md punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path} . {punk doc/files/project_changes.md punkshell__project_changes} . {shell doc/files/project_intro.md punkshell__project_intro} . {parse doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {punk doc/files/main.md punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/project_changes.md punkshell__project_changes} . {punk doc/files/project_intro.md punkshell__project_intro} . {file doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline} . {repl doc/files/main.md punkshell} . {capability doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} \ No newline at end of file diff --git a/src/embedded/md/.toc b/src/embedded/md/.toc index 7c55cb42..18e4bf21 100644 --- a/src/embedded/md/.toc +++ b/src/embedded/md/.toc @@ -1 +1 @@ -doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} \ No newline at end of file +doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.md punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.md punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.md punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.md punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.md punkshell {punkshell - Core}}}} \ No newline at end of file diff --git a/src/embedded/md/.xrf b/src/embedded/md/.xrf index ee6b22ac..5dc9fb66 100644 --- a/src/embedded/md/.xrf +++ b/src/embedded/md/.xrf @@ -1 +1 @@ -kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} punk {index.md punk} module {index.md module} plugin {index.md plugin} \ No newline at end of file +kw,capability {index.md capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell Changes} doc/files/project_changes.md {Introduction to punkshell} doc/files/project_intro.md sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md sa,punkshell(n) doc/files/main.md filesystem {index.md filesystem} sa,punkshell doc/files/main.md kw,shell {index.md shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md kw,parse {index.md parse} sa,punkshell__project_changes(n) doc/files/project_changes.md kw,path {index.md path} kw,module {index.md module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.md punkshell(n) doc/files/main.md kw,plugin {index.md plugin} punkshell doc/files/main.md kw,file {index.md file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.md changelog {index.md changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.md punkshell__project_changes(n) doc/files/project_changes.md sa,punkshell__project_changes doc/files/project_changes.md path {index.md path} file {index.md file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_changes doc/files/project_changes.md kw,filesystem {index.md filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md shell {index.md shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.md kw,repl {index.md repl} capability {index.md capability} kw,text {index.md text} parse {index.md parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {punkshell - Core} doc/files/main.md {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.md {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.md repl {index.md repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.md kw,punk {index.md punk} sa,punkshell__project_intro(n) doc/files/project_intro.md text {index.md text} sa,punkshell__project_intro doc/files/project_intro.md {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.md sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.md punkshell__project_intro(n) doc/files/project_intro.md {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.md punkshell__project_intro doc/files/project_intro.md kw,changelog {index.md changelog} module {index.md module} punk {index.md punk} plugin {index.md plugin} \ No newline at end of file diff --git a/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md b/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md new file mode 100644 index 00000000..9ed788e3 --- /dev/null +++ b/src/embedded/md/doc/files/punk/_module_fileline-0.1.0.tm.md @@ -0,0 +1,353 @@ + +[//000000001]: # (punkshell\_module\_punk::fileline \- punk fileline) +[//000000002]: # (Generated from file '\_module\_fileline\-0\.1\.0\.tm\.man' by tcllib/doctools with format 'markdown') +[//000000003]: # (Copyright © 2024) +[//000000004]: # (punkshell\_module\_punk::fileline\(0\) 0\.1\.0 doc "punk fileline") + +
[ Main Table Of Contents | Table Of Contents | Keyword Index ]
+ +# NAME + +punkshell\_module\_punk::fileline \- file line\-handling utilities + +# Table Of Contents + + - [Table Of Contents](#toc) + + - [Synopsis](#synopsis) + + - [Description](#section1) + + - [Overview](#section2) + + - [Concepts](#subsection1) + + - [Notes](#subsection2) + + - [dependencies](#subsection3) + + - [API](#section3) + + - [Namespace punk::fileline::class](#subsection4) + + - [Namespace punk::fileline](#subsection5) + + - [Namespace punk::fileline::lib](#subsection6) + + - [Internal](#section4) + + - [Namespace punk::fileline::system](#subsection7) + + - [Keywords](#keywords) + + - [Copyright](#copyright) + +# SYNOPSIS + +package require punk::fileline + +[class::textinfo __constructor__ *datachunk* ?option value\.\.\.?](#1) +[class::textinfo __chunk__ *chunkstart* *chunkend*](#2) +[class::textinfo __chunklen__](#3) +[class::textinfo __linecount__](#4) +[class::textinfo __regenerate\_lines__](#5) +[class::textinfo __line__ *lineindex*](#6) +[class::textinfo __linepayload__ *lineindex*](#7) +[class::textinfo __linemeta__ *lineindex*](#8) +[class::textinfo __lineinfo__ *lineindex*](#9) +[class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx*](#10) +[class::textinfo __linerange\_to\_chunk__ *startidx* *endidx*](#11) +[class::textinfo __lines__ *startidx* *endidx*](#12) +[class::textinfo __linepayloads__ *startidx* *endidx*](#13) +[class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend*](#14) +[class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.?](#15) +[class::textinfo __numeric\_linerange__ *startidx* *endidx*](#16) +[class::textinfo __numeric\_chunkrange__ *startidx* *endidx*](#17) +[class::textinfo __normalize\_indices__ *startidx* *endidx* *max*](#18) +[__lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize*](#19) + +# DESCRIPTION + +\- + +# Overview + +Utilities for in\-memory analysis of text file data as both line data and +byte/char\-counted data whilst preserving the line\-endings \(even if mixed\) + +This is important for certain text files where examining the number of +chars/bytes is important + +For example \- windows \.cmd/\.bat files need some byte counting to determine if +labels lie on chunk boundaries and need to be moved\. + +Despite including the word 'file', the library doesn't deal with reading/writing +to the filesystem\. It is for operating on text\-file like data\. + +## Concepts + +A chunk of textfile data \(possibly representing a whole file \- but usually at +least a complete set of lines\) is loaded into a punk::fileline::class::textinfo +instance at object creation\. + + package require punk::fileline + package require fileutil + set rawdata [fileutil::cat data.txt -translation binary] + punk::fileline::class::textinfo create obj_data $rawdata + puts stdout [obj_data linecount] + +## Notes + +Line records are referred to by a zero\-based index instead of a one\-based index +as is commonly used when displaying files\. + +This is for programming consistency and convenience, and the module user should +do their own conversion to one\-based indexing for line display or messaging if +desired\. + +No support for lone carriage\-returns being interpreted as line\-endings\. + +CR line\-endings that are intended to be interpreted as such should be mapped to +something else before the data is supplied to this module\. + +## dependencies + +packages used by punk::fileline + + - __Tcl 8\.6__ + +# API + +## Namespace punk::fileline::class + +class definitions + + 1. CLASS __textinfo__ + + - class::textinfo __constructor__ *datachunk* ?option value\.\.\.? + + *METHODS* + + Constructor for textinfo object which represents a chunk or all of a + file + + datachunk should be passed with the file data including line\-endings + as\-is for full functionality\. ie use something like: + + fconfigure $fd -translation binary + set chunkdata [read $fd]] + or + set chunkdata [fileutil::cat -translation binary] + + when loading the data + + - class::textinfo __chunk__ *chunkstart* *chunkend* + + Return a range of bytes from the underlying raw chunk data\. + + e\.g The following retrieves the entire chunk + + objName chunk 0 end + + - class::textinfo __chunklen__ + + Number of bytes/characters in the raw data of the file + + - class::textinfo __linecount__ + + Number of lines in the raw data of the file, counted as per the policy + in effect + + - class::textinfo __regenerate\_lines__ + + generate a list of lines from the stored raw data chunk and keep a map + of line\-endings indexed by lineindex + + - class::textinfo __line__ *lineindex* + + Reconstructs and returns the raw line using the payload and per\-line + stored line\-ending metadata + + A 'line' may be returned without a line\-ending if the unerlying chunk + had trailing data without a line\-ending \(or the chunk was loaded under + a non\-standard \-policy setting\) + + Whilst such data may not conform to definitions \(e\.g POSIX\) of the + terms 'textfile' and 'line' \- it is useful here to represent it as a + line with metadata le set to "none" + + To return just the data which might more commonly be needed for dealing + with lines, use the __linepayload__ method \- which returns the line + data minus line\-ending + + - class::textinfo __linepayload__ *lineindex* + + Return the text of the line indicated by the zero\-based lineindex + + The line\-ending is not returned in the data \- but is still stored + against this lineindex + + Line Metadata such as the line\-ending for a particular line and the + byte/character range it occupies within the chunk can be retrieved with + the __linemeta__ method + + To retrieve both the line text and metadata in a single call the + __lineinfo__ method can be used + + To retrieve an entire line including line\-ending use the __line__ + method\. + + - class::textinfo __linemeta__ *lineindex* + + Return a dict of the metadata for the line indicated by the zero\-based + lineindex + + Keys returned include + + * le + + A string representing the type of line\-ending: crlf|lf|none + + * linelen + + The number of characters/bytes in the whole line including + line\-ending if any + + * payloadlen + + The number of character/bytes in the line excluding line\-ending + + * start + + The zero\-based index into the associated raw file data indicating + at which byte/character index this line begins + + * end + + The zero\-based index into the associated raw file data indicating + at which byte/character index this line ends + + This end\-point corresponds to the last character of the line\-ending + if any \- not necessarily the last character of the line's payload + + - class::textinfo __lineinfo__ *lineindex* + + Return a dict of the metadata and text for the line indicated by the + zero\-based lineindex + + This returns the same info as the __linemeta__ with an added key of + 'payload' which is the text of the line without line\-ending\. + + The 'payload' value is the same as is returned from the + __linepayload__ method\. + + - class::textinfo __linerange\_to\_chunkrange__ *startidx* *endidx* + + - class::textinfo __linerange\_to\_chunk__ *startidx* *endidx* + + - class::textinfo __lines__ *startidx* *endidx* + + - class::textinfo __linepayloads__ *startidx* *endidx* + + - class::textinfo __chunkrange\_to\_linerange__ *chunkstart* *chunkend* + + - class::textinfo __chunkrange\_to\_lineinfolist__ *chunkstart* *chunkend* ?option value\.\.\.? + + Return a list of dicts each with structure like the result of the + __lineinfo__ method \- but possibly with extra keys for truncation + information if \-show\_truncated 1 is supplied + + The truncation key in a lineinfo dict may be returned for first and/or + last line in the resulting list\. + + truncation shows the shortened \(missing bytes on left and/or right + side\) part of the entire line \(potentially including line\-ending or + even partial line\-ending\) + + Note that this truncation info is only in the return value of this + method \- and will not be reflected in __lineinfo__ queries to the + main chunk\. + + - class::textinfo __numeric\_linerange__ *startidx* *endidx* + + A helper to return any Tcl\-style end end\-x values given to startidx or + endidx; converted to their specific values based on the current state + of the underlying line data + + This is used internally by API functions such as __line__ to enable + it to accept more expressive indices + + - class::textinfo __numeric\_chunkrange__ *startidx* *endidx* + + A helper to return any Tcl\-style end end\-x entries supplied to startidx + or endidx; converted to their specific values based on the current + state of the underlying chunk data + + - class::textinfo __normalize\_indices__ *startidx* *endidx* *max* + + A utility to convert some of the of Tcl\-style list\-index expressions + such as end, end\-1 etc to valid indices in the range 0 to the supplied + max + + Basic addition and subtraction expressions such as 4\-1 5\+2 are accepted + + startidx higher than endidx is allowed + + Unlike Tcl's index expressions \- we raise an error if the calculated + index is out of bounds 0 to max + +## Namespace punk::fileline + +Core API functions for punk::fileline + +## Namespace punk::fileline::lib + + - __lib::range\_spans\_chunk\_boundaries__ *start* *end* *chunksize* + + Takes start and end offset, generally representing bytes or character + indices, and computes a list of boundaries at multiples of the chunksize + that are spanned by the start and end range\. + + * integer *start* + + zero\-based start index of range + + * integer *end* + + zero\-based end index of range + + * integer *chunksize* + + Number of bytes/characters in chunk + + returns a dict with the keys is\_span and boundaries + + is\_span 0|1 indicates if the range specified spans a boundary of chunksize + + boundaries contains a list of the spanned boundaries \- which are always + multiples of the chunksize + + e\.g + + range_spans_chunk_boundaries 10 1750 512 + is_span 1 boundaries {512 1024 1536} + + This function automatically uses lseq \(if Tcl >= 8\.7\) when number of + boundaries spanned is approximately greater than 75 + +# Internal + +## Namespace punk::fileline::system + +Internal functions that are not part of the API + +# KEYWORDS + +[file](\.\./\.\./\.\./index\.md\#file), [module](\.\./\.\./\.\./index\.md\#module), +[parse](\.\./\.\./\.\./index\.md\#parse), [text](\.\./\.\./\.\./index\.md\#text) + +# COPYRIGHT + +Copyright © 2024 diff --git a/src/embedded/md/doc/files/punk/_module_path-0.1.0.tm.md b/src/embedded/md/doc/files/punk/_module_path-0.1.0.tm.md index 9192184b..3a9398a6 100644 --- a/src/embedded/md/doc/files/punk/_module_path-0.1.0.tm.md +++ b/src/embedded/md/doc/files/punk/_module_path-0.1.0.tm.md @@ -125,8 +125,9 @@ class definitions - __treefilenames__ *basepath* *tailglob* ?option value\.\.\.? - basic \(glob based\) list of filenames matching tailglob \- recursive no - natsorting \- so order is dependent on filesystem + basic \(glob based\) list of filenames matching tailglob \- recursive + + no natsorting \- so order is dependent on filesystem - __relative__ *reference* *location* @@ -153,17 +154,31 @@ class definitions Both paths must be the same type \- ie both absolute or both relative - Case sensitive\. ie relative /etc /etC will return \.\./etC + Case sensitive\. ie punk::path::relative /etc /etC will return \.\./etC On windows, the drive\-letter component \(only\) is not case sensitive - ie relative c:/etc C:/etc returns \. + P% punk::path::relative c:/etc C:/etc + - . + + The part following the driveletter is case sensitive so in the following + cases it recognises the driveletter matches but not the tail - but relative c:/etc C:/Etc returns \.\./Etc + P% punk::path::relative c:/etc C:/Etc + - ../Etc On windows, if the paths are absolute and specifiy different volumes, - only the location will be returned\. ie relative c:/etc d:/etc/blah - returns d:/etc/blah + only the location will be returned\. + + P% punk::path::relative c:/etc d:/etc/blah + - d:/etc/blah + + Unix\-like examples: + + P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below + - somewhere/below + P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + - ../../lib/here ## Namespace punk::path::lib diff --git a/src/embedded/md/doc/toc.md b/src/embedded/md/doc/toc.md index fd79bcd6..63ab3c56 100644 --- a/src/embedded/md/doc/toc.md +++ b/src/embedded/md/doc/toc.md @@ -11,6 +11,8 @@ - [punkshell\_module\_punk::cap](doc/files/punk/\_module\_cap\-0\.1\.0\.tm\.md) capability provider and handler plugin system + - [punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md) file line\-handling utilities + - [punkshell\_module\_punk::mix::commandset::project](doc/files/punk/mix/commandset/\_module\_project\-0\.1\.0\.tm\.md) pmix commandset \- project - [punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md) Filesystem path utilities diff --git a/src/embedded/md/index.md b/src/embedded/md/index.md index 69146770..591eff4c 100644 --- a/src/embedded/md/index.md +++ b/src/embedded/md/index.md @@ -5,7 +5,7 @@ ---- -[C](#cC) · [F](#cF) · [M](#cM) · [P](#cP) · [R](#cR) · [S](#cS) +[C](#cC) · [F](#cF) · [M](#cM) · [P](#cP) · [R](#cR) · [S](#cS) · [T](#cT) ---- @@ -21,6 +21,7 @@ ||| |---|---| +|file|[punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md)| |filesystem|[punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md)| @@ -28,13 +29,14 @@ ||| |---|---| -|module|[punkshell\_module\_punk::cap](doc/files/punk/\_module\_cap\-0\.1\.0\.tm\.md) · [punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md)| +|module|[punkshell\_module\_punk::cap](doc/files/punk/\_module\_cap\-0\.1\.0\.tm\.md) · [punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md) · [punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md)| #### Keywords: P ||| |---|---| +|parse|[punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md)| |path|[punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md)| |plugin|[punkshell\_module\_punk::cap](doc/files/punk/\_module\_cap\-0\.1\.0\.tm\.md)| |punk|[punkshell](doc/files/main\.md) · [punkshell\_\_project\_changes](doc/files/project\_changes\.md) · [punkshell\_\_project\_intro](doc/files/project\_intro\.md)| @@ -52,3 +54,10 @@ ||| |---|---| |shell|[punkshell](doc/files/main\.md) · [punkshell\_\_project\_changes](doc/files/project\_changes\.md) · [punkshell\_\_project\_intro](doc/files/project\_intro\.md)| + + +#### Keywords: T + +||| +|---|---| +|text|[punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md)| diff --git a/src/embedded/md/toc.md b/src/embedded/md/toc.md index fd79bcd6..63ab3c56 100644 --- a/src/embedded/md/toc.md +++ b/src/embedded/md/toc.md @@ -11,6 +11,8 @@ - [punkshell\_module\_punk::cap](doc/files/punk/\_module\_cap\-0\.1\.0\.tm\.md) capability provider and handler plugin system + - [punkshell\_module\_punk::fileline](doc/files/punk/\_module\_fileline\-0\.1\.0\.tm\.md) file line\-handling utilities + - [punkshell\_module\_punk::mix::commandset::project](doc/files/punk/mix/commandset/\_module\_project\-0\.1\.0\.tm\.md) pmix commandset \- project - [punkshell\_module\_punk::path](doc/files/punk/\_module\_path\-0\.1\.0\.tm\.md) Filesystem path utilities diff --git a/src/embedded/www/.doc/tocdoc b/src/embedded/www/.doc/tocdoc index 2639f7d7..1ee30657 100644 --- a/src/embedded/www/.doc/tocdoc +++ b/src/embedded/www/.doc/tocdoc @@ -3,6 +3,7 @@ [item doc/files/project_changes.html punkshell__project_changes {punkshell Changes}] [item doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}] [item doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}] +[item doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline {file line-handling utilities}] [item doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}] [item doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}] [toc_end] diff --git a/src/embedded/www/.idx b/src/embedded/www/.idx index 96e73879..60d2db40 100644 --- a/src/embedded/www/.idx +++ b/src/embedded/www/.idx @@ -1 +1 @@ -{shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} module {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}}} {{repl doc/files/main.html punkshell} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {punk doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/main.html punkshell} . {repl doc/files/project_changes.html punkshell__project_changes} .} 9 {shell shell changelog changelog filesystem filesystem path path capability capability module module punk punk plugin plugin repl repl} \ No newline at end of file +{file {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} repl {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} text {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} shell {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} changelog {{doc/files/project_changes.html punkshell__project_changes}} capability {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}} parse {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline}} filesystem {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} path {{doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} module {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path}} punk {{doc/files/project_intro.html punkshell__project_intro} {doc/files/project_changes.html punkshell__project_changes} {doc/files/main.html punkshell}} plugin {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap}}} {{repl doc/files/main.html punkshell} . {file doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/project_intro.html punkshell__project_intro} . {capability doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {shell doc/files/project_changes.html punkshell__project_changes} . {changelog doc/files/project_changes.html punkshell__project_changes} . {shell doc/files/main.html punkshell} . {text doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_intro.html punkshell__project_intro} . {module doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {path doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {plugin doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap} . {filesystem doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {module doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path} . {shell doc/files/project_intro.html punkshell__project_intro} . {punk doc/files/project_changes.html punkshell__project_changes} . {parse doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {punk doc/files/main.html punkshell} . {module doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline} . {repl doc/files/project_changes.html punkshell__project_changes} .} 12 {file file repl repl text text shell shell changelog changelog capability capability parse parse filesystem filesystem path path module module punk punk plugin plugin} \ No newline at end of file diff --git a/src/embedded/www/.toc b/src/embedded/www/.toc index bb33be84..4fa3f016 100644 --- a/src/embedded/www/.toc +++ b/src/embedded/www/.toc @@ -1 +1 @@ -doc {doc/toc {{doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} \ No newline at end of file +doc {doc/toc {{doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::fileline {file line-handling utilities}} {doc/files/punk/_module_cap-0.1.0.tm.html punkshell_module_punk::cap {capability provider and handler plugin system}} {doc/files/project_intro.html punkshell__project_intro {Introduction to punkshell}} {doc/files/punk/_module_path-0.1.0.tm.html punkshell_module_punk::path {Filesystem path utilities}} {doc/files/project_changes.html punkshell__project_changes {punkshell Changes}} {doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html punkshell_module_punk::mix::commandset::project {pmix commandset - project}} {doc/files/main.html punkshell {punkshell - Core}}}} \ No newline at end of file diff --git a/src/embedded/www/.xrf b/src/embedded/www/.xrf index ea711d2a..623ccbfd 100644 --- a/src/embedded/www/.xrf +++ b/src/embedded/www/.xrf @@ -1 +1 @@ -kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} punk {index.html punk} module {index.html module} plugin {index.html plugin} \ No newline at end of file +kw,capability {index.html capability} punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell Changes} doc/files/project_changes.html {Introduction to punkshell} doc/files/project_intro.html sa,punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project(0) doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html sa,punkshell(n) doc/files/main.html filesystem {index.html filesystem} sa,punkshell doc/files/main.html kw,shell {index.html shell} sa,punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html sa,punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html kw,parse {index.html parse} sa,punkshell__project_changes(n) doc/files/project_changes.html kw,path {index.html path} kw,module {index.html module} punkshell_module_punk::fileline(0) doc/files/punk/_module_fileline-0.1.0.tm.html punkshell(n) doc/files/main.html kw,plugin {index.html plugin} punkshell doc/files/main.html kw,file {index.html file} punkshell_module_punk::cap doc/files/punk/_module_cap-0.1.0.tm.html changelog {index.html changelog} punkshell_module_punk::cap(0) doc/files/punk/_module_cap-0.1.0.tm.html punkshell__project_changes(n) doc/files/project_changes.html sa,punkshell__project_changes doc/files/project_changes.html path {index.html path} file {index.html file} sa,punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_changes doc/files/project_changes.html kw,filesystem {index.html filesystem} sa,punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html shell {index.html shell} punkshell_module_punk::path doc/files/punk/_module_path-0.1.0.tm.html kw,repl {index.html repl} capability {index.html capability} kw,text {index.html text} parse {index.html parse} sa,punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html punkshell_module_punk::mix::commandset::project doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {punkshell - Core} doc/files/main.html {pmix commandset - project} doc/files/punk/mix/commandset/_module_project-0.1.0.tm.html {capability provider and handler plugin system} doc/files/punk/_module_cap-0.1.0.tm.html repl {index.html repl} punkshell_module_punk::fileline doc/files/punk/_module_fileline-0.1.0.tm.html kw,punk {index.html punk} sa,punkshell__project_intro(n) doc/files/project_intro.html text {index.html text} sa,punkshell__project_intro doc/files/project_intro.html {Filesystem path utilities} doc/files/punk/_module_path-0.1.0.tm.html sa,punkshell_module_punk::path(0) doc/files/punk/_module_path-0.1.0.tm.html punkshell__project_intro(n) doc/files/project_intro.html {file line-handling utilities} doc/files/punk/_module_fileline-0.1.0.tm.html punkshell__project_intro doc/files/project_intro.html kw,changelog {index.html changelog} module {index.html module} punk {index.html punk} plugin {index.html plugin} \ No newline at end of file diff --git a/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html b/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html new file mode 100644 index 00000000..5519d64f --- /dev/null +++ b/src/embedded/www/doc/files/punk/_module_fileline-0.1.0.tm.html @@ -0,0 +1,326 @@ + +punkshell_module_punk::fileline - punk fileline + + + + + +
[ + Main Table Of Contents +| Table Of Contents +| Keyword Index + ]
+
+

punkshell_module_punk::fileline(0) 0.1.0 doc "punk fileline"

+

Name

+

punkshell_module_punk::fileline - file line-handling utilities

+
+ + + +

Overview

+

Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed)

+

This is important for certain text files where examining the number of chars/bytes is important

+

For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved.

+

Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data.

+

Concepts

+

A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation.

+
+    package require punk::fileline
+    package require fileutil
+    set rawdata [fileutil::cat data.txt -translation binary]
+    punk::fileline::class::textinfo create obj_data  $rawdata
+    puts stdout [obj_data linecount]
+
+
+

Notes

+

Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files.

+

This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired.

+

No support for lone carriage-returns being interpreted as line-endings.

+

CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module.

+
+

dependencies

+

packages used by punk::fileline

+
    +
  • Tcl 8.6

  • +
+
+
+

API

+

Namespace punk::fileline::class

+

class definitions

+
    +
  1. CLASS textinfo

    +
    +

    METHODS

    +
    class::textinfo constructor datachunk ?option value...?
    +

    Constructor for textinfo object which represents a chunk or all of a file

    +

    datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like:

    +
    +    fconfigure $fd -translation binary
    +    set chunkdata [read $fd]]
    +or
    +    set chunkdata [fileutil::cat <filename> -translation binary]
    +
    +

    when loading the data

    +
    class::textinfo chunk chunkstart chunkend
    +

    Return a range of bytes from the underlying raw chunk data.

    +

    e.g The following retrieves the entire chunk

    +

    objName chunk 0 end

    +
    class::textinfo chunklen
    +

    Number of bytes/characters in the raw data of the file

    +
    class::textinfo linecount
    +

    Number of lines in the raw data of the file, counted as per the policy in effect

    +
    class::textinfo regenerate_lines
    +

    generate a list of lines from the stored raw data chunk and keep a map of line-endings indexed by lineindex

    +
    class::textinfo line lineindex
    +

    Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata

    +

    A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting)

    +

    Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none"

    +

    To return just the data which might more commonly be needed for dealing with lines, use the linepayload method - which returns the line data minus line-ending

    +
    class::textinfo linepayload lineindex
    +

    Return the text of the line indicated by the zero-based lineindex

    +

    The line-ending is not returned in the data - but is still stored against this lineindex

    +

    Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the linemeta method

    +

    To retrieve both the line text and metadata in a single call the lineinfo method can be used

    +

    To retrieve an entire line including line-ending use the line method.

    +
    class::textinfo linemeta lineindex
    +

    Return a dict of the metadata for the line indicated by the zero-based lineindex

    +

    Keys returned include

    +
      +
    • le

      +

      A string representing the type of line-ending: crlf|lf|none

    • +
    • linelen

      +

      The number of characters/bytes in the whole line including line-ending if any

    • +
    • payloadlen

      +

      The number of character/bytes in the line excluding line-ending

    • +
    • start

      +

      The zero-based index into the associated raw file data indicating at which byte/character index this line begins

    • +
    • end

      +

      The zero-based index into the associated raw file data indicating at which byte/character index this line ends

      +

      This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload

    • +
    +
    class::textinfo lineinfo lineindex
    +

    Return a dict of the metadata and text for the line indicated by the zero-based lineindex

    +

    This returns the same info as the linemeta with an added key of 'payload' which is the text of the line without line-ending.

    +

    The 'payload' value is the same as is returned from the linepayload method.

    +
    class::textinfo linerange_to_chunkrange startidx endidx
    +
    +
    class::textinfo linerange_to_chunk startidx endidx
    +
    +
    class::textinfo lines startidx endidx
    +
    +
    class::textinfo linepayloads startidx endidx
    +
    +
    class::textinfo chunkrange_to_linerange chunkstart chunkend
    +
    +
    class::textinfo chunkrange_to_lineinfolist chunkstart chunkend ?option value...?
    +

    Return a list of dicts each with structure like the result of the lineinfo method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied

    +

    The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list.

    +

    truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending)

    +

    Note that this truncation info is only in the return value of this method - and will not be reflected in lineinfo queries to the main chunk.

    +
    class::textinfo numeric_linerange startidx endidx
    +

    A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data

    +

    This is used internally by API functions such as line to enable it to accept more expressive indices

    +
    class::textinfo numeric_chunkrange startidx endidx
    +

    A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data

    +
    class::textinfo normalize_indices startidx endidx max
    +

    A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max

    +

    Basic addition and subtraction expressions such as 4-1 5+2 are accepted

    +

    startidx higher than endidx is allowed

    +

    Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max

    +
    +
  2. +
+
+

Namespace punk::fileline

+

Core API functions for punk::fileline

+
+
+
+

Namespace punk::fileline::lib

+

Secondary functions that are part of the API

+
+
lib::range_spans_chunk_boundaries start end chunksize
+

Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range.

+
+ +
integer start
+

zero-based start index of range

+
integer end
+

zero-based end index of range

+
integer chunksize
+

Number of bytes/characters in chunk

+
+

returns a dict with the keys is_span and boundaries

+

is_span 0|1 indicates if the range specified spans a boundary of chunksize

+

boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize

+

e.g

+
+    range_spans_chunk_boundaries 10 1750 512
+    is_span 1 boundaries {512 1024 1536}
+
+

This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75

+
+
+
+

Internal

+

Namespace punk::fileline::system

+

Internal functions that are not part of the API

+
+
+ + +
diff --git a/src/embedded/www/doc/files/punk/_module_path-0.1.0.tm.html b/src/embedded/www/doc/files/punk/_module_path-0.1.0.tm.html index 1f9e868e..24a2a427 100644 --- a/src/embedded/www/doc/files/punk/_module_path-0.1.0.tm.html +++ b/src/embedded/www/doc/files/punk/_module_path-0.1.0.tm.html @@ -190,8 +190,8 @@

ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah)

Explicitly specifying -nocase 0 will require the entire case to match including the driveletter.

treefilenames basepath tailglob ?option value...?
-

basic (glob based) list of filenames matching tailglob - recursive -no natsorting - so order is dependent on filesystem

+

basic (glob based) list of filenames matching tailglob - recursive

+

no natsorting - so order is dependent on filesystem

relative reference location

Taking two directory paths, a reference and a location, computes the path of the location relative to the reference.

@@ -210,14 +210,31 @@ no natsorting - so order is dependent on filesystem

Will return a single dot "." if the paths are the same

  • Notes:

    Both paths must be the same type - ie both absolute or both relative

    -

    Case sensitive. ie relative /etc /etC +

    Case sensitive. ie punk::path::relative /etc /etC will return ../etC

    On windows, the drive-letter component (only) is not case sensitive

    -

    ie relative c:/etc C:/etc returns .

    -

    but relative c:/etc C:/Etc returns ../Etc

    -

    On windows, if the paths are absolute and specifiy different volumes, only the location will be returned. - ie relative c:/etc d:/etc/blah - returns d:/etc/blah

  • +
    +    P% punk::path::relative c:/etc  C:/etc
    +    -  .
    +
    +

    The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail

    +
    +    P% punk::path::relative c:/etc C:/Etc
    +    -  ../Etc
    +
    +

    On windows, if the paths are absolute and specifiy different volumes, only the location will be returned.

    +
    +    P% punk::path::relative c:/etc d:/etc/blah
    +    -  d:/etc/blah
    +
    +

    Unix-like examples:

    +
    +   P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below
    +   - somewhere/below
    +   P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here
    +   - ../../lib/here
    +
    + diff --git a/src/embedded/www/doc/toc.html b/src/embedded/www/doc/toc.html index 0bc5e90c..55deb295 100644 --- a/src/embedded/www/doc/toc.html +++ b/src/embedded/www/doc/toc.html @@ -29,10 +29,14 @@ capability provider and handler plugin system +punkshell_module_punk::fileline +file line-handling utilities + + punkshell_module_punk::mix::commandset::project pmix commandset - project - + punkshell_module_punk::path Filesystem path utilities diff --git a/src/embedded/www/index.html b/src/embedded/www/index.html index 57292163..7bd7a53e 100644 --- a/src/embedded/www/index.html +++ b/src/embedded/www/index.html @@ -13,7 +13,7 @@ ]

    Keyword Index



    + + + - + + + + + + + +
    @@ -33,6 +33,11 @@ Keywords: F
    file + punkshell_module_punk::fileline +
    filesystem punkshell_module_punk::path @@ -40,14 +45,19 @@
    Keywords: M
    module - punkshell_module_punk::cap · punkshell_module_punk::path + punkshell_module_punk::cap · punkshell_module_punk::fileline · punkshell_module_punk::path
    Keywords: P
    parse + punkshell_module_punk::fileline +
    path @@ -79,5 +89,13 @@ punkshell · punkshell__project_changes · punkshell__project_intro
    +Keywords: T +
    text + punkshell_module_punk::fileline +
    \ No newline at end of file diff --git a/src/embedded/www/toc.html b/src/embedded/www/toc.html index 108b5dbc..1544975e 100644 --- a/src/embedded/www/toc.html +++ b/src/embedded/www/toc.html @@ -29,10 +29,14 @@ capability provider and handler plugin system +punkshell_module_punk::fileline +file line-handling utilities + + punkshell_module_punk::mix::commandset::project pmix commandset - project - + punkshell_module_punk::path Filesystem path utilities diff --git a/src/modules/punk/cap-999999.0a1.0.tm b/src/modules/punk/cap-999999.0a1.0.tm index fbd2df13..1f2b71a0 100644 --- a/src/modules/punk/cap-999999.0a1.0.tm +++ b/src/modules/punk/cap-999999.0a1.0.tm @@ -232,6 +232,7 @@ namespace eval punk::cap { #such unregistered capabilitynames may be used just to flag something, or have datamembers significant to callers cooperatively interested in that capname. #we allow registering a capability with an empty handler (capnamespace) - but this means another handler could be registered later. proc register_capabilityname {capname capnamespace} { + puts stderr "REGISTER_CAPABILITYNAME $capname $capnamespace" variable caps variable pkgcapsdeclared variable pkgcapsaccepted @@ -245,7 +246,8 @@ namespace eval punk::cap { #as handlers can be used to validate during provider registration - ideally handlers should be registered before any pkgs call register_package #we allow loading a handler later though - but will need to validate existing data from pkgs that have already registered as providers if {[set hdlr [capability_get_handler $capname]] ne ""} { - error "register_capabilityname cannot register capability:$capname with handler:$capnamespace. There is already a registered handler:$hdlr" + puts stderr "register_capabilityname cannot register capability:$capname with handler:$capnamespace. There is already a registered handler:$hdlr" + return } #assert: capnamespace may or may not be empty string, capname may or may not already exist in caps dict, caps $capname providers may have existing entries. dict set caps $capname handler $capnamespace diff --git a/src/modules/punk/fileline-999999.0a1.0.tm b/src/modules/punk/fileline-999999.0a1.0.tm new file mode 100644 index 00000000..b1f4eb8b --- /dev/null +++ b/src/modules/punk/fileline-999999.0a1.0.tm @@ -0,0 +1,829 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2024 +# +# @@ Meta Begin +# Application punk::fileline 999999.0a1.0 +# Meta platform tcl +# Meta license BSD +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin punkshell_module_punk::fileline 0 999999.0a1.0] +#[copyright "2024"] +#[titledesc {file line-handling utilities}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk fileline}] [comment {-- Description at end of page heading --}] +#[require punk::fileline] +#[keywords module text parse file] +#[description] +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para]Utilities for in-memory analysis of text file data as both line data and byte/char-counted data whilst preserving the line-endings (even if mixed) +#[para]This is important for certain text files where examining the number of chars/bytes is important +#[para]For example - windows .cmd/.bat files need some byte counting to determine if labels lie on chunk boundaries and need to be moved. +#[para]Despite including the word 'file', the library doesn't deal with reading/writing to the filesystem. It is for operating on text-file like data. +#[subsection Concepts] +#[para]A chunk of textfile data (possibly representing a whole file - but usually at least a complete set of lines) is loaded into a punk::fileline::class::textinfo instance at object creation. +#[example_begin] +# package require punk::fileline +# package require fileutil +# set rawdata [lb]fileutil::cat data.txt -translation binary[rb] +# punk::fileline::class::textinfo create obj_data $rawdata +# puts stdout [lb]obj_data linecount[rb] +#[example_end] +#[subsection Notes] +#[para]Line records are referred to by a zero-based index instead of a one-based index as is commonly used when displaying files. +#[para]This is for programming consistency and convenience, and the module user should do their own conversion to one-based indexing for line display or messaging if desired. +#[para]No support for lone carriage-returns being interpreted as line-endings. +#[para]CR line-endings that are intended to be interpreted as such should be mapped to something else before the data is supplied to this module. + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::fileline +#[list_begin itemized] + +package require Tcl 8.6 +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline::class { + namespace export * + #*** !doctools + #[subsection {Namespace punk::fileline::class}] + #[para] class definitions + if {[info commands [namespace current]::textinfo] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + + #uses zero based indexing. Caller can add 1 for line numbers + oo::class create [namespace current]::textinfo { + #*** !doctools + #[enum] CLASS [class textinfo] + #[list_begin definitions] + # [para] [emph METHODS] + + variable o_chunk + variable o_chunk_epoch + variable o_payloadlist + variable o_linemap + variable o_line_epoch + variable o_LF_C + variable o_CRLF_C + + constructor {datachunk args} { + #*** !doctools + #[call class::textinfo [method constructor] [arg datachunk] [opt {option value...}]] + #[para] Constructor for textinfo object which represents a chunk or all of a file + #[para] datachunk should be passed with the file data including line-endings as-is for full functionality. ie use something like: + #[example_begin] + # fconfigure $fd -translation binary + # set chunkdata [lb]read $fd[rb]] + #or + # set chunkdata [lb]fileutil::cat -translation binary[rb] + #[example_end] + #[para] when loading the data + set o_chunk $datachunk + set crlf_lf_placeholders [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message + set defaults [dict create\ + -substitutionmap {}\ + -crlf_lf_placeholders $crlf_lf_placeholders\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "textinfo::constructor error: unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- + set opt_substitutionmap [dict get $opts -substitutionmap] ;#review - can be done by caller - or a loadable -policy + set opt_crlf_lf_placeholders [dict get $opts -crlf_lf_placeholders] + # -- --- --- --- --- --- --- + + if {[llength $opt_crlf_lf_placeholders] != 2 || [string length [lindex $opt_crlf_lf_placeholders 0]] !=1 || [string length [lindex $opt_crlf_lf_placeholders 1]] !=1} { + error "textinfo::constructor error: -crlf_lf_placeholders requires a list of exactly 2 chars" + } + lassign $opt_crlf_lf_placeholders o_LF_C o_CRLF_C + if {[string first $o_LF_C $o_chunk] >=0} { + set decval [scan $o_LF_C %c] + if {$decval < 32 || $decval > 127} { + set char_desc "(decimal value $decval)" + } else { + set char_desc "'$o_LF_C' (decimal value $decval)" + } + error "textinfo::constructor error: rawfiledata already contains linefeed substitution character $char_desc specified as first element of -crlf_lf_placeholders" + } + if {[string first $o_CRLF_C $o_chunk] >=0} { + set decval [scan $o_CRLF_C %c] + if {$decval < 32 || $decval > 127} { + set char_desc "(decimal value $decval)" + } else { + set char_desc "'$o_CRLF_C' (decimal value $decval)" + } + error "textinfo::constructor error: rawfiledata already contains carriagereturn-linefeed substitution character $char_desc specified as second element of -crlf_lf_placeholders" + } + if {$o_LF_C eq $o_CRLF_C} { + puts stderr "WARNING: same substitution character used for both elements of -crlf_lf_placeholders - byte counting may be off if file contains mixed line-endings" + } + set o_chunk_epoch "initial" + set o_line_epoch "" + my regenerate_lines + + } + + method chunk {chunkstart chunkend} { + #*** !doctools + #[call class::textinfo [method chunk] [arg chunkstart] [arg chunkend]] + #[para]Return a range of bytes from the underlying raw chunk data. + #[para] e.g The following retrieves the entire chunk + #[para] objName chunk 0 end + return [string range $o_chunk $chunkstart $chunkend] + } + method chunklen {} { + #*** !doctools + #[call class::textinfo [method chunklen]] + #[para] Number of bytes/characters in the raw data of the file + return [string length $o_chunk] + } + method linecount {} { + #*** !doctools + #[call class::textinfo [method linecount]] + #[para] Number of lines in the raw data of the file, counted as per the policy in effect + return [llength $o_payloadlist] + } + + + method line {lineindex} { + #*** !doctools + #[call class::textinfo [method line] [arg lineindex]] + #[para]Reconstructs and returns the raw line using the payload and per-line stored line-ending metadata + #[para]A 'line' may be returned without a line-ending if the unerlying chunk had trailing data without a line-ending (or the chunk was loaded under a non-standard -policy setting) + #[para]Whilst such data may not conform to definitions (e.g POSIX) of the terms 'textfile' and 'line' - it is useful here to represent it as a line with metadata le set to "none" + #[para]To return just the data which might more commonly be needed for dealing with lines, use the [method linepayload] method - which returns the line data minus line-ending + + lassign [my numeric_linerange $lineindex 0] lineindex + + set le [dict get $o_linemap $lineindex le] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] + return [lindex $o_payloadlist $lineindex]$le_chars + } + method linepayload {lineindex} { + #*** !doctools + #[call class::textinfo [method linepayload] [arg lineindex]] + #[para]Return the text of the line indicated by the zero-based lineindex + #[para]The line-ending is not returned in the data - but is still stored against this lineindex + #[para]Line Metadata such as the line-ending for a particular line and the byte/character range it occupies within the chunk can be retrieved with the [method linemeta] method + #[para]To retrieve both the line text and metadata in a single call the [method lineinfo] method can be used + #[para]To retrieve an entire line including line-ending use the [method line] method. + lassign [my numeric_linerange $lineindex 0] lineindex + return [lindex $o_payloadlist $lineindex] + } + method linemeta {lineindex} { + #*** !doctools + #[call class::textinfo [method linemeta] [arg lineindex]] + #[para]Return a dict of the metadata for the line indicated by the zero-based lineindex + #[para]Keys returned include + #[list_begin itemized] + #[item] le + #[para] A string representing the type of line-ending: crlf|lf|none + #[item] linelen + #[para] The number of characters/bytes in the whole line including line-ending if any + #[item] payloadlen + #[para] The number of character/bytes in the line excluding line-ending + #[item] start + #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line begins + #[item] end + #[para] The zero-based index into the associated raw file data indicating at which byte/character index this line ends + #[para] This end-point corresponds to the last character of the line-ending if any - not necessarily the last character of the line's payload + #[list_end] + lassign [my numeric_linerange $lineindex 0] lineindex + dict get $o_linemap $lineindex + } + method lineinfo {lineindex} { + #*** !doctools + #[call class::textinfo [method lineinfo] [arg lineindex]] + #[para]Return a dict of the metadata and text for the line indicated by the zero-based lineindex + #[para]This returns the same info as the [method linemeta] with an added key of 'payload' which is the text of the line without line-ending. + #[para]The 'payload' value is the same as is returned from the [method linepayload] method. + lassign [my numeric_linerange $lineindex 0] lineindex ;#convert lineindex to canonical number e.g 1_000 -> 1000 end -> highest index + return [dict create lineindex $lineindex {*}[dict get $o_linemap $lineindex] payload [lindex $o_payloadlist $lineindex]] + } + method lineinfolist {startidx endidx} { + #*** !doctools + #[call class::textinfo [method lineinfolist] [arg startidx] [arg endidx]] + #[para]Returns list of lineinfo dicts for each line in line index range startidx to endidx + lassign [my numeric_linerange $startidx $endidx] startidx endidx + set chunkstart [dict get $o_linemap $startidx start] + set chunkend [dict get $o_linemap $endidx end] + set line_list [my chunkrange_to_lineinfolist $chunkstart $chunkend] ;# assert - no need to view truncations as we've picked start and end of complete lines + #verify sanity + set l_start [lindex $line_list 0] + if {[set idx_start [dict get $l_start lineindex]] ne $startidx} { + error "lineinfolist first lineindex $idx_start doesn't match startidx $startidx" + } + set l_end [lindex $line_list end] + if {[set idx_end [dict get $l_end lineindex]] ne $endidx} { + error "lineinfolist last lineindex $idx_end doesn't match endidx $endidx" + } + return $line_list + } + + method linerange_to_chunkrange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linerange_to_chunkrange] [arg startidx] [arg endidx]] + + lassign [my numeric_linerange $startidx $endidx] startidx endidx + #inclusive range + return [list [dict get $o_linemap $startidx start] [dict get $o_linemap $endidx end]] + } + method linerange_to_chunk {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linerange_to_chunk] [arg startidx] [arg endidx]] + set chunkrange [my linerange_to_chunkrange $startidx $endidx] + return [string range $o_chunk [lindex $chunkrange 0] [lindex $chunkrange 1]] + } + method lines {startidx endidx} { + #*** !doctools + #[call class::textinfo [method lines] [arg startidx] [arg endidx]] + lassign [my numeric_linerange $startidx $endidx] startidx endidx + set linelist [list] + set le_map [dict create lf \n crlf \r\n none ""] + for {set i $startidx} {$i <= $endidx} {incr i} { + lappend linelist "[lindex $o_payloadlist $i][dict get $le_map [dict get $o_linemap $i le]]" + } + return $linelist + } + method linepayloads {startidx endidx} { + #*** !doctools + #[call class::textinfo [method linepayloads] [arg startidx] [arg endidx]] + return [lrange $o_payloadlist $startidx $endidx] + } + method chunkrange_to_linerange {chunkstart chunkend} { + #*** !doctools + #[call class::textinfo [method chunkrange_to_linerange] [arg chunkstart] [arg chunkend]] + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + + set linestart -1 + for {set i 0} {$i < [llength $o_payloadlist]} {incr i} { + if {($chunkstart >= [dict get $o_linemap $i start]) && ($chunkstart <= [dict get $o_linemap $i end])} { + set linestart $i + break + } + } + if {$linestart == -1} { + error "Line with range in chunk spanning start index $chunkstart not found" + } + set lineend -1 + for {set i [expr {[llength $o_payloadlist] -1}]} {$i >=0} {incr i -1} { + if {($chunkend >= [dict get $o_linemap $i start]) && ($chunkend <= [dict get $o_linemap $i end])} { + set lineend $i + break + } + } + if {$lineend == -1} { + error "Line with range spanning end index $chunkend not found" + } + return [list $linestart $lineend] + } + method chunkrange_to_lineinfolist {chunkstart chunkend args} { + #*** !doctools + #[call class::textinfo [method chunkrange_to_lineinfolist] [arg chunkstart] [arg chunkend] [opt {option value...}]] + #[para]Return a list of dicts each with structure like the result of the [method lineinfo] method - but possibly with extra keys for truncation information if -show_truncated 1 is supplied + #[para]The truncation key in a lineinfo dict may be returned for first and/or last line in the resulting list. + #[para]truncation shows the shortened (missing bytes on left and/or right side) part of the entire line (potentially including line-ending or even partial line-ending) + #[para]Note that this truncation info is only in the return value of this method - and will not be reflected in [method lineinfo] queries to the main chunk. + + lassign [my numeric_chunkrange $chunkstart $chunkend] chunkstart chunkend + set defaults [dict create\ + -show_truncated 0\ + ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "chunkrange_to_lines error: unknown option '$k'. Known options: $known_opts" + } + } + set opts [dict merge $defaults $args] + # -- --- --- --- --- --- --- --- + set opt_show_truncated [dict get $opts -show_truncated] + # -- --- --- --- --- --- --- --- + + set infolist [list] + set linerange [my chunkrange_to_linerange $chunkstart $chunkend] + lassign $linerange start_lineindex end_lineindex + + #if -show_truncated + #return extra keys for first and last items (which may be the same item if chunkrange is entirely within a line) + #add is_truncated 0|1 to all lines + #Even if the start/end line is not fully within the chunkrange ie truncated - the 'payload' key will contain the original untruncated data + ########################### + # first line may have payload tail truncated - or just linefeed, or even a split linefeed + ########################### + set first [dict create lineindex $start_lineindex {*}[dict get $o_linemap $start_lineindex] payload [lindex $o_payloadlist $start_lineindex]] + set start_info [dict get $o_linemap $start_lineindex] + if {$opt_show_truncated} { + #line1 + if {$chunkstart > [dict get $start_info start]} { + #there is lhs truncation + set payload [lindex $o_payloadlist $start_lineindex] + set line_start [dict get $start_info start] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $start_info le]] + set payload_and_le "${payload}${le_chars}" + set split [expr {$chunkstart - $line_start}] + set truncated [string range $payload_and_le $split end] + set lhs [string range $payload_and_le 0 $split-1] + + dict set first truncated $truncated + dict set first truncatedside [list left] ;#truncatedside is a list which may have 'right' added if last line is same as first line + dict set first truncatedleft $lhs + dict set first is_truncated 1 + } else { + dict set first is_truncated 0 + } + } + ########################### + + ########################### + # middle lines if any - no truncation + ########################### + #difference in indexes of 1 would only mean 2 items to return + set middle_list [list] + if {($end_lineindex - $start_lineindex) > 1} { + for {set i [expr {$start_lineindex +1}]} {$i <= [expr {$end_lineindex -1}] } {incr i} { + #lineindex is key into main list + lappend middle_list [dict create lineindex $i {*}[dict get $o_linemap $i] payload [lindex $o_payloadlist $i] is_truncated 0] + } + } + ########################### + + ########################### + # tail line may have beginning or all of payload truncated - linefeed may be split if crlf + # may be same line as first line - in which case truncation at beginning as well + if {$end_lineindex == $start_lineindex} { + #same record + set end_info $start_info + if {$opt_show_truncated} { + if {$chunkend < [dict get $end_info end]} { + #lhere is rhs truncation + if {[dict get $first is_truncated]} { + dict set first truncatedside [list left right] + } else { + dict set first is_truncated 1 + dict set first truncatedside [list right] + } + #do rhs truncation - possibly in addition to existing lhs truncation + # ... + if {"left" ni [dict get $first truncatedside]} { + #rhs truncation only + set payload [lindex $o_payloadlist $end_lineindex] + set line_start [dict get $end_info start] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] [dict get $end_info le]] + set payload_and_le "${payload}${le_chars}" + puts "payload_and_le: $payload_and_le" + puts "LENGHT: [string length $payload_and_le]" + #--- + set split [expr {$chunkend - $line_start}] + set truncated [string range $payload_and_le 0 $split] + set rhs [string range $payload_and_le $split+1 end] + #--- + dict set first truncated $truncated + dict set first truncatedside [list right] + dict set first truncatedright $rhs + } else { + #truncated on both sides + } + } + } + #no middle or last to append + lappend infolist $first + } else { + set last [dict create lineindex $end_lineindex {*}[dict get $o_linemap $end_lineindex] payload [lindex $o_payloadlist $end_lineindex]] + set end_info [dict get $o_linemap $end_lineindex] + if {$opt_show_truncated} { + if {$chunkend < [dict get $end_info end]} { + #there is rhs truncation - and last line in range is a different line to first one + dict set last is_truncated 1 + set payload [lindex $o_payloadlist $end_lineindex] + set line_start [dict get $end_info start] + set line_end [dict get $end_info end] + set le [dict get $end_info le] + set le_size [dict get {lf 1 crlf 2 none 0} $le] + set le_chars [dict get [dict create lf \n crlf \r\n none ""] $le] + set payload_and_le "${payload}${le_chars}" + + set split [expr {$chunkend - $line_start}] + set truncated [string range $payload_and_le 0 $split] + set rhs [string range $payload_and_le $split+1 end] + + dict set last truncated $truncated + dict set last truncatedside [list right] + dict set last truncatedright $rhs + #this has the effect that truncating the rhs by 1 can result in truncated being larger than original payload for crlf lines - as payload now sees the cr + #this is a bit unintuitive - but probably best reflects the reality. The truncated value is the truncated 'line' rather than the truncated 'payload' + } + } + + + lappend infolist $first + if {[llength $middle_list]} { + lappend infolist {*}$middle_list + } + lappend infolist $last + } + ########################### + + return $infolist + } + + method chunk_le_counts {chunkstart chunkend} { + set infolines [my chunkrange_to_lineinfolist $chunkstart $chunkend] + set lf_count 0 + set crlf_count 0 + set none_count 0 + foreach d $infolines { + set le [dict get $d le] + if {$le eq "lf"} { + incr lf_count + } elseif {$le eq "crlf"} { + incr crlf_count + } else { + incr none_count + } + } + return [dict create lf $lf_count crlf $crlf_count unterminated $none_count] + } + + #todo - test last line and merge as necessary with first line from new chunk - generate line data only for appended chunk + method append_chunk {rawchunk} { + error "sorry - unimplemented" + } + + method numeric_linerange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method numeric_linerange] [arg startidx] [arg endidx]] + #[para]A helper to return any Tcl-style end end-x values given to startidx or endidx; converted to their specific values based on the current state of the underlying line data + #[para]This is used internally by API functions such as [method line] to enable it to accept more expressive indices + return [my normalize_indices $startidx $endidx [expr {[dict size $o_linemap]-1}]] + } + method numeric_chunkrange {startidx endidx} { + #*** !doctools + #[call class::textinfo [method numeric_chunkrange] [arg startidx] [arg endidx]] + #[para]A helper to return any Tcl-style end end-x entries supplied to startidx or endidx; converted to their specific values based on the current state of the underlying chunk data + return [my normalize_indices $startidx $endidx [expr {[string length $o_chunk]-1}]] + } + method normalize_indices {startidx endidx max} { + #*** !doctools + #[call class::textinfo [method normalize_indices] [arg startidx] [arg endidx] [arg max]] + #[para]A utility to convert some of the of Tcl-style list-index expressions such as end, end-1 etc to valid indices in the range 0 to the supplied max + #[para]Basic addition and subtraction expressions such as 4-1 5+2 are accepted + #[para]startidx higher than endidx is allowed + #[para]Unlike Tcl's index expressions - we raise an error if the calculated index is out of bounds 0 to max + set original_startidx $startidx + set original_endidx $endidx + set startidx [string map [list _ ""] $startidx] ;#don't barf on Tcl 8.7+ underscores in numbers - we can't just use expr because it will not handle end-x + set endidx [string map [list _ ""] $endidx] + if {![string is digit -strict "$startidx$endidx"]} { + foreach whichvar [list start end] { + upvar 0 ${whichvar}idx index + if {![string is digit -strict $index]} { + if {"end" eq $index} { + set index $max + } elseif {[string match "*-*" $index]} { + #end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions + lassign [split $index -] A B + if {$A eq "end"} { + set index [expr {$max - $B}] + } else { + set index [expr {$A - $B}] + } + } elseif {[string match "*+*" $index]} { + lassign [split $index +] A B + if {$A eq "end"} { + #review - this will just result in out of bounds error in final test - as desired + #By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all. + set index [expr {$max + $B}] + } else { + set index [expr {$A + $B}] + } + } else { + #May be something like +2 or -0 which braced expr can hanle + #we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources. + if {[catch {expr {$index}} index]} { + #could be end+x - but we don't want out of bounds to be valid + #set it to something that the final bounds expr test can deal with + set index Inf + } + } + } + } + } + #Unlike Tcl lrange,lindex etc - we don't want to support out of bound indices. + #show the supplied index and what it was mapped to in the error message. + if {$startidx < 0 || $startidx > $max} { + error "Bad start index '$original_startidx'. $startidx out of bounds 0 - $max" + } + if {$endidx < 0 || $endidx > $max} { + error "Bad end index '$original_endidx'. $endidx out of bounds 0 - $max" + } + return [list $startidx $endidx] + } + + method regenerate_lines {} { + #*** !doctools + #[call class::textinfo [method regenerate_lines]] + #[para]generate a list of lines from the current state of the stored raw data chunk and keep a map of line-endings indexed by lineindex + #[para]This is called automatically by the Constructor during object creation + #[para]It is exposed in the API experimentally - as chunk and line manipulation functions are considered. + #[para]TODO - review whether such manual control will be necessary/desirable + + #we don't store the actual line-endings as characters (for better layout of debug/display of data) - instead we store names lf|crlf|none + + # first split on lf - then crlf. As we've replaced with single substution chars - the order doesn't matter. + set o_payloadlist [list] + set o_linemap [dict create] + set crlf_replace [list \r\n $o_CRLF_C \n $o_LF_C] + set normalised_data [string map $crlf_replace $o_chunk] + + set lf_lines [split $normalised_data $o_LF_C] + + set idx 0 + set lf_count 0 + set crlf_count 0 + set filedata_offset 0 + set i 0 + set imax [expr {[llength $lf_lines]-1}] + foreach lfln $lf_lines { + set crlf_parts [split $lfln $o_CRLF_C] + if {[llength $crlf_parts] <= 1} { + #no crlf + set payloadlen [string length $lfln] + set le_size 1 + set le lf + if {$i == $imax} { + #no more lf segments - and no crlfs + if {$payloadlen > 0} { + #last line in split has chars - therefore there was no trailing line-ending + set le_size 0 + set le none + } else { + #empty space after last line-ending + #not really a line - we get here from splitting on our lf-replacement char + #An editor might display this pseudo-line with a line number - but we won't treat it as one here + break + } + } + lappend o_payloadlist $lfln + set linelen [expr {$payloadlen + $le_size}] + #we include line-ending in byte count for a line. + dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr lf_count + incr idx + } else { + foreach crlfpart [lrange $crlf_parts 0 end-1] { + lappend o_payloadlist $crlfpart + set payloadlen [string length $crlfpart] + set linelen [expr {$payloadlen + 2}] + dict set o_linemap $idx [list le crlf linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr crlf_count + incr idx + } + set lfpart [lindex $crlf_parts end] + set payloadlen [string length $lfpart] + if {$i == $imax} { + #no more lf segments - but we did find crlf in last (or perhaps only) lf line + #last element must be an empty crlf line or has no le + if {$payloadlen > 0} { + set le_size 0 + set le none + } else { + #set le_size 2 + #set le crlf + break + } + } else { + #more lf segments to come + #last element must be an empty lf line or has no le + if {$payloadlen > 0} { + set le_size 0 + set le none + } else { + set le_size 1 + set le lf + } + } + + lappend o_payloadlist $lfpart + set linelen [expr {$payloadlen + $le_size}] + dict set o_linemap $idx [list le $le linelen $linelen payloadlen $payloadlen start $filedata_offset end [expr {$filedata_offset + $linelen -1}]] + incr filedata_offset $linelen + incr lf_count + incr idx + } + incr i + #incr filedata_offset ;#move up 1 so start entry for next line is greater than end entry for previous line + } + set le_count [expr {$lf_count + $crlf_count}] + if {$le_count != [llength $o_payloadlist]} { + puts stderr "fileline::class::textinfo warning. regenerate_lines lf_count: $lf_count + crlf_count: $crlf_count does not equal length of lines stored: [llength $o_payloadlist]" + } + + } + method regenerate_chunk {} { + + } + + + #*** !doctools + #[list_end] + } + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline { + namespace export * + #variable xyz + + #*** !doctools + #[subsection {Namespace punk::fileline}] + #[para] Core API functions for punk::fileline + #[list_begin definitions] + + + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval punk::fileline::lib { + namespace export * + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace punk::fileline::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + + + proc range_spans_chunk_boundaries {start end chunksize} { + #*** !doctools + #[call [fun lib::range_spans_chunk_boundaries] [arg start] [arg end] [arg chunksize]] + #[para]Takes start and end offset, generally representing bytes or character indices, and computes a list of boundaries at multiples of the chunksize that are spanned by the start and end range. + #[list_begin arguments] + # [arg_def integer start] + # [para] zero-based start index of range + # [arg_def integer end] + # [para] zero-based end index of range + # [arg_def integer chunksize] + # [para] Number of bytes/characters in chunk + #[list_end] + #[para]returns a dict with the keys is_span and boundaries + #[para]is_span 0|1 indicates if the range specified spans a boundary of chunksize + #[para]boundaries contains a list of the spanned boundaries - which are always multiples of the chunksize + #[para]e.g + #[example_begin] + # range_spans_chunk_boundaries 10 1750 512 + # is_span 1 boundaries {512 1024 1536} + #[example_end] + #[para] This function automatically uses lseq (if Tcl >= 8.7) when number of boundaries spanned is approximately greater than 75 + if {[catch {package require Tcl 8.7}]} { + #only one implementation available for older Tcl + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize + } + if {(($end - $start) / $chunksize) < 75} { + tailcall punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize + } else { + tailcall punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize + } + } + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::fileline::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +namespace eval punk::fileline::system { + #*** !doctools + #[subsection {Namespace punk::fileline::system}] + #[para] Internal functions that are not part of the API + + + #for 8.7+ using lseq + #much faster when resultant boundary size is large + proc _range_spans_chunk_boundaries_lseq {start end chunksize} { + set smod [expr {$start % $chunksize}] + if {$smod != 0} { + set start [expr {$start + ($chunksize - $smod)}] + if {$start > $end} { + return [list is_span 0 boundaries {}] + } + } + set boundaries [lseq $start to $end $chunksize] + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] + } + + #faster than lseq for small number of resultant boundaries (~< 75) (which is a common use case) + #gets very slow (comparitively) with large resultsets + proc _range_spans_chunk_boundaries_tcl {start end chunksize} { + set is_span 0 + set smod [expr {$start % $chunksize}] + if {$smod != 0} { + set start [expr {$start + ($chunksize - $smod)}] + } + set boundaries [list] + for {set b $start} {$b <= $end} {incr b $chunksize} { + lappend boundaries $b + } + return [list is_span [expr {[llength $boundaries]>0}] boundaries $boundaries] + } + + proc _range_spans_chunk_boundaries_TIMEIT {start end chunksize {repeat 1}} { + puts "main : [time {punk::fileline::lib::range_spans_chunk_boundaries $start $end $chunksize} $repeat]" + puts "tcl : [time {punk::fileline::system::_range_spans_chunk_boundaries_tcl $start $end $chunksize} $repeat]" + if {![catch {package require Tcl 8.7}]} { + puts "lseq : [time {punk::fileline::system::_range_spans_chunk_boundaries_lseq $start $end $chunksize} $repeat]" + } + } +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::fileline [namespace eval punk::fileline { + variable pkg punk::fileline + variable version + set version 999999.0a1.0 +}] +return + +#*** !doctools +#[manpage_end] + diff --git a/src/modules/punk/fileline-buildversion.txt b/src/modules/punk/fileline-buildversion.txt new file mode 100644 index 00000000..f47d01c8 --- /dev/null +++ b/src/modules/punk/fileline-buildversion.txt @@ -0,0 +1,3 @@ +0.1.0 +#First line must be a semantic version number +#all other lines are ignored. diff --git a/src/modules/punk/mix/commandset/doc-999999.0a1.0.tm b/src/modules/punk/mix/commandset/doc-999999.0a1.0.tm index a08d10f4..b5da9cd0 100644 --- a/src/modules/punk/mix/commandset/doc-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/doc-999999.0a1.0.tm @@ -93,7 +93,10 @@ namespace eval punk::mix::commandset::doc { # -- --- --- --- --- --- puts stdout "BUILDING DOCS at $projectdir/src/embedded from src/doc" if {[catch { - + if {"::meta" eq [info commands ::meta]} { + puts stderr "There appears to be a leftover ::meta command which is presumed to be from doctools. Destroying object" + ::meta destroy + } punk::mix::cli::lib::kettle_call lib doc #Kettle doc diff --git a/src/modules/punk/mix/commandset/module-999999.0a1.0.tm b/src/modules/punk/mix/commandset/module-999999.0a1.0.tm index e2196f89..2d220e51 100644 --- a/src/modules/punk/mix/commandset/module-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/module-999999.0a1.0.tm @@ -9,7 +9,7 @@ # @@ Meta Begin # Application punk::mix::commandset::module 999999.0a1.0 # Meta platform tcl -# Meta license +# Meta license BSD # @@ Meta End diff --git a/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm b/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm index fecf684b..0865a09a 100644 --- a/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm +++ b/src/modules/punk/mix/commandset/scriptwrap-999999.0a1.0.tm @@ -20,6 +20,7 @@ package require punk::mix package require punk::mix::base +package require punk::fileline @@ -27,6 +28,10 @@ package require punk::mix::base namespace eval punk::mix::commandset::scriptwrap { namespace export * + namespace eval fileline { + namespace import ::punk::fileline::lib::* + namespace import ::punk::fileline::class::* + } #scriptpath allows templates command to use same custom template set as when multishell pointed to a filepath #it may or may not be within a project @@ -97,45 +102,8 @@ namespace eval punk::mix::commandset::scriptwrap { return $table } - proc range_spans_512_boundaries {start end} { - #todo - something more elegant - set is_span 0 - set lowmult [expr {$start / 512}] - set highmult [expr {$end / 512}] - - set test_boundaries [list] - for {set bindex $lowmult} {$bindex <= ($highmult + 1)} {incr bindex} { - lappend test_boundaries [expr {$bindex * 512}] - } - set lowboundary unset - set highboundary unset - foreach t $test_boundaries { - if {$lowboundary eq "unset"} { - if {$start <= $t } { - set lowboundary $t - } - } - if {$end >= $t} { - set highboundary $t - } - } - set boundaries [list] - foreach b $test_boundaries { - if {$lowboundary <= $b && $highboundary >= $b} { - lappend boundaries $b - } - } - - if {[llength $boundaries]} { - set is_span 1 - } - - if {$is_span} { - return [list is_span 1 boundaries $boundaries] - } else { - return [list is_span 0] - } - } + + #A batch file with unix line-endings is sensitive to label positioning. #batch file with windows crlf line endings can exhibit this problem - but probably only if specifically crafted with long lines deliberately designed to trigger it. #see: https://www.dostips.com/forum/viewtopic.php?t=8988#p58888 (Call and goto may fail when the batch file has Unix line endings) @@ -146,49 +114,102 @@ namespace eval punk::mix::commandset::scriptwrap { #The script should then be adjusted with comments and/or whitespace and checkoutput should be re-run to confirm there are no new boundary-spanning labels. #checkoutput needs to be run even on previously tested scriptwrapper templates because the final :exit label is beyond the payloads and so could span a 512 Byte block #It is more likely to catch issues if adjustments are made to the initial batch-script code in a template. + # + #cmd allows labels at call sites to span lines with line continuation character ^ + #target labels can't span lines with ^ - cmd doesn't recognise them. proc checkoutput {filepath args} { if {![file exists $filepath]} { error "punk::mix::commandset:scriptwrap error cannot find file '$filepath'" } + set crlf_lf_replacements [list \uFFFF \uFFFE] ;#defaults - if already exist in file - error out with message + # -ignore_rems 1 allows testing of alignment state if rems were stripped - todo - lf/crlf-preserving rem strip function set defaults [dict create\ -ignore_rems 0\ + -substitutionmap {}\ + -crlf_lf_replacements $crlf_lf_replacements\ ] + set known_opts [dict keys $defaults] + foreach {k v} $args { + if {$k ni $known_opts} { + error "checkoutput error - unknown option '$k'. Known options: $known_opts" + } + } set opts [dict merge $defaults $args] - set opt_ignore_rems [dict get $opts -ignore_rems] + # -- --- --- --- --- --- --- + set opt_ignore_rems [dict get $opts -ignore_rems] + set opt_substitutionmap [dict get $opts -substitutionmap] + set opt_crlf_lf_replacements [dict get $opts -crlf_lf_replacements] + # -- --- --- --- --- --- --- + + # #### load file #### + ##set raw_filedata [fcat $filepath -translation binary] + #don't use fcat/fileutil::cat - as we may need to look at data beyond a ctrl-z (\x1A) section + set fd [open $filepath r] + fconfigure $fd -translation binary + set raw_filedata [read $fd] + close $fd + # ################### + + + set objFile [fileline::textinfo new $raw_filedata] - set filedata [fcat $filepath] if {$opt_ignore_rems} { - set data "" - set skipped_rems 0 - foreach ln [split $filedata \n] { - set ln [string trim $ln] - if {[string match @REM* $ln] || [string match REM* $ln] || [string match @rem* $ln] || [string match rem* $ln]} { - #ignore - incr skipped_rems - } else { - append data $ln \n + #! todo + error "-ignore_rems unimplemented" + if 0 { + #todo - rebuild a raw_filedata value from the resultant lines + #review. @REM can appear after other commands and an ampersand for example. + # - we are interested in stripping lines with leading REMs + # - need to work out if a REM line with dos line-continuation should + + set data "" + set skipped_rems 0 + foreach ln [split $filedata \n] { + set ln [string trim $ln] + if {[string match @REM* $ln] || [string match REM* $ln] || [string match @rem* $ln] || [string match rem* $ln]} { + #ignore + incr skipped_rems + } else { + append data $ln \n ;#!! + } } + puts stderr "Skipped $skipped_rems rem lines" + set dsize [string length $data] } - puts stderr "Skipped $skipped_rems rem lines" } else { - set data $filedata + set dsize [string length $raw_filedata] } - set lines_before_after 3 - set dsize [string length $data] - puts stdout "examining $dsize bytes of file $filepath" - puts "checking 512 byte boundaries from call sites - displaying $lines_before_after lines before and after" + puts stdout "Examining [$objFile chunklen] bytes of file $filepath for cmd script issues." + set le_info [$objFile chunk_le_counts 0 end] + set lf_count [dict get $le_info lf] + set crlf_count [dict get $le_info crlf] + set unterminated_count [dict get $le_info unterminated] + set total_count [expr {$lf_count + $crlf_count + $unterminated_count}] + puts stdout "lines ending in lf : $lf_count" + puts stdout "lines ending in crlf : $crlf_count" + puts stdout "unterminated lines : $unterminated_count" ;#commonly 1 for trailing data at end of file. More than one is likely to be an error - or perhaps a policy plugin with different concept of lines? + puts stdout "crlf + lf + unterminated: $total_count" + puts stdout "line count : [$objFile linecount]" + if {$total_count != [$objFile linecount]} { + puts stdout "[a+ yellow bold]WARNING: Linecount mismatch with line-endings - seems fishy[a]" + } + if {$unterminated_count > 1} { + puts stdout "[a+ yellow bold]WARNING: More than one unterminated line reported - seems fishy[a]" + } + puts "Checking line based labels and 512 byte boundaries from call sites for possible labels and code execution points." set result "" - - + set line_count [$objFile linecount] + set callid 0 ;#id for callsite and objects created set file_offset 0 - set linenum 0 set error_labels [list] set warning_labels [list] - set file_lines [split $data \n] - foreach ln $file_lines { + for {set lineindex 0} {$lineindex < $line_count} {incr lineindex} { + set lineinfo [$objFile lineinfo $lineindex] + set ln [dict get $lineinfo payload] + set linenum [expr {$lineindex + 1}] + set callposn -1 - incr linenum set trimln [string trim $ln] if {[string match "rem *" $trimln] || [string match "@rem *" $trimln] || [string match "REM *" $trimln] || [string match "@REM *" $trimln]} { #ignore things that look like a call that are beind a REM @@ -196,26 +217,62 @@ namespace eval punk::mix::commandset::scriptwrap { foreach search_regex [list {(.*\s+|^)(@*call\s*:)(\S.*)} {(.*\s+|^)(@*CALL\s*:)(\S.*)} {(.*\s+|^)(@*goto\s*:)(\S.*)} {(.*\s+|^)(@*GOTO\s*:)(\S.*)}] { if {[regexp $search_regex $ln _m precall call labelplus]} { set callposn [expr {$file_offset + [string length $ln]}] ;#take callposn as end of line .. review - multiline statements? + #callposn affected by newlines? break } } + #todo - multiple calls on one line. - also - determine what cmd considers the starting point for forward scanning when call is in a structure such as an if statement. if {$callposn != -1} { - puts stdout "[a+ bold cyan]Found callsite '${call}${labelplus}' at byte $callposn line_num:$linenum line: $ln[a]" + puts stdout "[a+ bold cyan]CALLSITE on line $linenum ending at byte $callposn[a]" + set callsummary [string range "${call}${labelplus}" 0 100] + if {[string length $callsummary] < [string length ${call}${labelplus}]} { + puts stdout " CALLSITE: $callsummary (truncated to 100 bytes)" + } else { + puts stdout " CALLSITE: '${call}${labelplus}'" + } + puts stdout " [a+ cyan]FULLINE: $ln[a]" + + ################################## set labelpluswords [regexp -inline -all {\S+} $labelplus] ;#don't assume labelplus can be treated as Tcl list - use regexp to split - set label [lindex $labelpluswords 0] + #NOTE it is invalid to assume label always terminated by space - pair of % characters (variable substitution) can contain a space without terminating label + + set word1 [lindex $labelpluswords 0] + set word1len [string length $word1] + set labeltail [string range $labelplus $word1len end] + if {[string index $word1 end] eq "^"} { + if {![string length $labeltail]} { + #label + } + } else { + } + #todo batchlib::get_callsite_label $labelplus + + ################################## + + set label $word1 set labelsize [string length $label] #scan forward for labels at boundaries - set forward_data [string range $data $callposn end] - set dsize [string length $forward_data] + set forward_chunk [$objFile chunk $callposn end] + incr callid + set callvar "call-${callid}_fromline-${linenum}" + upvar 0 $callvar objForwardScan + set objForwardScan [fileline::textinfo new $forward_chunk] + + #Forward scan 1 - check at normal line boundaries - and see if collides with a chunk boundary - and if the label is obscured or ok + set dsize [$objForwardScan chunklen] set num_boundaries [expr {$dsize / 512} ] puts "scanning $dsize forward bytes in file for labels - num_boundaries: $num_boundaries" set scan_offset 0 set total_offset $file_offset set found_forward_label 0 - foreach scanline [split $forward_data \n] { - set line_bytes [expr {[string length $scanline] +1}] ;#+1 for unix lf - set line_start $total_offset - set line_end [expr {$total_offset + $line_bytes}] + foreach scanlineinfo [$objForwardScan lineinfolist 0 end] { + set line_bytes [dict get $scanlineinfo linelen] + set scanline [dict get $scanlineinfo payload] + set scanline_relstart [dict get $scanlineinfo start] + + set line_global_start $total_offset + set line_global_end [expr {$total_offset + $line_bytes}] + set trimscanline [string trim $scanline] if {[string match ":$label*" $trimscanline]} { incr found_forward_label @@ -231,36 +288,53 @@ namespace eval punk::mix::commandset::scriptwrap { if {($labelposn >= $lbound) && ($labelposn <= $ubound)} { lappend error_labels [list label $label call_offset_bytes $labelposn callsite [list call ${call}${labelplus} call_linenum $linenum]] puts stdout "[a+ bold red]ERROR: label $trimscanline at offset from callsite: $labelposn total offset: $total_offset[a]" - puts stdout "[a+ bold red] This label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite[a]" + puts stdout "[a+ bold red] This label appears to span the 512byte boundary at byte $ubound[a] [a+ yellow bold]from callsite.[a]" } else { puts stdout "[a+ bold green]OK: label $trimscanline at offset from callsite: $labelposn total offset: $total_offset[a]" } } - set forward_spaninfo [range_spans_512_boundaries $line_start $line_end] - if {[dict get $forward_spaninfo is_span]} { - set boundaries [dict get $forward_spaninfo boundaries] - puts stdout "line $linenum scan from call label $label at $callposn boundaries crossed: $boundaries" - } incr total_offset $line_bytes incr scan_offset $line_bytes } + #todo + #forward scan 2 - check any boundaries missed above because the label isn't at the begining of a line + #these are potentially hidden labels that could activate without requiring the label be at the beginning of a line + #check boundary spans relative to start of this objForwardScan chunk + set forward_spaninfo [fileline::range_spans_chunk_boundaries {*}[$objForwardScan numeric_chunkrange 0 end] 512] + if {[dict get $forward_spaninfo is_span]} { + set boundaries [dict get $forward_spaninfo boundaries] + if {[llength $boundaries] > 1} { + puts stdout "line $linenum scan from call label $label at $callposn. Callsite-relative boundaries crossed: [lrange $boundaries 1 end]" + } + } + #scan behind for labels at boundaries - using offset from start of file #we do a backward scan even if a forward label has been found, so that we can warn of duplicate labels. - set prior_lines [lrange $file_lines 0 $linenum] ;#only scan from file start to call-site + set prior_start 0 + set prior_end $lineindex ;#only scan from file start to call-site + set prior_total_offset 0 set found_backward_label 0 set p_linenum 0 - foreach pline $prior_lines { + for {set pidx $prior_start} {$pidx <= $prior_end} {incr pidx} { + set plineinfo [$objFile lineinfo $pidx] + set pline [dict get $plineinfo payload] incr p_linenum - set pline_bytes [expr {[string length $pline] +1}] ;#+1 for unix lf + set pline_bytes [dict get $plineinfo linelen] ;#includes lf or crlf ending bytes set pline_start $prior_total_offset - set pline_end [expr {$prior_total_offset + $pline_bytes}] - set spaninfo [range_spans_512_boundaries $pline_start $pline_end] - puts stdout "line $p_linenum byte range $pline_start -> $pline_end [a+ bold purple]$spaninfo[a]" + if {$pline_start != [dict get $plineinfo start]} { + error "checkoutput error: line $p_linenum - calculated start $pline_start not equal to stored start [dict get $plineinfo start]" + } + set pline_end [expr {$prior_total_offset + $pline_bytes -1}] + if {$pline_end != [dict get $plineinfo end]} { + error "checkoutput error: line $p_linenum - calculated end $pline_end not equal to stored end [dict get $plineinfo end]" + } + set trimpline [string trim $pline] + #callsite labels appear to be literal - not subject to % expansion and escaping for example. if {[string match ":$label*" $trimpline]} { incr found_backward_label set prior_label_posn_in_line [string first : $pline] @@ -280,7 +354,9 @@ namespace eval punk::mix::commandset::scriptwrap { } } + set spaninfo [fileline::range_spans_chunk_boundaries $pline_start $pline_end 512] if {[dict get $spaninfo is_span]} { + #puts stdout "boundary spanning line $p_linenum byte range $pline_start -> $pline_end [a+ bold purple]$spaninfo[a]" #check boundaries within the line set boundaries [dict get $spaninfo boundaries] foreach b $boundaries { @@ -308,7 +384,7 @@ namespace eval punk::mix::commandset::scriptwrap { puts stdout "[a+ bold yellow] label starting at $b : $pline_tail[a]" set tail_start $b set tail_end [expr {$b + [string length $pline_tail]}] - set tail_spaninfo [range_spans_512_boundaries $tail_start $tail_end] + set tail_spaninfo [fileline::range_spans_chunk_boundaries $tail_start $tail_end 512] if {[dict get $tail_spaninfo is_span]} { set tail_boundaries [dict get $tail_spaninfo boundaries] set extra_tail_boundaries [lsearch -all -inline -not $tail_boundaries $b] @@ -320,9 +396,14 @@ namespace eval punk::mix::commandset::scriptwrap { puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" } } else { - set nextline [lindex $file_lines $pline_tail+1] - puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextline[a]" - puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + if {$pidx+1 < [$objFile linecount]} { + set nextlineinfo [$objFile lineinfo $pidx+1] + set nextpayload [dict get $nextlineinfo payload] + puts "Line $p_linenum + 1 has data: [a+ yellow bold]$nextpayload[a]" + puts "[a+ yellow bold]NOTE: cmd may attempt to treat this data as code[a]" + } else { + #EOF reached + } } } @@ -945,6 +1026,60 @@ namespace eval punk::mix::commandset::scriptwrap { } + namespace eval batchlib { + proc get_callsite_label {labelplus} { + #a quick'n'dirty fix for some ways various escapes are handled within labels at callsite. + #There seem to be very different rules for labels at target site - presumably because they are not part of a command + # Mostly it seems target labels are more literal + #some rules.. + #callsite labels can't have space between : and label - but target labels can + #label terminated by =,: even if prefixed by ^ and even if in squotes or dquotes + #squotes and dquotes otherwise pass through as part of label + #may resolve variables within the label - but characters from variable value can terminate. + #as we don't have access to the variable values - we should normalize %varname% to empty string at callsite - but perhaps emit warning somewhere + # The target labels don't seem to + #a single % resolves to empty + #sequences of % don't begin a var - number of % in labelname = number of %s divided by 2 and rounded down. ie 1->0 2->1 3-> 1 4->2 5->2 6->3 etc + #spaces in % wrapped var names don't terminate label + #spaces aren't escaped by ^ or quoting + #sequences of ^ seem to follow same counting rule as % + #e.g @goto :la%path%bel where path begins with C:\Program Files.. becomes label :laC + + + #The due to whitespace and most chars except : and % being alowed inside vars - it seems the best first step + # -------------- start % handling % + set chars [split $labelplus ""] + set percentrun 0 + set invar 0 + set output "" + + foreach c $chars { + if {!$invar} { + if {$c ne "%"} { + append output [string repeat % [expr {$percentrun / 2}]] $c + set percentrun 0 + } else { + + } + } else { + #in var + if {$c eq "%" && $percentrun == 0} { + set invar 1 + } elseif {$c eq "%"} + + } else { + append varname $c + } + } + } + # -------------- end % handling % + + #caret -- etc + + + return [list label $label tail $tail] + } + } } diff --git a/src/modules/punk/mix/templates/modules/template_module-0.0.1.tm b/src/modules/punk/mix/templates/modules/template_module-0.0.1.tm index 9bae6521..3e44c9ff 100644 --- a/src/modules/punk/mix/templates/modules/template_module-0.0.1.tm +++ b/src/modules/punk/mix/templates/modules/template_module-0.0.1.tm @@ -22,6 +22,7 @@ #[titledesc {Module API}] [comment {-- Name section and table of contents description --}] #[moddesc {-}] [comment {-- Description at end of page heading --}] #[require %pkg%] +#[keywords module] #[description] #[para] - @@ -106,6 +107,12 @@ namespace eval %pkg% { + #proc sample1 {p1 args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [opt {?option value...?}]] + # #[para]Description of sample1 + # return "ok" + #} @@ -120,19 +127,24 @@ namespace eval %pkg% { # Secondary API namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval %pkg%::lib { - namespace export * - namespace path [namespace parent] - #*** !doctools - #[subsection {Namespace %pkg%::lib}] - #[para] Secondary functions that are part of the API - #[list_begin definitions] - + namespace export * + namespace path [namespace parent] + #*** !doctools + #[subsection {Namespace %pkg%::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} - #*** !doctools - #[list_end] [comment {--- end definitions namespace %pkg%::lib ---}] + #*** !doctools + #[list_end] [comment {--- end definitions namespace %pkg%::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ @@ -142,9 +154,9 @@ namespace eval %pkg%::lib { #*** !doctools #[section Internal] namespace eval %pkg%::system { - #*** !doctools - #[subsection {Namespace %pkg%::system}] - #[para] Internal functions that are not part of the API + #*** !doctools + #[subsection {Namespace %pkg%::system}] + #[para] Internal functions that are not part of the API diff --git a/src/modules/punk/path-999999.0a1.0.tm b/src/modules/punk/path-999999.0a1.0.tm index 3d797bcb..6408fd27 100644 --- a/src/modules/punk/path-999999.0a1.0.tm +++ b/src/modules/punk/path-999999.0a1.0.tm @@ -201,8 +201,8 @@ namespace eval punk::path { proc treefilenames {basepath tailglob args} { #*** !doctools #[call [fun treefilenames] [arg basepath] [arg tailglob] [opt {option value...}]] - #basic (glob based) list of filenames matching tailglob - recursive - #no natsorting - so order is dependent on filesystem + #[para]basic (glob based) list of filenames matching tailglob - recursive + #[para]no natsorting - so order is dependent on filesystem set defaults [dict create\ -call-depth-internal 0\ -antiglob_paths {}\ @@ -270,14 +270,30 @@ namespace eval punk::path { #[item] #[para] Notes: #[para] Both paths must be the same type - ie both absolute or both relative - #[para] Case sensitive. ie relative /etc /etC + #[para] Case sensitive. ie punk::path::relative /etc /etC # will return ../etC #[para] On windows, the drive-letter component (only) is not case sensitive - #[para] ie relative c:/etc C:/etc returns . - #[para] but relative c:/etc C:/Etc returns ../Etc + #[example_begin] + # P% punk::path::relative c:/etc C:/etc + # - . + #[example_end] + #[para] The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail + #[example_begin] + # P% punk::path::relative c:/etc C:/Etc + # - ../Etc + #[example_end] #[para] On windows, if the paths are absolute and specifiy different volumes, only the location will be returned. - # ie relative c:/etc d:/etc/blah - # returns d:/etc/blah + #[example_begin] + # P% punk::path::relative c:/etc d:/etc/blah + # - d:/etc/blah + #[example_end] + #[para] Unix-like examples: + #[example_begin] + # P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below + # - somewhere/below + # P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here + # - ../../lib/here + #[example_end] #[list_end] #see also kettle