From e1d0130b7a40bfc3535390075d2c9f696d4699e1 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Thu, 26 Mar 2026 10:19:54 +1100 Subject: [PATCH] auto_exec, dir listing and .lnk processing improvements - primarily on windows --- .../modules/include_modules.config | 1 + src/bootsupport/modules/punk-0.1.tm | 247 ++++- src/bootsupport/modules/punk/ansi-0.1.1.tm | 24 +- src/bootsupport/modules/punk/args-0.2.1.tm | 35 +- .../punk/args/moduledoc/tclcore-0.1.0.tm | 33 +- .../modules/punk/auto_exec-0.1.0.tm | 786 +++++++++++++++ src/bootsupport/modules/punk/char-0.1.0.tm | 353 ++++++- src/bootsupport/modules/punk/du-0.1.0.tm | 909 ++++++++++++++---- .../punk/mix/commandset/module-0.1.0.tm | 7 +- src/bootsupport/modules/punk/nav/fs-0.1.0.tm | 399 +++++++- src/bootsupport/modules/punk/path-0.1.0.tm | 5 + src/bootsupport/modules/punk/winlnk-0.1.1.tm | 290 ++++-- src/bootsupport/modules/punk/winpath-0.1.0.tm | 41 +- src/modules/punk-0.1.tm | 247 ++++- src/modules/punk/ansi-999999.0a1.0.tm | 2 +- src/modules/punk/args-999999.0a1.0.tm | 35 +- .../args/moduledoc/tclcore-999999.0a1.0.tm | 19 +- src/modules/punk/auto_exec-999999.0a1.0.tm | 786 +++++++++++++++ src/modules/punk/auto_exec-buildversion.txt | 3 + src/modules/punk/char-999999.0a1.0.tm | 353 ++++++- src/modules/punk/du-999999.0a1.0.tm | 909 ++++++++++++++---- .../mix/commandset/module-999999.0a1.0.tm | 7 +- src/modules/punk/nav/fs-999999.0a1.0.tm | 399 +++++++- src/modules/punk/path-999999.0a1.0.tm | 5 + src/modules/punk/winlnk-999999.0a1.0.tm | 290 ++++-- src/modules/punk/winpath-999999.0a1.0.tm | 41 +- .../modules/include_modules.config | 1 + .../src/bootsupport/modules/punk-0.1.tm | 247 ++++- .../bootsupport/modules/punk/ansi-0.1.1.tm | 24 +- .../bootsupport/modules/punk/args-0.2.1.tm | 35 +- .../punk/args/moduledoc/tclcore-0.1.0.tm | 33 +- .../modules/punk/auto_exec-0.1.0.tm | 786 +++++++++++++++ .../bootsupport/modules/punk/char-0.1.0.tm | 353 ++++++- .../src/bootsupport/modules/punk/du-0.1.0.tm | 909 ++++++++++++++---- .../punk/mix/commandset/module-0.1.0.tm | 7 +- .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 399 +++++++- .../bootsupport/modules/punk/path-0.1.0.tm | 5 + .../bootsupport/modules/punk/winlnk-0.1.1.tm | 290 ++++-- .../bootsupport/modules/punk/winpath-0.1.0.tm | 41 +- .../modules/include_modules.config | 1 + .../src/bootsupport/modules/punk-0.1.tm | 247 ++++- .../bootsupport/modules/punk/ansi-0.1.1.tm | 24 +- .../bootsupport/modules/punk/args-0.2.1.tm | 35 +- .../punk/args/moduledoc/tclcore-0.1.0.tm | 33 +- .../modules/punk/auto_exec-0.1.0.tm | 786 +++++++++++++++ .../bootsupport/modules/punk/char-0.1.0.tm | 353 ++++++- .../src/bootsupport/modules/punk/du-0.1.0.tm | 909 ++++++++++++++---- .../punk/mix/commandset/module-0.1.0.tm | 7 +- .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 399 +++++++- .../bootsupport/modules/punk/path-0.1.0.tm | 5 + .../bootsupport/modules/punk/winlnk-0.1.1.tm | 290 ++++-- .../bootsupport/modules/punk/winpath-0.1.0.tm | 41 +- src/vfs/_vfscommon.vfs/modules/punk-0.1.tm | 247 ++++- .../_vfscommon.vfs/modules/punk/ansi-0.1.1.tm | 2 +- .../_vfscommon.vfs/modules/punk/args-0.2.1.tm | 35 +- .../punk/args/moduledoc/tclcore-0.1.0.tm | 19 +- .../modules/punk/auto_exec-0.1.0.tm | 786 +++++++++++++++ .../_vfscommon.vfs/modules/punk/char-0.1.0.tm | 353 ++++++- .../_vfscommon.vfs/modules/punk/du-0.1.0.tm | 909 ++++++++++++++---- .../punk/mix/commandset/module-0.1.0.tm | 7 +- .../modules/punk/nav/fs-0.1.0.tm | 399 +++++++- .../_vfscommon.vfs/modules/punk/path-0.1.0.tm | 5 + .../modules/punk/winlnk-0.1.1.tm | 290 ++++-- .../modules/punk/winpath-0.1.0.tm | 41 +- 64 files changed, 13791 insertions(+), 1788 deletions(-) create mode 100644 src/bootsupport/modules/punk/auto_exec-0.1.0.tm create mode 100644 src/modules/punk/auto_exec-999999.0a1.0.tm create mode 100644 src/modules/punk/auto_exec-buildversion.txt create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/auto_exec-0.1.0.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/auto_exec-0.1.0.tm create mode 100644 src/vfs/_vfscommon.vfs/modules/punk/auto_exec-0.1.0.tm diff --git a/src/bootsupport/modules/include_modules.config b/src/bootsupport/modules/include_modules.config index 44fd74c6..e643dc39 100644 --- a/src/bootsupport/modules/include_modules.config +++ b/src/bootsupport/modules/include_modules.config @@ -54,6 +54,7 @@ set bootsupport_modules [list\ modules punk::assertion\ modules punk::args\ modules punk::args::moduledoc::tclcore\ + modules punk::auto_exec\ modules punk::cap\ modules punk::cap::handlers::caphandler\ modules punk::cap::handlers::scriptlibs\ diff --git a/src/bootsupport/modules/punk-0.1.tm b/src/bootsupport/modules/punk-0.1.tm index e1648d9d..5a7824f0 100644 --- a/src/bootsupport/modules/punk-0.1.tm +++ b/src/bootsupport/modules/punk-0.1.tm @@ -75,6 +75,76 @@ namespace eval punk { # } + if {![interp issafe] && $::tcl_platform(platform) eq "windows"} { + + #return the raw command string from the registry for the association of the given extension, without processing the placeholders such as %1 %SystemRoot% etc. + #This is because we want to process these ourselves to be able to return a proper list of command and arguments. + #Note that the resulting string can't be directly treated as a tcl list because it has double quoted segments with characters that are literals (not escaped) + #Accessing it directly as a list will cause tcl to interpret the backslashes as escapes and lose the literal meaning values such as the path. + proc extension_open_association {ext} { + #look up assoc and ftype to find associated file type and application. + #Note that the windows 'ftype' and 'assoc' commands only examine the system's data - not the user-specific data which may override the system data. + #to get the user-specific associations we need to read the registry keys. + + #extensions in the registry seem to be stored lower case wnd with a leading dot. + set lext [string tolower $ext] + package require registry + set user_assoc_path [join [list HKEY_CURRENT_USER Software Microsoft Windows CurrentVersion Explorer FileExts $lext UserChoice] "\\"] + + #The UserChoice subkey under this key is where the user-specific association is stored when the user has made a choice. It contains a Progid value which points to the associated file type. + #It can return something like "Python.File" or "Applications\python.exe" or even tcl_auto_file (for .tcl files associated with tclsh by a tcl installer) + + #The UserChoice key may not exist if the user hasn't made an explicit choice, in which case we fall back to the system association which is stored under HKEY_CLASSES_ROOT\$ext (which also contains user-specific overrides but is more cumbersome to query for them) + if {![catch {registry get $user_assoc_path Progid} user_choice]} { + if {$user_choice ne ""} { + #examples such as Applications\python.exe and tcl_auto_file are found under HKEY_CURRENT_USER\Software\Classes + #they then have a sub-path shell\open\command which contains the command to open files of that type, which is what we need to extract the associated application. + #it is faster to query for the key directly within a catch than to retrieve keys as lists and iterate through them to find the one we want. + if {![catch {registry get [join [list HKEY_CURRENT_USER Software Classes $user_choice shell open command] "\\"] ""} raw_assoc]} { + #The command string can contain placeholders like "%1" for the file name, so we need to extract just the executable path. + #e.g .py -> "c:\Program Files\Python\python.exe" "%1" + #e.g .rb -> "C:\tools\ruby31\bin\ruby.exe" "%1" %* + # e.g .vbs -> "%SystemRoot%\System32\WScript.exe" "%1" %* + #we need to process this without Tcl interpreting the backslashes as escapes. + #we will use double quotes to determine which entries belong together as a list item for the resulting list of command and arguments. + #set assoc [string map [list \\ \\\\] $raw_assoc] + set assoc $raw_assoc + } else { + #e.g Python.File + if {![catch {registry get [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $user_choice shell open command] "\\"] ""} raw_assoc]} { + #set assoc [string map [list \\ \\\\] $raw_assoc] + set assoc $raw_assoc + } else { + set assoc "" + } + } + + } else { + #review - is it possible for Progid to be empty string? If so we should probably fall back to system association instead of returning no association. + #alternatively it could mean the user has explicitly chosen to have no association for this file type - in which case returning an empty association may be the correct behaviour. + set assoc "" + } + } else { + #fall back to system association and ftype + if {![catch {registry get [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $lext] "\\"] ""} ftype]} { + #ftype is the file type associated with the extension, e.g "Python.File" + #we then need to look up the command associated with this file type under the same path but with the file type instead of the extension and with the additional sub-path shell\open\command + if {![catch {registry get [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $ftype shell open command] "\\"] ""} raw_assoc]} { + #set assoc [string map [list \\ \\\\] $raw_assoc] + set assoc $raw_assoc + } else { + set assoc "" + } + } else { + set assoc "" + } + } + return $assoc + } + + + } + if {![interp issafe] && $has_commandstack && $::tcl_platform(platform) eq "windows"} { #still a caching version of auto_execok - but with proper(fixed) search order @@ -113,6 +183,8 @@ namespace eval punk { proc auto_execok_better name { global auto_execs env tcl_platform + #for now at least, auto_execok_better is windows-specific. + package require punk::auto_exec if {[info exists auto_execs($name)]} { return $auto_execs($name) @@ -141,11 +213,60 @@ namespace eval punk { } if {[llength [file split $name]] != 1} { - #has a path + #has a path component - could be relative or absolute. + if {[string tolower [file extension $name]] eq ".lnk"} { + #special case .lnk + #todo - consider working directory or other properties of link before launching? + package require punk::winlnk + if {![catch {punk::winlnk::target $name} linktarget]} { + if {$linktarget ne ""} { + set target $linktarget + } else { + return "" + } + } else { + set target $name + } + } else { + set target $name + } + #always store $name as the key when setting auto_execs. foreach ext $execExtensions { - set file ${name}${ext} + set file ${target}${ext} + #first execExtension is empty string - ensure we test actual file as given before we try appending extensions. + if {$ext eq ""} { + set test_ext [file extension $file] + } else { + set test_ext $ext + } if {[file exists $file] && ![file isdirectory $file]} { - return [set auto_execs($name) [list $file]] + #look up assoc and ftype to find associated file type and application. + #Note that the windows 'ftype' and 'assoc' commands only examine the system's data - not the user-specific data which may override the system data. + #set assoc [extension_open_association $ext] + set associnfo [punk::auto_exec::shell_open_command $test_ext] + set valuetype [dict get $associnfo type] + set assoc [dict get $associnfo value] + set ftype [dict get $associnfo filetype] + if {$assoc eq ""} { + return [set auto_execs($name) [list $file]] + } else { + if {[file pathtype $target] eq "relative" && $ftype eq "InternetShortcut"} { + #special case InternetShortcut - cannot accept relative path - so we can't cache it in auto_execs if we used a relative path to launch + #if we return an empty string - the auto_exec will fail to launch this every time. + #The best we can do is return a token for the 'unknown' process to detect and re-resolve the path every time. + #This requires cooperation from 'unknown' which may not be configured to handle this token if the default 'punk' version isn't installed. + + #we can't resolve using absolute path here - because we don't want to lock in a specific file for a relative path. + #e.g ::auto_execs(./link.url) = some.exe c:/desktop/link.url + #this would be wrong if the user changed directory and tried to run ./link.url again on a different file with the same name + # - as the cached path would no longer be correct. + return [set auto_execs($name) "(resolve_in_unknown_handler) punk::auto_exec absolute_path required"] + } + puts stderr "auto_execok_better: (review required) assoc $assoc for file $file ext $test_ext" + set run [punk::auto_exec::shell_command_as_tcl_list -type $valuetype $assoc $file] ;# -workingdir [pwd] vs path of script? + return [set auto_execs($name) $run] + #return [set auto_execs($name) [list $file]] + } } } return "" @@ -164,11 +285,20 @@ namespace eval punk { append path "$windir/system32;$windir/system;$windir;" } - foreach var {PATH Path path} { - if {[info exists env($var)]} { - append path ";$env($var)" - } + # ------------------------ + #Note that unlike an ordinary Tcl array - the linked ::env behaves differently. + #e.g parray ::env Path will not find ::env(PATH) and yet 'info exists env(Path)' returns true. + #similarly 'set ::env(Path) ?newval?' or any case variation can set/get the value of ::env(PATH) + #Windows environment variables are case-insensitive. + + #No evidence has been seen that any version of windows; current or historic since NT; can allow differently cased versions + # of an environment variable to exist concurrently in the same environment. + #for this reason we should be able to just use PATH. + # + if {[info exists env(PATH)]} { + append path ";$env(PATH)" } + # ------------------------ #change2 if {[file extension $name] ne "" && [string tolower [file extension $name]] in [string tolower $execExtensions]} { @@ -177,6 +307,8 @@ namespace eval punk { set lookfor [lmap ext $execExtensions {string cat ${name} ${ext}}] } #puts "-->$lookfor" + + foreach dir [split $path {;}] { set dir [string trim $dir {\\}] ;#trailing slash will result in a tail such as "/python.exe" #set dir [file normalize $dir] @@ -207,7 +339,30 @@ namespace eval punk { foreach match [glob -nocomplain -dir $dir -tail {*}$lookfor] { set file [file join $dir $match] if {[file exists $file] && ![file isdirectory $file]} { - return [set auto_execs($name) [list $file]] + #set assoc [extension_open_association [file extension $file]] + #todo - cache this lookup for each extension we encounter? maybe not, as the user might like changes reflected between runs. review. + if {"windows" ne $::tcl_platform(platform)} { + return [set auto_execs($name) [list $file]] + } + + set associnfo [punk::auto_exec::shell_open_command [file extension $file]] + set assoc [dict get $associnfo value] + set type [dict get $associnfo type] + if {$assoc eq ""} { + return [set auto_execs($name) [list $file]] + } else { + puts stderr "auto_execok_better: assoc $assoc for file $file with type $type" + #return [set auto_execs($name) [list $file]] + #review - our stored auto_execs doesn't have any way to capture the full assoc info such as how subsequent arguments should be processed. + #This may need handling in our Tcl shell 'unknown' function when calls are actually made to these commands + #- we may need to re-process the assoc info at that point to determine how to combine all arguments with the calling specification in the assoc string. + #The workingdir for the command may also need to be determined at that point - should it be the dir of the script being called, or the current dir of the shell? + + #The main point of Tcl's auto_execs is to avoid scanning the PATH entries every time a command is called, + #but we may want to keep some of the assoc info available for processing at call time. + set run [punk::auto_exec::shell_command_as_tcl_list -type $type $assoc $file] ;# -workingdir [pwd] vs path of script? + return [set auto_execs($name) $run] + } } } } @@ -5203,11 +5358,11 @@ namespace eval punk { #These can be stacked with shellfilter and operate as OS handles - which we can't do with fifo2 etc # - if {[string first " " $new] > 0} { - set c1 $name - } else { - set c1 $new - } + #if {[string first " " $new] > 0} { + # set c1 $name + #} else { + # set c1 $new + #} # -- --- --- --- --- set idlist_stdout [list] @@ -5241,6 +5396,24 @@ namespace eval punk { } else { set repl_runid [punk::get_repl_runid] #set ::punk::last_run_display [list] + if {$new eq "(resolve_in_unknown_handler) punk::auto_exec absolute_path required"} { + #re-resolve. + puts "(unknown-handler): auto_execok for $name requires absolute path. Re-resolving $name with absolute path." + set ext [file extension $name] + set associnfo [punk::auto_exec::shell_open_command $ext] + set valuetype [dict get $associnfo type] + set assoc [dict get $associnfo value] + set ftype [dict get $associnfo filetype] + set fullpath [file normalize $name] + #at least for .url files - long paths (paths with multiple spaces?) can fail to run. Using the short path seems to fix this. + #This seems hacky but anyway.. + set attributes [file attributes $fullpath] + if {[dict exists $attributes -shortname]} { + set fullpath [dict get $attributes -shortname] + } + set new [punk::auto_exec::shell_command_as_tcl_list -type $valuetype $assoc $fullpath] + } + set redir ">&@stdout <@stdin" uplevel 1 [list ::catch [concat exec $redir $new [lrange $args 1 end]] ::tcl::UnknownResult ::tcl::UnknownOptions] @@ -6108,6 +6281,16 @@ namespace eval punk { } } + #This should be designed to be useful on all platforms. + #Case sensitivity represents a difficulty because even on a particular platform + #- different filesystems or folders may have different case sensitivity configurations. + + #as a first step - we can detect windows and mac platforms and treat paths as case-insensitive, vs case-sensitive on other unix-like platforms. + #as a second step - we will consider running a test on each path to determine if the folder at the leaf level is case-sensitive or not. + #- and then use that information to determine how to treat the executables in that path. + #This may be a bit of a performance hit - so we may want to cache the results of this test for each path - and provide a way to clear the cache if needed. + #Alternatively we could just provide an option to treat all paths as case-sensitive or case-insensitive. + #Windows executable search location order: #1. The current directory. #2. The directories that are listed in the PATH environment variable. @@ -6203,8 +6386,12 @@ namespace eval punk { #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. - #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe' + # but tcl's glob does not respect the case of even the character-class pattern - so this is not a reliable workaround). + #see punk::fglob for a work-in-progress glob implementation which gives us more control over case sensitivity and the case of results on windows. + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + #JJJ set executables [list] foreach e $globresults { puts stderr "glob result: $e" @@ -6596,6 +6783,29 @@ namespace eval punk { #These aliases work fine for interactive use - but the result is always a string int-rep #interp alias {} ~ {} file join $::env(HOME) ;#HOME must be capitalized to work cross platform (lowercase home works on windows - but probably not elsewhere) #interp alias {} ~ {} apply {args {file join $::env(HOME) $args}} + namespace eval argdoc { + punk::args::define { + @id -id ::punk::~ + @cmd -name "punk::~"\ + -summary\ + "Return home directory"\ + -help\ + "Return the home directory path. With additional arguments, + return the path obtained by joining the home directory with + the supplied arguments. + + usage e.g + cd [~] + (change to home directory) + .// [~] .config + (change to .config directory within home directory + and list contents.) + " + @opts + @values -min 0 -max -1 + arg -type any -optional 1 -multiple 1 + } + } proc ~ {args} { set hdir [punk::valcopy $::env(HOME)] file pathtype $hdir @@ -7811,9 +8021,10 @@ namespace eval punk { # ------------------------------------------------------- set title "[a+ brightgreen] Filesystem navigation: " set cmdinfo [list] - lappend cmdinfo [list ./ "?${I}subdir${NI}?" "view/change directory"] - lappend cmdinfo [list ../ "" "go up one directory"] - lappend cmdinfo [list ./new "${I}subdir${NI}" "make new directory and switch to it"] + lappend cmdinfo [list ./ "?${I}glob${NI}?" "view/change dir, list dirs."] + lappend cmdinfo [list ../ "?${I}path${NI}" "go up one dir, then to path if given"] + lappend cmdinfo [list .// "?${I}glob${NI}?" "view/change dir, list dirs and files"] + lappend cmdinfo [list ./new "${I}subdir${NI}..." "make new dir or dirs and show status"] lappend cmdinfo [list fcat "${I}file ?file?...${NI}" "cat file(s)"] set t [textblock::class::table new -minwidth 80 -show_seps 0] foreach row $cmdinfo { @@ -7859,7 +8070,7 @@ namespace eval punk { lappend cmdinfo [list i "${I}cmd${NI} ?${I}subcommand${NI}...?" "Show usage for a command or ensemble subcommand"] lappend cmdinfo [list s "${I}cmd${NI} ?${I}subcommand${NI}...?" "Show synopsis for a command or ensemble subcommand"] lappend cmdinfo [list eg "${I}cmd${NI} ?${I}subcommand${NI}...?" "Show example from manpage"] - lappend cmdinfo [list corp "${I}proc${NI}" "View proc body and arguments"] + lappend cmdinfo [list corp "${I}proc${NI}" "View proc body and arguments with basic highlighting"] set t [textblock::class::table new -minwidth 80 -show_seps 0] foreach row $cmdinfo { $t add_row $row diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm index c659d4af..3d2f5140 100644 --- a/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -1148,7 +1148,7 @@ tcl::namespace::eval punk::ansi { "The grepstr command can find strings in ANSI text even if there are interspersed ANSI colour codes etc. Even if a word has different coloured/styled letters, the regex can match the plaintext. (Search is performed on ansistripped text, and then - the matched sections are highlighted and overlayed on the original styled/colourd + the matched sections are highlighted and overlayed on the original styled/coloured input. If the input string has ANSI movement codes - the resultant text may not be directly @@ -3524,9 +3524,9 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu underline {lappend t 4} underlinedefault {lappend t 59} underextendedoff { - #Remove any existing 4:1 etc extended underline codes - #NOTE: struct::set result order can differ depending on whether tcl/critcl impl is used. - #FIXED: Using punk::lib::ldiff instead of struct::set difference for consistent ordering. + #lremove any existing 4:1 etc + #NOTE struct::set result order can differ depending on whether tcl/critcl imp used + #set e [struct::set difference $e [list 4:1 4:2 4:3 4:4 4:5]] set e [punk::lib::ldiff $e [list 4:1 4:2 4:3 4:4 4:5]] lappend e 4:0 } @@ -3543,8 +3543,9 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu lappend e 4:4 } underdashed - underdash { - # FIXED: Extended codes with colon suppress normal SGR attributes when in same escape sequence - # on terminals that don't support the extended codes. Emit as separate sequence if needed. + #TODO - fix + # extended codes with colon suppress normal SGR attributes when in same escape sequence on terminal that don't support the extended codes. + # need to emit in lappend e 4:5 } doubleunderline {lappend t 21} @@ -4732,11 +4733,7 @@ Brightblack 100 Brightred 101 Brightgreen 102 Brightyellow 103 Brightblu if {[string length $uri] > 2083} { error "punk::ansi::hyperlink uri too long: limit 2083" } - #SECURITY: Sanitize hyperlink ID to prevent injection attacks - #Current mapping: : -> . ; -> , prevents common delimiter issues - #FUTURE: Consider additional restrictions on special characters (=, &, ?, #, etc.) - #to prevent URL parameter injection or other hyperlink protocol exploits - set id [string map {: . {;} ,} $id] + set id [string map {: . {;} ,} $id] ;#avoid some likely problematic ids. TODO - review, restrict further. set params "id=$id" return "\x1b\]8\;$params\;$uri\x1b\\" } @@ -6829,9 +6826,8 @@ tcl::namespace::eval punk::ansi::ta { # -- --- --- --- #variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_g0_open}|${re_g0_close}" # -- --- --- --- - #handrafted TRIE version of above. Somewhat difficult to construct and maintain. - #FUTURE: Consider using a regexp TRIE generator that works with Tcl regexes for maintainability. - #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. + #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regexp TRIE generator that works with Tcl regexes + #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. #variable re_ansi_detect {(?:\x1b(?:\((?:0|B)|\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|\007).)*(?:\x1b\\|\007))|c|7|8|M|E|D|H|=|>|(?:#(?:3|4|5|6|8))))|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)|(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]|(?:\u009d)(?:[^\u009c]*)?\u009c} #NOTE - the literal # char can cause problems in expanded syntax - even though it's within a bracketed section. \# seems to work though. diff --git a/src/bootsupport/modules/punk/args-0.2.1.tm b/src/bootsupport/modules/punk/args-0.2.1.tm index c8f88537..5abc839d 100644 --- a/src/bootsupport/modules/punk/args-0.2.1.tm +++ b/src/bootsupport/modules/punk/args-0.2.1.tm @@ -6148,7 +6148,7 @@ tcl::namespace::eval punk::args { ### set e [lindex $clauseval $clausecolumn] ### set e_check [lindex $clauseval_check $clausecolumn] ### #//review - we may need '?' char on windows - ### if {!([tcl::string::length $e_check]>0 && ![regexp {[\"*<>\;]} $e_check])} { + ### if {!([tcl::string::length $e_check]>0 && ![regexp {[\"*<>]} $e_check])} { ### #what about special file names e.g on windows NUL ? ### set msg "$argclass $argname for %caller% requires type '$type'. Received: '$e' which doesn't look like it could be a file or directory" ### return -options [list -code error -errorcode [list PUNKARGS VALIDATION [list typemismatch $type] -badarg $argname -argspecs $argspecs]] $msg @@ -6671,9 +6671,12 @@ tcl::namespace::eval punk::args { directory - existingfile - existingdirectory { - #//review - we may need '?' char on windows - if {!([tcl::string::length $e_check]>0 && ![regexp {[\"*<>\;]} $e_check])} { - #what about special file names e.g on windows NUL ? + #we have to support file and directory names on all platforms - and even characters illegal on a filesystem/platform may need to be passed. + #For example a file/folder may be created with an illegal name on a platform (or mounted on it) and be mapped to another string on the filesystem + #- yet it may remain accessible to commands such as file stat etc via the string with 'illegal' characters as well as its underlying stored (mapped) name. + + #NUL is almost universally problematic - so we will reject. + if {[tcl::string::length $e_check]==0 || [string first \0 $e_check] >= 0} { set msg "$argclass $argname for %caller% requires type '$type'. Received: '$e' which doesn't look like it could be a file or directory" lset clause_results $c_idx $a_idx [list err [list typemismatch $type] msg $msg] continue @@ -6693,6 +6696,30 @@ tcl::namespace::eval punk::args { } lset clause_results $c_idx $a_idx 1 } + existingportablefile - + existingportabledirectory - + portablefile - + portabledirectory { + if {[tcl::string::length $e_check]==0 || [string first \0 $e_check] >= 0 || [punk::winpath::illegalname_test $e_check]} { + set msg "$argclass $argname for %caller% requires type '$type'. Received: '$e' which doesn't look like it could be a portable file or directory (must pass punk::winpath::illegalname_test)" + lset clause_results $c_idx $a_idx [list err [list typemismatch $type] msg $msg] + continue + } + if {$type eq "existingportablefile"} { + if {![file exists $e_check]} { + set msg "$argclass $argname for %caller% requires type '$type'. Received: '$e' which is not an existing file" + lset clause_results $c_idx $a_idx [list err [list typemismatch $type] msg $msg] + continue + } + } elseif {$type eq "existingportabledirectory"} { + if {![file isdirectory $e_check]} { + set msg "$argclass $argname for %caller% requires type '$type'. Received: '$e' which is not an existing directory" + lset clause_results $c_idx $a_idx [list err [list typemismatch $type] msg $msg] + continue + } + } + lset clause_results $c_idx $a_idx 1 + } char { #review - char vs unicode codepoint vs grapheme? if {[tcl::string::length $e_check] != 1} { diff --git a/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm b/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm index 8b6f82ef..97a9a2a1 100644 --- a/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm +++ b/src/bootsupport/modules/punk/args/moduledoc/tclcore-0.1.0.tm @@ -4490,17 +4490,15 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- - #DOCUMENTED: CLOCK_ARITHMETIC and TIME ZONES references added to help text + #TODO - add CLOCK_ARITHMETIC documentation + #TODO - TIME ZONES documentation? lappend PUNKARGS [list { @id -id ::tcl::clock::add @cmd -name "Built-in: tcl::clock::add"\ -summary\ "Add an offset to timeVal in seconds (base 1970-01-01 00:00 UTC)"\ -help\ - "Adds a (possibly negative) offset to a time that is expressed as an integer number of seconds. - CLOCK ARITHMETIC: Supports count_unit pairs (e.g., 1 month, 2 weeks) for flexible date arithmetic. - TIME ZONES: Use -timezone option with values like :UTC, :localtime, or location-based zones (:America/New_York). - See the clock manpage for complete CLOCK ARITHMETIC and TIME ZONES documentation." + "Adds a (possibly negative) offset to a time that is expressed as an integer number of seconds. See CLOCK ARITHMETIC for a full description." @leaders -min 1 -max -1 timeVal -type integer|literal(now) -help\ "Time value in integer number of seconds since epoch time. @@ -5427,6 +5425,23 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { glob */ }]} except that the first case doesn't return the trailing “/” and is more platform independent." + #todo - review eopts handling. + #The actual glob function will treat the first non-dashed argument as the first pattern, whereas + #punk::args will treat the first invalid option or non-dashed as the end of options and the start of patterns. + #This means that if you have a pattern that starts with a dash, you must use -- to mark the end of options. + #We need a way to force punk::args to have that more restrictive behaviour in this case so that the help is aligned. + #ie tcl glob requires -- whenever values start with a dash, but punk::args only requires it only when the first of multiple values + #start with a dash or when a single has a prefix-match for a flag. + #Arguably punk::args should be more restrictive and require -- whenever any value starts with a dash. + #e.g punk::args will accept "glob -nonflag" but will warn of "glob -nonflag -nonflag" as an invalid option. + #This helps provide warnings for mistyped flags, but is potentially confusing. + #In any case, the help for glob should be consistent with the actual behaviour of glob, so we need to make sure that punk::args + #is configured to require -- whenever any value starts with a dash. + #This requires either a change to the default behaviour of punk::args when {-- -type none} is used, + #or a new option to control this behaviour. + #A new option may be preferable to avoid changing the default behaviour of punk::args in other cases where it may be desirable + #to allow values starting with dashes without requiring --. + #(e.g when a command only accepts a single value and that value may start with a dash) -- -type none -help\ "Marks the end of switches. The argument following this one will be treated as a pattern even if it starts with a -." @@ -5760,8 +5775,8 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { @form -form configure @values -min 0 -max -1 - #FUTURE: Implement choice-parameters for better validation - #Would allow: -choiceparameters {literalprefix type} for smarter option validation + #TODO - choice-parameters + #?? -choiceparameters {literalprefix type} optionpair\ -type {string any}\ -typesynopsis {${$I}-option value${$NI}}\ @@ -5769,7 +5784,7 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { -multiple 1\ -choicerestricted 0\ -choices {{-command string} {-granularity int} {-milliseconds int} {-seconds int} {-value any}}\ - -help "Option-value pairs. Valid options: -command, -granularity, -milliseconds, -seconds, -value" + -help "(todo: adjust args definition to validate optionpairs properly)" @form -form query @values -min 0 -max 1 @@ -6943,7 +6958,7 @@ tcl::namespace::eval punk::args::moduledoc::tclcore { @opts -type string -parsekey "" -group "NESTED LIST OPTIONS" -grouphelp\ "These options are used to search lists of lists. They may be used with any other options." - -stride -type integer -default 1 -typesynopsis strideLength -help\ + -stride -type integer -default 1 -range {1 ""} -typesynopsis strideLength -help\ "If this option is specified, the list is treated as consisting of groups of ${$I}strideLength${$NI} elements and the groups are searched by either their first element or, if the ${$B}-index${$N} option is used, by the element within each group given by the first index passed to -index (which is then ignored by -index). The resulting diff --git a/src/bootsupport/modules/punk/auto_exec-0.1.0.tm b/src/bootsupport/modules/punk/auto_exec-0.1.0.tm new file mode 100644 index 00000000..22486038 --- /dev/null +++ b/src/bootsupport/modules/punk/auto_exec-0.1.0.tm @@ -0,0 +1,786 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.4.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2026 +# +# @@ Meta Begin +# Application punk::auto_exec 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +package require Tcl 8.6- + +#---------------------- +# registry notes + +#Computer\HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\ApplicationAssociationToasts +# - associates applications/file types with extensions and protocols. + +#.cp items have a shell\cplopen\command subkey instead of shell\open\command +#- but the command string has the same format and placeholders as the shell\open\command entries for other file types, +#so we can handle them in the same way as other file types when we extract the associated command string. + +#---------------------- + + + +tcl::namespace::eval punk::auto_exec { + namespace eval argdoc { + variable PUNKARGS + set I "\x1b\[3m" ;# [a+ italic] + set NI "\x1b\[23m" ;# [a+ noitalic] + set B "\x1b\[1m" ;# [a+ bold] + set N "\x1b\[22m" ;# [a+ normal] + set T "\x1b\[1\;4m" ;# [a+ bold underline] + set NT "\x1b\[22\;24m\x1b\[4:0m" ;# [a+ normal nounderline] + } + + variable PUNKARGS + lappend PUNKARGS [list { + @id -id "::punk::auto_exec::shell_open_command" + @cmd -name "punk::auto_exec::assoc" -help\ + {Returns the raw 'open' command string associated with the file type for the given file extension, + by looking up the user-specific association in the registry and falling back to the system + association if no user-specific association is found. + + The resulting command string can contain placeholders like "%1" for the file name. + e.g .py -> "c:\Program Files\Python\python.exe" "%1" + e.g .rb -> "C:\tools\ruby31\bin\ruby.exe" "%1" %* + e.g .vbs -> "%SystemRoot%\System32\WScript.exe" "%1" %* + + Note that the resulting string has unescaped backslashes within double quotes, so it is + not suitable for direct execution by Tcl without further processing to handle the backslashes + and placeholders. + + see 'shell_command_as_tcl_list' for processing the command string into a Tcl list of command and + arguments with placeholders substituted. + } + @opts + @values -min 0 -max 1 + ext -type string -default "" -optional true -help\ + "File extension to look up, e.g .txt or .py" + }] + proc shell_open_command {ext} { + #look up assoc and ftype to find associated file type and application. + #Note that the windows 'ftype' and 'assoc' commands only examine the system's data - not the user-specific data which may override the system data. + #to get the user-specific associations we need to read the registry keys. + + #extensions in the registry seem to be stored lower case wnd with a leading dot. + set lext [string tolower $ext] + package require registry + set user_assoc_path [join [list HKEY_CURRENT_USER Software Microsoft Windows CurrentVersion Explorer FileExts $lext UserChoice] "\\"] + + #The UserChoice subkey under this key is where the user-specific association is stored when the user has made a choice. It contains a Progid value which points to the associated file type. + #It can return something like "Python.File" or "Applications\python.exe" or even tcl_auto_file (for .tcl files associated with tclsh by a tcl installer) + + set openverb "open" + + + #The UserChoice key may not exist if the user hasn't made an explicit choice, in which case we fall back to the system association which is stored under HKEY_CLASSES_ROOT\$ext (which also contains user-specific overrides but is more cumbersome to query for them) + if {![catch {registry get $user_assoc_path Progid} user_choice]} { + if {$user_choice ne ""} { + #examples such as Applications\python.exe and tcl_auto_file are found under HKEY_CURRENT_USER\Software\Classes + #they then have a sub-path shell\open\command which contains the command to open files of that type, which is what we need to extract the associated application. + #it is faster to query for the key directly within a catch than to retrieve keys as lists and iterate through them to find the one we want. + + #special case .cpl cplfile + if {$user_choice eq "cplfile"} { + set openverb "cplopen" + } + set verbinfo [ftype $user_choice] + if {[dict exists $verbinfo $openverb]} { + set ftypeinfo [dict get $verbinfo $openverb] + return [dict set ftypeinfo filetype $user_choice] + } else { + return [dict create type "notfound" value "" scope user filetype $user_choice] + } + + } else { + #review - is it possible for Progid to be empty string? If so we should probably fall back to system association instead of returning no association. + #alternatively it could mean the user has explicitly chosen to have no association for this file type - in which case returning an empty association may be the correct behaviour. + return [dict create type "empty" value "" scope user filetype ""] + } + } else { + #fall back to system association and ftype + if {![catch {registry get [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $lext] "\\"] ""} ftype]} { + #ftype is the file type associated with the extension, e.g "Python.File" + #we then need to look up the command associated with this file type under the same path but with the file type instead of the extension and with the additional sub-path shell\open\command + + #special case .cpl cplfile + if {$ftype eq "cplfile"} { + set openverb "cplopen" + } + set verbinfo [ftype $ftype] + if {[dict exists $verbinfo $openverb]} { + set ftypeinfo [dict get $verbinfo $openverb] + return [dict set ftypeinfo filetype $ftype] + } else { + return [dict create type "notfound" value "" scope system filetype $ftype] + } + } else { + return [dict create type "notfound" value "" scope "" filetype ""] + } + } + #shouldn't get here - there is a return in each branch above. + return "no-result" + } + + # %1 - standard placeholder for the first file parameter. + # %L (or %l) - Long File Name form of the path. + # %* - replaces with all parameters passed to the command. (ie not including the command itself) + # %W (or %w) - working directory. + + # other placeholders that we won't handle for now: + # %I (or %i) - handle to an Item ID List (IDList) + # e.g for filetype ArchiveFolder (associated with .7z, .gz, .CPIO etc) we have "%SystemRoot%\Explorer.exe" /idlist,%I,%L + # %D (or %d) - Desktop absolute parsing name of the first parameter + # (for items that don't have file system paths) will be the same as %1 for file system items. + + #todo - we need to substitute other placeholders such as %SystemRoot% etc. (case insensitively) + # - but only if the type of the key is expand_sz. + + #These are environment variables that can be used in the command string. + #when type is sz - only the single letter placeholders are substituted, and environment variables are not substituted. + #when type is expand_sz - the single letter placeholders are substituted, and environment variables are also substituted + #(case insensitively) - but only if they are in the form %VAR% + #A matching environment variable will take precedence over a single letter placeholder if there is a conflict, + #e.g if the string is %1% and there is an environment variable named "1" then it will be substituted instead of the %1 placeholder. + #a non-existant environment variable will not be substituted and it's wrapping % characters will not be included in the output. + #similarly a single letter placeholder that is not recognised, such as %x, will result in only the x being included in the output without the leading % character. + + + #todo - tests for shell_command_as_tcl_list with various combinations of placeholders, quoted and unquoted, + #and with both reg_sz and reg_expand_sz types, and with various edge cases such as missing arguments, extra arguments, + #empty arguments, unrecognized placeholders, invalid env var references etc. + lappend PUNKARGS [list { + @id -id "::punk::auto_exec::shell_command_as_tcl_list" + @cmd -name "punk::auto_exec::shell_command_as_tcl_list" -help\ + {} + @opts + -type -type string -default sz -choices {sz expand_sz} -help\ + "The type of the registry value containing the command string, which determines how + environment variables are substituted. sz means environment variables will not + be substituted, and expand_sz means environment variables will be substituted + if they are in the form %VAR% (case insensitively) and will take precedence over + single letter placeholders if there is a conflict. + + In either case, the single letter placeholders will be substituted as follows: + %1 - standard placeholder for the first file parameter. + %L (or %l) - Long File Name form of the path. + %* - replaced with all subsequent parameters passed to the command. + (but not including the script name itself) + %W (or %w) - working directory." + -workingdir -type string -default "" -help\ + "The working directory to substitute for the %W (or %w) placeholder." + @values -min 1 -max -1 + commandspec -type string -help\ + "The command string to process, which can contain placeholders like %1 for the file name, + and a list of arguments to substitute for the placeholders. The command string is typically + obtained from the registry for a file type association, and the arguments are typically the + file name and other parameters to substitute into the command string." + arg -type any -multiple 1 -optional 1 -help\ + {One or more arguments to substitute for the placeholders in the command string. + The first argument (often a script or document path) will be substituted for %1, + the second argument will be substituted for %2, and so on. If the command string + contains a %* placeholder, then all of the arguments will be substituted for that + placeholder starting from %2. + If there are more placeholders than arguments, then the extra placeholders will be + substituted with empty string. + If missing arguments are specified in the commandspec as quoted strings, eg "%3" then + corresponding empty strings as separate arguments will be included in the output.} + }] + proc shell_command_as_tcl_list {args} { + set argd [punk::args::parse $args withid ::punk::auto_exec::shell_command_as_tcl_list] + lassign [dict values $argd] _leaders opts values received + set type [dict get $opts -type] + set workingdir [dict get $opts -workingdir] + set commandspec [dict get $values commandspec] + if {[dict exists $received arg]} { + set arglist [dict get $values arg] + } else { + set arglist [list] + } + + set result [list] + set quoted [list 0] ;#track whether the current chunk is quoted or not. Only the last item is relevant. + + set chars [split [string trim $commandspec] ""] + set in_quote 0 + set current_chunk {} + set new_chunk 1 + set got_placeholder 0 + for {set i 0} {$i < [llength $chars]} {incr i} { + set char [lindex $chars $i] + if {$in_quote} { + if {$char eq "\""} { + if {$got_placeholder} { + #wasn't a valid placeholder - % not emitted. + set got_placeholder 0 + } + #The windows implementation doesn't seem to close off a chunk just because it encounters a closing quote, + #so we don't do that either. + #The closing quote just affects whether the next space will terminate the chunk or not. + set in_quote 0 + } else { + if {$char eq "%"} { + #we do not handle the trailing % of an env var such as %VAR% here + #- as we scan for that in the default case of the switch below. + if {$got_placeholder} { + #this is a % escaped by doubling up ie a literal % in the output + append current_chunk "%" + set got_placeholder 0 + } else { + set got_placeholder 1 + } + } elseif {$got_placeholder} { + if {$type eq "expand_sz"} { + #we scan for environment variables in the form %VAR% (case insensitively) and substitute them if found, taking precedence over single letter placeholders if there is a conflict. + #a non-existant environment variable will not be substituted and it's wrapping % characters will not be included in the output. + set env_var_name $char + for {set j [expr {$i+1}]} {$j < [llength $chars]} {incr j} { + set next_char [lindex $chars $j] + if {$next_char in {" " \t}} { + #end of env var name - we don't expect to see spaces within environment variable names, + #treat space as terminator indicating failure to match. + break + } elseif {$next_char eq "\""} { + #end of env var name - we don't expect to see quotes within environment variable names, + #treat quote as terminator indicating failure to match. + break + } elseif {$next_char eq "%"} { + #end of *possible* env var name + break + } else { + append env_var_name $next_char + } + } + if {$next_char eq "%"} { + #we found a closing % character, so we have a possible env var name between the two % characters. + #we will substitute the env var value if it exists, + # or the value of any single letter placeholder if there is a match, + # or the original string between the two % characters if there is no match for either an env var or a single letter placeholder. + if {[info exists ::env($env_var_name)]} { + append current_chunk $::env($env_var_name) + set got_placeholder 0 + set i $j ;#advance past the env var name and closing % character for next iteration of main loop + continue + } + } else { + #we didn't find a closing % character, so this isn't a valid env var reference. + set env_var_name "" + #fall through to single % placeholder handling below, + #which will treat the first character after the % as a single letter placeholder and include the rest of the string as literal characters in the output. + } + } + + switch -- $char { + 1 - L - l { + append current_chunk [lindex $arglist 0] + } + 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + append current_chunk [lindex $arglist $char-1] + } + D - d { + #review + #we don't have a way to represent an Item ID List (IDList) in our API, so we'll just substitute the same value as %1 for now, + #which is what windows seems to do for file system items that have file paths. + append current_chunk [lindex $arglist 0] + } + I - i { + #we don't have a way to represent an Item ID List (IDList) in our API, so we'll just substitute something for now. + #format seems to be 9 and 5 digits :nnnnnnnnn:nnnnn + #REVIEW! unknown consequences! + append current_chunk ":000000000:00000" + } + "*" { + append current_chunk [lrange $arglist 1 end] + } + W - w { + append current_chunk $workingdir + } + default { + if {$type eq "expand_sz"} { + #if expand_sz we would have already scanned for environment variables in the form %VAR% and substituted them if found, + #taking precedence over single letter placeholders if there is a conflict. + append current_chunk "$char" [string range $env_var_name 1 end] ;#if env_var_name is empty string then this will just append the single char after the % as a literal, + #which is the correct behaviour for unrecognized placeholders such as %x and for invalid env var references such as %NONEXISTANT% + } else { + append current_chunk $char + } + } + } + set got_placeholder 0 + } else { + append current_chunk $char + } + } + } else { + #NOT in quoted string + if {$char eq "\""} { + if {$got_placeholder} { + append current_chunk "%" ;#wasn't a valid placeholder char, so we need to add the % that we stripped off back into the output + set got_placeholder 0 + } + set in_quote 1 + set new_chunk 0 + lset quoted end 1 ;#we'll mark as quoted if any quote found in it - review + } elseif {$char in [list " " \t ]} { + #we don't expect to see tabs or other whitespace characters as separators in the command string, but we'll treat tab the same as spaces just in case. + if {$got_placeholder} { + #wasn't a valid placeholder char. The % is stripped and not included in the output. + set got_placeholder 0 + } + #space terminates an unquoted chunk, so we add it to the result list and start a new chunk. + #we also need to ensure that consecutive spaces are treated as a single separator and don't result in empty items in the output list, + + if {!$new_chunk} { + if {$current_chunk ne "" || $current_chunk eq "" && [lindex $quoted end]} { + #we add the current chunk to the result list if it's not empty, or if it is empty but is quoted (because in that case we want to preserve it as an empty argument). + lappend result $current_chunk + lappend quoted 0 + set current_chunk {} + } + set new_chunk 1 + } + for {set j [expr {$i+1}]} {$j < [llength $chars]} {incr j} { + if {[lindex $chars $j] in {" " \t}} { + incr i + } else { + break + } + } + } else { + if {$got_placeholder} { + if {$type eq "expand_sz"} { + #we scan for environment variables in the form %VAR% (case insensitively) and substitute them if found, taking precedence over single letter placeholders if there is a conflict. + #a non-existant environment variable will not be substituted and it's wrapping % characters will not be included in the output. + set env_var_name $char + for {set j [expr {$i+1}]} {$j < [llength $chars]} {incr j} { + set next_char [lindex $chars $j] + if {$next_char in {" " \t}} { + #end of env var name - we don't expect to see spaces within environment variable names, + #treat space as terminator indicating failure to match. + break + } elseif {$next_char eq "\""} { + #end of env var name - we don't expect to see quotes within environment variable names, + #treat quote as terminator indicating failure to match. + break + } elseif {$next_char eq "%"} { + #end of *possible* env var name + break + } else { + append env_var_name $next_char + } + } + if {$next_char eq "%"} { + #we found a closing % character, so we have a possible env var name between the two % characters. + #we will substitute the env var value if it exists, + # or the value of any single letter placeholder if there is a match, + # or the original string between the two % characters if there is no match for either an env var or a single letter placeholder. + if {[info exists ::env($env_var_name)]} { + append current_chunk $::env($env_var_name) + set got_placeholder 0 + set i $j ;#advance past the env var name and closing % character for next iteration of main loop + continue + } + } else { + #we didn't find a closing % character, so this isn't a valid env var reference. + set env_var_name "" + #fall through to single % placeholder handling below, + #which will treat the first character after the % as a single letter placeholder and include the rest of the string as literal characters in the output. + } + } + + switch -- $char { + "%" { + append current_chunk "%" + set got_placeholder 1 + continue + } + 1 - + L - l { + #append current_chunk [lindex $arglist 0] + set append_value [string trim [lindex $arglist 0]] + foreach ch [split $append_value ""] { + if {$ch eq " "} { + #we need to split on spaces within arguments as well, because unquoted %1 can expand to multiple arguments if the first argument contains spaces, and we need to preserve the correct splitting of arguments in that case. + #e.g if %1 is substituted with "C:\My Documents\file.txt" then we need to split that into three items in the output list: "C:\My", "Documents\file.txt" + if {$current_chunk ne ""} { + lappend result $current_chunk + lappend quoted 0 + set current_chunk {} + } + set new_chunk 1 + } else { + append current_chunk $ch + set new_chunk 0 + } + } + } + 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 { + append current_chunk [lindex $arglist $char-1] + } + D - d { + #review + #we don't have a way to represent an Item ID List (IDList) in our API, so we'll just substitute the same value as %1 for now, + #which is what windows seems to do for file system items that have file paths. + append current_chunk [lindex $arglist 0] + } + I - i { + #we don't have a way to represent an Item ID List (IDList) in our API, so we'll just substitute something for now. + #format seems to be 9 and 5 digits :nnnnnnnnn:nnnnn + #REVIEW! unknown consequences! + append current_chunk ":000000000:00000" + } + "*" { + #the window implementation of unquoted %* always seems to emit the previous chunk before the %* placeholder as a separate item + #in the command line, even if there is no space between them, so we will do the same. + #Note that a following string (quoted or not) immediately after the %* will be appended to the last item in the output list rather than being a separate item, + #which is also consistent with the microsoft implementation. + if {!$new_chunk} { + if {$current_chunk ne "" || $current_chunk eq "" && [lindex $quoted end]} { + lappend result $current_chunk + lappend quoted 0 + set current_chunk {} + } + } + lappend result {*}[lrange $arglist 1 end] + lappend quoted {*}[lrepeat [llength $arglist] 0] + } + W - w { + append current_chunk $workingdir + } + default { + if {$type eq "expand_sz"} { + #if expand_sz we would have already scanned for environment variables in the form %VAR% and substituted them if found, + #taking precedence over single letter placeholders if there is a conflict. + append current_chunk "$char" [string range $env_var_name 1 end] ;#if env_var_name is empty string then this will just append the single char after the % as a literal, + #which is the correct behaviour for unrecognized placeholders such as %x and for invalid env var references such as %NONEXISTANT% + } else { + append current_chunk $char + } + } + } + set got_placeholder 0 + set new_chunk 0 + } else { + if {$char eq "%"} { + set got_placeholder 1 + } else { + append current_chunk $char + set new_chunk 0 + } + } + } + } + } + + if {$current_chunk ne "" || $current_chunk eq "" && [lindex $quoted end]} { + #review - edge case - if the command string ends with a space then we would have already added the last chunk to the result list and started a new chunk, so we shouldn't add an empty chunk to the result list in that case. + #however if the last chunk was quoted and is empty because the command string ends with a placeholder that is substituted with an empty string, then we should add the empty chunk to the result list. + lappend result $current_chunk + } + return $result + } + + namespace eval punk::auto_exec::system { + proc assoc_get_info {ext} { + set lext [string tolower $ext] + set result [dict create system "" user ""] + set user_assoc_path [join [list HKEY_CURRENT_USER Software Microsoft Windows CurrentVersion Explorer FileExts $lext UserChoice] "\\"] + if {![catch {registry get $user_assoc_path Progid} user_choice]} { + dict set result user $user_choice + } + if {![catch {registry get [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $lext] "\\"] ""} ftype]} { + dict set result system $ftype + } + return $result + } + } + lappend PUNKARGS [list { + @id -id "::punk::auto_exec::assoc" + @cmd -name "punk::auto_exec::assoc"\ + -summary\ + "Look up the associated file type (system and user) for a file extension"\ + -help\ + "Get the associated file type for a file extension by looking up the user-specific + file type in the registry and falling back to the system file type if no + user-specific association is found. + + Returns a dict with keys ${$I}system${$NI} and ${$I}user${$NI}. + One or more of the key values may be empty string if there is no defined + file type for the extension. + + This is somewhat like the windows 'assoc' command except that the windows command + only looks up the system association and does not take into account any user-specific + overrides. + This function returns both values in the result dictionary if they are available." + @opts + @values -min 0 -max 1 + ext -type string -default "" -optional true -help\ + "File extension to look up, e.g .txt or .py" + }] + proc assoc {args} { + package require registry + set argd [punk::args::parse $args withid ::punk::auto_exec::assoc] + set ext [dict get $argd values ext] + + if {$ext ne ""} { + return [punk::auto_exec::system::assoc_get_info $ext] + } else { + #look up all associated ftypes + set user_ftypes [registry keys {HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Explorer\FileExts} .*] + + set system_ftypes [registry keys {HKEY_LOCAL_MACHINE\SOFTWARE\Classes} .*] + set all_ftypes [lsort -unique [concat $user_ftypes $system_ftypes]] + set results [list] + foreach ftype $all_ftypes { + dict set results $ftype [punk::auto_exec::system::assoc_get_info $ftype] + } + return $results + } + } + namespace eval argdoc { + lappend PUNKARGS [list { + @id -id "::punk::auto_exec::ftype" + @cmd -name "punk::auto_exec::ftype"\ + -summary\ + "Look up shell verb command values from windows file type."\ + -help\ + "Get the associated shell verb information (such as open) for a file type by looking up the user-specific + association in the registry and falling back to the system association if no + user-specific association is found. + + Returns a dict of dicts with toplevel keys for each shell verb (e.g open, runas) and values that are dicts with keys + ${$I}type${$NI} and ${$I}value${$NI} and ${$I}scope${$NI}, where + type is determined from the registry value type (e.g sz or expand_sz) + string is the raw command string from the registry + scope is either "user" or "system" depending on whether the value was found in the user-specific registry keys or + the system registry keys. + + This is somewhat like the windows 'ftype' command except that the windows command only looks for the 'open' verb and + only looks up the system association and does not take into account any user-specific + overrides. + + The file type can be looked up using the ${$B}assoc${$N} function in this package. + + The command string can contain placeholders like \"%1\" for the file name, and environment variables + in the form %VAR% (case insensitive) if the registry value type is reg_expand_sz (expand_sz), + which will be substituted by the ${$B}shell_command_as_tcl_list${$N} function when processing the command + string into a Tcl list of command and arguments with placeholders substituted." + @opts + @values -min 0 -max 1 + filetype -type string -default "" -optional true -help\ + "File type associated with a file extension, e.g Python.File. + This can be looked up using the 'assoc' function in this package." + }] + } + #proc ftype {filetype} { + # package require registry + + # if {$filetype eq "cplfile"} { + # #special case for cplfile (associated with .cpl files) which doesn't follow the usual pattern of having the command string under shell\open\command, + # #but instead has it under HKEY_LOCAL_MACHINE\SOFTWARE\Classes\cplfile\shell\cplopen\command. + # #There doesn't seem to be any user-specific override for this file type + # #- but we will check for one under HKEY_CURRENT_USER\Software\Classes\cplfile\shell\cplopen\command anyway for consistency with the way we check + # #for user-specific overrides for other file types. + # set key [join [list HKEY_CURRENT_USER Software Classes cplfile shell cplopen command] "\\"] + # } else { + # set key [join [list HKEY_CURRENT_USER Software Classes $filetype shell open command] "\\"] + # } + # if {![catch {registry get $key ""} raw_assoc]} { + # set tp [registry type $key ""] + # return [dict create open [dict create type $tp string $raw_assoc]] + # } else { + # #e.g Python.File + # if {$filetype eq "cplfile"} { + # set key [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes cplfile shell cplopen command] "\\"] + # } else { + # set key [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $filetype shell open command] "\\"] + # } + # if {![catch {registry get $key ""} raw_assoc]} { + # set tp [registry type $key ""] + # return [dict create type $tp string $raw_assoc] + # } else { + # return [dict create type "" string ""] ;#no association found + # } + # } + #} + proc ftype {filetype} { + package require registry + set resultdict [dict create] + + #e.g Python.File + set shellpath [join [list HKEY_LOCAL_MACHINE SOFTWARE Classes $filetype shell] "\\"] + if {![catch {registry keys $shellpath *} shellverbs]} { + foreach verb $shellverbs { + set commandkey [join [list $shellpath $verb command] "\\"] + if {![catch {registry get $commandkey ""} cmdstring]} { + #registry queryies are case insensitive but some are cased differently e.g Open vs open. + #when using the verb as a key in the output dict, we need to normalize so that it is useful for lookups. We'll use lowercase for that. + set verb [string tolower $verb] + set tp [registry type $commandkey ""] + dict set resultdict $verb [dict create type $tp value $cmdstring scope system] + } + } + } + + #allow user-specific verbs to be overridden. + set shellpath [join [list HKEY_CURRENT_USER Software Classes $filetype shell] "\\"] + if {![catch {registry keys $shellpath *} shellverbs]} { + foreach verb $shellverbs { + set commandkey [join [list $shellpath $verb command] "\\"] + if {![catch {registry get $commandkey ""} cmdstring]} { + set verb [string tolower $verb] + set tp [registry type $commandkey ""] + dict set resultdict $verb [dict create type $tp value $cmdstring scope user] + } + } + } + + return $resultdict + } + +} + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::auto_exec::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +#tcl::namespace::eval punk::auto_exec::system { +#} + + +# == === === === === === === === === === === === === === === +# Sample 'about' function with punk::args documentation +# == === === === === === === === === === === === === === === +tcl::namespace::eval punk::auto_exec { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + variable PUNKARGS + variable PUNKARGS_aliases + + lappend PUNKARGS [list { + @id -id "(package)punk::auto_exec" + @package -name "punk::auto_exec" -help\ + "Package + Description" + }] + + namespace eval argdoc { + #namespace for custom argument documentation + proc package_name {} { + return punk::auto_exec + } + proc about_topics {} { + #info commands results are returned in an arbitrary order (like array keys) + set topic_funs [info commands [namespace current]::get_topic_*] + set about_topics [list] + foreach f $topic_funs { + set tail [namespace tail $f] + lappend about_topics [string range $tail [string length get_topic_] end] + } + #Adjust this function or 'default_topics' if a different order is required + return [lsort $about_topics] + } + proc default_topics {} {return [list Description *]} + + # ------------------------------------------------------------- + # get_topic_ functions add more to auto-include in about topics + # ------------------------------------------------------------- + proc get_topic_Description {} { + punk::args::lib::tstr [string trim { + package punk::auto_exec + description to come.. + } \n] + } + proc get_topic_License {} { + return "" + } + proc get_topic_Version {} { + return "$::punk::auto_exec::version" + } + proc get_topic_Contributors {} { + set authors {{ {Julian Noble}}} + set contributors "" + foreach a $authors { + append contributors $a \n + } + if {[string index $contributors end] eq "\n"} { + set contributors [string range $contributors 0 end-1] + } + return $contributors + } + proc get_topic_custom-topic {} { + punk::args::lib::tstr -return string { + A custom + topic + etc + } + } + # ------------------------------------------------------------- + } + + # we re-use the argument definition from punk::args::standard_about and override some items + set overrides [dict create] + dict set overrides @id -id "::punk::auto_exec::about" + dict set overrides @cmd -name "punk::auto_exec::about" + dict set overrides @cmd -help [string trim [punk::args::lib::tstr { + About punk::auto_exec + }] \n] + dict set overrides topic -choices [list {*}[punk::auto_exec::argdoc::about_topics] *] + dict set overrides topic -choicerestricted 1 + dict set overrides topic -default [punk::auto_exec::argdoc::default_topics] ;#if -default is present 'topic' will always appear in parsed 'values' dict + set newdef [punk::args::resolved_def -antiglobs -package_about_namespace -override $overrides ::punk::args::package::standard_about *] + lappend PUNKARGS [list $newdef] + proc about {args} { + package require punk::args + #standard_about accepts additional choices for topic - but we need to normalize any abbreviations to full topic name before passing on + set argd [punk::args::parse $args withid ::punk::auto_exec::about] + lassign [dict values $argd] _leaders opts values _received + punk::args::package::standard_about -package_about_namespace ::punk::auto_exec::argdoc {*}$opts {*}[dict get $values topic] + } +} +# end of sample 'about' function +# == === === === === === === === === === === === === === === + + +# ----------------------------------------------------------------------------- +# register namespace(s) to have PUNKARGS,PUNKARGS_aliases variables checked +# ----------------------------------------------------------------------------- +# variable PUNKARGS +# variable PUNKARGS_aliases +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::auto_exec +} +# ----------------------------------------------------------------------------- + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::auto_exec [tcl::namespace::eval punk::auto_exec { + variable pkg punk::auto_exec + variable version + set version 0.1.0 +}] +return + diff --git a/src/bootsupport/modules/punk/char-0.1.0.tm b/src/bootsupport/modules/punk/char-0.1.0.tm index e6bf4b9d..f8f8873e 100644 --- a/src/bootsupport/modules/punk/char-0.1.0.tm +++ b/src/bootsupport/modules/punk/char-0.1.0.tm @@ -237,6 +237,88 @@ tcl::namespace::eval punk::char { return $out } + namespace eval argdoc { + variable PUNKARGS + lappend PUNKARGS [list { + @id -id ::punk::char::ascii2doublewide + @cmd -name punk::char::ascii2doublewide\ + -summary\ + "string to double-wide unicode char"\ + -help\ + "Convert parts of string in the ascii range 21 to 7E to corresponding chars from the unicode + 'Halfwidth and Fullwidth Forms' block. + The space character (0x20) is converted to the 'IDEOGRAPHIC SPACE' character (0x3000) + Control chars and chars outside the ascii range are not converted and passed through as-is." + @values -min 1 -max 1 + str -type string + }] + } + proc ascii2doublewide {str} { + #set base to \UFEE0 and add ascii value of char to get the double-wide character in the 'Halfwidth and Fullwidth Forms' block of unicode + set out "" + foreach ch [split $str ""] { + set decval [scan $ch %c] + if {$decval == 0x20} { + #space char maps to ideographic space rather than the fullwidth space char - as the fullwidth space char is often rendered as a narrow space rather than a wide one - and the ideographic space is more likely to be rendered as a wide space. + set decval 0x3000 + } elseif {$decval < 0x21 || $decval > 0x7E} { + #chars outside the ascii range are not converted - pass through as-is + set decval [scan $ch %c] + } else { + set decval [expr {0xFEE0 + $decval}] + } + append out [format %c $decval] + } + return $out + } + proc doublewide2ascii {str} { + set out "" + foreach ch [split $str ""] { + set decval [scan $ch %c] + if {$decval == 0x3000} { + #ideographic space maps to ascii space char + set decval 0x20 + append out \U3000 + } elseif {$decval < 0xFF01 || $decval > 0xFF5E} { + #chars outside the part of the 'Halfwidth and Fullwidth Forms' block that corresponds to ascii are not converted - pass through as-is + append out $ch + } else { + set decval [expr {$decval - 0xFEE0}] + append out [format %c $decval] + } + } + return $out + } + + proc ascii2NTFSPUA {str} { + #set base to \uF000 and add ascii value of char to get the PUA character in the 'Private Use Area' block of unicode + set out "" + foreach ch [split $str ""] { + set decval [scan $ch %c] + if {$decval <= 0x7F} { + append out [format %c [expr {0xF000 + $decval}]] + } else { + #chars outside the ascii range are not converted - pass through as-is + append out $ch + } + } + return $out + } + proc NTFSPUA2ascii {str} { + set out "" + foreach ch [split $str ""] { + set decval [scan $ch %c] + if {$decval >= 0xF000 && $decval <= 0xF0FF} { + set decval [expr {$decval - 0xF000}] + append out [format %c $decval] + } else { + #chars outside the 'Private Use Area' block that corresponds to ascii are not converted - pass through as-is + append out $ch + } + } + return $out + } + proc symbol {} { tailcall page symbol @@ -2573,11 +2655,269 @@ tcl::namespace::eval punk::char { return [tcl::string::map $map $str] } - #todo - lookup from unicode tables + #ISO-3166-1 alpha-2 country codes to flag emojis + #These can change over time. + #todo - lookup from official sources - and update as needed + # variable flags [dict create\ + AC \U1F1E6\U1F1E8\ + AD \U1F1E6\U1F1E9\ + AE \U1F1E6\U1F1EA\ + AF \U1F1E6\U1F1EB\ + AG \U1F1E6\U1F1EC\ + AI \U1F1E6\U1F1EE\ + AL \U1F1E6\U1F1F1\ + AM \U1F1E6\U1F1F2\ + AO \U1F1E6\U1F1F4\ + AQ \U1F1E6\U1F1F6\ + AR \U1F1E6\U1F1F7\ + AS \U1F1E6\U1F1F8\ + AT \U1F1E6\U1F1F9\ AU \U1F1E6\U1F1FA\ + AW \U1F1E6\U1F1FC\ + AX \U1F1E6\U1F1FD\ + AZ \U1F1E6\U1F1FF\ + BA \U1F1E7\U1F1E6\ + BB \U1F1E7\U1F1E7\ + BD \U1F1E7\U1F1E9\ + BE \U1F1E7\U1F1EA\ + BF \U1F1E7\U1F1EB\ + BG \U1F1E7\U1F1EC\ + BH \U1F1E7\U1F1ED\ + BI \U1F1E7\U1F1EE\ + BJ \U1F1E7\U1F1EF\ + BL \U1F1E7\U1F1F1\ + BM \U1F1E7\U1F1F2\ + BN \U1F1E7\U1F1F3\ + BO \U1F1E7\U1F1F4\ + BQ \U1F1E7\U1F1F6\ + BR \U1F1E7\U1F1F7\ + BS \U1F1E7\U1F1F8\ + BT \U1F1E7\U1F1F9\ + BV \U1F1E7\U1F1FB\ + BW \U1F1E7\U1F1FC\ + BY \U1F1E7\U1F1FE\ + BZ \U1F1E7\U1F1FF\ + CA \U1F1E8\U1F1E6\ + CC \U1F1E8\U1F1E8\ + CD \U1F1E8\U1F1E9\ + CF \U1F1E8\U1F1Eb\ + CG \U1F1E8\U1F1Ec\ + CH \U1F1E8\U1F1ED\ + CI \U1F1E8\U1F1EE\ + CK \U1F1E8\U1F1F0\ + CL \U1F1E8\U1F1F1\ + CM \U1F1E8\U1F1F2\ + CN \U1F1E8\U1F1F3\ + CO \U1F1E8\U1F1F4\ + CP \U1F1E8\U1F1F5\ + CR \U1F1E8\U1F1F7\ + CS \U1F1E8\U1F1F8\ + CU \U1F1E8\U1F1FA\ + CV \U1F1E8\U1F1FB\ + CW \U1F1E8\U1F1FC\ + CX \U1F1E8\U1F1FE\ + CY \U1F1E8\U1F1FE\ + CZ \U1F1E8\U1F1FF\ + DE \U1F1E9\U1F1EA\ + DG \U1F1E9\U1F1EC\ + DJ \U1F1E9\U1F1EF\ + DK \U1F1E9\U1F1F0\ + DM \U1F1E9\U1F1F2\ + DO \U1F1E9\U1F1F4\ + DZ \U1F1E9\U1F1FF\ + EA \U1F1EA\U1F1E6\ + EC \U1F1EA\U1F1E8\ + EE \U1F1EA\U1F1EA\ + EG \U1F1EA\U1F1EC\ + EH \U1F1EA\U1F1ED\ + ER \U1F1EA\U1F1F7\ + ES \U1F1EA\U1F1F8\ + ET \U1F1EA\U1F1F9\ + EU \U1F1EA\U1F1FA\ + FI \U1F1EB\U1F1EE\ + FJ \U1F1EB\U1F1EF\ + FK \U1F1EB\U1F1F0\ + FM \U1F1EB\U1F1F2\ + FO \U1F1EB\U1F1F4\ + FR \U1F1EB\U1F1F7\ + GA \U1F1EC\U1F1E6\ + GB \U1F1EC\U1F1E7\ + GD \U1F1EC\U1F1E9\ + GE \U1F1EC\U1F1EA\ + GF \U1F1EC\U1F1EB\ + GG \U1F1EC\U1F1EC\ + GH \U1F1EC\U1F1ED\ + GI \U1F1EC\U1F1EE\ + GL \U1F1EC\U1F1F1\ + GM \U1F1EC\U1F1F2\ + GN \U1F1EC\U1F1F3\ + GP \U1F1EC\U1F1F5\ + GQ \U1F1EC\U1F1F6\ + GR \U1F1EC\U1F1F7\ + GS \U1F1EC\U1F1F8\ + GT \U1F1EC\U1F1F9\ + GU \U1F1EC\U1F1FA\ + GW \U1F1EC\U1F1FC\ + GY \U1F1EC\U1F1FE\ + HK \U1F1ED\U1F1F0\ + HM \U1F1ED\U1F1F2\ + HN \U1F1ED\U1F1F3\ + HR \U1F1ED\U1F1F7\ + HT \U1F1ED\U1F1F9\ + HU \U1F1ED\U1F1FA\ + IC \U1F1EE\U1F1E8\ + ID \U1F1EE\U1F1E9\ + IE \U1F1EE\U1F1EA\ + IL \U1F1EE\U1F1F1\ + IM \U1F1EE\U1F1F2\ + IN \U1F1EE\U1F1F3\ + IO \U1F1EE\U1F1F4\ + IQ \U1F1EE\U1F1F6\ + IR \U1F1EE\U1F1F7\ + IS \U1F1EE\U1F1F8\ + IT \U1F1EE\U1F1F9\ + JE \U1F1EF\U1F1EA\ + JM \U1F1EF\U1F1F2\ + JO \U1F1EF\U1F1F4\ + JP \U1F1EF\U1F1F5\ + KE \U1F1F0\U1F1EA\ + KG \U1F1F0\U1F1EC\ + KH \U1F1F0\U1F1ED\ + KI \U1F1F0\U1F1EE\ + KM \U1F1F0\U1F1F2\ + KN \U1F1F0\U1F1F3\ + KP \U1F1F0\U1F1F5\ + KR \U1F1F0\U1F1F7\ + KW \U1F1F0\U1F1FC\ + KY \U1F1F0\U1F1FE\ + KZ \U1F1F0\U1F1FF\ + LA \U1F1F1\U1F1E6\ + LB \U1F1F1\U1F1E7\ + LC \U1F1F1\U1F1E8\ + LI \U1F1F1\U1F1EE\ + LK \U1F1F1\U1F1F0\ + LR \U1F1F1\U1F1F7\ + LS \U1F1F1\U1F1F8\ + LT \U1F1F1\U1F1F9\ + LU \U1F1F1\U1F1FA\ + LV \U1F1F1\U1F1FB\ + LY \U1F1F1\U1F1FE\ + MA \U1F1F2\U1F1E6\ + MC \U1F1F2\U1F1E8\ + MD \U1F1F2\U1F1E9\ + ME \U1F1F2\U1F1EA\ + MF \U1F1F2\U1F1EB\ + MG \U1F1F2\U1F1EC\ + MH \U1F1F2\U1F1ED\ + MK \U1F1F2\U1F1F0\ + ML \U1F1F2\U1F1F1\ + MM \U1F1F2\U1F1F2\ + MN \U1F1F2\U1F1F3\ + MO \U1F1F2\U1F1F4\ + MP \U1F1F2\U1F1F5\ + MQ \U1F1F2\U1F1F6\ + MR \U1F1F2\U1F1F7\ + MS \U1F1F2\U1F1F8\ + MT \U1F1F2\U1F1F9\ + MU \U1F1F2\U1F1FA\ + MV \U1F1F2\U1F1FB\ + MW \U1F1F2\U1F1FC\ + MX \U1F1F2\U1F1FD\ + MY \U1F1F2\U1F1FE\ + MZ \U1F1F2\U1F1FF\ + NA \U1F1F3\U1F1E6\ + NC \U1F1F3\U1F1E8\ + NE \U1F1F3\U1F1EA\ + NF \U1F1F3\U1F1EB\ + NG \U1F1F3\U1F1EC\ + NI \U1F1F3\U1F1EE\ + NL \U1F1F3\U1F1F1\ + NO \U1F1F3\U1F1F4\ + NP \U1F1F3\U1F1F5\ + NR \U1F1F3\U1F1F7\ + NU \U1F1F3\U1F1F8\ + NZ \U1F1F3\U1F1ff\ + OM \U1F1F4\U1F1F2\ + PA \U1F1F5\U1F1E6\ + PE \U1F1F5\U1F1EA\ + PF \U1F1F5\U1F1EB\ + PG \U1F1F5\U1F1EC\ + PH \U1F1F5\U1F1ED\ + PK \U1F1F5\U1F1ED\ + PL \U1F1F5\U1F1F1\ + PM \U1F1F5\U1F1F3\ + PN \U1F1F5\U1F1F3\ + PR \U1F1F5\U1F1F7\ + PS \U1F1F5\U1F1F8\ + PT \U1F1F5\U1F1F9\ + PW \U1F1F5\U1F1FC\ + PY \U1F1F5\U1F1FE\ + QA \U1F1F6\U1F1E6\ + RE \U1F1F7\U1F1EA\ + RO \U1F1F7\U1F1F4\ + RS \U1F1F7\U1F1F7\ + RU \U1F1F7\U1F1FA\ + RW \U1F1F7\U1F1FC\ + SA \U1F1F8\U1F1E6\ + SB \U1F1F8\U1F1E7\ + SC \U1F1F8\U1F1E8\ + SD \U1F1F8\U1F1E9\ + SE \U1F1F8\U1F1EA\ + SG \U1F1F8\U1F1EC\ + SH \U1F1F8\U1F1ED\ + SI \U1F1F8\U1F1EE\ + SJ \U1F1F8\U1F1EF\ + SK \U1F1F8\U1F1F0\ + SL \U1F1F8\U1F1F1\ + SM \U1F1F8\U1F1F2\ + SN \U1F1F8\U1F1F3\ + SO \U1F1F8\U1F1F4\ + SR \U1F1F8\U1F1F7\ + SS \U1F1F8\U1F1F8\ + ST \U1F1F8\U1F1F9\ + SV \U1F1F8\U1F1F7\ + SX \U1F1F8\U1F1F8\ + SY \U1F1F8\U1F1FE\ + SZ \U1F1F8\U1F1FF\ + TA \U1F1F9\U1F1E6\ + TC \U1F1F9\U1F1E8\ + TD \U1F1F9\U1F1E9\ + TF \U1F1F9\U1F1EB\ + TG \U1F1F9\U1F1EC\ + TH \U1F1F9\U1F1ED\ + TJ \U1F1F9\U1F1EF\ + TK \U1F1F9\U1F1F0\ + TL \U1F1F9\U1F1F1\ + TM \U1F1F9\U1F1F2\ + TN \U1F1F9\U1F1F3\ + TO \U1F1F9\U1F1F4\ + TR \U1F1F9\U1F1F7\ + TT \U1F1F9\U1F1F9\ + TV \U1F1F9\U1F1FB\ + TW \U1F1F9\U1F1FC\ + TZ \U1F1F9\U1F1FF\ + UA \U1F1FA\U1F1E6\ + UG \U1F1FA\U1F1EC\ + UM \U1F1FA\U1F1F2\ US \U1F1FA\U1F1F8\ - ZW \U1F1FF\U1F1FC + UY \U1F1FA\U1F1FE\ + UZ \U1F1FA\U1F1FF\ + VA \U1F1FB\U1F1E6\ + VC \U1F1FB\U1F1E8\ + VE \U1F1FB\U1F1EA\ + VG \U1F1FB\U1F1EC\ + VI \U1F1FB\U1F1EE\ + VN \U1F1FB\U1F1F3\ + VU \U1F1FB\U1F1FA\ + WF \U1F1FC\U1F1EB\ + WS \U1F1FC\U1F1F8\ + XK \U1F1FD\U1F1F3\ + YE \U1F1FE\U1F1EA\ + YT \U1F1FE\U1F1F9\ + ZA \U1F1FF\U1F1E6\ + ZM \U1F1FF\U1F1F2\ + ZW \U1F1FF\U1F1FC\ ] variable rflags dict for {k v} $flags { @@ -2825,6 +3165,15 @@ tcl::namespace::eval punk::char::lib { } } +# ----------------------------------------------------------------------------- +# register namespace(s) to have PUNKARGS,PUNKARGS_aliases variables checked +# ----------------------------------------------------------------------------- +# variable PUNKARGS +# variable PUNKARGS_aliases +namespace eval ::punk::args::register { + #use fully qualified so 8.6 doesn't find existing var in global namespace + lappend ::punk::args::register::NAMESPACES ::punk::char +} # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ diff --git a/src/bootsupport/modules/punk/du-0.1.0.tm b/src/bootsupport/modules/punk/du-0.1.0.tm index c9ab54fd..6d2eb59f 100644 --- a/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/bootsupport/modules/punk/du-0.1.0.tm @@ -276,8 +276,16 @@ namespace eval punk::du { } } - - set dirs [dict get $du_info dirs] + set rawdirs [dict get $du_info dirs] + set dirs [list] + foreach d $rawdirs { + if {[file tail $d] eq "." || [file tail $d] eq ".."} { + #skip . and .. entries if they are returned by the dirlisting function + continue + } + lappend dirs $d + } + set files [dict get $du_info files] set filesizes [dict get $du_info filesizes] set vfsmounts [dict get $du_info vfsmounts] @@ -420,7 +428,6 @@ namespace eval punk::du { set size [expr {$bytes / [set $switch]}] lappend retval [list $size $path] } - # copyright 2002 by The LIGO Laboratory return $retval } namespace eval active { @@ -479,23 +486,92 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] - #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api - #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this - proc decode_win_attributes {bitmask} { - variable winfile_attributes - if {[dict exists $winfile_attributes $bitmask]} { - return [dict get $winfile_attributes $bitmask] + #The cache is populated with some common combinations of flags - and then populated with any new combinations as they are encountered during decoding + variable ntfs_attributes_cache + set ntfs_attributes_cache(16) [list directory] + set ntfs_attributes_cache(32) [list archive] + set ntfs_attributes_cache(1024) [list reparse_point] + set ntfs_attributes_cache(18) [list directory hidden] + set ntfs_attributes_cache(34) [list archive hidden] + set ntfs_attributes_cache(52) [list directory archive system] + set ntfs_attributes_cache(1040) [list directory reparse_point] + set ntfs_attributes_cache(1048576) [list unpinned] + set ntfs_attributes_cache(524288) [list pinned] + + #https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants + variable ntfs_attribute_flags + set ntfs_attribute_flags(1) archive + set ntfs_attribute_flags(2) hidden + set ntfs_attribute_flags(4) system + set ntfs_attribute_flags(16) directory + set ntfs_attribute_flags(32) archive + set ntfs_attribute_flags(64) device + set ntfs_attribute_flags(128) normal + set ntfs_attribute_flags(256) temporary + set ntfs_attribute_flags(512) sparse_file + set ntfs_attribute_flags(1024) reparse_point + set ntfs_attribute_flags(2048) compressed + set ntfs_attribute_flags(4096) offline + set ntfs_attribute_flags(8192) not_content_indexed + set ntfs_attribute_flags(16384) encrypted + set ntfs_attribute_flags(32768) integrity_stream + set ntfs_attribute_flags(65536) virtual + set ntfs_attribute_flags(131072) no_scrub_data + #set ntfs_attribute_flags(262144) ?? ;#marked as internal use only - but also as recall_on_open in 'directory enumeration classes'. Unsure if it applies here. + set ntfs_attribute_flags(524288) pinned + set ntfs_attribute_flags(1048576) unpinned + + + proc decode_ntfs_attributes {bitmask} { + variable ntfs_attribute_flags + set flags [list] + #array for not available in tcl 8.6 - but we can iterate over result of 'array get' + foreach {flagbit flagname} [array get ntfs_attribute_flags] { + if {$bitmask & $flagbit} { + lappend flags $flagname + } + } + return $flags + } + proc lookup_ntfs_attributes {bitmask} { + variable ntfs_attributes_cache + if {[info exists ntfs_attributes_cache($bitmask)]} { + return [set ntfs_attributes_cache($bitmask)] } else { - #list/dict shimmering? - #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + set decoded [decode_ntfs_attributes $bitmask] + set ntfs_attributes_cache($bitmask) $decoded + return $decoded + } + } + #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api + #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this + proc lookup_ntfs_attributes1 {bitmask} { + variable ntfs_attributes_cache + if {[info exists ntfs_attributes_cache($bitmask)]} { + return [set ntfs_attributes_cache($bitmask)] + } else { set decoded [twapi::decode_file_attributes $bitmask] - dict set winfile_attributes $bitmask $decoded + # 2026 - twapi call doesn't seem to know about all the flags that we can encounter. + set ntfs_attributes_cache($bitmask) $decoded return $decoded } } + #proc decode_win_attributes {bitmask} { + # variable winfile_attributes + # if {[dict exists $winfile_attributes $bitmask]} { + # return [dict get $winfile_attributes $bitmask] + # } else { + # #list/dict shimmering? + # #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + # set decoded [twapi::decode_file_attributes $bitmask] + # #todo + # #set extra_flags [dict create 524288 pinned 1048576 unpinned] + # dict set winfile_attributes $bitmask $decoded + # return $decoded + # } + #} variable win_reparse_tags #implied prefix for all names IO_REPARSE_TAG_ #list of reparse tags: https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/c8e77b37-3909-4fe6-a4ea-2b9d423b1ee4 @@ -578,59 +654,63 @@ namespace eval punk::du { proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] - set defaults [dict create -debug 0 -debugchannel stderr] - set opts [dict merge $defaults [lrange $args 0 end-1]] - set iteminfo [lindex $args end] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] + set result [dict create -archive 0 -directory 0 -normal 0 -compressed 0 -not_content_indexed 0 -reparse_point 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw $iteminfo] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" - append dbg [pdict -channel none iteminfo] + append dbg [pdict -channel none iteminfo] \n if {$opt_debugchannel eq "none"} { dict set result -debug $dbg } else { - puts -nonewline $opt_debugchannel $dbg + puts -nonewline $opt_debugchannel $dbg } } - set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - foreach attr $attrinfo { - switch -- $attr { - hidden { - dict set result -hidden 1 - } - system { - dict set result -system 1 - } - readonly { - dict set result -readonly 1 - } - reparse_point { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + set attrinfo [lookup_ntfs_attributes [dict get $iteminfo attrs]] + if {[llength $attrinfo]} { + foreach attr $attrinfo { + switch -- $attr { + directory { + dict set result -directory 1 + } + reparse_point { + dict set result -reparse_point 1 + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 } } } + dict set result -fileattributes $attrinfo } - #dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - dict set result -raw $iteminfo return $result } @@ -826,8 +906,13 @@ namespace eval punk::du { #puts " alt_wp: $alt_wp" #puts " alt_re: $alt_re" lappend next_winglob_list "$wg$alt_wp" - set alt_re_no_caret [string range $alt_re 1 end] - lappend next_regexp_list "${re}${alt_re_no_caret}" + if {$alt_re eq ".*"} { + #^.*$ is normalized to .* at the end of this function - so we might get a result from our recursive call with no anchors. + lappend next_regexp_list "${re}.*" + } else { + set alt_re_no_anchors [string range $alt_re 1 end-1] ;#strip off ^ and $ anchors + lappend next_regexp_list "${re}${alt_re_no_anchors}" + } } } @@ -935,8 +1020,20 @@ namespace eval punk::du { if {[llength $winglob_list] != [llength $tclregexp_list]} { error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" } - set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] - return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + #set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re \$}] + set norm_tclregexp_list [list] + foreach re $tclregexp_list { + if {$re eq ".*"} { + #normalise the regexp for * to be unanchored, in the common case where we are just matching any string. + #(our data set of results from a filesystem will not include the requirement to test empty strings against the regexps, + #so we don't need to worry about the difference between ^.*$ and .*) + lappend norm_tclregexp_list $re + } else { + lappend norm_tclregexp_list "^${re}\$" + } + } + + return [dict create winglobs $winglob_list tclregexps $norm_tclregexp_list] } @@ -955,8 +1052,10 @@ namespace eval punk::du { -glob *\ -filedebug 0\ -patterndebug 0\ + -link_info 1\ -with_sizes 1\ -with_times 1\ + -types {}\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- @@ -991,14 +1090,21 @@ namespace eval punk::du { # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_linkinfo [dict get $opts -link_info] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_patterndebug [dict get $opts -patterndebug] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set ftypes [list f d l] + #as far as I can tell, even though CON,PRN etc are of type characterSpecial - they aren't generally returnable within directories (review) + #No default examples of socket or block special files that are listable within directories on windows that I can find. + #- Nevertheless, they may be accessible/mountable in some way so we will add them to the list of types that we check for if the user explicitly requests them. + #It's possible that some/all these types may be listable under some circumstances (perhaps requiring admin permissions?) + #Named pipes can be listed under //./pipe - even though //./pipe itself is not a real folder. + set direntry_types [list f d l p s b c] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) #only accept 0|1 if {$opt_with_sizes} { - set sized_types $ftypes + set sized_types $direntry_types } else { set sized_types [list] } @@ -1007,16 +1113,17 @@ namespace eval punk::du { } if {[llength $sized_types]} { foreach st $sized_types { - if {$st ni $ftypes} { + if {$st ni $direntry_types} { error "du_dirlisting_twapi unrecognized element in -with_sizes '$st'" } } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_times [dict get $opts -with_times] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- if {"$opt_with_times" in {0 1}} { if {$opt_with_times} { - set timed_types $ftypes + set timed_types $direntry_types } else { set timed_types [list] } @@ -1025,12 +1132,79 @@ namespace eval punk::du { } if {[llength $timed_types]} { foreach item $timed_types { - if {$item ni $ftypes} { + if {$item ni $direntry_types} { error "du_dirlisting_twapi unrecognised element in -with-times '$item'" } } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #like tcl glob - we include the entry types {f d l b c p s} as well as the attributes {hidden readonly} and the permissions {r w x} in the types list. + set entry_and_attribute_types [dict get $opts -types] + set types_entry [list] + set types_attribute [list] + set types_permission [list] + set types_REQUIRED [list] + foreach t $entry_and_attribute_types { + switch -- $t { + f - d - l - p - s - b - c { + #these are all valid entry types + lappend types_entry $t + } + hidden - readonly { + #these are all valid attributes + lappend types_attribute $t + lappend types_REQUIRED $t + } + r - w - x { + #these are all valid permissions + lappend types_permission $t + lappend types_REQUIRED $t + } + default { + error "du_dirlisting_tclvfs unrecognized element in -types '$t'. Known types/attributes/permissions: f d l p s b c hidden readonly r w x" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + if {"x" in $types_permission} { + if {[info exists ::env(PATHEXT)]} { + set pathext [set ::env(PATHEXT)] + set pathext_list [split $pathext ";"] + #traditionally upper case - but we should probably be case-insensitive when checking for these extensions, as windows filesystems are *usually* case-insensitive. + set pathext_list [lmap ext $pathext_list {string toupper $ext}] + } else { + set pathext_list [list ".COM" ".EXE" ".BAT" ".CMD"] ;#default windows executable extensions if PATHEXT is not set in the environment + } + } + + set do_sizes_d [expr {"d" in $sized_types}] + set do_sizes_f [expr {"f" in $sized_types}] + set do_sizes_l [expr {"l" in $sized_types}] + set do_times_d [expr {"d" in $timed_types}] + set do_times_f [expr {"f" in $timed_types}] + set do_times_l [expr {"l" in $timed_types}] + if {[llength $types_entry] && "d" ni $types_entry} { + set skip_dirs 1 + } else { + set skip_dirs 0 + } + if {[llength $types_entry] && "f" ni $types_entry} { + set skip_files 1 + } else { + set skip_files 0 + } + if {[llength $types_entry] && "l" ni $types_entry} { + set skip_links 1 + } else { + set skip_links 0 + } + set trequire_hidden [expr {"hidden" in $types_REQUIRED}] + set trequire_readonly [expr {"readonly" in $types_REQUIRED}] + set trequire_executable [expr {"x" in $types_REQUIRED}] + set trequire_readable [expr {"r" in $types_REQUIRED}] + set trequire_writable [expr {"w" in $types_REQUIRED}] + + set dirs [list] set files [list] set filesizes [list] @@ -1043,11 +1217,16 @@ namespace eval punk::du { set flaggedhidden [list] set flaggedsystem [list] set flaggedreadonly [list] + set flaggedoffline [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths + #We can end up with overlapping glob patterns even if the user only supplied a single pattern. + #only add to seen_entries after we have passed a glob regex check, to avoid marking entries as seen that we haven't actually processed yet. + set seen_entries [dict create] + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { if {$opt_patterndebug} { puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" @@ -1067,13 +1246,13 @@ namespace eval punk::du { #output similar format as unixy du puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} flaggedoffline {} altname {} opts $opts errors $errors] } if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" puts stderr " (errorcode: $::errorCode)\n" dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} flaggedoffline {} altname {} opts $opts errors $errors] } @@ -1137,7 +1316,7 @@ namespace eval punk::du { if {![string length $fixedtail]} { dict lappend errors $folderpath {*}$tmp_errors puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} flaggedoffline {} altname {} opts $opts errors $errors] } #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. @@ -1160,7 +1339,7 @@ namespace eval punk::du { puts stderr " (errorcode: $::errorCode)\n" puts stderr "$errMsg" dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} flaggedoffline {} altname {} opts $opts errors $errors] } @@ -1172,55 +1351,177 @@ namespace eval punk::du { puts stderr "FAILED to collect info for folder '$folderpath'" #append errmsg "aborting.." #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} flaggedoffline {} altname {} opts $opts errors $errors] } } #jjj + while {[twapi::find_file_next $iterator iteminfo]} { set nm [dict get $iteminfo name] - if {![regexp $tcl_re $nm]} { + #if {$nm in {. ..}} { + # continue + #} + if {[llength $win_glob_list ] > 1 && [dict exists $seen_entries $nm]} { + #we can get duplicates across multiple patterns - but we only want to process each entry once. continue } - if {$nm in {. ..}} { + if {$win_glob ne "*" && ![regexp -- $tcl_re $nm]} { continue } + if {[llength $win_glob_list ] > 1} { + dict set seen_entries $nm "" + } + #jjj set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set ftype "" set do_sizes 0 set do_times 0 #attributes applicable to any classification set fullname [file_join_one $folderpath $nm] - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname + set entry_archive 0 + set entry_directory 0 + set entry_hidden 0 + set entry_readonly 0 + set entry_reparse_point 0 + set entry_reparse_info "" + set entry_system 0 + #some additional attributes we might want to return in the result dictionary at some point. + set entry_normal 0 + set entry_compressed 0 + set entry_not_content_indexed 0 + + set entry_no_scrub_data 0 + set entry_offline 0 + set entry_temporary 0 + + if {$opt_linkinfo} { + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {$opt_filedebug} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] + #set entry_directory [dict get $attrdict -directory] + #set entry_hidden [dict get $attrdict -hidden] + #set entry_readonly [dict get $attrdict -readonly] + #set entry_system [dict get $attrdict -system] + if {[dict get $attrdict -reparse_point]} { + set entry_reparse_point 1 + set entry_reparse_info [dict get $attrdict -reparse_point] + } + } else { + #set file_attributes [lookup_ntfs_attributes [dict get $iteminfo attrs]] + #---------------------------------------------------- + #inlined. + set bitmask [dict get $iteminfo attrs] + variable ntfs_attributes_cache + if {[info exists ntfs_attributes_cache($bitmask)]} { + set file_attributes [set ntfs_attributes_cache($bitmask)] + } else { + #set twapi_attributes [twapi::decode_file_attributes $bitmask] ;#twapi doesn't seem to handle some flags. + set file_attributes [decode_ntfs_attributes $bitmask] + set ntfs_attributes_cache($bitmask) $file_attributes + } + #---------------------------------------------------- + if {$opt_filedebug} { + #todo + dict set debuginfo $fullname "iteminfo:$iteminfo fileattributes:$file_attributes" + } } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname + foreach a $file_attributes { + switch -- $a { + archive { + set entry_archive 1 + } + directory { + set entry_directory 1 + } + hidden { + set entry_hidden 1 + } + readonly { + set entry_readonly 1 + } + reparse_point { + set entry_reparse_point 1 + } + system { + set entry_system 1 + } + normal { + set entry_normal 1 + } + compressed { + set entry_compressed 1 + } + not_content_indexed { + set entry_not_content_indexed 1 + } + no_scrub_data { + #(or NoScrub) + #Instructs Windows storage integrity scanner to skip integrity checks on the file or directory. + #This can be used to improve performance for files that are known to be resilient to corruption, + #such as media files, or files that are not critical to system operation. + #However, it should be used with caution, as it can increase the risk of data loss if the file becomes corrupted and is not detected by integrity checks. + set entry_no_scrub_data 1 + } + offline { + #The file or directory data is not immediately available. This attribute indicates that the file + #or directory is stored on offline storage, such as a remote storage device or a storage device + #that is currently disconnected. When this attribute is set, the file or directory cannot be accessed + #until the data is made available again. + + #(in practice we may also see this on a local file that was extracted from a downloaded archive) + set entry_offline 1 + lappend flaggedoffline $fullname + } + temporary { + #The file is being used for temporary storage. Windows can delete a temporary file at any time when it is no longer needed. + #This attribute is typically used for files that are created and used by applications for short-term storage of data, such as temporary internet files, or files that are created during software installation or updates. + set entry_temporary 1 + } + pinned {} + unpinned {} + default { + #other attributes we aren't specifically tracking - but we can add them to the attrdict for possible use in linkinfo if the item turns out to be a link - as some of these attributes may be relevant to the link type - e.g a hidden reparse point might be more likely to + puts stderr "du_dirlisting_twapi: unrecognised file attribute '$a' for item '$fullname' (all attributes: $file_attributes)" + } + } } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname + + #----------------------------------------------- + #check required flags + if {!$entry_hidden & $trequire_hidden} { + continue } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$entry_readonly && ($trequire_writable || $trequire_executable)} { + #if the item is readonly, but we require writable or executable items, then we skip it - as it doesn't match the criteria for writable or executable items. + continue + } + if {!$entry_readonly && $trequire_readonly} { + #if the item is not readonly, but we require readonly items, then we skip it - as it doesn't match the criteria for readonly items. + continue } - set file_attributes [dict get $attrdict -fileattributes] + #----------------------------------------------- + #further down we check the same flags after classifying the entry as file/dir/link in order to add to the flaggedhidden etc lists. - set is_reparse_point [expr {"reparse_point" in $file_attributes}] - set is_directory [expr {"directory" in $file_attributes}] + + #set is_directory [dict exists $attrdict -directory] + #set is_reparse_point [dict exists $attrdict -reparse_point] set linkdata [dict create] # ----------------------------------------------------------- #main classification - if {$is_reparse_point} { + if {$entry_reparse_point} { #this concept doesn't correspond 1-to-1 with unix links #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #review - and see which if any actually belong in the links key of our return - + if {$skip_links} { + continue + } #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point # @@ -1239,51 +1540,72 @@ namespace eval punk::du { #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. # #links are techically files too, whether they point to a file/dir or nothing. + if {!$entry_directory} { + #review - other attributes? will we see a link to a link here? + dict set linkdata target_type file + } else { + dict set linkdata target_type directory + } lappend links $fullname #set ftype "l" - if {"l" in $sized_types} { - set do_sizes 1 - } - if {"l" in $timed_types} { - set do_times 1 - } + set do_sizes $do_sizes_l + set do_times $do_times_l + dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file + if {$opt_linkinfo} { + #dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + dict set linkdata reparseinfo $entry_reparse_info } } - if {$is_directory} { - #if {$nm in {. ..}} { - # continue - #} - if {!$is_reparse_point} { + if {$entry_directory} { + #consider all directories to be executable for now - as this what TCL glob does on windows. + #review - should check ACLS if 'x' type is given, as seems to be done on unix. + if {$skip_dirs} { + continue + } + if {!$entry_reparse_point} { lappend dirs $fullname #set ftype "d" - if {"d" in $sized_types} { - set do_sizes 1 - } - if {"d" in $timed_types} { - set do_times 1 - } + set do_sizes $do_sizes_d + set do_times $do_times_d } else { #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections dict set linkdata target_type directory } } - if {!$is_reparse_point && !$is_directory} { + if {!$entry_reparse_point && !$entry_directory} { #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + if {$skip_files} { + continue + } + if {$trequire_executable} { + #if we require executable items, but this item is not executable, then we skip it - as it doesn't match the criteria for executable items. + #tcl glob only checks for specific extensions .exe, .com, .cmd and .bat + #we want to check all extensions in ::env(PATHEXT) - but we have to be careful to only check extensions if the item is a file or a link that points to a file, and not if it's a directory or a link that points to a directory - as directories can have dots and things that look like extensions in their names but they aren't really extensions in the same way as for files. + #NOTE that this isn't a posix executable check + #review - leave it without checking ACLS for now - as this is what TCL glob does. + set ext [file extension $nm] + if {[string toupper $ext] ni $pathext_list} { + continue + } + } lappend files $fullname - if {"f" in $sized_types} { + if {$do_sizes_f} { lappend filesizes [dict get $iteminfo size] set do_sizes 1 } - if {"f" in $timed_types} { - set do_times 1 - } - #set ftype "f" + set do_times $do_times_f } # ----------------------------------------------------------- + if {$entry_hidden} { + lappend flaggedhidden $fullname + } + if {$entry_readonly} { + lappend flaggedreadonly $fullname + } + if {$entry_system} { + lappend flaggedsystem $fullname + } if {$do_sizes} { dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] @@ -1298,21 +1620,45 @@ namespace eval punk::du { m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ ] } - if {[dict size $linkdata]} { + #if {[dict size $linkdata]} { + # dict set linkinfo $fullname $linkdata + #} + if {[llength $linkdata]} { dict set linkinfo $fullname $linkdata } } twapi::find_file_close $iterator } - set vfsmounts [get_vfsmounts_in_folder $folderpath] + #tcl vfs mounts act as a directory - review (is there such a thing as a vfs-mounted file? if so we would need to check for mounts in files too and not just dirs?) + if {$skip_dirs} { + set vfsmounts [list] + } else { + set all_vfsmounts [get_vfsmounts_in_folder $folderpath] + #ensure any results match our glob. + set vfsmounts [list] + foreach m $all_vfsmounts { + set tail [::tcl::file::tail $m] + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + #review - we don't know that the tcl vfs mount would match as if filtered by windows API pattern matching (which is dependent on filesystem case-sensitivity) + #windows folders are not *always* case-insensitive. + #we need to know whether to string match with -nocase or not. + #To know this would require writing a test file to the folder. + if {[regexp $tcl_re $m]} { + lappend vfsmounts $m + continue + } + } + } + + } set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly flaggedoffline $flaggedoffline altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -1356,8 +1702,12 @@ namespace eval punk::du { proc du_dirlisting_generic {folderpath args} { set opts [dict create\ -glob *\ + -filedebug 0\ + -patterndebug 0\ + -link_info 1\ -with_sizes 0\ -with_times 0\ + -types {}\ ] set errors [dict create] foreach {k v} $args { @@ -1374,11 +1724,11 @@ namespace eval punk::du { set opt_glob [dict get $opts -glob] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] - set ftypes [list f d l] + set direntry_types [list f d l p s b c] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean (false vs f problem where f indicates file) if {$opt_with_sizes} { - set sized_types $ftypes + set sized_types $direntry_types } else { set sized_types [list] } @@ -1387,7 +1737,7 @@ namespace eval punk::du { } if {[llength $sized_types]} { foreach st $sized_types { - if {$st ni $ftypes} { + if {$st ni $direntry_types} { error "du_dirlisting_generic unrecognized element in -with_sizes '$st'" } } @@ -1396,7 +1746,7 @@ namespace eval punk::du { set opt_with_times [dict get $opts -with_times] if {"$opt_with_times" in {0 1}} { if {$opt_with_times} { - set timed_types $ftypes + set timed_types $direntry_types } else { set timed_types [list] } @@ -1405,12 +1755,38 @@ namespace eval punk::du { } if {[llength $timed_types]} { foreach item $timed_types { - if {$item ni $ftypes} { + if {$item ni $direntry_types} { error "du_dirlisting_generic unrecognised element in -with-times '$item'" } } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #like tcl glob - we include the entry types {f d l b c p s} as well as the attributes {hidden readonly} and the permissions {r w x} in the types list. + set entry_and_attribute_types [dict get $opts -types] + set types_entry [list] + set types_attribute [list] + set types_permission [list] + foreach t $entry_and_attribute_types { + switch -- $t { + f - d - l - p - s - b - c { + #these are all valid entry types + lappend types_entry $t + } + hidden - readonly { + #these are all valid attributes + lappend types_attribute $t + } + r - w - x { + #these are all valid permissions + lappend types_permission $t + } + default { + error "du_dirlisting_generic unrecognized element in -types '$t'. Known types/attributes/permissions: f d l p s b c hidden readonly r w x" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- # The repeated globs are a source of slowness for this function. #TODO - we could minimize the number of globs if we know we need to do a file stat and/or file attributes on each entry anyway #For the case where we don't need times,sizes or other metadata - it is faster to do multiple globs @@ -1426,49 +1802,75 @@ namespace eval punk::du { #note - with this design, we can't glob for all except dotfiles - this is for cross-platform consistency and efficiency (Review). #dotfiles aren't considered hidden on all platforms #some sort of antiglob is a possible enhancement and more consistent with the view that leading dot should be treated as any other filename character in this context. + + set dirs {} + set hdirs {} + set files {} + set hfiles {} + set links {} + set hlinks {} if {"windows" eq $::tcl_platform(platform)} { if {$opt_glob eq "*"} { - #Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink' + #Note - multiple glob operations with restrictions seem to be faster than looped tests like 'file isdirectory' & 'file readlink' #set parent [lindex $folders $folderidx] - set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*] - set dirs [glob -nocomplain -dir $folderpath -types d * .*] - - set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*] - set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique - - set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} * .*] - set files [glob -nocomplain -dir $folderpath -types f * .*] + if {![llength $types_entry] || "d" in $types_entry} { + #we need to check directories for hidden attribute - as on windows, not all dotfiles are hidden, and not all hidden files are dotfiles + set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*] + set dirs [glob -nocomplain -dir $folderpath -types d * .*] + } + if {![llength $types_entry] || "l" in $types_entry} { + set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*] + set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique + } + if {![llength $types_entry] || "f" in $types_entry} { + set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} * .*] + set files [glob -nocomplain -dir $folderpath -types f * .*] + } } else { - set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} $opt_glob] - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - - set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique - - set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} $opt_glob] - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + if {![llength $types_entry] || "d" in $types_entry} { + #we need to check directories for hidden attribute - as on windows, not all dotfiles are hidden, and not all hidden files are dotfiles + set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} $opt_glob] + set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] + } + if {![llength $types_entry] || "l" in $types_entry} { + set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} $opt_glob] + set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique + } + if {![llength $types_entry] || "f" in $types_entry} { + set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} $opt_glob] + set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + } } } else { - set hdirs {} - set hfiles {} - set hlinks {} if {$opt_glob eq "*"} { #Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink' #set parent [lindex $folders $folderidx] - set dirs [glob -nocomplain -dir $folderpath -types d * .*] - set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique - set files [glob -nocomplain -dir $folderpath -types f * .*] + #set dirs [glob -nocomplain -dir $folderpath -types d * .*] + #set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique + #set files [glob -nocomplain -dir $folderpath -types f * .*] + set globs [list * .*] } else { - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + set globs [list $opt_glob] + #set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] + #set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique + #set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + } + + if {![llength $types_entry] || "d" in $types_entry} { + set dirs [glob -nocomplain -dir $folderpath -types d {*}$globs] + } + if {![llength $types_entry] || "l" in $types_entry} { + set links [glob -nocomplain -dir $folderpath -types l {*}$globs] ;#links may have dupes - we don't care. punk::lib::struct_set_diff_unique + } + if {![llength $types_entry] || "f" in $types_entry} { + set files [glob -nocomplain -dir $folderpath -types f {*}$globs] } } #note struct::set difference produces unordered result #struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!) - #relying on struct::set to remove dupes is somewhat risky. + #relying on struct::set to remove dupes is somewhat risky. #It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets' ie lists without dupes #for this reason we must use the wrapper punk::lib::struct_set_diff_unique, which will use the well behaved critcl for speed if avail, but fall back to a deduping tcl version #remove links and . .. from directories, remove links from files @@ -1501,19 +1903,23 @@ namespace eval punk::du { puts stderr "zipfs: $folderpath" set defaults [dict create\ -glob *\ + -filedebug 0\ + -patterndebug 0\ + -link_info 1\ -with_sizes 0\ -with_times 0\ + -types {}\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_glob [dict get $opts -glob] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] - set ftypes [list f d l] + set direntry_types [list f d l p s b c] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean (false vs f problem where f indicates file) if {$opt_with_sizes} { - set sized_types $ftypes + set sized_types $direntry_types } else { set sized_types [list] } @@ -1522,7 +1928,7 @@ namespace eval punk::du { } if {[llength $sized_types]} { foreach st $sized_types { - if {$st ni $ftypes} { + if {$st ni $direntry_types} { error "du_dirlisting_zipfs unrecognized element in -with_sizes '$st'" } } @@ -1531,7 +1937,7 @@ namespace eval punk::du { set opt_with_times [dict get $opts -with_times] if {"$opt_with_times" in {0 1}} { if {$opt_with_times} { - set timed_types $ftypes + set timed_types $direntry_types } else { set timed_types [list] } @@ -1540,12 +1946,38 @@ namespace eval punk::du { } if {[llength $timed_types]} { foreach item $timed_types { - if {$item ni $ftypes} { + if {$item ni $direntry_types} { error "du_dirlisting_zipfs unrecognised element in -with-times '$item'" } } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #like tcl glob - we include the entry types {f d l b c p s} as well as the attributes {hidden readonly} and the permissions {r w x} in the types list. + set entry_and_attribute_types [dict get $opts -types] + set types_entry [list] + set types_attribute [list] + set types_permission [list] + foreach t $entry_and_attribute_types { + switch -- $t { + f - d - l - p - s - b - c { + #these are all valid entry types + lappend types_entry $t + } + hidden - readonly { + #these are all valid attributes + lappend types_attribute $t + } + r - w - x { + #these are all valid permissions + lappend types_permission $t + } + default { + error "du_dirlisting_zipfs unrecognized element in -types '$t'. Known types/attributes/permissions: f d l p s b c hidden readonly r w x" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types @@ -1593,17 +2025,28 @@ namespace eval punk::du { #if {[punk::mix::base::lib::path_a_above_b $folderpath "//zipfs:/"]} {} + set dirs {} + set files {} + set links {} + #todo - hidden? not returned in attributes on windows at least. #zipfs files also reported as links by glob - review - should we preserve this in response? (2024 unable to duplicate) if {$opt_glob eq "*"} { - set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs - set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove - set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + set globs [list * .*] } else { - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + set globs [list $opt_glob] + } + if {![llength $types_entry] || "d" in $types_entry} { + set dirs [glob -nocomplain -dir $folderpath -types d {*}$globs] ;# also returns links to dirs } + if {![llength $types_entry] || "l" in $types_entry} { + set links [glob -nocomplain -dir $folderpath -types l {*}$globs] ;# links may have dupes - we don't care. struct::set difference will remove + } + if {![llength $types_entry] || "f" in $types_entry} { + set files [glob -nocomplain -dir $folderpath -types f {*}$globs] ;# also returns links to files + } + + #remove any links from our dirs and files collections #see du_dirlisting_generic re struct::set difference issues set dirs [punk::lib::struct_set_diff_unique $dirs[unset dirs] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] @@ -1624,19 +2067,23 @@ namespace eval punk::du { proc du_dirlisting_tclvfs {folderpath args} { set defaults [dict create\ -glob *\ + -filedebug 0\ + -patterndebug 0\ + -link_info 1\ -with_sizes 0\ -with_times 0\ + -types {}\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_glob [dict get $opts -glob] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] - set ftypes [list f d l] + set direntry_types [list f d l p s b c] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean (false vs f problem where f indicates file) if {$opt_with_sizes} { - set sized_types $ftypes + set sized_types $direntry_types } else { set sized_types [list] } @@ -1645,7 +2092,7 @@ namespace eval punk::du { } if {[llength $sized_types]} { foreach st $sized_types { - if {$st ni $ftypes} { + if {$st ni $direntry_types} { error "du_dirlisting_tclvfs unrecognized element in -with_sizes '$st'" } } @@ -1654,7 +2101,7 @@ namespace eval punk::du { set opt_with_times [dict get $opts -with_times] if {"$opt_with_times" in {0 1}} { if {$opt_with_times} { - set timed_types $ftypes + set timed_types $direntry_types } else { set timed_types [list] } @@ -1663,48 +2110,85 @@ namespace eval punk::du { } if {[llength $timed_types]} { foreach item $timed_types { - if {$item ni $ftypes} { + if {$item ni $direntry_types} { error "du_dirlisting_tclvfs unrecognised element in -with-times '$item'" } } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #like tcl glob - we include the entry types {f d l b c p s} as well as the attributes {hidden readonly} and the permissions {r w x} in the types list. + set entry_and_attribute_types [dict get $opts -types] + set types_entry [list] + set types_attribute [list] + set types_permission [list] + foreach t $entry_and_attribute_types { + switch -- $t { + f - d - l - p - s - b - c { + #these are all valid entry types + lappend types_entry $t + } + hidden - readonly { + #these are all valid attributes + lappend types_attribute $t + } + r - w - x { + #these are all valid permissions + lappend types_permission $t + } + default { + error "du_dirlisting_tclvfs unrecognized element in -types '$t'. Known types/attributes/permissions: f d l p s b c hidden readonly r w x" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- #at least some vfs on windows seem to support the -hidden attribute #we are presuming glob will accept the -types hidden option for all vfs - even if it doesn't really apply REVIEW #The extra globs aren't nice - but hopefully the vfs is reasonably performant (?) + set dirs {} + set hdirs {} + set files {} + set hfiles {} + set links {} + set hlinks {} set errors [dict create] if {"windows" eq $::tcl_platform(platform)} { if {$opt_glob eq "*"} { - set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*] - set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs - #review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? - set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*] - set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove - set hfiles [glob -nocomplain -dir $folderpath -types {hidden l} * .*] - set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + set globs [list * .*] } else { - set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} $opt_glob] - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] - set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} $opt_glob] - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + set globs [list $opt_glob] + } + if {![llength $types_entry] || "d" in $types_entry} { + set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} {*}$globs] + set dirs [glob -nocomplain -dir $folderpath -types d {*}$globs] ;# also returns links to dirs + } + if {![llength $types_entry] || "l" in $types_entry} { + #review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? + set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} {*}$globs] + set links [glob -nocomplain -dir $folderpath -types l {*}$globs] ;# links may have dupes - we don't care. struct::set difference will remove + } + if {![llength $types_entry] || "f" in $types_entry} { + set hfiles [glob -nocomplain -dir $folderpath -types {hidden l} {*}$globs] + set files [glob -nocomplain -dir $folderpath -types f {*}$globs] ;# also returns links to files } } else { #we leave it to the ui on unix to classify dotfiles as hidden - set hdirs {} - set hfiles {} - set hlinks {} if {$opt_glob eq "*"} { - set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs - #review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? - set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove - set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + set globs [list * .*] } else { - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + set globs [list $opt_glob] + } + + if {![llength $types_entry] || "d" in $types_entry} { + set dirs [glob -nocomplain -dir $folderpath -types d {*}$globs] ;# also returns links to dirs + } + if {![llength $types_entry] || "l" in $types_entry} { + set links [glob -nocomplain -dir $folderpath -types l {*}$globs] ;# links may have dupes - we don't care. struct::set difference will remove + ##review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs? + } + if {![llength $types_entry] || "f" in $types_entry} { + set files [glob -nocomplain -dir $folderpath -types f {*}$globs] ;# also returns links to files } } #remove any links from our dirs and files collections @@ -1730,8 +2214,12 @@ namespace eval punk::du { proc du_dirlisting_unix {folderpath args} { set defaults [dict create\ -glob *\ + -filedebug 0\ + -patterndebug 0\ + -link_info 1\ -with_sizes 0\ -with_times 0\ + -types {}\ ] set errors [dict create] dict lappend errors $folderpath "metadata support incomplete - prefer du_dirlisting_generic" @@ -1740,11 +2228,11 @@ namespace eval punk::du { set opt_glob [dict get $opts -glob] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] - set ftypes [list f d l] + set direntry_types [list f d l p s b c] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean (false vs f problem where f indicates file) if {$opt_with_sizes} { - set sized_types $ftypes + set sized_types $direntry_types } else { set sized_types [list] } @@ -1753,7 +2241,7 @@ namespace eval punk::du { } if {[llength $sized_types]} { foreach st $sized_types { - if {$st ni $ftypes} { + if {$st ni $direntry_types} { error "du_dirlisting_generic unrecognized element in -with_sizes '$st'" } } @@ -1762,7 +2250,7 @@ namespace eval punk::du { set opt_with_times [dict get $opts -with_times] if {"$opt_with_times" in {0 1}} { if {$opt_with_times} { - set timed_types $ftypes + set timed_types $direntry_types } else { set timed_types [list] } @@ -1771,25 +2259,58 @@ namespace eval punk::du { } if {[llength $timed_types]} { foreach item $timed_types { - if {$item ni $ftypes} { + if {$item ni $direntry_types} { error "du_dirlisting_generic unrecognised element in -with-times '$item'" } } } - + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #like tcl glob - we include the entry types {f d l b c p s} as well as the attributes {hidden readonly} and the permissions {r w x} in the types list. + set entry_and_attribute_types [dict get $opts -types] + set types_entry [list] + set types_attribute [list] + set types_permission [list] + foreach t $entry_and_attribute_types { + switch -- $t { + f - d - l - p - s - b - c { + #these are all valid entry types + lappend types_entry $t + } + hidden - readonly { + #these are all valid attributes + lappend types_attribute $t + } + r - w - x { + #these are all valid permissions + lappend types_permission $t + } + default { + error "du_dirlisting_tclvfs unrecognized element in -types '$t'. Known types/attributes/permissions: f d l p s b c hidden readonly r w x" + } + } + } + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + #this is not consistent with standard way * works for unix folders - but it is consistent with behaviour of this facility on windows #we don't classify anything as 'flaggedhidden' on unix. - #it is a convention for dotfiles rather than a flag - and we'll leave the distinction for the display library - #This + #it is a convention for dotfiles rather than a flag - and we'll leave the distinction for the display library + if {$opt_glob eq "*"} { - set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs - set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove - set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files + set globs [list * .*] } else { - set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob] - set links [glob -nocomplain -dir $folderpath -types l $opt_glob] - set files [glob -nocomplain -dir $folderpath -types f $opt_glob] + set globs [list $opt_glob] + } + if {![llength $types_entry] || "d" in $types_entry} { + set dirs [glob -nocomplain -dir $folderpath -types d {*}$globs] ;# also returns links to dirs + } + if {![llength $types_entry] || "l" in $types_entry} { + set links [glob -nocomplain -dir $folderpath -types l {*}$globs] ;# links may have dupes - we don't care. struct::set difference will remove } + if {![llength $types_entry] || "f" in $types_entry} { + set files [glob -nocomplain -dir $folderpath -types f {*}$globs] ;# also returns links to files + } + + #remove any links from our dirs and files collections #see du_dirlisting_generic re struct::set difference issues set dirs [punk::lib::struct_set_diff_unique $dirs[unset dirs] [list {*}$links [file join $folderpath .] [file join $folderpath ..] ]] diff --git a/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm b/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm index 59f23842..3b40d838 100644 --- a/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm +++ b/src/bootsupport/modules/punk/mix/commandset/module-0.1.0.tm @@ -158,8 +158,11 @@ namespace eval punk::mix::commandset::module { If a version is specified in the module argument as well as in -version the higher version number will be used. " - -license -default - -author -default -multiple 1 + -license -default -help\ + "License to be mentioned in the module file. e.g BSD, MIT" + -author -default -multiple 1 -help\ + {Author(s) of the module. Multiple authors can be specified by repeating the -author option. + e.g -author {"John Smith" "jsmith@example.com"} -author {"Jane Doe" "jane@example.org"}} -template -default punk.module -type -default "[lindex $moduletypes 0]" -choices {$moduletypes} -force -default 0 -type boolean -help\ diff --git a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index 073b6cce..24dfad86 100644 --- a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -233,6 +233,11 @@ tcl::namespace::eval punk::nav::fs { set args [lrange $args 1 end] } + if {$v eq "/"} { + #directory only listing - we can optimize this by not getting file sizes and times - as these are only used for file listings + #we can't completely skip iterating over files as we want to know if there are any files in the directory - as this affects the display of links/shortcuts that point to directories - but we can skip getting sizes and times for files. + #todo? + } if {![llength $args]} { #ls is too slow even over a fairly low-latency network @@ -243,12 +248,12 @@ tcl::namespace::eval punk::nav::fs { commandstack::basecall cd $VIRTUAL_CWD } } - set matchinfo [dirfiles_dict -searchbase $VIRTUAL_CWD -with_times {f d l} -with_sizes {f d l}] + set matchinfo [dirfiles_dict -searchbase $VIRTUAL_CWD -with_times {f d l b c p s} -with_sizes {f d l b c p s}] } else { if {[pwd] ne $VIRTUAL_CWD} { commandstack::basecall cd $VIRTUAL_CWD } - set matchinfo [dirfiles_dict -searchbase [pwd] -with_times {f d l} -with_sizes {f d l}] + set matchinfo [dirfiles_dict -searchbase [pwd] -with_times {f d l b c p s} -with_sizes {f d l b c p s}] } set dircount [llength [dict get $matchinfo dirs]] set filecount [llength [dict get $matchinfo files]] @@ -621,30 +626,41 @@ tcl::namespace::eval punk::nav::fs { punk::args::define { @id -id ::punk::nav::fs::d/new - -nonportable -type none -help\ - "Allow creation of directories which may not be portable across platforms. - Use with caution and only when you know what you are doing. - This allows creation of directories with names that may be invalid on some - platforms, or that may have special meanings on some platforms - (e.g reserved device names on windows). - If -nonportable is not supplied, then an error will be raised if any supplied - path is non-portable as defined by punk::winpath::illegalname_test. - - Regardless of whether -nonportable is supplied or not, some characters are not - suitable for windows or most other platforms and will be rejected with an error. - An example of this is the null character (\0)." + @cmd -name punk::nav::fs::d/new\ + -summary\ + "Create directory or directories at the specified path(s)."\ + -help\ + "This command creates directories at the specified path(s). + If any part of the specified path does not exist, then it will be created as well. + If a specified path already exists, then it will be left as-is and no error will be raised. + + A summary line is returned for each created directory, with the full path of the created + directory and a status line indicating the number of dirs and files in the directory if + it already existed (or showing 0 for both if it was just created)." + -nonportable -type none\ + -help\ + "Allows creation of directories which may not be portable across platforms. + Use with caution and only when you know what you are doing. + This allows creation of directories with names that may be invalid on some + platforms, or that may have special meanings on some platforms + (e.g reserved device names on windows). + If -nonportable is not supplied, then an error will be raised if any supplied + path is non-portable as defined by punk::winpath::illegalname_test. + + Regardless of whether -nonportable is supplied or not, some characters are not + suitable for windows or most other platforms and will be rejected with an error. + An example of this is the null character (\0)." @values -min 1 -max -1 -type string path -type string -multiple 1 -help\ "Path(s) to create. Can be absolute or relative. + If any path is rejected due to -nonportable or other invalid characters, + or because a parent directory is not writable, then no directories will be created. - If any path is rejected due to -nonportable or other invalid characters, - or because a parent directory is not writable, then no directories will be created. - - If a path already exists, then it will be left as-is and no error will be raised. + If a path already exists, then it will be left as-is and no error will be raised. - If despite passing the name tests or writability tests, a directory cannot be - created for some reason (e.g other filesystem error) then an error will be raised - and processing of any remaining paths will be aborted." + If despite passing the name tests or writability tests, a directory cannot be + created for some reason (e.g other filesystem error) then an error will be raised + and processing of any remaining paths will be aborted." } #todo - synchronize overall behaviour of d/new with that of n/new (for namespaces) proc d/new {args} { @@ -718,7 +734,7 @@ tcl::namespace::eval punk::nav::fs { } #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. - set query_paths [lmap v $paths $v/*] + set query_paths [lmap v $paths {string cat $v "/*"}] d/ / {*}$query_paths } @@ -734,6 +750,10 @@ tcl::namespace::eval punk::nav::fs { #run a file + #review. On unix the shebang should be able to choose the interpreter. + #on windows the ::env(PATHEXT) and file associations should be able to choose the interpreter. (see punk::auto_execok_better) + #when running using ./script_or_exe.name - the windows file association should be used. + #when running using x/ script_or_exe.name - we want to be able to run scripts even if no file association exists (or it is inappropriate for execution e.g ps1 defaults to open in editor) proc x/ {args} { if {![llength $args]} { set result [d/] @@ -748,11 +768,16 @@ tcl::namespace::eval punk::nav::fs { lua [list exe lua extensions [list ".lua"]]\ perl [list exe perl extensions [list ".pl"]]\ php [list exe php extensions [list ".php"]]\ + ps1 [list exe pwsh extensions [list ".ps1"]]\ ] + #todo - allow cofnig to specify arguments for the executable. eg pwsh -noprofile -executionpolicy remotesigned for running powershell scripts. + + #this is a bit of a hack - fix set tcl_extensions [list ".tcl" ".tm" ".kit" ".tk"] ;#todo - load from config set py_extensions [list ".py"] - set lua_extensions [list ".lua"] - set perl_extensions [list ".pl"] + #set lua_extensions [list ".lua"] + #set perl_extensions [list ".pl"] + #set pwsh_extensions [list ".ps1"] set script_extensions [list] set extension_lookup [dict create] @@ -792,14 +817,12 @@ tcl::namespace::eval punk::nav::fs { set ::argc [llength $newargs] set ::argv $newargs tailcall source $path - } elseif {$extlower in $py_extensions} { - set pycmd [auto_execok python] - tailcall {*}$pycmd {*}$args } elseif {$extlower in $script_extensions} { set exename [dict get $scriptconfig $scriptlang exe] set cmd [auto_execok $exename] tailcall {*}$cmd $args } else { + #review set fd [open $path r] set chunk [read $fd 4000]; close $fd #consider any commented line near top of file containing 'tcl' as likely to be a tcl script of some sort and attempt to source it. @@ -824,7 +847,11 @@ tcl::namespace::eval punk::nav::fs { } } } else { - puts stderr "No script executable known for this" + if {$scriptfile eq ""} { + puts stderr "script not found" + } else { + puts stderr "No script executable known for files with extension [file extension $scriptfile]. Ensure file has a known extension ($script_extensions) or add a commented hint in the file such as #!/usr/bin/env tclsh" + } } } @@ -934,13 +961,96 @@ tcl::namespace::eval punk::nav::fs { # -searchbase is always passed through - and is only used to construct a location path if a relative searchspec was supplied punk::args::define { @id -id ::punk::nav::fs::dirfiles_dict - @cmd -name punk::nav::fs::dirfiles_dict + @cmd -name punk::nav::fs::dirfiles_dict\ + -summary\ + ""\ + -help\ + "This command performs a directory listing of the specified location and returns the results as a dictionary + containing lists of keys such as files and directories and their properties. + + The results are returned as a dictionary with the following structure: + { + location + searchbase + dirs {} + vfsmounts {} + links {} + linkinfo {} + files {} + filesizes {} + sizes {} + times {} + flaggedhidden {