From b2f4d670569e604cfab102b75a4b2e61c615efca Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Thu, 19 Mar 2026 05:47:20 +1100 Subject: [PATCH] dir listing and glob fixes - windows --- src/bootsupport/modules/punk-0.1.tm | 211 ++++- src/bootsupport/modules/punk/du-0.1.0.tm | 858 +++++++++++++----- src/bootsupport/modules/punk/lib-0.1.6.tm | 32 + src/bootsupport/modules/punk/nav/fs-0.1.0.tm | 209 ++++- src/bootsupport/modules/punk/path-0.1.0.tm | 2 +- src/bootsupport/modules/punk/winpath-0.1.0.tm | 64 +- src/modules/punk-0.1.tm | 211 ++++- src/modules/punk/du-999999.0a1.0.tm | 858 +++++++++++++----- src/modules/punk/lib-999999.0a1.0.tm | 32 + src/modules/punk/nav/fs-999999.0a1.0.tm | 209 ++++- src/modules/punk/path-999999.0a1.0.tm | 2 +- src/modules/punk/winpath-999999.0a1.0.tm | 64 +- .../src/bootsupport/modules/punk-0.1.tm | 211 ++++- .../src/bootsupport/modules/punk/du-0.1.0.tm | 858 +++++++++++++----- .../src/bootsupport/modules/punk/lib-0.1.6.tm | 32 + .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 209 ++++- .../bootsupport/modules/punk/path-0.1.0.tm | 2 +- .../bootsupport/modules/punk/winpath-0.1.0.tm | 64 +- .../src/bootsupport/modules/punk-0.1.tm | 211 ++++- .../src/bootsupport/modules/punk/du-0.1.0.tm | 858 +++++++++++++----- .../src/bootsupport/modules/punk/lib-0.1.6.tm | 32 + .../bootsupport/modules/punk/nav/fs-0.1.0.tm | 209 ++++- .../bootsupport/modules/punk/path-0.1.0.tm | 2 +- .../bootsupport/modules/punk/winpath-0.1.0.tm | 64 +- src/vfs/_vfscommon.vfs/modules/punk-0.1.tm | 211 ++++- .../_vfscommon.vfs/modules/punk/du-0.1.0.tm | 858 +++++++++++++----- .../_vfscommon.vfs/modules/punk/lib-0.1.6.tm | 32 + .../modules/punk/nav/fs-0.1.0.tm | 209 ++++- .../_vfscommon.vfs/modules/punk/path-0.1.0.tm | 2 +- .../modules/punk/winpath-0.1.0.tm | 64 +- 30 files changed, 5350 insertions(+), 1530 deletions(-) diff --git a/src/bootsupport/modules/punk-0.1.tm b/src/bootsupport/modules/punk-0.1.tm index ea72ad1c..e1648d9d 100644 --- a/src/bootsupport/modules/punk-0.1.tm +++ b/src/bootsupport/modules/punk-0.1.tm @@ -6066,9 +6066,218 @@ namespace eval punk { set pipe [punk::path_list_pipe $glob] {*}$pipe } - proc path {{glob *}} { + proc path_basic {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::path + @cmd -name "punk::path" -help\ + "Introspection of the PATH environment variable. + This tool will examine executables within each PATH entry and show which binaries + are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns." + @opts + -binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries." + @values -min 0 -max -1 + glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories." + } + } + proc path {args} { + set argd [punk::args::parse $args withid ::punk::path] + lassign [dict values $argd] leaders opts values received + set binglobs [dict get $opts -binglobs] + set globs [dict get $values glob] + if {$::tcl_platform(platform) eq "windows"} { + set sep ";" + } else { + # : ok for linux/bsd ... mac? + set sep ":" + } + set all_paths [split [string trimright $::env(PATH) $sep] $sep] + set filtered_paths $all_paths + if {[llength $globs]} { + set filtered_paths [list] + foreach p $all_paths { + foreach g $globs { + if {[string match -nocase $g $p]} { + lappend filtered_paths $p + break + } + } + } + } + + #Windows executable search location order: + #1. The current directory. + #2. The directories that are listed in the PATH environment variable. + # within each directory windows uses the PATHEXT environment variable to determine + #which files are considered executable and in which order they are considered. + #So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files + #with the same name but different extensions in the same directory, then the one + #with the extension that appears first in PATHEXT will be executed when that name is called. + + #On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output. + #Duplicate PATH entries are also a fairly likely possibility on all platforms. + #This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries. + + #By default we don't want to display all executables in all PATH entries - as this can be very verbose + #- but we want to be able to show which executables are overshadowed by which PATH entries, + #and to be able to filter the PATH entries and the executables using glob patterns. + #To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name. + #The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths + #in the original PATH list. The executable dict will contain the paths and path indices where each executable is found, + #and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a + #dict keyed by path index which contains the list of executables in that path - to make it easy to show which + #executables are overshadowed by which paths. + + set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows). + set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off). + set d_index_executables [dict create] ;#key is path index, value is list of executables in that path + set path_idx 0 + foreach p $all_paths { + if {$::tcl_platform(platform) eq "windows"} { + set pnorm [string tolower $p] + } else { + set pnorm $p + } + if {![dict exists $d_path_info $pnorm]} { + dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]] + set executables [list] + if {[file isdirectory $p]} { + #get all files that are executable in this path. + #If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names. + set pnormglob [file normalize $p] + if {$::tcl_platform(platform) eq "windows"} { + #Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable. + #We need to account for this in our glob pattern. + set pathexts [list] + if {[info exists ::env(PATHEXT)]} { + set env_pathexts [split $::env(PATHEXT) ";"] + #set pathexts [lmap e $env_pathexts {string tolower $e}] + foreach pe $env_pathexts { + if {$pe eq "."} { + continue + } + lappend pathexts [string tolower $pe] + } + } else { + set env_pathexts [list] + #default PATHEXT if not set - according to Microsoft docs + set pathexts [list .com .exe .bat .cmd] + } + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set has_pathext 1 + break + } + } + if {!$has_pathext} { + foreach pe $pathexts { + lappend binglobs "$bg$pe" + } + } + } + set lc_binglobs [lmap e $binglobs {string tolower $e}] + if {"." in $pathexts} { + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]] + set has_pathext 1 + break + } + } + if {$has_pathext} { + if {[string tolower $base] ni $lc_binglobs} { + lappend binglobs "$base" + } + } + } + } + + #TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem. + #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) + #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. + #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + set executables [list] + foreach e $globresults { + puts stderr "glob result: $e" + puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]" + lappend executables [file tail [file normalize $e]] + } + } else { + set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]] + } + } + dict set d_index_executables $path_idx $executables + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + if {![dict exists $d_bin_info $exenorm]} { + dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]] + } else { + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + } else { + #duplicate path entry - add to list of original paths for this normalized path + + # Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...? + # we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it. + set pathdata [dict get $d_path_info $pnorm] + dict lappend pathdata original_paths $p + dict lappend pathdata indices $path_idx + dict set d_path_info $pnorm $pathdata + + + #we don't need to add executables for this path - as they will be the same as the original path that we have already processed. + #However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables. + set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + + incr path_idx + } + + #temporary debug output to check dicts are being built correctly + set debug "" + append debug "Path info dict:" \n + append debug [showdict $d_path_info] \n + append debug "Binary info dict:" \n + append debug [showdict $d_bin_info] \n + append debug "Index executables dict:" \n + append debug [showdict $d_index_executables] \n + #return $debug + puts stdout $debug + + + } #------------------------------------------------------------------- diff --git a/src/bootsupport/modules/punk/du-0.1.0.tm b/src/bootsupport/modules/punk/du-0.1.0.tm index bc753154..c9ab54fd 100644 --- a/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/bootsupport/modules/punk/du-0.1.0.tm @@ -479,7 +479,7 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this @@ -489,7 +489,11 @@ namespace eval punk::du { return [dict get $winfile_attributes $bitmask] } else { #list/dict shimmering? - return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + + set decoded [twapi::decode_file_attributes $bitmask] + dict set winfile_attributes $bitmask $decoded + return $decoded } } variable win_reparse_tags @@ -563,23 +567,25 @@ namespace eval punk::du { #then twapi::device_ioctl (win32 DeviceIoControl) #then parse buffer somehow (binary scan..) #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 - + punk::args::define { + @id -id ::punk::du::lib::Get_attributes_from_iteminfo + -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" + -debugchannel -default stderr -help "channel to write debug output, or none to append to output" + @values -min 1 -max 1 + iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" + } + #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args. proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int + #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] - set argd [punk::args::parse $args withdef { - @id -id ::punk::du::lib::Get_attributes_from_iteminfo - -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" - -debugchannel -default stderr -help "channel to write debug output, or none to append to output" - @values -min 1 -max 1 - iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" - }] - set opts [dict get $argd opts] - set iteminfo [dict get $argd values iteminfo] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] + set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" append dbg [pdict -channel none iteminfo] @@ -592,34 +598,38 @@ namespace eval punk::du { } set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - if {"hidden" in $attrinfo} { - dict set result -hidden 1 - } - if {"system" in $attrinfo} { - dict set result -system 1 - } - if {"readonly" in $attrinfo} { - dict set result -readonly 1 - } - dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - if {"reparse_point" in $attrinfo} { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] + foreach attr $attrinfo { + switch -- $attr { + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 + } + reparse_point { + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } } } + #dict set result -shortname [dict get $iteminfo altname] + dict set result -fileattributes $attrinfo dict set result -raw $iteminfo return $result } @@ -652,27 +662,337 @@ namespace eval punk::du { catch {twapi::find_file_close $iterator} } } + proc resolve_characterclass {cclass} { + #takes the inner value from a tcl square bracketed character class and converts to a list of characters. + + #todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters + #e.g a-c-3 -> a b c - 3 + #e.g a-c-3-5 -> a b c - 3 4 5 + #e.g a-c -> a b c + #e.g a- -> a - + #e.g -c-e -> - c d e + #the tcl character class does not support negation or intersection - so we can ignore those possibilities for now. + #in this context we do not need to support named character classes such as [:digit:] + set chars [list] + set i 0 + set len [string length $cclass] + set accept_range 0 + while {$i < $len} { + set ch [string index $cclass $i] + if {$ch eq "-"} { + if {$accept_range} { + set start [string index $cclass [expr {$i - 1}]] + set end [string index $cclass [expr {$i + 1}]] + if {$start eq "" || $end eq ""} { + #invalid range - treat - as literal + if {"-" ni $chars} { + lappend chars "-" + } + } else { + #we have a range - add all chars from previous char to next char + #range may be in either direction - e.g a-c or c-a but we don't care about the order of our result. + if {$start eq $end} { + #degenerate range - treat as single char + if {$start ni $chars} { + lappend chars $start + } + } else { + if {[scan $start %c] < [scan $end %c]} { + set c1 [scan $start %c] + set c2 [scan $end %c] + } else { + set c1 [scan $end %c] + set c2 [scan $start %c] + } + for {set c $c1} {$c <= $c2} {incr c} { + set char [format %c $c] + if {$char ni $chars} { + lappend chars $char + } + } + } + + incr i ;#skip end char as it's already included in range + } + set accept_range 0 + } else { + #we have a literal - add to list and allow for possible range if next char is also a - + if {"-" ni $chars} { + lappend chars "-" + } + set accept_range 1 + } + } else { #we have a literal - add to list and allow for possible range if next char is also a - + if {$ch ni $chars} { + lappend chars $ch + } + set accept_range 1 + } + incr i + } + return $chars + } + + #return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible. + #first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes. + # e.g {*[\{]*} is a valid tcl glob + + #todo - write tests. + #should also support {*\{*} matching a file such as a{blah}b + + #Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL. + #So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity. + + proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} { + #windows API function in use is the FindFirstFile set of functions. + #these support wildcards * and ? *only*. + + #examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported + # {abc[1-3].txt} -> {abc?.txt} + # {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + # {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call. + # {{S,t}*.txt} -> {S*.txt} {T*.txt} + # *.{txt,log} -> {*.txt} {*.log} + # {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals + + + set gchars [split $tclglob ""] + set winglob_list [list ""] + set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + set in_brackets 0 + set esc_next 0 + + set brace_depth 0 + set brace_content "" + set braced_alternatives [list] + + set brace_is_normal 0 + + set cclass_content "" + foreach ch $gchars { + if {$esc_next} { + if {$in_brackets} { + append cclass_content $ch + continue + } elseif {$brace_depth} { + append brace_content $ch + continue + } + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}] + set esc_next 0 + continue + } + + if {$ch eq "\{"} { + if {$brace_depth} { + #we have an opening brace char inside braces + #Let the brace processing handle it as it recurses. + incr brace_depth 1 + append brace_content $ch + continue + } + incr brace_depth 1 + set brace_content "" + } elseif {$ch eq "\}"} { + if {$brace_depth > 1} { + #we have a closing brace char inside braces + append brace_content $ch + incr brace_depth -1 + continue + } + #process brace_content representing a list of alternatives + #handle list of alternatives - convert {txt,log} to *.txt;*.log + #set alternatives [split $brace_content ","] + lappend braced_alternatives $brace_content + set alternatives $braced_alternatives + set braced_alternatives [list] + set brace_content "" + incr brace_depth -1 + + set alt_winpatterns [list] + set alt_regexps [list] + foreach alt $alternatives { + set subresult [tclglob_equivalents $alt] + lappend alt_winpatterns {*}[dict get $subresult winglobs] + lappend alt_regexps {*}[dict get $subresult tclregexps] + } + set next_winglob_list [list] + set next_regexp_list [list] + foreach wg $winglob_list re $tclregexp_list { + #puts "wg: $wg" + #puts "re: $re" + foreach alt_wp $alt_winpatterns alt_re $alt_regexps { + #puts " alt_wp: $alt_wp" + #puts " alt_re: $alt_re" + lappend next_winglob_list "$wg$alt_wp" + set alt_re_no_caret [string range $alt_re 1 end] + lappend next_regexp_list "${re}${alt_re_no_caret}" + } + } + + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + + } elseif {$ch eq "\["} { + #windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + + if {!$brace_depth} { + set in_brackets 1 + } else { + #we have a [ char inside braces + #Let the brace processing handle it as it recurses. + #but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets. + set brace_is_normal 1 + append brace_content $ch + } + } elseif {$ch eq "\]"} { + if {$brace_depth} { + #we have a ] char inside braces + #Let the brace processing hanele it as it recurses. + append brace_content $ch + continue + } + set in_brackets 0 + set charlist [resolve_characterclass $cclass_content] + set cclass_content "" + set next_winglob_list [list] + set next_regexp_list [list] + foreach c $charlist { + #set winglob_list [lmap wg $winglob_list {append wg $c}] + foreach wg $winglob_list { + lappend next_winglob_list "$wg$c" + } + foreach re $tclregexp_list { + set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}] + lappend next_regexp_list "${re}${c_escaped}" + } + } + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + } elseif {$ch eq "\\"} { + if {$in_brackets} { + append cclass_content $ch + #append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself. + set esc_next 1 + continue + } + if {$brace_depth} { + #we have a \ char inside braces + #Let the brace processing handle it as it recurses. + append brace_content $ch + set esc_next 1 + continue + } + set esc_next 1 + continue + } else { + if {$in_brackets} { + append cclass_content $ch + continue + } + if {!$brace_depth} { + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}] + if {[string length $re_ch] == 1} { + switch -- $re_ch { + "?" {set re_ch "."} + "*" {set re_ch ".*"} + default { + #we could use the same mixed case filter here for both sensitive and insensitive filesystems, + #because the API filtering will already have done the restriction, + #and so a more permissive regex that matches both cases will still only match the results that the API call returns, + #which will be correct based on the case-sensitivity of the filesystem. + #It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob. + #ie in the parts of the original glob that were in square brackets. + + if {!$case_sensitive_filesystem} { + # add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + if {[string is upper $re_ch]} { + set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"] + } elseif {[string is lower $re_ch]} { + set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"] + } else { + #non-alpha char - no need to add case-insensitivity + } + } + } + } + } + set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}] + } else { + #we have a literal char inside braces - add to current brace_content + if {$brace_depth == 1 && $ch eq ","} { + lappend braced_alternatives $brace_content + set brace_content "" + } else { + append brace_content $ch + } + } + } + } + #sanity check + if {[llength $winglob_list] != [llength $tclregexp_list]} { + error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" + } + set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] + return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + } + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs # get listing without using unix-tools (may not be installed on the windows system) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + + #todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob. + # we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before + #we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder. + #For really large folders and a moderate number of patterns, this could be a significant performance improvement. proc du_dirlisting_twapi {folderpath args} { set defaults [dict create\ -glob *\ -filedebug 0\ + -patterndebug 0\ -with_sizes 1\ -with_times 1\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_glob [dict get $opts -glob] + set tcl_glob [dict get $opts -glob] + + #todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps. + set case_sensitive_filesystem 0 ;#todo - consider detecting this properly. + #Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program. + #In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly, + #as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem, + #so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly. + #Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a + #case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem. + + #Note: we only use this to adjust the filtering regexps we generate from the tcl glob. + #The windows API pattern match will already filter based on the case-sensitivity of the filesystem + # so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem. + + set globs_processed [tclglob_equivalents $tcl_glob] + #we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob. + #temp + #set win_glob_list [list $tcl_glob] + set win_glob_list [dict get $globs_processed winglobs] + set tcl_regex_list [dict get $globs_processed tclregexps] + + + #review + # our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob. + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_patterndebug [dict get $opts -patterndebug] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set ftypes [list f d l] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) @@ -711,256 +1031,288 @@ namespace eval punk::du { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set linkinfo [dict create] + set debuginfo [dict create] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths - #puts stderr ">>> glob: $opt_glob" - #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames - #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ - #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. - # using * all the time may be inefficient - so we might be able to avoid that in some cases. - try { - #glob of * will return dotfiles too on windows - set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field - } on error args { + + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + if {$opt_patterndebug} { + puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" + puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re" + } + + #REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. try { - if {[string match "*denied*" $args]} { - #output similar format as unixy du - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } - if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" - puts stderr " (errorcode: $::errorCode)\n" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } - #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} - #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error - #The find-all glob * won't get here because it returns . & .. - #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) - #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob - #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) - if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {} if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { #looks like an ordinary no results for chosen glob - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + continue } - } - if {[set plen [pathcharacterlen $folderpath]] >= 250} { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - # re-fetch this folder with altnames - #file normalize - aside from being slow - will have problems with long paths - so this won't work. - #this function should only accept absolute paths - # - # - #Note: using -detail full only helps if the last segment of path has an altname.. - #To properly shorten we need to have kept track of altname all the way from the root! - #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd - #### SLOW - set fixedpath [dict get [file attributes $folderpath] -shortname] - #### SLOW - + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - append errmsg "retrying with with windows altname '$fixedpath'" - puts stderr $errmsg - } else { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - set tmp_errors [list $::errorCode] - #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share - #we can use //?/path dos device path - but not with tcl functions - #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. - #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - - set fixedtail "" - - set parent [file dirname $folderpath] - set badtail [file tail $folderpath] - set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - if {$nm eq $badtail} { - set fixedtail [dict get $iteminfo altname] - break + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo" + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath $parent/$fixedtail + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + } - if {![string length $fixedtail]} { - dict lappend errors $folderpath {*}$tmp_errors - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } - #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. - #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it - #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) - #so the illegalname_fix doesn't really work here - #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] - #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. - set fixedpath [file join $parent $fixedtail] - append errmsg "retrying with with windows dos device path $fixedpath\n" - puts stderr $errmsg + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + } + #jjj - if {[catch { - set iterator [twapi::find_file_open $fixedpath/* -detail basic] - } errMsg]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" - puts stderr " (errorcode: $::errorCode)\n" - puts stderr "$errMsg" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {![regexp $tcl_re $nm]} { + continue + } + if {$nm in {. ..}} { + continue } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #set ftype "" + set do_sizes 0 + set do_times 0 + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {[dict get $attrdict -hidden]} { + lappend flaggedhidden $fullname + } + if {[dict get $attrdict -system]} { + lappend flaggedsystem $fullname + } + if {[dict get $attrdict -readonly]} { + lappend flaggedreadonly $fullname + } + if {[dict exists $attrdict -debug]} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] - } on error args { - set errmsg "error reading folder: $folderpath\n" - append errmsg "error: $args" \n - append errmsg "errorInfo: $::errorInfo" \n - puts stderr "$errmsg" - puts stderr "FAILED to collect info for folder '$folderpath'" - #append errmsg "aborting.." - #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - - } - } - set dirs [list] - set files [list] - set filesizes [list] - set allsizes [dict create] - set alltimes [dict create] + set is_reparse_point [expr {"reparse_point" in $file_attributes}] + set is_directory [expr {"directory" in $file_attributes}] - set links [list] - set linkinfo [dict create] - set debuginfo [dict create] - set flaggedhidden [list] - set flaggedsystem [list] - set flaggedreadonly [list] + set linkdata [dict create] + # ----------------------------------------------------------- + #main classification + if {$is_reparse_point} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - #recheck glob - #review! - if {![string match $opt_glob $nm]} { - continue - } - set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - set ftype "" - #attributes applicable to any classification - set fullname [file_join_one $folderpath $nm] - - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - set file_attributes [dict get $attrdict -fileattributes] - - set linkdata [dict create] - # ----------------------------------------------------------- - #main classification - if {"reparse_point" in $file_attributes} { - #this concept doesn't correspond 1-to-1 with unix links - #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points - #review - and see which if any actually belong in the links key of our return - - - #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point - # - #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? - #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' - #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls - #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} - #e.g (stripped of headers/footers and other lines) - #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] - #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. - #du includes the size of the symlink - #but we can't get it with tcl's file size - #twapi doesn't seem to have anything to help read it either (?) - #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link - # - #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. - # - #links are techically files too, whether they point to a file/dir or nothing. - lappend links $fullname - set ftype "l" - dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file - } - } - if {"directory" in $file_attributes} { - if {$nm in {. ..}} { - continue + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + lappend links $fullname + #set ftype "l" + if {"l" in $sized_types} { + set do_sizes 1 + } + if {"l" in $timed_types} { + set do_times 1 + } + dict set linkdata linktype reparse_point + dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + if {"directory" ni $file_attributes} { + dict set linkdata target_type file + } } - if {"reparse_point" ni $file_attributes} { - lappend dirs $fullname - set ftype "d" - } else { - #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections - dict set linkdata target_type directory + if {$is_directory} { + #if {$nm in {. ..}} { + # continue + #} + if {!$is_reparse_point} { + lappend dirs $fullname + #set ftype "d" + if {"d" in $sized_types} { + set do_sizes 1 + } + if {"d" in $timed_types} { + set do_times 1 + } + } else { + #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections + dict set linkdata target_type directory + } } - } - if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { - #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? - lappend files $fullname - if {"f" in $sized_types} { - lappend filesizes [dict get $iteminfo size] + if {!$is_reparse_point && !$is_directory} { + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + set do_sizes 1 + } + if {"f" in $timed_types} { + set do_times 1 + } + #set ftype "f" } - set ftype "f" - } - # ----------------------------------------------------------- + # ----------------------------------------------------------- - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname - } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname - } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname - } - if {$ftype in $sized_types} { - dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] - } - if {$ftype in $timed_types} { - #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) - #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds - #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict - dict set alltimes $fullname [dict create\ - c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ - a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ - m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ - ] - } - if {[dict size $linkdata]} { - dict set linkinfo $fullname $linkdata - } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$do_sizes} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$do_times} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + if {[dict size $linkdata]} { + dict set linkinfo $fullname $linkdata + } } + twapi::find_file_close $iterator } - twapi::find_file_close $iterator - set vfsmounts [get_vfsmounts_in_folder $folderpath] + set vfsmounts [get_vfsmounts_in_folder $folderpath] set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -991,9 +1343,11 @@ namespace eval punk::du { #work around the horrible tilde-expansion thing (not needed for tcl 9+) proc file_join_one {base newtail} { if {[string index $newtail 0] ne {~}} { - return [file join $base $newtail] + #return [file join $base $newtail] + return $base/$newtail } - return [file join $base ./$newtail] + #return [file join $base ./$newtail] + return $base/./$newtail } @@ -1121,7 +1475,7 @@ namespace eval punk::du { #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] - set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] + set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]] #---- diff --git a/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/bootsupport/modules/punk/lib-0.1.6.tm index 6a7b79d6..fe8cfc7e 100644 --- a/src/bootsupport/modules/punk/lib-0.1.6.tm +++ b/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + + proc has_tclbug_caseinsensitiveglob_windows {} { + #https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted + + if {"windows" ne $::tcl_platform(platform)} { + set bug 0 + } else { + set tmpdir [file tempdir] + set testfile [file join $tmpdir "bugtest"] + set fd [open $testfile w] + puts $fd test + close $fd + set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] + foreach r $globresult { + if {$r ne "bugtest"} { + set bug 1 + break + } + } + return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning] + #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized + # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. + } + } + + #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. + # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, + # or at least checking for an existing mounted case-insensitive filesystem. + # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. + # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. + + proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, diff --git a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index e767e366..073b6cce 100644 --- a/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -50,7 +50,7 @@ package require punk::lib package require punk::args package require punk::ansi package require punk::winpath -package require punk::du +package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing. package require commandstack #*** !doctools #[item] [package {Tcl 8.6}] @@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs { #[list_begin definitions] + punk::args::define { + @id -id ::punk::nav::fs::d/ + @cmd -name punk::nav::fs::d/ -help\ + {List directories or directories and files in the current directory or in the + targets specified with the fileglob_or_target glob pattern(s). + + If a single target is specified without glob characters, and it exists as a directory, + then the working directory is changed to that target and a listing of that directory + is returned. If the single target specified without glob characters does not exist as + a directory, then it is treated as a glob pattern and the listing is for the current + directory with results filtered to match fileglob_or_target. + + If multiple targets or glob patterns are specified, then a separate listing is returned + for each fileglob_or_target pattern. + + This function is provided via aliases as ./ and .// with v being inferred from the alias + name, and also as d/ with an explicit v argument. + The ./ and .// forms are more convenient for interactive use. + examples: + ./ - list directories in current directory + .// - list directories and files in current directory + ./ src/* - list directories in src + .// src/* - list directories and files in src + .// *.txt - list files in current directory with .txt extension + .// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name + (on a case-insensitive filesystem this would also match T*1.txt etc) + .// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name + (glob chars treated as literals due to being in character-class brackets + This will match files beginning with a capital T and not lower case t + even on a case-insensitive filesystem.) + .// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns: + {[t]*} - names beginning with t + {d{e,d}*} - names beginning with de or dd + (on a case-insensitive filesystem the first pattern would also match names beginning with T) + } + @values -min 1 -max -1 -type string + v -type string -choices {/ //} -help\ + " + / - list directories only + // - list directories and files + " + fileglob_or_target -type string -optional true -multiple true -help\ + "A glob pattern as supported by Tcl's 'glob' command, to filter results. + If multiple patterns are supplied, then a listing for each pattern is returned. + If no patterns are supplied, then all items are listed." + } + #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #As this function recurses and calls cd multiple times - it's not thread-safe. #Another thread could theoretically cd whilst this is running. @@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args cdtarget] if {[llength $args] == 1} { set cdtarget [lindex $args 0] switch -exact -- $cdtarget { . - ./ { - tailcall punk::nav::fs::d/ + tailcall punk::nav::fs::d/ $v } .. - ../ { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { @@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs { if {[string range $cdtarget_copy 0 3] eq "//?/"} { #handle dos device paths - convert to normal path for glob testing set glob_test [string range $cdtarget_copy 3 end] - set cdtarget_is_glob [regexp {[*?]} $glob_test] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test] } else { - set cdtarget_is_glob [regexp {[*?]} $cdtarget] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget] + #todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing. } if {!$cdtarget_is_glob} { set cdtarget_file_type [file type $cdtarget] @@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs { } set VIRTUAL_CWD $cdtarget set curdir $cdtarget + tailcall punk::nav::fs::d/ $v } else { set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { @@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs { } if {[file type $target] eq "directory"} { set VIRTUAL_CWD $target + tailcall punk::nav::fs::d/ $v } } - tailcall punk::nav::fs::d/ $v } set curdir $VIRTUAL_CWD } else { @@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs { #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) - set searchspec [lindex $args 0] set result "" #set chunklist [list] @@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs { set this_result [dict create] foreach searchspec $args { set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] - set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] + #we need to support the same glob chars that Tcl's 'glob' command accepts. + set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]] + #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) @@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs { set allow_nonportable [dict exists $received -nonportable] set curdir [pwd] - set fullpath_list [list] + + set fullpath_list [list] ;#list of full paths to create. + set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir) + #these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests. + set error_paths [list] foreach p $paths { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { @@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] continue } - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)] + #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir. #Some subpaths of the supplied paths to create may already exist. - #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + #we should test write permissions on the nearest existing parent of the supplied path to create, + #rather than just on the immediate parent segment of the supplied path itself which may not exist. + + set fullpath [file normalize $fullpath] set parent [file dirname $fullpath] while {![file exists $parent]} { set parent [file dirname $parent] @@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] continue } + lappend existing_parent_list $parent lappend fullpath_list $fullpath } if {[llength $fullpath_list] != [llength $paths]} { @@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs { set num_created 0 set error_string "" - foreach fullpath $fullpath_list { + foreach fullpath $fullpath_list existing_parent $existing_parent_list { + #calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue. + #set relative_path [file relative $fullpath $existing_parent] + #todo. if {[catch {file mkdir $fullpath}]} { set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + cd $curdir break } incr num_created @@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs { if {$error_string ne ""} { error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." } - d/ $curdir + + #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. + set query_paths [lmap v $paths $v/*] + d/ / {*}$query_paths } #todo use unknown to allow d/~c:/etc ?? @@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs { if {![file isdirectory $target]} { error "Folder $target not found" } - d/ $target + d/ / $target } @@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs { } set relativepath [expr {[file pathtype $searchspec] eq "relative"}] - set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + + #set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) @@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs { } puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] - return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] + return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] } #todo - package as punk::nav::fs @@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs { } proc dirfiles_dict {args} { set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] - lassign [dict values $argd] leaders opts vals - set searchspecs [dict values $vals] + lassign [dict values $argd] leaders opts values + set searchspecs [dict values $values] #puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stdout "arglist: $opts" @@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs { set searchspec [lindex $searchspecs 0] # -- --- --- --- --- --- --- set opt_searchbase [dict get $opts -searchbase] - set opt_tailglob [dict get $opts -tailglob] + set opt_tailglob [dict get $opts -tailglob] set opt_with_sizes [dict get $opts -with_sizes] set opt_with_times [dict get $opts -with_times] # -- --- --- --- --- --- --- @@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs { } } "\uFFFF" { - set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] + set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] + + #set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] if {$searchtail_has_globs} { if {$is_relativesearchspec} { #set location [file dirname [file join $searchbase $searchspec]] @@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } + + set ts1 [clock clicks -milliseconds] if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { @@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs { } } } + set ts2 [clock clicks -milliseconds] + set ts_listing [expr {$ts2 - $ts1}] set dirs [dict get $listing dirs] set files [dict get $listing files] @@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs { set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] } - set dirs [lsort -dictionary $dirs] ;#todo - natsort + #----------------------------------------------------------------------------------------- + set ts1 [clock milliseconds] + set dirs [lsort -dictionary $dirs] ;#todo - natsort #foreach d $dirs { # if {[lindex [file system $d] 0] eq "tclvfs"} { @@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs { set files $sorted_files set filesizes $sorted_filesizes + set ts2 [clock milliseconds] + set ts_sorting [expr {$ts2 - $ts1}] + #----------------------------------------------------------------------------------------- # -- --- #jmn + set ts1 [clock milliseconds] foreach nm [list {*}$dirs {*}$files] { if {[punk::winpath::illegalname_test $nm]} { lappend nonportable $nm } } + set ts2 [clock milliseconds] + set ts_nonportable_check [expr {$ts2 - $ts1}] + set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms] + set front_of_dict [dict create location $location searchbase $opt_searchbase] set listing [dict merge $front_of_dict $listing] - set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] + set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info] return [dict merge $listing $updated] } @@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean - -listing -default "/" -choices {/ // //} + -listing -default "/" -choices {/ //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { - set ts1 [clock milliseconds] + #set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] - set ts2 [clock milliseconds] + #set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { @@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo - puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" - puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" + #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs { #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #review: punk::winpath calls cygpath! #review: file pathtype is platform dependant - proc path_to_absolute {path base platform} { - set ptype [file pathtype $path] + proc path_to_absolute {subpath base platform} { + set ptype [file pathtype $subpath] if {$ptype eq "absolute"} { - set path_absolute $path + set path_absolute $subpath } elseif {$ptype eq "volumerelative"} { if {$platform eq "windows"} { #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) - if {[string index $path 0] eq "/"} { + if {[string index $subpath 0] eq "/"} { #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #Todo - tidy up. package require punk::unixywindows - set path_absolute [punk::unixywindows::towinpath $path] + set path_absolute [punk::unixywindows::towinpath $subpath] #puts stderr "winpath: $path" } else { #todo handle volume-relative paths with volume specified c:etc c: @@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs { #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #Arguably if ...? - #set path_absolute $base/$path - set path_absolute $path + #set path_absolute $base/$subpath + set path_absolute $subpath } } else { # unknown what paths are reported as this on other platforms.. treat as absolute for now - set path_absolute $path + set path_absolute $subpath } } else { - set path_absolute $base/$path - } - if {$platform eq "windows"} { - if {[punk::winpath::illegalname_test $path_absolute]} { - set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present - } + #e.g relative subpath=* base = c:/test -> c:/test/* + #e.g relative subpath=../test base = c:/test -> c:/test/../test + #e.g relative subpath=* base = //server/share/test -> //server/share/test/* + set path_absolute $base/$subpath } + #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues. + #if {$platform eq "windows"} { + # if {[punk::winpath::illegalname_test $path_absolute]} { + # set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present + # } + #} return $path_absolute } proc strip_prefix_depth {path prefix} { @@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib { #[para] Secondary functions that are part of the API #[list_begin definitions] - #proc utility1 {p1 args} { - # #*** !doctools - # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] - # #[para]Description of utility1 - # return 1 - #} + punk::args::define { + @id -id ::punk::nav::fs::lib::is_fileglob + @cmd -name punk::nav::fs::lib::is_fileglob + @values -min 1 -max 1 + path -type string -required true -help\ + {String to test for being a glob pattern as recognised by the tcl 'glob' command. + If the string represents a path with multiple segments, only the final component + of the path will be tested for glob characters. + Glob patterns in this context are different to globs accepted by TCL's 'string match'. + A glob pattern is any string that contains unescaped * ? { } [ or ]. + This will not detect mismatched unescaped braces or brackets. + Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern. + } + } + proc is_fileglob {str} { + #a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser) + set in_escape 0 + set segments [file split $str] + set tail [lindex $segments end] + foreach c [split $tail ""] { + if {$in_escape} { + set in_escape 0 + } else { + if {$c eq "\\"} { + set in_escape 1 + } elseif {$c in [list * ? "\[" "\]" "{" "}" ]} { + return 1 + } + } + } + return 0 + } #*** !doctools diff --git a/src/bootsupport/modules/punk/path-0.1.0.tm b/src/bootsupport/modules/punk/path-0.1.0.tm index 46ca4aa2..de536724 100644 --- a/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/bootsupport/modules/punk/path-0.1.0.tm @@ -357,7 +357,7 @@ namespace eval punk::path { } } } - puts "==>finalparts: '$finalparts'" + #puts "==>finalparts: '$finalparts'" # using join - {"" "" server share} -> //server/share and {a b} -> a/b if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { #backtracking on unix-style path can end up with empty string as only member of finalparts diff --git a/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/bootsupport/modules/punk/winpath-0.1.0.tm index 9079dbbc..1f2948fe 100644 --- a/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -114,6 +114,50 @@ namespace eval punk::winpath { return $path } } + + proc illegal_char_map_to_doublewide {ch} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + set map [dict create \ + "<" "\uFF1C" \ + ">" "\uFF1E" \ + ":" "\uFF1A" \ + "\"" "\uFF02" \ + "/" "\uFF0F" \ + "\\" "\uFF3C" \ + "|" "\uFF5C" \ + "?" "\uFF1F" \ + "*" "\uFF0A"] + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + proc illegal_char_map_to_ntfs {ch} { + #windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc. + #see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars + #see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped) + + set map [dict create \ + "<" "\uF03C" \ + ">" "\uF03E" \ + ":" "\uF03A" \ + "\"" "\uF022" \ + "/" "unknown" \ + "\\" "\uF05c" \ + "|" "\uF07C" \ + "?" "\uF03F" \ + "*" "\uF02A"] + #note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API. + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + + + #we don't validate that path is actually illegal because we don't know the full range of such names. #The caller can apply this to any path. #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) @@ -200,8 +244,15 @@ namespace eval punk::winpath { set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. - foreach seg [file split $path] { - if {$seg in [list . ..]} { + set segments [file split $path] + if {[file pathtype $path] eq "absolute"} { + #absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments. + set checksegments [lrange $segments 1 end] + } else { + set checksegments $segments + } + foreach seg $checksegments { + if {$seg in {. ..}} { #review - what if there is a folder or file that actually has a name such as . or .. ? #unlikely in normal use - but could done deliberately for bad reasons? #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. @@ -220,10 +271,17 @@ namespace eval punk::winpath { #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph - if {[string index $seg end] in [list " " "."]} { + if {[string index $seg end] in {" " .}} { #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) return 1 } + #set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams. + set re {[<>:"/\\|?*]} + + if {[regexp $re $seg]} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + return 1 + } } #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #- they seem to be readable from cmd and tclsh as is. diff --git a/src/modules/punk-0.1.tm b/src/modules/punk-0.1.tm index ea72ad1c..e1648d9d 100644 --- a/src/modules/punk-0.1.tm +++ b/src/modules/punk-0.1.tm @@ -6066,9 +6066,218 @@ namespace eval punk { set pipe [punk::path_list_pipe $glob] {*}$pipe } - proc path {{glob *}} { + proc path_basic {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::path + @cmd -name "punk::path" -help\ + "Introspection of the PATH environment variable. + This tool will examine executables within each PATH entry and show which binaries + are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns." + @opts + -binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries." + @values -min 0 -max -1 + glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories." + } + } + proc path {args} { + set argd [punk::args::parse $args withid ::punk::path] + lassign [dict values $argd] leaders opts values received + set binglobs [dict get $opts -binglobs] + set globs [dict get $values glob] + if {$::tcl_platform(platform) eq "windows"} { + set sep ";" + } else { + # : ok for linux/bsd ... mac? + set sep ":" + } + set all_paths [split [string trimright $::env(PATH) $sep] $sep] + set filtered_paths $all_paths + if {[llength $globs]} { + set filtered_paths [list] + foreach p $all_paths { + foreach g $globs { + if {[string match -nocase $g $p]} { + lappend filtered_paths $p + break + } + } + } + } + + #Windows executable search location order: + #1. The current directory. + #2. The directories that are listed in the PATH environment variable. + # within each directory windows uses the PATHEXT environment variable to determine + #which files are considered executable and in which order they are considered. + #So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files + #with the same name but different extensions in the same directory, then the one + #with the extension that appears first in PATHEXT will be executed when that name is called. + + #On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output. + #Duplicate PATH entries are also a fairly likely possibility on all platforms. + #This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries. + + #By default we don't want to display all executables in all PATH entries - as this can be very verbose + #- but we want to be able to show which executables are overshadowed by which PATH entries, + #and to be able to filter the PATH entries and the executables using glob patterns. + #To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name. + #The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths + #in the original PATH list. The executable dict will contain the paths and path indices where each executable is found, + #and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a + #dict keyed by path index which contains the list of executables in that path - to make it easy to show which + #executables are overshadowed by which paths. + + set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows). + set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off). + set d_index_executables [dict create] ;#key is path index, value is list of executables in that path + set path_idx 0 + foreach p $all_paths { + if {$::tcl_platform(platform) eq "windows"} { + set pnorm [string tolower $p] + } else { + set pnorm $p + } + if {![dict exists $d_path_info $pnorm]} { + dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]] + set executables [list] + if {[file isdirectory $p]} { + #get all files that are executable in this path. + #If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names. + set pnormglob [file normalize $p] + if {$::tcl_platform(platform) eq "windows"} { + #Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable. + #We need to account for this in our glob pattern. + set pathexts [list] + if {[info exists ::env(PATHEXT)]} { + set env_pathexts [split $::env(PATHEXT) ";"] + #set pathexts [lmap e $env_pathexts {string tolower $e}] + foreach pe $env_pathexts { + if {$pe eq "."} { + continue + } + lappend pathexts [string tolower $pe] + } + } else { + set env_pathexts [list] + #default PATHEXT if not set - according to Microsoft docs + set pathexts [list .com .exe .bat .cmd] + } + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set has_pathext 1 + break + } + } + if {!$has_pathext} { + foreach pe $pathexts { + lappend binglobs "$bg$pe" + } + } + } + set lc_binglobs [lmap e $binglobs {string tolower $e}] + if {"." in $pathexts} { + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]] + set has_pathext 1 + break + } + } + if {$has_pathext} { + if {[string tolower $base] ni $lc_binglobs} { + lappend binglobs "$base" + } + } + } + } + + #TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem. + #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) + #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. + #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + set executables [list] + foreach e $globresults { + puts stderr "glob result: $e" + puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]" + lappend executables [file tail [file normalize $e]] + } + } else { + set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]] + } + } + dict set d_index_executables $path_idx $executables + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + if {![dict exists $d_bin_info $exenorm]} { + dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]] + } else { + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + } else { + #duplicate path entry - add to list of original paths for this normalized path + + # Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...? + # we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it. + set pathdata [dict get $d_path_info $pnorm] + dict lappend pathdata original_paths $p + dict lappend pathdata indices $path_idx + dict set d_path_info $pnorm $pathdata + + + #we don't need to add executables for this path - as they will be the same as the original path that we have already processed. + #However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables. + set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + + incr path_idx + } + + #temporary debug output to check dicts are being built correctly + set debug "" + append debug "Path info dict:" \n + append debug [showdict $d_path_info] \n + append debug "Binary info dict:" \n + append debug [showdict $d_bin_info] \n + append debug "Index executables dict:" \n + append debug [showdict $d_index_executables] \n + #return $debug + puts stdout $debug + + + } #------------------------------------------------------------------- diff --git a/src/modules/punk/du-999999.0a1.0.tm b/src/modules/punk/du-999999.0a1.0.tm index daf8ca56..7f956f29 100644 --- a/src/modules/punk/du-999999.0a1.0.tm +++ b/src/modules/punk/du-999999.0a1.0.tm @@ -479,7 +479,7 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this @@ -489,7 +489,11 @@ namespace eval punk::du { return [dict get $winfile_attributes $bitmask] } else { #list/dict shimmering? - return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + + set decoded [twapi::decode_file_attributes $bitmask] + dict set winfile_attributes $bitmask $decoded + return $decoded } } variable win_reparse_tags @@ -563,23 +567,25 @@ namespace eval punk::du { #then twapi::device_ioctl (win32 DeviceIoControl) #then parse buffer somehow (binary scan..) #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 - + punk::args::define { + @id -id ::punk::du::lib::Get_attributes_from_iteminfo + -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" + -debugchannel -default stderr -help "channel to write debug output, or none to append to output" + @values -min 1 -max 1 + iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" + } + #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args. proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int + #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] - set argd [punk::args::parse $args withdef { - @id -id ::punk::du::lib::Get_attributes_from_iteminfo - -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" - -debugchannel -default stderr -help "channel to write debug output, or none to append to output" - @values -min 1 -max 1 - iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" - }] - set opts [dict get $argd opts] - set iteminfo [dict get $argd values iteminfo] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] + set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" append dbg [pdict -channel none iteminfo] @@ -592,34 +598,38 @@ namespace eval punk::du { } set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - if {"hidden" in $attrinfo} { - dict set result -hidden 1 - } - if {"system" in $attrinfo} { - dict set result -system 1 - } - if {"readonly" in $attrinfo} { - dict set result -readonly 1 - } - dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - if {"reparse_point" in $attrinfo} { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] + foreach attr $attrinfo { + switch -- $attr { + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 + } + reparse_point { + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } } } + #dict set result -shortname [dict get $iteminfo altname] + dict set result -fileattributes $attrinfo dict set result -raw $iteminfo return $result } @@ -652,27 +662,337 @@ namespace eval punk::du { catch {twapi::find_file_close $iterator} } } + proc resolve_characterclass {cclass} { + #takes the inner value from a tcl square bracketed character class and converts to a list of characters. + + #todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters + #e.g a-c-3 -> a b c - 3 + #e.g a-c-3-5 -> a b c - 3 4 5 + #e.g a-c -> a b c + #e.g a- -> a - + #e.g -c-e -> - c d e + #the tcl character class does not support negation or intersection - so we can ignore those possibilities for now. + #in this context we do not need to support named character classes such as [:digit:] + set chars [list] + set i 0 + set len [string length $cclass] + set accept_range 0 + while {$i < $len} { + set ch [string index $cclass $i] + if {$ch eq "-"} { + if {$accept_range} { + set start [string index $cclass [expr {$i - 1}]] + set end [string index $cclass [expr {$i + 1}]] + if {$start eq "" || $end eq ""} { + #invalid range - treat - as literal + if {"-" ni $chars} { + lappend chars "-" + } + } else { + #we have a range - add all chars from previous char to next char + #range may be in either direction - e.g a-c or c-a but we don't care about the order of our result. + if {$start eq $end} { + #degenerate range - treat as single char + if {$start ni $chars} { + lappend chars $start + } + } else { + if {[scan $start %c] < [scan $end %c]} { + set c1 [scan $start %c] + set c2 [scan $end %c] + } else { + set c1 [scan $end %c] + set c2 [scan $start %c] + } + for {set c $c1} {$c <= $c2} {incr c} { + set char [format %c $c] + if {$char ni $chars} { + lappend chars $char + } + } + } + + incr i ;#skip end char as it's already included in range + } + set accept_range 0 + } else { + #we have a literal - add to list and allow for possible range if next char is also a - + if {"-" ni $chars} { + lappend chars "-" + } + set accept_range 1 + } + } else { #we have a literal - add to list and allow for possible range if next char is also a - + if {$ch ni $chars} { + lappend chars $ch + } + set accept_range 1 + } + incr i + } + return $chars + } + + #return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible. + #first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes. + # e.g {*[\{]*} is a valid tcl glob + + #todo - write tests. + #should also support {*\{*} matching a file such as a{blah}b + + #Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL. + #So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity. + + proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} { + #windows API function in use is the FindFirstFile set of functions. + #these support wildcards * and ? *only*. + + #examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported + # {abc[1-3].txt} -> {abc?.txt} + # {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + # {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call. + # {{S,t}*.txt} -> {S*.txt} {T*.txt} + # *.{txt,log} -> {*.txt} {*.log} + # {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals + + + set gchars [split $tclglob ""] + set winglob_list [list ""] + set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + set in_brackets 0 + set esc_next 0 + + set brace_depth 0 + set brace_content "" + set braced_alternatives [list] + + set brace_is_normal 0 + + set cclass_content "" + foreach ch $gchars { + if {$esc_next} { + if {$in_brackets} { + append cclass_content $ch + continue + } elseif {$brace_depth} { + append brace_content $ch + continue + } + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}] + set esc_next 0 + continue + } + + if {$ch eq "\{"} { + if {$brace_depth} { + #we have an opening brace char inside braces + #Let the brace processing handle it as it recurses. + incr brace_depth 1 + append brace_content $ch + continue + } + incr brace_depth 1 + set brace_content "" + } elseif {$ch eq "\}"} { + if {$brace_depth > 1} { + #we have a closing brace char inside braces + append brace_content $ch + incr brace_depth -1 + continue + } + #process brace_content representing a list of alternatives + #handle list of alternatives - convert {txt,log} to *.txt;*.log + #set alternatives [split $brace_content ","] + lappend braced_alternatives $brace_content + set alternatives $braced_alternatives + set braced_alternatives [list] + set brace_content "" + incr brace_depth -1 + + set alt_winpatterns [list] + set alt_regexps [list] + foreach alt $alternatives { + set subresult [tclglob_equivalents $alt] + lappend alt_winpatterns {*}[dict get $subresult winglobs] + lappend alt_regexps {*}[dict get $subresult tclregexps] + } + set next_winglob_list [list] + set next_regexp_list [list] + foreach wg $winglob_list re $tclregexp_list { + #puts "wg: $wg" + #puts "re: $re" + foreach alt_wp $alt_winpatterns alt_re $alt_regexps { + #puts " alt_wp: $alt_wp" + #puts " alt_re: $alt_re" + lappend next_winglob_list "$wg$alt_wp" + set alt_re_no_caret [string range $alt_re 1 end] + lappend next_regexp_list "${re}${alt_re_no_caret}" + } + } + + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + + } elseif {$ch eq "\["} { + #windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + + if {!$brace_depth} { + set in_brackets 1 + } else { + #we have a [ char inside braces + #Let the brace processing handle it as it recurses. + #but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets. + set brace_is_normal 1 + append brace_content $ch + } + } elseif {$ch eq "\]"} { + if {$brace_depth} { + #we have a ] char inside braces + #Let the brace processing hanele it as it recurses. + append brace_content $ch + continue + } + set in_brackets 0 + set charlist [resolve_characterclass $cclass_content] + set cclass_content "" + set next_winglob_list [list] + set next_regexp_list [list] + foreach c $charlist { + #set winglob_list [lmap wg $winglob_list {append wg $c}] + foreach wg $winglob_list { + lappend next_winglob_list "$wg$c" + } + foreach re $tclregexp_list { + set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}] + lappend next_regexp_list "${re}${c_escaped}" + } + } + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + } elseif {$ch eq "\\"} { + if {$in_brackets} { + append cclass_content $ch + #append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself. + set esc_next 1 + continue + } + if {$brace_depth} { + #we have a \ char inside braces + #Let the brace processing handle it as it recurses. + append brace_content $ch + set esc_next 1 + continue + } + set esc_next 1 + continue + } else { + if {$in_brackets} { + append cclass_content $ch + continue + } + if {!$brace_depth} { + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}] + if {[string length $re_ch] == 1} { + switch -- $re_ch { + "?" {set re_ch "."} + "*" {set re_ch ".*"} + default { + #we could use the same mixed case filter here for both sensitive and insensitive filesystems, + #because the API filtering will already have done the restriction, + #and so a more permissive regex that matches both cases will still only match the results that the API call returns, + #which will be correct based on the case-sensitivity of the filesystem. + #It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob. + #ie in the parts of the original glob that were in square brackets. + + if {!$case_sensitive_filesystem} { + # add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + if {[string is upper $re_ch]} { + set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"] + } elseif {[string is lower $re_ch]} { + set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"] + } else { + #non-alpha char - no need to add case-insensitivity + } + } + } + } + } + set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}] + } else { + #we have a literal char inside braces - add to current brace_content + if {$brace_depth == 1 && $ch eq ","} { + lappend braced_alternatives $brace_content + set brace_content "" + } else { + append brace_content $ch + } + } + } + } + #sanity check + if {[llength $winglob_list] != [llength $tclregexp_list]} { + error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" + } + set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] + return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + } + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs # get listing without using unix-tools (may not be installed on the windows system) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + + #todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob. + # we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before + #we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder. + #For really large folders and a moderate number of patterns, this could be a significant performance improvement. proc du_dirlisting_twapi {folderpath args} { set defaults [dict create\ -glob *\ -filedebug 0\ + -patterndebug 0\ -with_sizes 1\ -with_times 1\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_glob [dict get $opts -glob] + set tcl_glob [dict get $opts -glob] + + #todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps. + set case_sensitive_filesystem 0 ;#todo - consider detecting this properly. + #Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program. + #In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly, + #as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem, + #so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly. + #Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a + #case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem. + + #Note: we only use this to adjust the filtering regexps we generate from the tcl glob. + #The windows API pattern match will already filter based on the case-sensitivity of the filesystem + # so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem. + + set globs_processed [tclglob_equivalents $tcl_glob] + #we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob. + #temp + #set win_glob_list [list $tcl_glob] + set win_glob_list [dict get $globs_processed winglobs] + set tcl_regex_list [dict get $globs_processed tclregexps] + + + #review + # our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob. + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_patterndebug [dict get $opts -patterndebug] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set ftypes [list f d l] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) @@ -711,256 +1031,288 @@ namespace eval punk::du { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set linkinfo [dict create] + set debuginfo [dict create] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths - #puts stderr ">>> glob: $opt_glob" - #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames - #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ - #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. - # using * all the time may be inefficient - so we might be able to avoid that in some cases. - try { - #glob of * will return dotfiles too on windows - set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field - } on error args { + + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + if {$opt_patterndebug} { + puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" + puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re" + } + + #REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. try { - if {[string match "*denied*" $args]} { - #output similar format as unixy du - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } - if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" - puts stderr " (errorcode: $::errorCode)\n" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } - #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} - #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error - #The find-all glob * won't get here because it returns . & .. - #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) - #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob - #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) - if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {} if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { #looks like an ordinary no results for chosen glob - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + continue } - } - if {[set plen [pathcharacterlen $folderpath]] >= 250} { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - # re-fetch this folder with altnames - #file normalize - aside from being slow - will have problems with long paths - so this won't work. - #this function should only accept absolute paths - # - # - #Note: using -detail full only helps if the last segment of path has an altname.. - #To properly shorten we need to have kept track of altname all the way from the root! - #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd - #### SLOW - set fixedpath [dict get [file attributes $folderpath] -shortname] - #### SLOW - + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - append errmsg "retrying with with windows altname '$fixedpath'" - puts stderr $errmsg - } else { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - set tmp_errors [list $::errorCode] - #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share - #we can use //?/path dos device path - but not with tcl functions - #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. - #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - - set fixedtail "" - - set parent [file dirname $folderpath] - set badtail [file tail $folderpath] - set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - if {$nm eq $badtail} { - set fixedtail [dict get $iteminfo altname] - break + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo" + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath $parent/$fixedtail + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + } - if {![string length $fixedtail]} { - dict lappend errors $folderpath {*}$tmp_errors - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } - #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. - #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it - #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) - #so the illegalname_fix doesn't really work here - #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] - #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. - set fixedpath [file join $parent $fixedtail] - append errmsg "retrying with with windows dos device path $fixedpath\n" - puts stderr $errmsg + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + } + #jjj - if {[catch { - set iterator [twapi::find_file_open $fixedpath/* -detail basic] - } errMsg]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" - puts stderr " (errorcode: $::errorCode)\n" - puts stderr "$errMsg" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {![regexp $tcl_re $nm]} { + continue + } + if {$nm in {. ..}} { + continue } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #set ftype "" + set do_sizes 0 + set do_times 0 + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {[dict get $attrdict -hidden]} { + lappend flaggedhidden $fullname + } + if {[dict get $attrdict -system]} { + lappend flaggedsystem $fullname + } + if {[dict get $attrdict -readonly]} { + lappend flaggedreadonly $fullname + } + if {[dict exists $attrdict -debug]} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] - } on error args { - set errmsg "error reading folder: $folderpath\n" - append errmsg "error: $args" \n - append errmsg "errorInfo: $::errorInfo" \n - puts stderr "$errmsg" - puts stderr "FAILED to collect info for folder '$folderpath'" - #append errmsg "aborting.." - #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - - } - } - set dirs [list] - set files [list] - set filesizes [list] - set allsizes [dict create] - set alltimes [dict create] + set is_reparse_point [expr {"reparse_point" in $file_attributes}] + set is_directory [expr {"directory" in $file_attributes}] - set links [list] - set linkinfo [dict create] - set debuginfo [dict create] - set flaggedhidden [list] - set flaggedsystem [list] - set flaggedreadonly [list] + set linkdata [dict create] + # ----------------------------------------------------------- + #main classification + if {$is_reparse_point} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - #recheck glob - #review! - if {![string match $opt_glob $nm]} { - continue - } - set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - set ftype "" - #attributes applicable to any classification - set fullname [file_join_one $folderpath $nm] - - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - set file_attributes [dict get $attrdict -fileattributes] - - set linkdata [dict create] - # ----------------------------------------------------------- - #main classification - if {"reparse_point" in $file_attributes} { - #this concept doesn't correspond 1-to-1 with unix links - #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points - #review - and see which if any actually belong in the links key of our return - - - #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point - # - #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? - #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' - #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls - #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} - #e.g (stripped of headers/footers and other lines) - #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] - #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. - #du includes the size of the symlink - #but we can't get it with tcl's file size - #twapi doesn't seem to have anything to help read it either (?) - #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link - # - #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. - # - #links are techically files too, whether they point to a file/dir or nothing. - lappend links $fullname - set ftype "l" - dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file - } - } - if {"directory" in $file_attributes} { - if {$nm in {. ..}} { - continue + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + lappend links $fullname + #set ftype "l" + if {"l" in $sized_types} { + set do_sizes 1 + } + if {"l" in $timed_types} { + set do_times 1 + } + dict set linkdata linktype reparse_point + dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + if {"directory" ni $file_attributes} { + dict set linkdata target_type file + } } - if {"reparse_point" ni $file_attributes} { - lappend dirs $fullname - set ftype "d" - } else { - #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections - dict set linkdata target_type directory + if {$is_directory} { + #if {$nm in {. ..}} { + # continue + #} + if {!$is_reparse_point} { + lappend dirs $fullname + #set ftype "d" + if {"d" in $sized_types} { + set do_sizes 1 + } + if {"d" in $timed_types} { + set do_times 1 + } + } else { + #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections + dict set linkdata target_type directory + } } - } - if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { - #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? - lappend files $fullname - if {"f" in $sized_types} { - lappend filesizes [dict get $iteminfo size] + if {!$is_reparse_point && !$is_directory} { + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + set do_sizes 1 + } + if {"f" in $timed_types} { + set do_times 1 + } + #set ftype "f" } - set ftype "f" - } - # ----------------------------------------------------------- + # ----------------------------------------------------------- - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname - } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname - } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname - } - if {$ftype in $sized_types} { - dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] - } - if {$ftype in $timed_types} { - #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) - #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds - #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict - dict set alltimes $fullname [dict create\ - c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ - a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ - m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ - ] - } - if {[dict size $linkdata]} { - dict set linkinfo $fullname $linkdata - } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$do_sizes} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$do_times} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + if {[dict size $linkdata]} { + dict set linkinfo $fullname $linkdata + } } + twapi::find_file_close $iterator } - twapi::find_file_close $iterator - set vfsmounts [get_vfsmounts_in_folder $folderpath] + set vfsmounts [get_vfsmounts_in_folder $folderpath] set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -991,9 +1343,11 @@ namespace eval punk::du { #work around the horrible tilde-expansion thing (not needed for tcl 9+) proc file_join_one {base newtail} { if {[string index $newtail 0] ne {~}} { - return [file join $base $newtail] + #return [file join $base $newtail] + return $base/$newtail } - return [file join $base ./$newtail] + #return [file join $base ./$newtail] + return $base/./$newtail } @@ -1121,7 +1475,7 @@ namespace eval punk::du { #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] - set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] + set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]] #---- diff --git a/src/modules/punk/lib-999999.0a1.0.tm b/src/modules/punk/lib-999999.0a1.0.tm index c2273df7..c6e2c636 100644 --- a/src/modules/punk/lib-999999.0a1.0.tm +++ b/src/modules/punk/lib-999999.0a1.0.tm @@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + + proc has_tclbug_caseinsensitiveglob_windows {} { + #https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted + + if {"windows" ne $::tcl_platform(platform)} { + set bug 0 + } else { + set tmpdir [file tempdir] + set testfile [file join $tmpdir "bugtest"] + set fd [open $testfile w] + puts $fd test + close $fd + set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] + foreach r $globresult { + if {$r ne "bugtest"} { + set bug 1 + break + } + } + return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning] + #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized + # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. + } + } + + #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. + # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, + # or at least checking for an existing mounted case-insensitive filesystem. + # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. + # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. + + proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, diff --git a/src/modules/punk/nav/fs-999999.0a1.0.tm b/src/modules/punk/nav/fs-999999.0a1.0.tm index 1b73917d..c91fe5b7 100644 --- a/src/modules/punk/nav/fs-999999.0a1.0.tm +++ b/src/modules/punk/nav/fs-999999.0a1.0.tm @@ -50,7 +50,7 @@ package require punk::lib package require punk::args package require punk::ansi package require punk::winpath -package require punk::du +package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing. package require commandstack #*** !doctools #[item] [package {Tcl 8.6}] @@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs { #[list_begin definitions] + punk::args::define { + @id -id ::punk::nav::fs::d/ + @cmd -name punk::nav::fs::d/ -help\ + {List directories or directories and files in the current directory or in the + targets specified with the fileglob_or_target glob pattern(s). + + If a single target is specified without glob characters, and it exists as a directory, + then the working directory is changed to that target and a listing of that directory + is returned. If the single target specified without glob characters does not exist as + a directory, then it is treated as a glob pattern and the listing is for the current + directory with results filtered to match fileglob_or_target. + + If multiple targets or glob patterns are specified, then a separate listing is returned + for each fileglob_or_target pattern. + + This function is provided via aliases as ./ and .// with v being inferred from the alias + name, and also as d/ with an explicit v argument. + The ./ and .// forms are more convenient for interactive use. + examples: + ./ - list directories in current directory + .// - list directories and files in current directory + ./ src/* - list directories in src + .// src/* - list directories and files in src + .// *.txt - list files in current directory with .txt extension + .// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name + (on a case-insensitive filesystem this would also match T*1.txt etc) + .// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name + (glob chars treated as literals due to being in character-class brackets + This will match files beginning with a capital T and not lower case t + even on a case-insensitive filesystem.) + .// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns: + {[t]*} - names beginning with t + {d{e,d}*} - names beginning with de or dd + (on a case-insensitive filesystem the first pattern would also match names beginning with T) + } + @values -min 1 -max -1 -type string + v -type string -choices {/ //} -help\ + " + / - list directories only + // - list directories and files + " + fileglob_or_target -type string -optional true -multiple true -help\ + "A glob pattern as supported by Tcl's 'glob' command, to filter results. + If multiple patterns are supplied, then a listing for each pattern is returned. + If no patterns are supplied, then all items are listed." + } + #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #As this function recurses and calls cd multiple times - it's not thread-safe. #Another thread could theoretically cd whilst this is running. @@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args cdtarget] if {[llength $args] == 1} { set cdtarget [lindex $args 0] switch -exact -- $cdtarget { . - ./ { - tailcall punk::nav::fs::d/ + tailcall punk::nav::fs::d/ $v } .. - ../ { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { @@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs { if {[string range $cdtarget_copy 0 3] eq "//?/"} { #handle dos device paths - convert to normal path for glob testing set glob_test [string range $cdtarget_copy 3 end] - set cdtarget_is_glob [regexp {[*?]} $glob_test] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test] } else { - set cdtarget_is_glob [regexp {[*?]} $cdtarget] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget] + #todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing. } if {!$cdtarget_is_glob} { set cdtarget_file_type [file type $cdtarget] @@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs { } set VIRTUAL_CWD $cdtarget set curdir $cdtarget + tailcall punk::nav::fs::d/ $v } else { set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { @@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs { } if {[file type $target] eq "directory"} { set VIRTUAL_CWD $target + tailcall punk::nav::fs::d/ $v } } - tailcall punk::nav::fs::d/ $v } set curdir $VIRTUAL_CWD } else { @@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs { #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) - set searchspec [lindex $args 0] set result "" #set chunklist [list] @@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs { set this_result [dict create] foreach searchspec $args { set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] - set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] + #we need to support the same glob chars that Tcl's 'glob' command accepts. + set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]] + #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) @@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs { set allow_nonportable [dict exists $received -nonportable] set curdir [pwd] - set fullpath_list [list] + + set fullpath_list [list] ;#list of full paths to create. + set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir) + #these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests. + set error_paths [list] foreach p $paths { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { @@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] continue } - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)] + #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir. #Some subpaths of the supplied paths to create may already exist. - #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + #we should test write permissions on the nearest existing parent of the supplied path to create, + #rather than just on the immediate parent segment of the supplied path itself which may not exist. + + set fullpath [file normalize $fullpath] set parent [file dirname $fullpath] while {![file exists $parent]} { set parent [file dirname $parent] @@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] continue } + lappend existing_parent_list $parent lappend fullpath_list $fullpath } if {[llength $fullpath_list] != [llength $paths]} { @@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs { set num_created 0 set error_string "" - foreach fullpath $fullpath_list { + foreach fullpath $fullpath_list existing_parent $existing_parent_list { + #calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue. + #set relative_path [file relative $fullpath $existing_parent] + #todo. if {[catch {file mkdir $fullpath}]} { set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + cd $curdir break } incr num_created @@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs { if {$error_string ne ""} { error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." } - d/ $curdir + + #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. + set query_paths [lmap v $paths $v/*] + d/ / {*}$query_paths } #todo use unknown to allow d/~c:/etc ?? @@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs { if {![file isdirectory $target]} { error "Folder $target not found" } - d/ $target + d/ / $target } @@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs { } set relativepath [expr {[file pathtype $searchspec] eq "relative"}] - set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + + #set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) @@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs { } puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] - return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] + return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] } #todo - package as punk::nav::fs @@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs { } proc dirfiles_dict {args} { set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] - lassign [dict values $argd] leaders opts vals - set searchspecs [dict values $vals] + lassign [dict values $argd] leaders opts values + set searchspecs [dict values $values] #puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stdout "arglist: $opts" @@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs { set searchspec [lindex $searchspecs 0] # -- --- --- --- --- --- --- set opt_searchbase [dict get $opts -searchbase] - set opt_tailglob [dict get $opts -tailglob] + set opt_tailglob [dict get $opts -tailglob] set opt_with_sizes [dict get $opts -with_sizes] set opt_with_times [dict get $opts -with_times] # -- --- --- --- --- --- --- @@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs { } } "\uFFFF" { - set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] + set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] + + #set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] if {$searchtail_has_globs} { if {$is_relativesearchspec} { #set location [file dirname [file join $searchbase $searchspec]] @@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } + + set ts1 [clock clicks -milliseconds] if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { @@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs { } } } + set ts2 [clock clicks -milliseconds] + set ts_listing [expr {$ts2 - $ts1}] set dirs [dict get $listing dirs] set files [dict get $listing files] @@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs { set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] } - set dirs [lsort -dictionary $dirs] ;#todo - natsort + #----------------------------------------------------------------------------------------- + set ts1 [clock milliseconds] + set dirs [lsort -dictionary $dirs] ;#todo - natsort #foreach d $dirs { # if {[lindex [file system $d] 0] eq "tclvfs"} { @@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs { set files $sorted_files set filesizes $sorted_filesizes + set ts2 [clock milliseconds] + set ts_sorting [expr {$ts2 - $ts1}] + #----------------------------------------------------------------------------------------- # -- --- #jmn + set ts1 [clock milliseconds] foreach nm [list {*}$dirs {*}$files] { if {[punk::winpath::illegalname_test $nm]} { lappend nonportable $nm } } + set ts2 [clock milliseconds] + set ts_nonportable_check [expr {$ts2 - $ts1}] + set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms] + set front_of_dict [dict create location $location searchbase $opt_searchbase] set listing [dict merge $front_of_dict $listing] - set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] + set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info] return [dict merge $listing $updated] } @@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean - -listing -default "/" -choices {/ // //} + -listing -default "/" -choices {/ //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { - set ts1 [clock milliseconds] + #set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] - set ts2 [clock milliseconds] + #set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { @@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo - puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" - puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" + #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs { #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #review: punk::winpath calls cygpath! #review: file pathtype is platform dependant - proc path_to_absolute {path base platform} { - set ptype [file pathtype $path] + proc path_to_absolute {subpath base platform} { + set ptype [file pathtype $subpath] if {$ptype eq "absolute"} { - set path_absolute $path + set path_absolute $subpath } elseif {$ptype eq "volumerelative"} { if {$platform eq "windows"} { #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) - if {[string index $path 0] eq "/"} { + if {[string index $subpath 0] eq "/"} { #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #Todo - tidy up. package require punk::unixywindows - set path_absolute [punk::unixywindows::towinpath $path] + set path_absolute [punk::unixywindows::towinpath $subpath] #puts stderr "winpath: $path" } else { #todo handle volume-relative paths with volume specified c:etc c: @@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs { #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #Arguably if ...? - #set path_absolute $base/$path - set path_absolute $path + #set path_absolute $base/$subpath + set path_absolute $subpath } } else { # unknown what paths are reported as this on other platforms.. treat as absolute for now - set path_absolute $path + set path_absolute $subpath } } else { - set path_absolute $base/$path - } - if {$platform eq "windows"} { - if {[punk::winpath::illegalname_test $path_absolute]} { - set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present - } + #e.g relative subpath=* base = c:/test -> c:/test/* + #e.g relative subpath=../test base = c:/test -> c:/test/../test + #e.g relative subpath=* base = //server/share/test -> //server/share/test/* + set path_absolute $base/$subpath } + #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues. + #if {$platform eq "windows"} { + # if {[punk::winpath::illegalname_test $path_absolute]} { + # set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present + # } + #} return $path_absolute } proc strip_prefix_depth {path prefix} { @@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib { #[para] Secondary functions that are part of the API #[list_begin definitions] - #proc utility1 {p1 args} { - # #*** !doctools - # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] - # #[para]Description of utility1 - # return 1 - #} + punk::args::define { + @id -id ::punk::nav::fs::lib::is_fileglob + @cmd -name punk::nav::fs::lib::is_fileglob + @values -min 1 -max 1 + path -type string -required true -help\ + {String to test for being a glob pattern as recognised by the tcl 'glob' command. + If the string represents a path with multiple segments, only the final component + of the path will be tested for glob characters. + Glob patterns in this context are different to globs accepted by TCL's 'string match'. + A glob pattern is any string that contains unescaped * ? { } [ or ]. + This will not detect mismatched unescaped braces or brackets. + Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern. + } + } + proc is_fileglob {str} { + #a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser) + set in_escape 0 + set segments [file split $str] + set tail [lindex $segments end] + foreach c [split $tail ""] { + if {$in_escape} { + set in_escape 0 + } else { + if {$c eq "\\"} { + set in_escape 1 + } elseif {$c in [list * ? "\[" "\]" "{" "}" ]} { + return 1 + } + } + } + return 0 + } #*** !doctools diff --git a/src/modules/punk/path-999999.0a1.0.tm b/src/modules/punk/path-999999.0a1.0.tm index 5b3c9298..4d728a40 100644 --- a/src/modules/punk/path-999999.0a1.0.tm +++ b/src/modules/punk/path-999999.0a1.0.tm @@ -357,7 +357,7 @@ namespace eval punk::path { } } } - puts "==>finalparts: '$finalparts'" + #puts "==>finalparts: '$finalparts'" # using join - {"" "" server share} -> //server/share and {a b} -> a/b if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { #backtracking on unix-style path can end up with empty string as only member of finalparts diff --git a/src/modules/punk/winpath-999999.0a1.0.tm b/src/modules/punk/winpath-999999.0a1.0.tm index 71e7ee92..1d123bb1 100644 --- a/src/modules/punk/winpath-999999.0a1.0.tm +++ b/src/modules/punk/winpath-999999.0a1.0.tm @@ -114,6 +114,50 @@ namespace eval punk::winpath { return $path } } + + proc illegal_char_map_to_doublewide {ch} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + set map [dict create \ + "<" "\uFF1C" \ + ">" "\uFF1E" \ + ":" "\uFF1A" \ + "\"" "\uFF02" \ + "/" "\uFF0F" \ + "\\" "\uFF3C" \ + "|" "\uFF5C" \ + "?" "\uFF1F" \ + "*" "\uFF0A"] + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + proc illegal_char_map_to_ntfs {ch} { + #windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc. + #see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars + #see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped) + + set map [dict create \ + "<" "\uF03C" \ + ">" "\uF03E" \ + ":" "\uF03A" \ + "\"" "\uF022" \ + "/" "unknown" \ + "\\" "\uF05c" \ + "|" "\uF07C" \ + "?" "\uF03F" \ + "*" "\uF02A"] + #note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API. + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + + + #we don't validate that path is actually illegal because we don't know the full range of such names. #The caller can apply this to any path. #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) @@ -200,8 +244,15 @@ namespace eval punk::winpath { set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. - foreach seg [file split $path] { - if {$seg in [list . ..]} { + set segments [file split $path] + if {[file pathtype $path] eq "absolute"} { + #absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments. + set checksegments [lrange $segments 1 end] + } else { + set checksegments $segments + } + foreach seg $checksegments { + if {$seg in {. ..}} { #review - what if there is a folder or file that actually has a name such as . or .. ? #unlikely in normal use - but could done deliberately for bad reasons? #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. @@ -220,10 +271,17 @@ namespace eval punk::winpath { #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph - if {[string index $seg end] in [list " " "."]} { + if {[string index $seg end] in {" " .}} { #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) return 1 } + #set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams. + set re {[<>:"/\\|?*]} + + if {[regexp $re $seg]} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + return 1 + } } #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #- they seem to be readable from cmd and tclsh as is. diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm index ea72ad1c..e1648d9d 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm @@ -6066,9 +6066,218 @@ namespace eval punk { set pipe [punk::path_list_pipe $glob] {*}$pipe } - proc path {{glob *}} { + proc path_basic {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::path + @cmd -name "punk::path" -help\ + "Introspection of the PATH environment variable. + This tool will examine executables within each PATH entry and show which binaries + are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns." + @opts + -binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries." + @values -min 0 -max -1 + glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories." + } + } + proc path {args} { + set argd [punk::args::parse $args withid ::punk::path] + lassign [dict values $argd] leaders opts values received + set binglobs [dict get $opts -binglobs] + set globs [dict get $values glob] + if {$::tcl_platform(platform) eq "windows"} { + set sep ";" + } else { + # : ok for linux/bsd ... mac? + set sep ":" + } + set all_paths [split [string trimright $::env(PATH) $sep] $sep] + set filtered_paths $all_paths + if {[llength $globs]} { + set filtered_paths [list] + foreach p $all_paths { + foreach g $globs { + if {[string match -nocase $g $p]} { + lappend filtered_paths $p + break + } + } + } + } + + #Windows executable search location order: + #1. The current directory. + #2. The directories that are listed in the PATH environment variable. + # within each directory windows uses the PATHEXT environment variable to determine + #which files are considered executable and in which order they are considered. + #So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files + #with the same name but different extensions in the same directory, then the one + #with the extension that appears first in PATHEXT will be executed when that name is called. + + #On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output. + #Duplicate PATH entries are also a fairly likely possibility on all platforms. + #This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries. + + #By default we don't want to display all executables in all PATH entries - as this can be very verbose + #- but we want to be able to show which executables are overshadowed by which PATH entries, + #and to be able to filter the PATH entries and the executables using glob patterns. + #To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name. + #The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths + #in the original PATH list. The executable dict will contain the paths and path indices where each executable is found, + #and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a + #dict keyed by path index which contains the list of executables in that path - to make it easy to show which + #executables are overshadowed by which paths. + + set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows). + set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off). + set d_index_executables [dict create] ;#key is path index, value is list of executables in that path + set path_idx 0 + foreach p $all_paths { + if {$::tcl_platform(platform) eq "windows"} { + set pnorm [string tolower $p] + } else { + set pnorm $p + } + if {![dict exists $d_path_info $pnorm]} { + dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]] + set executables [list] + if {[file isdirectory $p]} { + #get all files that are executable in this path. + #If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names. + set pnormglob [file normalize $p] + if {$::tcl_platform(platform) eq "windows"} { + #Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable. + #We need to account for this in our glob pattern. + set pathexts [list] + if {[info exists ::env(PATHEXT)]} { + set env_pathexts [split $::env(PATHEXT) ";"] + #set pathexts [lmap e $env_pathexts {string tolower $e}] + foreach pe $env_pathexts { + if {$pe eq "."} { + continue + } + lappend pathexts [string tolower $pe] + } + } else { + set env_pathexts [list] + #default PATHEXT if not set - according to Microsoft docs + set pathexts [list .com .exe .bat .cmd] + } + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set has_pathext 1 + break + } + } + if {!$has_pathext} { + foreach pe $pathexts { + lappend binglobs "$bg$pe" + } + } + } + set lc_binglobs [lmap e $binglobs {string tolower $e}] + if {"." in $pathexts} { + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]] + set has_pathext 1 + break + } + } + if {$has_pathext} { + if {[string tolower $base] ni $lc_binglobs} { + lappend binglobs "$base" + } + } + } + } + + #TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem. + #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) + #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. + #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + set executables [list] + foreach e $globresults { + puts stderr "glob result: $e" + puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]" + lappend executables [file tail [file normalize $e]] + } + } else { + set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]] + } + } + dict set d_index_executables $path_idx $executables + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + if {![dict exists $d_bin_info $exenorm]} { + dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]] + } else { + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + } else { + #duplicate path entry - add to list of original paths for this normalized path + + # Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...? + # we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it. + set pathdata [dict get $d_path_info $pnorm] + dict lappend pathdata original_paths $p + dict lappend pathdata indices $path_idx + dict set d_path_info $pnorm $pathdata + + + #we don't need to add executables for this path - as they will be the same as the original path that we have already processed. + #However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables. + set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + + incr path_idx + } + + #temporary debug output to check dicts are being built correctly + set debug "" + append debug "Path info dict:" \n + append debug [showdict $d_path_info] \n + append debug "Binary info dict:" \n + append debug [showdict $d_bin_info] \n + append debug "Index executables dict:" \n + append debug [showdict $d_index_executables] \n + #return $debug + puts stdout $debug + + + } #------------------------------------------------------------------- diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm index bc753154..c9ab54fd 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm @@ -479,7 +479,7 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this @@ -489,7 +489,11 @@ namespace eval punk::du { return [dict get $winfile_attributes $bitmask] } else { #list/dict shimmering? - return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + + set decoded [twapi::decode_file_attributes $bitmask] + dict set winfile_attributes $bitmask $decoded + return $decoded } } variable win_reparse_tags @@ -563,23 +567,25 @@ namespace eval punk::du { #then twapi::device_ioctl (win32 DeviceIoControl) #then parse buffer somehow (binary scan..) #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 - + punk::args::define { + @id -id ::punk::du::lib::Get_attributes_from_iteminfo + -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" + -debugchannel -default stderr -help "channel to write debug output, or none to append to output" + @values -min 1 -max 1 + iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" + } + #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args. proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int + #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] - set argd [punk::args::parse $args withdef { - @id -id ::punk::du::lib::Get_attributes_from_iteminfo - -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" - -debugchannel -default stderr -help "channel to write debug output, or none to append to output" - @values -min 1 -max 1 - iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" - }] - set opts [dict get $argd opts] - set iteminfo [dict get $argd values iteminfo] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] + set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" append dbg [pdict -channel none iteminfo] @@ -592,34 +598,38 @@ namespace eval punk::du { } set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - if {"hidden" in $attrinfo} { - dict set result -hidden 1 - } - if {"system" in $attrinfo} { - dict set result -system 1 - } - if {"readonly" in $attrinfo} { - dict set result -readonly 1 - } - dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - if {"reparse_point" in $attrinfo} { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] + foreach attr $attrinfo { + switch -- $attr { + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 + } + reparse_point { + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } } } + #dict set result -shortname [dict get $iteminfo altname] + dict set result -fileattributes $attrinfo dict set result -raw $iteminfo return $result } @@ -652,27 +662,337 @@ namespace eval punk::du { catch {twapi::find_file_close $iterator} } } + proc resolve_characterclass {cclass} { + #takes the inner value from a tcl square bracketed character class and converts to a list of characters. + + #todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters + #e.g a-c-3 -> a b c - 3 + #e.g a-c-3-5 -> a b c - 3 4 5 + #e.g a-c -> a b c + #e.g a- -> a - + #e.g -c-e -> - c d e + #the tcl character class does not support negation or intersection - so we can ignore those possibilities for now. + #in this context we do not need to support named character classes such as [:digit:] + set chars [list] + set i 0 + set len [string length $cclass] + set accept_range 0 + while {$i < $len} { + set ch [string index $cclass $i] + if {$ch eq "-"} { + if {$accept_range} { + set start [string index $cclass [expr {$i - 1}]] + set end [string index $cclass [expr {$i + 1}]] + if {$start eq "" || $end eq ""} { + #invalid range - treat - as literal + if {"-" ni $chars} { + lappend chars "-" + } + } else { + #we have a range - add all chars from previous char to next char + #range may be in either direction - e.g a-c or c-a but we don't care about the order of our result. + if {$start eq $end} { + #degenerate range - treat as single char + if {$start ni $chars} { + lappend chars $start + } + } else { + if {[scan $start %c] < [scan $end %c]} { + set c1 [scan $start %c] + set c2 [scan $end %c] + } else { + set c1 [scan $end %c] + set c2 [scan $start %c] + } + for {set c $c1} {$c <= $c2} {incr c} { + set char [format %c $c] + if {$char ni $chars} { + lappend chars $char + } + } + } + + incr i ;#skip end char as it's already included in range + } + set accept_range 0 + } else { + #we have a literal - add to list and allow for possible range if next char is also a - + if {"-" ni $chars} { + lappend chars "-" + } + set accept_range 1 + } + } else { #we have a literal - add to list and allow for possible range if next char is also a - + if {$ch ni $chars} { + lappend chars $ch + } + set accept_range 1 + } + incr i + } + return $chars + } + + #return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible. + #first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes. + # e.g {*[\{]*} is a valid tcl glob + + #todo - write tests. + #should also support {*\{*} matching a file such as a{blah}b + + #Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL. + #So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity. + + proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} { + #windows API function in use is the FindFirstFile set of functions. + #these support wildcards * and ? *only*. + + #examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported + # {abc[1-3].txt} -> {abc?.txt} + # {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + # {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call. + # {{S,t}*.txt} -> {S*.txt} {T*.txt} + # *.{txt,log} -> {*.txt} {*.log} + # {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals + + + set gchars [split $tclglob ""] + set winglob_list [list ""] + set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + set in_brackets 0 + set esc_next 0 + + set brace_depth 0 + set brace_content "" + set braced_alternatives [list] + + set brace_is_normal 0 + + set cclass_content "" + foreach ch $gchars { + if {$esc_next} { + if {$in_brackets} { + append cclass_content $ch + continue + } elseif {$brace_depth} { + append brace_content $ch + continue + } + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}] + set esc_next 0 + continue + } + + if {$ch eq "\{"} { + if {$brace_depth} { + #we have an opening brace char inside braces + #Let the brace processing handle it as it recurses. + incr brace_depth 1 + append brace_content $ch + continue + } + incr brace_depth 1 + set brace_content "" + } elseif {$ch eq "\}"} { + if {$brace_depth > 1} { + #we have a closing brace char inside braces + append brace_content $ch + incr brace_depth -1 + continue + } + #process brace_content representing a list of alternatives + #handle list of alternatives - convert {txt,log} to *.txt;*.log + #set alternatives [split $brace_content ","] + lappend braced_alternatives $brace_content + set alternatives $braced_alternatives + set braced_alternatives [list] + set brace_content "" + incr brace_depth -1 + + set alt_winpatterns [list] + set alt_regexps [list] + foreach alt $alternatives { + set subresult [tclglob_equivalents $alt] + lappend alt_winpatterns {*}[dict get $subresult winglobs] + lappend alt_regexps {*}[dict get $subresult tclregexps] + } + set next_winglob_list [list] + set next_regexp_list [list] + foreach wg $winglob_list re $tclregexp_list { + #puts "wg: $wg" + #puts "re: $re" + foreach alt_wp $alt_winpatterns alt_re $alt_regexps { + #puts " alt_wp: $alt_wp" + #puts " alt_re: $alt_re" + lappend next_winglob_list "$wg$alt_wp" + set alt_re_no_caret [string range $alt_re 1 end] + lappend next_regexp_list "${re}${alt_re_no_caret}" + } + } + + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + + } elseif {$ch eq "\["} { + #windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + + if {!$brace_depth} { + set in_brackets 1 + } else { + #we have a [ char inside braces + #Let the brace processing handle it as it recurses. + #but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets. + set brace_is_normal 1 + append brace_content $ch + } + } elseif {$ch eq "\]"} { + if {$brace_depth} { + #we have a ] char inside braces + #Let the brace processing hanele it as it recurses. + append brace_content $ch + continue + } + set in_brackets 0 + set charlist [resolve_characterclass $cclass_content] + set cclass_content "" + set next_winglob_list [list] + set next_regexp_list [list] + foreach c $charlist { + #set winglob_list [lmap wg $winglob_list {append wg $c}] + foreach wg $winglob_list { + lappend next_winglob_list "$wg$c" + } + foreach re $tclregexp_list { + set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}] + lappend next_regexp_list "${re}${c_escaped}" + } + } + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + } elseif {$ch eq "\\"} { + if {$in_brackets} { + append cclass_content $ch + #append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself. + set esc_next 1 + continue + } + if {$brace_depth} { + #we have a \ char inside braces + #Let the brace processing handle it as it recurses. + append brace_content $ch + set esc_next 1 + continue + } + set esc_next 1 + continue + } else { + if {$in_brackets} { + append cclass_content $ch + continue + } + if {!$brace_depth} { + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}] + if {[string length $re_ch] == 1} { + switch -- $re_ch { + "?" {set re_ch "."} + "*" {set re_ch ".*"} + default { + #we could use the same mixed case filter here for both sensitive and insensitive filesystems, + #because the API filtering will already have done the restriction, + #and so a more permissive regex that matches both cases will still only match the results that the API call returns, + #which will be correct based on the case-sensitivity of the filesystem. + #It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob. + #ie in the parts of the original glob that were in square brackets. + + if {!$case_sensitive_filesystem} { + # add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + if {[string is upper $re_ch]} { + set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"] + } elseif {[string is lower $re_ch]} { + set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"] + } else { + #non-alpha char - no need to add case-insensitivity + } + } + } + } + } + set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}] + } else { + #we have a literal char inside braces - add to current brace_content + if {$brace_depth == 1 && $ch eq ","} { + lappend braced_alternatives $brace_content + set brace_content "" + } else { + append brace_content $ch + } + } + } + } + #sanity check + if {[llength $winglob_list] != [llength $tclregexp_list]} { + error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" + } + set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] + return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + } + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs # get listing without using unix-tools (may not be installed on the windows system) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + + #todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob. + # we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before + #we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder. + #For really large folders and a moderate number of patterns, this could be a significant performance improvement. proc du_dirlisting_twapi {folderpath args} { set defaults [dict create\ -glob *\ -filedebug 0\ + -patterndebug 0\ -with_sizes 1\ -with_times 1\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_glob [dict get $opts -glob] + set tcl_glob [dict get $opts -glob] + + #todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps. + set case_sensitive_filesystem 0 ;#todo - consider detecting this properly. + #Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program. + #In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly, + #as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem, + #so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly. + #Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a + #case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem. + + #Note: we only use this to adjust the filtering regexps we generate from the tcl glob. + #The windows API pattern match will already filter based on the case-sensitivity of the filesystem + # so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem. + + set globs_processed [tclglob_equivalents $tcl_glob] + #we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob. + #temp + #set win_glob_list [list $tcl_glob] + set win_glob_list [dict get $globs_processed winglobs] + set tcl_regex_list [dict get $globs_processed tclregexps] + + + #review + # our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob. + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_patterndebug [dict get $opts -patterndebug] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set ftypes [list f d l] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) @@ -711,256 +1031,288 @@ namespace eval punk::du { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set linkinfo [dict create] + set debuginfo [dict create] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths - #puts stderr ">>> glob: $opt_glob" - #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames - #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ - #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. - # using * all the time may be inefficient - so we might be able to avoid that in some cases. - try { - #glob of * will return dotfiles too on windows - set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field - } on error args { + + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + if {$opt_patterndebug} { + puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" + puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re" + } + + #REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. try { - if {[string match "*denied*" $args]} { - #output similar format as unixy du - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } - if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" - puts stderr " (errorcode: $::errorCode)\n" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } - #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} - #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error - #The find-all glob * won't get here because it returns . & .. - #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) - #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob - #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) - if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {} if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { #looks like an ordinary no results for chosen glob - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + continue } - } - if {[set plen [pathcharacterlen $folderpath]] >= 250} { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - # re-fetch this folder with altnames - #file normalize - aside from being slow - will have problems with long paths - so this won't work. - #this function should only accept absolute paths - # - # - #Note: using -detail full only helps if the last segment of path has an altname.. - #To properly shorten we need to have kept track of altname all the way from the root! - #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd - #### SLOW - set fixedpath [dict get [file attributes $folderpath] -shortname] - #### SLOW - + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - append errmsg "retrying with with windows altname '$fixedpath'" - puts stderr $errmsg - } else { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - set tmp_errors [list $::errorCode] - #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share - #we can use //?/path dos device path - but not with tcl functions - #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. - #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - - set fixedtail "" - - set parent [file dirname $folderpath] - set badtail [file tail $folderpath] - set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - if {$nm eq $badtail} { - set fixedtail [dict get $iteminfo altname] - break + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo" + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath $parent/$fixedtail + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + } - if {![string length $fixedtail]} { - dict lappend errors $folderpath {*}$tmp_errors - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } - #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. - #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it - #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) - #so the illegalname_fix doesn't really work here - #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] - #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. - set fixedpath [file join $parent $fixedtail] - append errmsg "retrying with with windows dos device path $fixedpath\n" - puts stderr $errmsg + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + } + #jjj - if {[catch { - set iterator [twapi::find_file_open $fixedpath/* -detail basic] - } errMsg]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" - puts stderr " (errorcode: $::errorCode)\n" - puts stderr "$errMsg" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {![regexp $tcl_re $nm]} { + continue + } + if {$nm in {. ..}} { + continue } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #set ftype "" + set do_sizes 0 + set do_times 0 + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {[dict get $attrdict -hidden]} { + lappend flaggedhidden $fullname + } + if {[dict get $attrdict -system]} { + lappend flaggedsystem $fullname + } + if {[dict get $attrdict -readonly]} { + lappend flaggedreadonly $fullname + } + if {[dict exists $attrdict -debug]} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] - } on error args { - set errmsg "error reading folder: $folderpath\n" - append errmsg "error: $args" \n - append errmsg "errorInfo: $::errorInfo" \n - puts stderr "$errmsg" - puts stderr "FAILED to collect info for folder '$folderpath'" - #append errmsg "aborting.." - #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - - } - } - set dirs [list] - set files [list] - set filesizes [list] - set allsizes [dict create] - set alltimes [dict create] + set is_reparse_point [expr {"reparse_point" in $file_attributes}] + set is_directory [expr {"directory" in $file_attributes}] - set links [list] - set linkinfo [dict create] - set debuginfo [dict create] - set flaggedhidden [list] - set flaggedsystem [list] - set flaggedreadonly [list] + set linkdata [dict create] + # ----------------------------------------------------------- + #main classification + if {$is_reparse_point} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - #recheck glob - #review! - if {![string match $opt_glob $nm]} { - continue - } - set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - set ftype "" - #attributes applicable to any classification - set fullname [file_join_one $folderpath $nm] - - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - set file_attributes [dict get $attrdict -fileattributes] - - set linkdata [dict create] - # ----------------------------------------------------------- - #main classification - if {"reparse_point" in $file_attributes} { - #this concept doesn't correspond 1-to-1 with unix links - #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points - #review - and see which if any actually belong in the links key of our return - - - #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point - # - #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? - #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' - #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls - #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} - #e.g (stripped of headers/footers and other lines) - #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] - #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. - #du includes the size of the symlink - #but we can't get it with tcl's file size - #twapi doesn't seem to have anything to help read it either (?) - #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link - # - #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. - # - #links are techically files too, whether they point to a file/dir or nothing. - lappend links $fullname - set ftype "l" - dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file - } - } - if {"directory" in $file_attributes} { - if {$nm in {. ..}} { - continue + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + lappend links $fullname + #set ftype "l" + if {"l" in $sized_types} { + set do_sizes 1 + } + if {"l" in $timed_types} { + set do_times 1 + } + dict set linkdata linktype reparse_point + dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + if {"directory" ni $file_attributes} { + dict set linkdata target_type file + } } - if {"reparse_point" ni $file_attributes} { - lappend dirs $fullname - set ftype "d" - } else { - #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections - dict set linkdata target_type directory + if {$is_directory} { + #if {$nm in {. ..}} { + # continue + #} + if {!$is_reparse_point} { + lappend dirs $fullname + #set ftype "d" + if {"d" in $sized_types} { + set do_sizes 1 + } + if {"d" in $timed_types} { + set do_times 1 + } + } else { + #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections + dict set linkdata target_type directory + } } - } - if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { - #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? - lappend files $fullname - if {"f" in $sized_types} { - lappend filesizes [dict get $iteminfo size] + if {!$is_reparse_point && !$is_directory} { + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + set do_sizes 1 + } + if {"f" in $timed_types} { + set do_times 1 + } + #set ftype "f" } - set ftype "f" - } - # ----------------------------------------------------------- + # ----------------------------------------------------------- - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname - } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname - } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname - } - if {$ftype in $sized_types} { - dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] - } - if {$ftype in $timed_types} { - #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) - #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds - #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict - dict set alltimes $fullname [dict create\ - c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ - a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ - m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ - ] - } - if {[dict size $linkdata]} { - dict set linkinfo $fullname $linkdata - } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$do_sizes} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$do_times} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + if {[dict size $linkdata]} { + dict set linkinfo $fullname $linkdata + } } + twapi::find_file_close $iterator } - twapi::find_file_close $iterator - set vfsmounts [get_vfsmounts_in_folder $folderpath] + set vfsmounts [get_vfsmounts_in_folder $folderpath] set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -991,9 +1343,11 @@ namespace eval punk::du { #work around the horrible tilde-expansion thing (not needed for tcl 9+) proc file_join_one {base newtail} { if {[string index $newtail 0] ne {~}} { - return [file join $base $newtail] + #return [file join $base $newtail] + return $base/$newtail } - return [file join $base ./$newtail] + #return [file join $base ./$newtail] + return $base/./$newtail } @@ -1121,7 +1475,7 @@ namespace eval punk::du { #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] - set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] + set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]] #---- diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm index 6a7b79d6..fe8cfc7e 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + + proc has_tclbug_caseinsensitiveglob_windows {} { + #https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted + + if {"windows" ne $::tcl_platform(platform)} { + set bug 0 + } else { + set tmpdir [file tempdir] + set testfile [file join $tmpdir "bugtest"] + set fd [open $testfile w] + puts $fd test + close $fd + set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] + foreach r $globresult { + if {$r ne "bugtest"} { + set bug 1 + break + } + } + return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning] + #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized + # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. + } + } + + #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. + # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, + # or at least checking for an existing mounted case-insensitive filesystem. + # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. + # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. + + proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index e767e366..073b6cce 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -50,7 +50,7 @@ package require punk::lib package require punk::args package require punk::ansi package require punk::winpath -package require punk::du +package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing. package require commandstack #*** !doctools #[item] [package {Tcl 8.6}] @@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs { #[list_begin definitions] + punk::args::define { + @id -id ::punk::nav::fs::d/ + @cmd -name punk::nav::fs::d/ -help\ + {List directories or directories and files in the current directory or in the + targets specified with the fileglob_or_target glob pattern(s). + + If a single target is specified without glob characters, and it exists as a directory, + then the working directory is changed to that target and a listing of that directory + is returned. If the single target specified without glob characters does not exist as + a directory, then it is treated as a glob pattern and the listing is for the current + directory with results filtered to match fileglob_or_target. + + If multiple targets or glob patterns are specified, then a separate listing is returned + for each fileglob_or_target pattern. + + This function is provided via aliases as ./ and .// with v being inferred from the alias + name, and also as d/ with an explicit v argument. + The ./ and .// forms are more convenient for interactive use. + examples: + ./ - list directories in current directory + .// - list directories and files in current directory + ./ src/* - list directories in src + .// src/* - list directories and files in src + .// *.txt - list files in current directory with .txt extension + .// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name + (on a case-insensitive filesystem this would also match T*1.txt etc) + .// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name + (glob chars treated as literals due to being in character-class brackets + This will match files beginning with a capital T and not lower case t + even on a case-insensitive filesystem.) + .// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns: + {[t]*} - names beginning with t + {d{e,d}*} - names beginning with de or dd + (on a case-insensitive filesystem the first pattern would also match names beginning with T) + } + @values -min 1 -max -1 -type string + v -type string -choices {/ //} -help\ + " + / - list directories only + // - list directories and files + " + fileglob_or_target -type string -optional true -multiple true -help\ + "A glob pattern as supported by Tcl's 'glob' command, to filter results. + If multiple patterns are supplied, then a listing for each pattern is returned. + If no patterns are supplied, then all items are listed." + } + #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #As this function recurses and calls cd multiple times - it's not thread-safe. #Another thread could theoretically cd whilst this is running. @@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args cdtarget] if {[llength $args] == 1} { set cdtarget [lindex $args 0] switch -exact -- $cdtarget { . - ./ { - tailcall punk::nav::fs::d/ + tailcall punk::nav::fs::d/ $v } .. - ../ { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { @@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs { if {[string range $cdtarget_copy 0 3] eq "//?/"} { #handle dos device paths - convert to normal path for glob testing set glob_test [string range $cdtarget_copy 3 end] - set cdtarget_is_glob [regexp {[*?]} $glob_test] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test] } else { - set cdtarget_is_glob [regexp {[*?]} $cdtarget] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget] + #todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing. } if {!$cdtarget_is_glob} { set cdtarget_file_type [file type $cdtarget] @@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs { } set VIRTUAL_CWD $cdtarget set curdir $cdtarget + tailcall punk::nav::fs::d/ $v } else { set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { @@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs { } if {[file type $target] eq "directory"} { set VIRTUAL_CWD $target + tailcall punk::nav::fs::d/ $v } } - tailcall punk::nav::fs::d/ $v } set curdir $VIRTUAL_CWD } else { @@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs { #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) - set searchspec [lindex $args 0] set result "" #set chunklist [list] @@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs { set this_result [dict create] foreach searchspec $args { set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] - set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] + #we need to support the same glob chars that Tcl's 'glob' command accepts. + set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]] + #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) @@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs { set allow_nonportable [dict exists $received -nonportable] set curdir [pwd] - set fullpath_list [list] + + set fullpath_list [list] ;#list of full paths to create. + set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir) + #these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests. + set error_paths [list] foreach p $paths { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { @@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] continue } - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)] + #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir. #Some subpaths of the supplied paths to create may already exist. - #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + #we should test write permissions on the nearest existing parent of the supplied path to create, + #rather than just on the immediate parent segment of the supplied path itself which may not exist. + + set fullpath [file normalize $fullpath] set parent [file dirname $fullpath] while {![file exists $parent]} { set parent [file dirname $parent] @@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] continue } + lappend existing_parent_list $parent lappend fullpath_list $fullpath } if {[llength $fullpath_list] != [llength $paths]} { @@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs { set num_created 0 set error_string "" - foreach fullpath $fullpath_list { + foreach fullpath $fullpath_list existing_parent $existing_parent_list { + #calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue. + #set relative_path [file relative $fullpath $existing_parent] + #todo. if {[catch {file mkdir $fullpath}]} { set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + cd $curdir break } incr num_created @@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs { if {$error_string ne ""} { error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." } - d/ $curdir + + #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. + set query_paths [lmap v $paths $v/*] + d/ / {*}$query_paths } #todo use unknown to allow d/~c:/etc ?? @@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs { if {![file isdirectory $target]} { error "Folder $target not found" } - d/ $target + d/ / $target } @@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs { } set relativepath [expr {[file pathtype $searchspec] eq "relative"}] - set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + + #set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) @@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs { } puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] - return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] + return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] } #todo - package as punk::nav::fs @@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs { } proc dirfiles_dict {args} { set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] - lassign [dict values $argd] leaders opts vals - set searchspecs [dict values $vals] + lassign [dict values $argd] leaders opts values + set searchspecs [dict values $values] #puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stdout "arglist: $opts" @@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs { set searchspec [lindex $searchspecs 0] # -- --- --- --- --- --- --- set opt_searchbase [dict get $opts -searchbase] - set opt_tailglob [dict get $opts -tailglob] + set opt_tailglob [dict get $opts -tailglob] set opt_with_sizes [dict get $opts -with_sizes] set opt_with_times [dict get $opts -with_times] # -- --- --- --- --- --- --- @@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs { } } "\uFFFF" { - set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] + set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] + + #set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] if {$searchtail_has_globs} { if {$is_relativesearchspec} { #set location [file dirname [file join $searchbase $searchspec]] @@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } + + set ts1 [clock clicks -milliseconds] if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { @@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs { } } } + set ts2 [clock clicks -milliseconds] + set ts_listing [expr {$ts2 - $ts1}] set dirs [dict get $listing dirs] set files [dict get $listing files] @@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs { set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] } - set dirs [lsort -dictionary $dirs] ;#todo - natsort + #----------------------------------------------------------------------------------------- + set ts1 [clock milliseconds] + set dirs [lsort -dictionary $dirs] ;#todo - natsort #foreach d $dirs { # if {[lindex [file system $d] 0] eq "tclvfs"} { @@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs { set files $sorted_files set filesizes $sorted_filesizes + set ts2 [clock milliseconds] + set ts_sorting [expr {$ts2 - $ts1}] + #----------------------------------------------------------------------------------------- # -- --- #jmn + set ts1 [clock milliseconds] foreach nm [list {*}$dirs {*}$files] { if {[punk::winpath::illegalname_test $nm]} { lappend nonportable $nm } } + set ts2 [clock milliseconds] + set ts_nonportable_check [expr {$ts2 - $ts1}] + set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms] + set front_of_dict [dict create location $location searchbase $opt_searchbase] set listing [dict merge $front_of_dict $listing] - set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] + set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info] return [dict merge $listing $updated] } @@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean - -listing -default "/" -choices {/ // //} + -listing -default "/" -choices {/ //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { - set ts1 [clock milliseconds] + #set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] - set ts2 [clock milliseconds] + #set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { @@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo - puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" - puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" + #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs { #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #review: punk::winpath calls cygpath! #review: file pathtype is platform dependant - proc path_to_absolute {path base platform} { - set ptype [file pathtype $path] + proc path_to_absolute {subpath base platform} { + set ptype [file pathtype $subpath] if {$ptype eq "absolute"} { - set path_absolute $path + set path_absolute $subpath } elseif {$ptype eq "volumerelative"} { if {$platform eq "windows"} { #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) - if {[string index $path 0] eq "/"} { + if {[string index $subpath 0] eq "/"} { #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #Todo - tidy up. package require punk::unixywindows - set path_absolute [punk::unixywindows::towinpath $path] + set path_absolute [punk::unixywindows::towinpath $subpath] #puts stderr "winpath: $path" } else { #todo handle volume-relative paths with volume specified c:etc c: @@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs { #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #Arguably if ...? - #set path_absolute $base/$path - set path_absolute $path + #set path_absolute $base/$subpath + set path_absolute $subpath } } else { # unknown what paths are reported as this on other platforms.. treat as absolute for now - set path_absolute $path + set path_absolute $subpath } } else { - set path_absolute $base/$path - } - if {$platform eq "windows"} { - if {[punk::winpath::illegalname_test $path_absolute]} { - set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present - } + #e.g relative subpath=* base = c:/test -> c:/test/* + #e.g relative subpath=../test base = c:/test -> c:/test/../test + #e.g relative subpath=* base = //server/share/test -> //server/share/test/* + set path_absolute $base/$subpath } + #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues. + #if {$platform eq "windows"} { + # if {[punk::winpath::illegalname_test $path_absolute]} { + # set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present + # } + #} return $path_absolute } proc strip_prefix_depth {path prefix} { @@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib { #[para] Secondary functions that are part of the API #[list_begin definitions] - #proc utility1 {p1 args} { - # #*** !doctools - # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] - # #[para]Description of utility1 - # return 1 - #} + punk::args::define { + @id -id ::punk::nav::fs::lib::is_fileglob + @cmd -name punk::nav::fs::lib::is_fileglob + @values -min 1 -max 1 + path -type string -required true -help\ + {String to test for being a glob pattern as recognised by the tcl 'glob' command. + If the string represents a path with multiple segments, only the final component + of the path will be tested for glob characters. + Glob patterns in this context are different to globs accepted by TCL's 'string match'. + A glob pattern is any string that contains unescaped * ? { } [ or ]. + This will not detect mismatched unescaped braces or brackets. + Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern. + } + } + proc is_fileglob {str} { + #a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser) + set in_escape 0 + set segments [file split $str] + set tail [lindex $segments end] + foreach c [split $tail ""] { + if {$in_escape} { + set in_escape 0 + } else { + if {$c eq "\\"} { + set in_escape 1 + } elseif {$c in [list * ? "\[" "\]" "{" "}" ]} { + return 1 + } + } + } + return 0 + } #*** !doctools diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm index 46ca4aa2..de536724 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm @@ -357,7 +357,7 @@ namespace eval punk::path { } } } - puts "==>finalparts: '$finalparts'" + #puts "==>finalparts: '$finalparts'" # using join - {"" "" server share} -> //server/share and {a b} -> a/b if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { #backtracking on unix-style path can end up with empty string as only member of finalparts diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm index 9079dbbc..1f2948fe 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -114,6 +114,50 @@ namespace eval punk::winpath { return $path } } + + proc illegal_char_map_to_doublewide {ch} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + set map [dict create \ + "<" "\uFF1C" \ + ">" "\uFF1E" \ + ":" "\uFF1A" \ + "\"" "\uFF02" \ + "/" "\uFF0F" \ + "\\" "\uFF3C" \ + "|" "\uFF5C" \ + "?" "\uFF1F" \ + "*" "\uFF0A"] + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + proc illegal_char_map_to_ntfs {ch} { + #windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc. + #see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars + #see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped) + + set map [dict create \ + "<" "\uF03C" \ + ">" "\uF03E" \ + ":" "\uF03A" \ + "\"" "\uF022" \ + "/" "unknown" \ + "\\" "\uF05c" \ + "|" "\uF07C" \ + "?" "\uF03F" \ + "*" "\uF02A"] + #note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API. + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + + + #we don't validate that path is actually illegal because we don't know the full range of such names. #The caller can apply this to any path. #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) @@ -200,8 +244,15 @@ namespace eval punk::winpath { set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. - foreach seg [file split $path] { - if {$seg in [list . ..]} { + set segments [file split $path] + if {[file pathtype $path] eq "absolute"} { + #absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments. + set checksegments [lrange $segments 1 end] + } else { + set checksegments $segments + } + foreach seg $checksegments { + if {$seg in {. ..}} { #review - what if there is a folder or file that actually has a name such as . or .. ? #unlikely in normal use - but could done deliberately for bad reasons? #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. @@ -220,10 +271,17 @@ namespace eval punk::winpath { #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph - if {[string index $seg end] in [list " " "."]} { + if {[string index $seg end] in {" " .}} { #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) return 1 } + #set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams. + set re {[<>:"/\\|?*]} + + if {[regexp $re $seg]} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + return 1 + } } #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #- they seem to be readable from cmd and tclsh as is. diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm index ea72ad1c..e1648d9d 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm @@ -6066,9 +6066,218 @@ namespace eval punk { set pipe [punk::path_list_pipe $glob] {*}$pipe } - proc path {{glob *}} { + proc path_basic {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::path + @cmd -name "punk::path" -help\ + "Introspection of the PATH environment variable. + This tool will examine executables within each PATH entry and show which binaries + are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns." + @opts + -binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries." + @values -min 0 -max -1 + glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories." + } + } + proc path {args} { + set argd [punk::args::parse $args withid ::punk::path] + lassign [dict values $argd] leaders opts values received + set binglobs [dict get $opts -binglobs] + set globs [dict get $values glob] + if {$::tcl_platform(platform) eq "windows"} { + set sep ";" + } else { + # : ok for linux/bsd ... mac? + set sep ":" + } + set all_paths [split [string trimright $::env(PATH) $sep] $sep] + set filtered_paths $all_paths + if {[llength $globs]} { + set filtered_paths [list] + foreach p $all_paths { + foreach g $globs { + if {[string match -nocase $g $p]} { + lappend filtered_paths $p + break + } + } + } + } + + #Windows executable search location order: + #1. The current directory. + #2. The directories that are listed in the PATH environment variable. + # within each directory windows uses the PATHEXT environment variable to determine + #which files are considered executable and in which order they are considered. + #So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files + #with the same name but different extensions in the same directory, then the one + #with the extension that appears first in PATHEXT will be executed when that name is called. + + #On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output. + #Duplicate PATH entries are also a fairly likely possibility on all platforms. + #This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries. + + #By default we don't want to display all executables in all PATH entries - as this can be very verbose + #- but we want to be able to show which executables are overshadowed by which PATH entries, + #and to be able to filter the PATH entries and the executables using glob patterns. + #To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name. + #The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths + #in the original PATH list. The executable dict will contain the paths and path indices where each executable is found, + #and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a + #dict keyed by path index which contains the list of executables in that path - to make it easy to show which + #executables are overshadowed by which paths. + + set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows). + set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off). + set d_index_executables [dict create] ;#key is path index, value is list of executables in that path + set path_idx 0 + foreach p $all_paths { + if {$::tcl_platform(platform) eq "windows"} { + set pnorm [string tolower $p] + } else { + set pnorm $p + } + if {![dict exists $d_path_info $pnorm]} { + dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]] + set executables [list] + if {[file isdirectory $p]} { + #get all files that are executable in this path. + #If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names. + set pnormglob [file normalize $p] + if {$::tcl_platform(platform) eq "windows"} { + #Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable. + #We need to account for this in our glob pattern. + set pathexts [list] + if {[info exists ::env(PATHEXT)]} { + set env_pathexts [split $::env(PATHEXT) ";"] + #set pathexts [lmap e $env_pathexts {string tolower $e}] + foreach pe $env_pathexts { + if {$pe eq "."} { + continue + } + lappend pathexts [string tolower $pe] + } + } else { + set env_pathexts [list] + #default PATHEXT if not set - according to Microsoft docs + set pathexts [list .com .exe .bat .cmd] + } + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set has_pathext 1 + break + } + } + if {!$has_pathext} { + foreach pe $pathexts { + lappend binglobs "$bg$pe" + } + } + } + set lc_binglobs [lmap e $binglobs {string tolower $e}] + if {"." in $pathexts} { + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]] + set has_pathext 1 + break + } + } + if {$has_pathext} { + if {[string tolower $base] ni $lc_binglobs} { + lappend binglobs "$base" + } + } + } + } + + #TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem. + #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) + #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. + #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + set executables [list] + foreach e $globresults { + puts stderr "glob result: $e" + puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]" + lappend executables [file tail [file normalize $e]] + } + } else { + set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]] + } + } + dict set d_index_executables $path_idx $executables + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + if {![dict exists $d_bin_info $exenorm]} { + dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]] + } else { + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + } else { + #duplicate path entry - add to list of original paths for this normalized path + + # Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...? + # we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it. + set pathdata [dict get $d_path_info $pnorm] + dict lappend pathdata original_paths $p + dict lappend pathdata indices $path_idx + dict set d_path_info $pnorm $pathdata + + + #we don't need to add executables for this path - as they will be the same as the original path that we have already processed. + #However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables. + set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + + incr path_idx + } + + #temporary debug output to check dicts are being built correctly + set debug "" + append debug "Path info dict:" \n + append debug [showdict $d_path_info] \n + append debug "Binary info dict:" \n + append debug [showdict $d_bin_info] \n + append debug "Index executables dict:" \n + append debug [showdict $d_index_executables] \n + #return $debug + puts stdout $debug + + + } #------------------------------------------------------------------- diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm index bc753154..c9ab54fd 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm @@ -479,7 +479,7 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this @@ -489,7 +489,11 @@ namespace eval punk::du { return [dict get $winfile_attributes $bitmask] } else { #list/dict shimmering? - return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + + set decoded [twapi::decode_file_attributes $bitmask] + dict set winfile_attributes $bitmask $decoded + return $decoded } } variable win_reparse_tags @@ -563,23 +567,25 @@ namespace eval punk::du { #then twapi::device_ioctl (win32 DeviceIoControl) #then parse buffer somehow (binary scan..) #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 - + punk::args::define { + @id -id ::punk::du::lib::Get_attributes_from_iteminfo + -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" + -debugchannel -default stderr -help "channel to write debug output, or none to append to output" + @values -min 1 -max 1 + iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" + } + #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args. proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int + #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] - set argd [punk::args::parse $args withdef { - @id -id ::punk::du::lib::Get_attributes_from_iteminfo - -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" - -debugchannel -default stderr -help "channel to write debug output, or none to append to output" - @values -min 1 -max 1 - iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" - }] - set opts [dict get $argd opts] - set iteminfo [dict get $argd values iteminfo] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] + set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" append dbg [pdict -channel none iteminfo] @@ -592,34 +598,38 @@ namespace eval punk::du { } set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - if {"hidden" in $attrinfo} { - dict set result -hidden 1 - } - if {"system" in $attrinfo} { - dict set result -system 1 - } - if {"readonly" in $attrinfo} { - dict set result -readonly 1 - } - dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - if {"reparse_point" in $attrinfo} { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] + foreach attr $attrinfo { + switch -- $attr { + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 + } + reparse_point { + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } } } + #dict set result -shortname [dict get $iteminfo altname] + dict set result -fileattributes $attrinfo dict set result -raw $iteminfo return $result } @@ -652,27 +662,337 @@ namespace eval punk::du { catch {twapi::find_file_close $iterator} } } + proc resolve_characterclass {cclass} { + #takes the inner value from a tcl square bracketed character class and converts to a list of characters. + + #todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters + #e.g a-c-3 -> a b c - 3 + #e.g a-c-3-5 -> a b c - 3 4 5 + #e.g a-c -> a b c + #e.g a- -> a - + #e.g -c-e -> - c d e + #the tcl character class does not support negation or intersection - so we can ignore those possibilities for now. + #in this context we do not need to support named character classes such as [:digit:] + set chars [list] + set i 0 + set len [string length $cclass] + set accept_range 0 + while {$i < $len} { + set ch [string index $cclass $i] + if {$ch eq "-"} { + if {$accept_range} { + set start [string index $cclass [expr {$i - 1}]] + set end [string index $cclass [expr {$i + 1}]] + if {$start eq "" || $end eq ""} { + #invalid range - treat - as literal + if {"-" ni $chars} { + lappend chars "-" + } + } else { + #we have a range - add all chars from previous char to next char + #range may be in either direction - e.g a-c or c-a but we don't care about the order of our result. + if {$start eq $end} { + #degenerate range - treat as single char + if {$start ni $chars} { + lappend chars $start + } + } else { + if {[scan $start %c] < [scan $end %c]} { + set c1 [scan $start %c] + set c2 [scan $end %c] + } else { + set c1 [scan $end %c] + set c2 [scan $start %c] + } + for {set c $c1} {$c <= $c2} {incr c} { + set char [format %c $c] + if {$char ni $chars} { + lappend chars $char + } + } + } + + incr i ;#skip end char as it's already included in range + } + set accept_range 0 + } else { + #we have a literal - add to list and allow for possible range if next char is also a - + if {"-" ni $chars} { + lappend chars "-" + } + set accept_range 1 + } + } else { #we have a literal - add to list and allow for possible range if next char is also a - + if {$ch ni $chars} { + lappend chars $ch + } + set accept_range 1 + } + incr i + } + return $chars + } + + #return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible. + #first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes. + # e.g {*[\{]*} is a valid tcl glob + + #todo - write tests. + #should also support {*\{*} matching a file such as a{blah}b + + #Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL. + #So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity. + + proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} { + #windows API function in use is the FindFirstFile set of functions. + #these support wildcards * and ? *only*. + + #examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported + # {abc[1-3].txt} -> {abc?.txt} + # {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + # {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call. + # {{S,t}*.txt} -> {S*.txt} {T*.txt} + # *.{txt,log} -> {*.txt} {*.log} + # {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals + + + set gchars [split $tclglob ""] + set winglob_list [list ""] + set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + set in_brackets 0 + set esc_next 0 + + set brace_depth 0 + set brace_content "" + set braced_alternatives [list] + + set brace_is_normal 0 + + set cclass_content "" + foreach ch $gchars { + if {$esc_next} { + if {$in_brackets} { + append cclass_content $ch + continue + } elseif {$brace_depth} { + append brace_content $ch + continue + } + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}] + set esc_next 0 + continue + } + + if {$ch eq "\{"} { + if {$brace_depth} { + #we have an opening brace char inside braces + #Let the brace processing handle it as it recurses. + incr brace_depth 1 + append brace_content $ch + continue + } + incr brace_depth 1 + set brace_content "" + } elseif {$ch eq "\}"} { + if {$brace_depth > 1} { + #we have a closing brace char inside braces + append brace_content $ch + incr brace_depth -1 + continue + } + #process brace_content representing a list of alternatives + #handle list of alternatives - convert {txt,log} to *.txt;*.log + #set alternatives [split $brace_content ","] + lappend braced_alternatives $brace_content + set alternatives $braced_alternatives + set braced_alternatives [list] + set brace_content "" + incr brace_depth -1 + + set alt_winpatterns [list] + set alt_regexps [list] + foreach alt $alternatives { + set subresult [tclglob_equivalents $alt] + lappend alt_winpatterns {*}[dict get $subresult winglobs] + lappend alt_regexps {*}[dict get $subresult tclregexps] + } + set next_winglob_list [list] + set next_regexp_list [list] + foreach wg $winglob_list re $tclregexp_list { + #puts "wg: $wg" + #puts "re: $re" + foreach alt_wp $alt_winpatterns alt_re $alt_regexps { + #puts " alt_wp: $alt_wp" + #puts " alt_re: $alt_re" + lappend next_winglob_list "$wg$alt_wp" + set alt_re_no_caret [string range $alt_re 1 end] + lappend next_regexp_list "${re}${alt_re_no_caret}" + } + } + + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + + } elseif {$ch eq "\["} { + #windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + + if {!$brace_depth} { + set in_brackets 1 + } else { + #we have a [ char inside braces + #Let the brace processing handle it as it recurses. + #but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets. + set brace_is_normal 1 + append brace_content $ch + } + } elseif {$ch eq "\]"} { + if {$brace_depth} { + #we have a ] char inside braces + #Let the brace processing hanele it as it recurses. + append brace_content $ch + continue + } + set in_brackets 0 + set charlist [resolve_characterclass $cclass_content] + set cclass_content "" + set next_winglob_list [list] + set next_regexp_list [list] + foreach c $charlist { + #set winglob_list [lmap wg $winglob_list {append wg $c}] + foreach wg $winglob_list { + lappend next_winglob_list "$wg$c" + } + foreach re $tclregexp_list { + set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}] + lappend next_regexp_list "${re}${c_escaped}" + } + } + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + } elseif {$ch eq "\\"} { + if {$in_brackets} { + append cclass_content $ch + #append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself. + set esc_next 1 + continue + } + if {$brace_depth} { + #we have a \ char inside braces + #Let the brace processing handle it as it recurses. + append brace_content $ch + set esc_next 1 + continue + } + set esc_next 1 + continue + } else { + if {$in_brackets} { + append cclass_content $ch + continue + } + if {!$brace_depth} { + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}] + if {[string length $re_ch] == 1} { + switch -- $re_ch { + "?" {set re_ch "."} + "*" {set re_ch ".*"} + default { + #we could use the same mixed case filter here for both sensitive and insensitive filesystems, + #because the API filtering will already have done the restriction, + #and so a more permissive regex that matches both cases will still only match the results that the API call returns, + #which will be correct based on the case-sensitivity of the filesystem. + #It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob. + #ie in the parts of the original glob that were in square brackets. + + if {!$case_sensitive_filesystem} { + # add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + if {[string is upper $re_ch]} { + set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"] + } elseif {[string is lower $re_ch]} { + set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"] + } else { + #non-alpha char - no need to add case-insensitivity + } + } + } + } + } + set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}] + } else { + #we have a literal char inside braces - add to current brace_content + if {$brace_depth == 1 && $ch eq ","} { + lappend braced_alternatives $brace_content + set brace_content "" + } else { + append brace_content $ch + } + } + } + } + #sanity check + if {[llength $winglob_list] != [llength $tclregexp_list]} { + error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" + } + set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] + return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + } + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs # get listing without using unix-tools (may not be installed on the windows system) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + + #todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob. + # we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before + #we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder. + #For really large folders and a moderate number of patterns, this could be a significant performance improvement. proc du_dirlisting_twapi {folderpath args} { set defaults [dict create\ -glob *\ -filedebug 0\ + -patterndebug 0\ -with_sizes 1\ -with_times 1\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_glob [dict get $opts -glob] + set tcl_glob [dict get $opts -glob] + + #todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps. + set case_sensitive_filesystem 0 ;#todo - consider detecting this properly. + #Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program. + #In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly, + #as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem, + #so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly. + #Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a + #case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem. + + #Note: we only use this to adjust the filtering regexps we generate from the tcl glob. + #The windows API pattern match will already filter based on the case-sensitivity of the filesystem + # so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem. + + set globs_processed [tclglob_equivalents $tcl_glob] + #we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob. + #temp + #set win_glob_list [list $tcl_glob] + set win_glob_list [dict get $globs_processed winglobs] + set tcl_regex_list [dict get $globs_processed tclregexps] + + + #review + # our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob. + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_patterndebug [dict get $opts -patterndebug] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set ftypes [list f d l] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) @@ -711,256 +1031,288 @@ namespace eval punk::du { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set linkinfo [dict create] + set debuginfo [dict create] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths - #puts stderr ">>> glob: $opt_glob" - #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames - #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ - #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. - # using * all the time may be inefficient - so we might be able to avoid that in some cases. - try { - #glob of * will return dotfiles too on windows - set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field - } on error args { + + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + if {$opt_patterndebug} { + puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" + puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re" + } + + #REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. try { - if {[string match "*denied*" $args]} { - #output similar format as unixy du - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } - if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" - puts stderr " (errorcode: $::errorCode)\n" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } - #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} - #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error - #The find-all glob * won't get here because it returns . & .. - #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) - #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob - #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) - if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {} if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { #looks like an ordinary no results for chosen glob - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + continue } - } - if {[set plen [pathcharacterlen $folderpath]] >= 250} { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - # re-fetch this folder with altnames - #file normalize - aside from being slow - will have problems with long paths - so this won't work. - #this function should only accept absolute paths - # - # - #Note: using -detail full only helps if the last segment of path has an altname.. - #To properly shorten we need to have kept track of altname all the way from the root! - #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd - #### SLOW - set fixedpath [dict get [file attributes $folderpath] -shortname] - #### SLOW - + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - append errmsg "retrying with with windows altname '$fixedpath'" - puts stderr $errmsg - } else { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - set tmp_errors [list $::errorCode] - #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share - #we can use //?/path dos device path - but not with tcl functions - #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. - #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - - set fixedtail "" - - set parent [file dirname $folderpath] - set badtail [file tail $folderpath] - set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - if {$nm eq $badtail} { - set fixedtail [dict get $iteminfo altname] - break + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo" + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath $parent/$fixedtail + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + } - if {![string length $fixedtail]} { - dict lappend errors $folderpath {*}$tmp_errors - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } - #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. - #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it - #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) - #so the illegalname_fix doesn't really work here - #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] - #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. - set fixedpath [file join $parent $fixedtail] - append errmsg "retrying with with windows dos device path $fixedpath\n" - puts stderr $errmsg + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + } + #jjj - if {[catch { - set iterator [twapi::find_file_open $fixedpath/* -detail basic] - } errMsg]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" - puts stderr " (errorcode: $::errorCode)\n" - puts stderr "$errMsg" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {![regexp $tcl_re $nm]} { + continue + } + if {$nm in {. ..}} { + continue } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #set ftype "" + set do_sizes 0 + set do_times 0 + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {[dict get $attrdict -hidden]} { + lappend flaggedhidden $fullname + } + if {[dict get $attrdict -system]} { + lappend flaggedsystem $fullname + } + if {[dict get $attrdict -readonly]} { + lappend flaggedreadonly $fullname + } + if {[dict exists $attrdict -debug]} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] - } on error args { - set errmsg "error reading folder: $folderpath\n" - append errmsg "error: $args" \n - append errmsg "errorInfo: $::errorInfo" \n - puts stderr "$errmsg" - puts stderr "FAILED to collect info for folder '$folderpath'" - #append errmsg "aborting.." - #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - - } - } - set dirs [list] - set files [list] - set filesizes [list] - set allsizes [dict create] - set alltimes [dict create] + set is_reparse_point [expr {"reparse_point" in $file_attributes}] + set is_directory [expr {"directory" in $file_attributes}] - set links [list] - set linkinfo [dict create] - set debuginfo [dict create] - set flaggedhidden [list] - set flaggedsystem [list] - set flaggedreadonly [list] + set linkdata [dict create] + # ----------------------------------------------------------- + #main classification + if {$is_reparse_point} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - #recheck glob - #review! - if {![string match $opt_glob $nm]} { - continue - } - set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - set ftype "" - #attributes applicable to any classification - set fullname [file_join_one $folderpath $nm] - - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - set file_attributes [dict get $attrdict -fileattributes] - - set linkdata [dict create] - # ----------------------------------------------------------- - #main classification - if {"reparse_point" in $file_attributes} { - #this concept doesn't correspond 1-to-1 with unix links - #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points - #review - and see which if any actually belong in the links key of our return - - - #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point - # - #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? - #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' - #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls - #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} - #e.g (stripped of headers/footers and other lines) - #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] - #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. - #du includes the size of the symlink - #but we can't get it with tcl's file size - #twapi doesn't seem to have anything to help read it either (?) - #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link - # - #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. - # - #links are techically files too, whether they point to a file/dir or nothing. - lappend links $fullname - set ftype "l" - dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file - } - } - if {"directory" in $file_attributes} { - if {$nm in {. ..}} { - continue + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + lappend links $fullname + #set ftype "l" + if {"l" in $sized_types} { + set do_sizes 1 + } + if {"l" in $timed_types} { + set do_times 1 + } + dict set linkdata linktype reparse_point + dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + if {"directory" ni $file_attributes} { + dict set linkdata target_type file + } } - if {"reparse_point" ni $file_attributes} { - lappend dirs $fullname - set ftype "d" - } else { - #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections - dict set linkdata target_type directory + if {$is_directory} { + #if {$nm in {. ..}} { + # continue + #} + if {!$is_reparse_point} { + lappend dirs $fullname + #set ftype "d" + if {"d" in $sized_types} { + set do_sizes 1 + } + if {"d" in $timed_types} { + set do_times 1 + } + } else { + #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections + dict set linkdata target_type directory + } } - } - if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { - #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? - lappend files $fullname - if {"f" in $sized_types} { - lappend filesizes [dict get $iteminfo size] + if {!$is_reparse_point && !$is_directory} { + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + set do_sizes 1 + } + if {"f" in $timed_types} { + set do_times 1 + } + #set ftype "f" } - set ftype "f" - } - # ----------------------------------------------------------- + # ----------------------------------------------------------- - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname - } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname - } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname - } - if {$ftype in $sized_types} { - dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] - } - if {$ftype in $timed_types} { - #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) - #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds - #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict - dict set alltimes $fullname [dict create\ - c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ - a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ - m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ - ] - } - if {[dict size $linkdata]} { - dict set linkinfo $fullname $linkdata - } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$do_sizes} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$do_times} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + if {[dict size $linkdata]} { + dict set linkinfo $fullname $linkdata + } } + twapi::find_file_close $iterator } - twapi::find_file_close $iterator - set vfsmounts [get_vfsmounts_in_folder $folderpath] + set vfsmounts [get_vfsmounts_in_folder $folderpath] set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -991,9 +1343,11 @@ namespace eval punk::du { #work around the horrible tilde-expansion thing (not needed for tcl 9+) proc file_join_one {base newtail} { if {[string index $newtail 0] ne {~}} { - return [file join $base $newtail] + #return [file join $base $newtail] + return $base/$newtail } - return [file join $base ./$newtail] + #return [file join $base ./$newtail] + return $base/./$newtail } @@ -1121,7 +1475,7 @@ namespace eval punk::du { #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] - set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] + set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]] #---- diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm index 6a7b79d6..fe8cfc7e 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm @@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + + proc has_tclbug_caseinsensitiveglob_windows {} { + #https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted + + if {"windows" ne $::tcl_platform(platform)} { + set bug 0 + } else { + set tmpdir [file tempdir] + set testfile [file join $tmpdir "bugtest"] + set fd [open $testfile w] + puts $fd test + close $fd + set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] + foreach r $globresult { + if {$r ne "bugtest"} { + set bug 1 + break + } + } + return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning] + #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized + # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. + } + } + + #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. + # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, + # or at least checking for an existing mounted case-insensitive filesystem. + # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. + # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. + + proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm index e767e366..073b6cce 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm @@ -50,7 +50,7 @@ package require punk::lib package require punk::args package require punk::ansi package require punk::winpath -package require punk::du +package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing. package require commandstack #*** !doctools #[item] [package {Tcl 8.6}] @@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs { #[list_begin definitions] + punk::args::define { + @id -id ::punk::nav::fs::d/ + @cmd -name punk::nav::fs::d/ -help\ + {List directories or directories and files in the current directory or in the + targets specified with the fileglob_or_target glob pattern(s). + + If a single target is specified without glob characters, and it exists as a directory, + then the working directory is changed to that target and a listing of that directory + is returned. If the single target specified without glob characters does not exist as + a directory, then it is treated as a glob pattern and the listing is for the current + directory with results filtered to match fileglob_or_target. + + If multiple targets or glob patterns are specified, then a separate listing is returned + for each fileglob_or_target pattern. + + This function is provided via aliases as ./ and .// with v being inferred from the alias + name, and also as d/ with an explicit v argument. + The ./ and .// forms are more convenient for interactive use. + examples: + ./ - list directories in current directory + .// - list directories and files in current directory + ./ src/* - list directories in src + .// src/* - list directories and files in src + .// *.txt - list files in current directory with .txt extension + .// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name + (on a case-insensitive filesystem this would also match T*1.txt etc) + .// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name + (glob chars treated as literals due to being in character-class brackets + This will match files beginning with a capital T and not lower case t + even on a case-insensitive filesystem.) + .// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns: + {[t]*} - names beginning with t + {d{e,d}*} - names beginning with de or dd + (on a case-insensitive filesystem the first pattern would also match names beginning with T) + } + @values -min 1 -max -1 -type string + v -type string -choices {/ //} -help\ + " + / - list directories only + // - list directories and files + " + fileglob_or_target -type string -optional true -multiple true -help\ + "A glob pattern as supported by Tcl's 'glob' command, to filter results. + If multiple patterns are supplied, then a listing for each pattern is returned. + If no patterns are supplied, then all items are listed." + } + #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #As this function recurses and calls cd multiple times - it's not thread-safe. #Another thread could theoretically cd whilst this is running. @@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args cdtarget] if {[llength $args] == 1} { set cdtarget [lindex $args 0] switch -exact -- $cdtarget { . - ./ { - tailcall punk::nav::fs::d/ + tailcall punk::nav::fs::d/ $v } .. - ../ { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { @@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs { if {[string range $cdtarget_copy 0 3] eq "//?/"} { #handle dos device paths - convert to normal path for glob testing set glob_test [string range $cdtarget_copy 3 end] - set cdtarget_is_glob [regexp {[*?]} $glob_test] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test] } else { - set cdtarget_is_glob [regexp {[*?]} $cdtarget] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget] + #todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing. } if {!$cdtarget_is_glob} { set cdtarget_file_type [file type $cdtarget] @@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs { } set VIRTUAL_CWD $cdtarget set curdir $cdtarget + tailcall punk::nav::fs::d/ $v } else { set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { @@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs { } if {[file type $target] eq "directory"} { set VIRTUAL_CWD $target + tailcall punk::nav::fs::d/ $v } } - tailcall punk::nav::fs::d/ $v } set curdir $VIRTUAL_CWD } else { @@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs { #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) - set searchspec [lindex $args 0] set result "" #set chunklist [list] @@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs { set this_result [dict create] foreach searchspec $args { set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] - set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] + #we need to support the same glob chars that Tcl's 'glob' command accepts. + set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]] + #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) @@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs { set allow_nonportable [dict exists $received -nonportable] set curdir [pwd] - set fullpath_list [list] + + set fullpath_list [list] ;#list of full paths to create. + set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir) + #these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests. + set error_paths [list] foreach p $paths { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { @@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] continue } - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)] + #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir. #Some subpaths of the supplied paths to create may already exist. - #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + #we should test write permissions on the nearest existing parent of the supplied path to create, + #rather than just on the immediate parent segment of the supplied path itself which may not exist. + + set fullpath [file normalize $fullpath] set parent [file dirname $fullpath] while {![file exists $parent]} { set parent [file dirname $parent] @@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] continue } + lappend existing_parent_list $parent lappend fullpath_list $fullpath } if {[llength $fullpath_list] != [llength $paths]} { @@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs { set num_created 0 set error_string "" - foreach fullpath $fullpath_list { + foreach fullpath $fullpath_list existing_parent $existing_parent_list { + #calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue. + #set relative_path [file relative $fullpath $existing_parent] + #todo. if {[catch {file mkdir $fullpath}]} { set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + cd $curdir break } incr num_created @@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs { if {$error_string ne ""} { error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." } - d/ $curdir + + #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. + set query_paths [lmap v $paths $v/*] + d/ / {*}$query_paths } #todo use unknown to allow d/~c:/etc ?? @@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs { if {![file isdirectory $target]} { error "Folder $target not found" } - d/ $target + d/ / $target } @@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs { } set relativepath [expr {[file pathtype $searchspec] eq "relative"}] - set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + + #set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) @@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs { } puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] - return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] + return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] } #todo - package as punk::nav::fs @@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs { } proc dirfiles_dict {args} { set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] - lassign [dict values $argd] leaders opts vals - set searchspecs [dict values $vals] + lassign [dict values $argd] leaders opts values + set searchspecs [dict values $values] #puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stdout "arglist: $opts" @@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs { set searchspec [lindex $searchspecs 0] # -- --- --- --- --- --- --- set opt_searchbase [dict get $opts -searchbase] - set opt_tailglob [dict get $opts -tailglob] + set opt_tailglob [dict get $opts -tailglob] set opt_with_sizes [dict get $opts -with_sizes] set opt_with_times [dict get $opts -with_times] # -- --- --- --- --- --- --- @@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs { } } "\uFFFF" { - set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] + set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] + + #set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] if {$searchtail_has_globs} { if {$is_relativesearchspec} { #set location [file dirname [file join $searchbase $searchspec]] @@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } + + set ts1 [clock clicks -milliseconds] if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { @@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs { } } } + set ts2 [clock clicks -milliseconds] + set ts_listing [expr {$ts2 - $ts1}] set dirs [dict get $listing dirs] set files [dict get $listing files] @@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs { set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] } - set dirs [lsort -dictionary $dirs] ;#todo - natsort + #----------------------------------------------------------------------------------------- + set ts1 [clock milliseconds] + set dirs [lsort -dictionary $dirs] ;#todo - natsort #foreach d $dirs { # if {[lindex [file system $d] 0] eq "tclvfs"} { @@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs { set files $sorted_files set filesizes $sorted_filesizes + set ts2 [clock milliseconds] + set ts_sorting [expr {$ts2 - $ts1}] + #----------------------------------------------------------------------------------------- # -- --- #jmn + set ts1 [clock milliseconds] foreach nm [list {*}$dirs {*}$files] { if {[punk::winpath::illegalname_test $nm]} { lappend nonportable $nm } } + set ts2 [clock milliseconds] + set ts_nonportable_check [expr {$ts2 - $ts1}] + set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms] + set front_of_dict [dict create location $location searchbase $opt_searchbase] set listing [dict merge $front_of_dict $listing] - set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] + set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info] return [dict merge $listing $updated] } @@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean - -listing -default "/" -choices {/ // //} + -listing -default "/" -choices {/ //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { - set ts1 [clock milliseconds] + #set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] - set ts2 [clock milliseconds] + #set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { @@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo - puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" - puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" + #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs { #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #review: punk::winpath calls cygpath! #review: file pathtype is platform dependant - proc path_to_absolute {path base platform} { - set ptype [file pathtype $path] + proc path_to_absolute {subpath base platform} { + set ptype [file pathtype $subpath] if {$ptype eq "absolute"} { - set path_absolute $path + set path_absolute $subpath } elseif {$ptype eq "volumerelative"} { if {$platform eq "windows"} { #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) - if {[string index $path 0] eq "/"} { + if {[string index $subpath 0] eq "/"} { #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #Todo - tidy up. package require punk::unixywindows - set path_absolute [punk::unixywindows::towinpath $path] + set path_absolute [punk::unixywindows::towinpath $subpath] #puts stderr "winpath: $path" } else { #todo handle volume-relative paths with volume specified c:etc c: @@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs { #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #Arguably if ...? - #set path_absolute $base/$path - set path_absolute $path + #set path_absolute $base/$subpath + set path_absolute $subpath } } else { # unknown what paths are reported as this on other platforms.. treat as absolute for now - set path_absolute $path + set path_absolute $subpath } } else { - set path_absolute $base/$path - } - if {$platform eq "windows"} { - if {[punk::winpath::illegalname_test $path_absolute]} { - set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present - } + #e.g relative subpath=* base = c:/test -> c:/test/* + #e.g relative subpath=../test base = c:/test -> c:/test/../test + #e.g relative subpath=* base = //server/share/test -> //server/share/test/* + set path_absolute $base/$subpath } + #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues. + #if {$platform eq "windows"} { + # if {[punk::winpath::illegalname_test $path_absolute]} { + # set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present + # } + #} return $path_absolute } proc strip_prefix_depth {path prefix} { @@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib { #[para] Secondary functions that are part of the API #[list_begin definitions] - #proc utility1 {p1 args} { - # #*** !doctools - # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] - # #[para]Description of utility1 - # return 1 - #} + punk::args::define { + @id -id ::punk::nav::fs::lib::is_fileglob + @cmd -name punk::nav::fs::lib::is_fileglob + @values -min 1 -max 1 + path -type string -required true -help\ + {String to test for being a glob pattern as recognised by the tcl 'glob' command. + If the string represents a path with multiple segments, only the final component + of the path will be tested for glob characters. + Glob patterns in this context are different to globs accepted by TCL's 'string match'. + A glob pattern is any string that contains unescaped * ? { } [ or ]. + This will not detect mismatched unescaped braces or brackets. + Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern. + } + } + proc is_fileglob {str} { + #a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser) + set in_escape 0 + set segments [file split $str] + set tail [lindex $segments end] + foreach c [split $tail ""] { + if {$in_escape} { + set in_escape 0 + } else { + if {$c eq "\\"} { + set in_escape 1 + } elseif {$c in [list * ? "\[" "\]" "{" "}" ]} { + return 1 + } + } + } + return 0 + } #*** !doctools diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm index 46ca4aa2..de536724 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm @@ -357,7 +357,7 @@ namespace eval punk::path { } } } - puts "==>finalparts: '$finalparts'" + #puts "==>finalparts: '$finalparts'" # using join - {"" "" server share} -> //server/share and {a b} -> a/b if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { #backtracking on unix-style path can end up with empty string as only member of finalparts diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm index 9079dbbc..1f2948fe 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm @@ -114,6 +114,50 @@ namespace eval punk::winpath { return $path } } + + proc illegal_char_map_to_doublewide {ch} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + set map [dict create \ + "<" "\uFF1C" \ + ">" "\uFF1E" \ + ":" "\uFF1A" \ + "\"" "\uFF02" \ + "/" "\uFF0F" \ + "\\" "\uFF3C" \ + "|" "\uFF5C" \ + "?" "\uFF1F" \ + "*" "\uFF0A"] + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + proc illegal_char_map_to_ntfs {ch} { + #windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc. + #see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars + #see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped) + + set map [dict create \ + "<" "\uF03C" \ + ">" "\uF03E" \ + ":" "\uF03A" \ + "\"" "\uF022" \ + "/" "unknown" \ + "\\" "\uF05c" \ + "|" "\uF07C" \ + "?" "\uF03F" \ + "*" "\uF02A"] + #note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API. + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + + + #we don't validate that path is actually illegal because we don't know the full range of such names. #The caller can apply this to any path. #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) @@ -200,8 +244,15 @@ namespace eval punk::winpath { set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. - foreach seg [file split $path] { - if {$seg in [list . ..]} { + set segments [file split $path] + if {[file pathtype $path] eq "absolute"} { + #absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments. + set checksegments [lrange $segments 1 end] + } else { + set checksegments $segments + } + foreach seg $checksegments { + if {$seg in {. ..}} { #review - what if there is a folder or file that actually has a name such as . or .. ? #unlikely in normal use - but could done deliberately for bad reasons? #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. @@ -220,10 +271,17 @@ namespace eval punk::winpath { #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph - if {[string index $seg end] in [list " " "."]} { + if {[string index $seg end] in {" " .}} { #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) return 1 } + #set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams. + set re {[<>:"/\\|?*]} + + if {[regexp $re $seg]} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + return 1 + } } #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #- they seem to be readable from cmd and tclsh as is. diff --git a/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm b/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm index ea72ad1c..e1648d9d 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk-0.1.tm @@ -6066,9 +6066,218 @@ namespace eval punk { set pipe [punk::path_list_pipe $glob] {*}$pipe } - proc path {{glob *}} { + proc path_basic {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines + } + namespace eval argdoc { + punk::args::define { + @id -id ::punk::path + @cmd -name "punk::path" -help\ + "Introspection of the PATH environment variable. + This tool will examine executables within each PATH entry and show which binaries + are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns." + @opts + -binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries." + @values -min 0 -max -1 + glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories." + } + } + proc path {args} { + set argd [punk::args::parse $args withid ::punk::path] + lassign [dict values $argd] leaders opts values received + set binglobs [dict get $opts -binglobs] + set globs [dict get $values glob] + if {$::tcl_platform(platform) eq "windows"} { + set sep ";" + } else { + # : ok for linux/bsd ... mac? + set sep ":" + } + set all_paths [split [string trimright $::env(PATH) $sep] $sep] + set filtered_paths $all_paths + if {[llength $globs]} { + set filtered_paths [list] + foreach p $all_paths { + foreach g $globs { + if {[string match -nocase $g $p]} { + lappend filtered_paths $p + break + } + } + } + } + + #Windows executable search location order: + #1. The current directory. + #2. The directories that are listed in the PATH environment variable. + # within each directory windows uses the PATHEXT environment variable to determine + #which files are considered executable and in which order they are considered. + #So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files + #with the same name but different extensions in the same directory, then the one + #with the extension that appears first in PATHEXT will be executed when that name is called. + + #On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output. + #Duplicate PATH entries are also a fairly likely possibility on all platforms. + #This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries. + + #By default we don't want to display all executables in all PATH entries - as this can be very verbose + #- but we want to be able to show which executables are overshadowed by which PATH entries, + #and to be able to filter the PATH entries and the executables using glob patterns. + #To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name. + #The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths + #in the original PATH list. The executable dict will contain the paths and path indices where each executable is found, + #and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a + #dict keyed by path index which contains the list of executables in that path - to make it easy to show which + #executables are overshadowed by which paths. + + set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows). + set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off). + set d_index_executables [dict create] ;#key is path index, value is list of executables in that path + set path_idx 0 + foreach p $all_paths { + if {$::tcl_platform(platform) eq "windows"} { + set pnorm [string tolower $p] + } else { + set pnorm $p + } + if {![dict exists $d_path_info $pnorm]} { + dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]] + set executables [list] + if {[file isdirectory $p]} { + #get all files that are executable in this path. + #If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names. + set pnormglob [file normalize $p] + if {$::tcl_platform(platform) eq "windows"} { + #Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable. + #We need to account for this in our glob pattern. + set pathexts [list] + if {[info exists ::env(PATHEXT)]} { + set env_pathexts [split $::env(PATHEXT) ";"] + #set pathexts [lmap e $env_pathexts {string tolower $e}] + foreach pe $env_pathexts { + if {$pe eq "."} { + continue + } + lappend pathexts [string tolower $pe] + } + } else { + set env_pathexts [list] + #default PATHEXT if not set - according to Microsoft docs + set pathexts [list .com .exe .bat .cmd] + } + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set has_pathext 1 + break + } + } + if {!$has_pathext} { + foreach pe $pathexts { + lappend binglobs "$bg$pe" + } + } + } + set lc_binglobs [lmap e $binglobs {string tolower $e}] + if {"." in $pathexts} { + foreach bg $binglobs { + set has_pathext 0 + foreach pe $pathexts { + if {[string match -nocase "*$pe" $bg]} { + set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]] + set has_pathext 1 + break + } + } + if {$has_pathext} { + if {[string tolower $base] ni $lc_binglobs} { + lappend binglobs "$base" + } + } + } + } + + #TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem. + #(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug) + #We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results. + #The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for. + #(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe') + set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]] + set executables [list] + foreach e $globresults { + puts stderr "glob result: $e" + puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]" + lappend executables [file tail [file normalize $e]] + } + } else { + set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]] + } + } + dict set d_index_executables $path_idx $executables + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + if {![dict exists $d_bin_info $exenorm]} { + dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]] + } else { + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + } else { + #duplicate path entry - add to list of original paths for this normalized path + + # Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...? + # we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it. + set pathdata [dict get $d_path_info $pnorm] + dict lappend pathdata original_paths $p + dict lappend pathdata indices $path_idx + dict set d_path_info $pnorm $pathdata + + + #we don't need to add executables for this path - as they will be the same as the original path that we have already processed. + #However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables. + set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path + foreach exe $executables { + if {$::tcl_platform(platform) eq "windows"} { + set exenorm [string tolower $exe] + } else { + set exenorm $exe + } + #dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe + set bindata [dict get $d_bin_info $exenorm] + dict lappend bindata path_indices $path_idx + dict lappend bindata paths $p + dict lappend bindata executable_names $exe + dict set d_bin_info $exenorm $bindata + } + } + + incr path_idx + } + + #temporary debug output to check dicts are being built correctly + set debug "" + append debug "Path info dict:" \n + append debug [showdict $d_path_info] \n + append debug "Binary info dict:" \n + append debug [showdict $d_bin_info] \n + append debug "Index executables dict:" \n + append debug [showdict $d_index_executables] \n + #return $debug + puts stdout $debug + + + } #------------------------------------------------------------------- diff --git a/src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm index bc753154..c9ab54fd 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm @@ -479,7 +479,7 @@ namespace eval punk::du { } namespace eval lib { variable du_literal - variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] + variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ] #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this @@ -489,7 +489,11 @@ namespace eval punk::du { return [dict get $winfile_attributes $bitmask] } else { #list/dict shimmering? - return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] + + set decoded [twapi::decode_file_attributes $bitmask] + dict set winfile_attributes $bitmask $decoded + return $decoded } } variable win_reparse_tags @@ -563,23 +567,25 @@ namespace eval punk::du { #then twapi::device_ioctl (win32 DeviceIoControl) #then parse buffer somehow (binary scan..) #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 - + punk::args::define { + @id -id ::punk::du::lib::Get_attributes_from_iteminfo + -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" + -debugchannel -default stderr -help "channel to write debug output, or none to append to output" + @values -min 1 -max 1 + iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" + } + #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args. proc Get_attributes_from_iteminfo {args} { variable win_reparse_tags_by_int + #set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo] + set defaults [dict create -debug 0 -debugchannel stderr] + set opts [dict merge $defaults [lrange $args 0 end-1]] + set iteminfo [lindex $args end] - set argd [punk::args::parse $args withdef { - @id -id ::punk::du::lib::Get_attributes_from_iteminfo - -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" - -debugchannel -default stderr -help "channel to write debug output, or none to append to output" - @values -min 1 -max 1 - iteminfo -help "iteminfo dict as set by 'twapi::find_file_next iteminfo'" - }] - set opts [dict get $argd opts] - set iteminfo [dict get $argd values iteminfo] set opt_debug [dict get $opts -debug] set opt_debugchannel [dict get $opts -debugchannel] #-longname is placeholder - caller needs to set - set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] + set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}] if {$opt_debug} { set dbg "iteminfo returned by find_file_open\n" append dbg [pdict -channel none iteminfo] @@ -592,34 +598,38 @@ namespace eval punk::du { } set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - if {"hidden" in $attrinfo} { - dict set result -hidden 1 - } - if {"system" in $attrinfo} { - dict set result -system 1 - } - if {"readonly" in $attrinfo} { - dict set result -readonly 1 - } - dict set result -shortname [dict get $iteminfo altname] - dict set result -fileattributes $attrinfo - if {"reparse_point" in $attrinfo} { - #the twapi API splits this 32bit value for us - set low_word [dict get $iteminfo reserve0] - set high_word [dict get $iteminfo reserve1] - # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 - # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - #+-+-+-+-+-----------------------+-------------------------------+ - #|M|R|N|R| Reserved bits | Reparse tag value | - #+-+-+-+-+-----------------------+-------------------------------+ - #todo - is_microsoft from first bit of high_word - set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? - if {[dict exists $win_reparse_tags_by_int $low_int]} { - dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] - } else { - dict set result -reparseinfo [dict create tag "" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] + foreach attr $attrinfo { + switch -- $attr { + hidden { + dict set result -hidden 1 + } + system { + dict set result -system 1 + } + readonly { + dict set result -readonly 1 + } + reparse_point { + #the twapi API splits this 32bit value for us + set low_word [dict get $iteminfo reserve0] + set high_word [dict get $iteminfo reserve1] + # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + #+-+-+-+-+-----------------------+-------------------------------+ + #|M|R|N|R| Reserved bits | Reparse tag value | + #+-+-+-+-+-----------------------+-------------------------------+ + #todo - is_microsoft from first bit of high_word + set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter? + if {[dict exists $win_reparse_tags_by_int $low_int]} { + dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] + } else { + dict set result -reparseinfo [dict create tag "" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"] + } + } } } + #dict set result -shortname [dict get $iteminfo altname] + dict set result -fileattributes $attrinfo dict set result -raw $iteminfo return $result } @@ -652,27 +662,337 @@ namespace eval punk::du { catch {twapi::find_file_close $iterator} } } + proc resolve_characterclass {cclass} { + #takes the inner value from a tcl square bracketed character class and converts to a list of characters. + + #todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters + #e.g a-c-3 -> a b c - 3 + #e.g a-c-3-5 -> a b c - 3 4 5 + #e.g a-c -> a b c + #e.g a- -> a - + #e.g -c-e -> - c d e + #the tcl character class does not support negation or intersection - so we can ignore those possibilities for now. + #in this context we do not need to support named character classes such as [:digit:] + set chars [list] + set i 0 + set len [string length $cclass] + set accept_range 0 + while {$i < $len} { + set ch [string index $cclass $i] + if {$ch eq "-"} { + if {$accept_range} { + set start [string index $cclass [expr {$i - 1}]] + set end [string index $cclass [expr {$i + 1}]] + if {$start eq "" || $end eq ""} { + #invalid range - treat - as literal + if {"-" ni $chars} { + lappend chars "-" + } + } else { + #we have a range - add all chars from previous char to next char + #range may be in either direction - e.g a-c or c-a but we don't care about the order of our result. + if {$start eq $end} { + #degenerate range - treat as single char + if {$start ni $chars} { + lappend chars $start + } + } else { + if {[scan $start %c] < [scan $end %c]} { + set c1 [scan $start %c] + set c2 [scan $end %c] + } else { + set c1 [scan $end %c] + set c2 [scan $start %c] + } + for {set c $c1} {$c <= $c2} {incr c} { + set char [format %c $c] + if {$char ni $chars} { + lappend chars $char + } + } + } + + incr i ;#skip end char as it's already included in range + } + set accept_range 0 + } else { + #we have a literal - add to list and allow for possible range if next char is also a - + if {"-" ni $chars} { + lappend chars "-" + } + set accept_range 1 + } + } else { #we have a literal - add to list and allow for possible range if next char is also a - + if {$ch ni $chars} { + lappend chars $ch + } + set accept_range 1 + } + incr i + } + return $chars + } + + #return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible. + #first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes. + # e.g {*[\{]*} is a valid tcl glob + + #todo - write tests. + #should also support {*\{*} matching a file such as a{blah}b + + #Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL. + #So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity. + + proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} { + #windows API function in use is the FindFirstFile set of functions. + #these support wildcards * and ? *only*. + + #examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported + # {abc[1-3].txt} -> {abc?.txt} + # {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + # {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call. + # {{S,t}*.txt} -> {S*.txt} {T*.txt} + # *.{txt,log} -> {*.txt} {*.log} + # {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals + + + set gchars [split $tclglob ""] + set winglob_list [list ""] + set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + set in_brackets 0 + set esc_next 0 + + set brace_depth 0 + set brace_content "" + set braced_alternatives [list] + + set brace_is_normal 0 + + set cclass_content "" + foreach ch $gchars { + if {$esc_next} { + if {$in_brackets} { + append cclass_content $ch + continue + } elseif {$brace_depth} { + append brace_content $ch + continue + } + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}] + set esc_next 0 + continue + } + + if {$ch eq "\{"} { + if {$brace_depth} { + #we have an opening brace char inside braces + #Let the brace processing handle it as it recurses. + incr brace_depth 1 + append brace_content $ch + continue + } + incr brace_depth 1 + set brace_content "" + } elseif {$ch eq "\}"} { + if {$brace_depth > 1} { + #we have a closing brace char inside braces + append brace_content $ch + incr brace_depth -1 + continue + } + #process brace_content representing a list of alternatives + #handle list of alternatives - convert {txt,log} to *.txt;*.log + #set alternatives [split $brace_content ","] + lappend braced_alternatives $brace_content + set alternatives $braced_alternatives + set braced_alternatives [list] + set brace_content "" + incr brace_depth -1 + + set alt_winpatterns [list] + set alt_regexps [list] + foreach alt $alternatives { + set subresult [tclglob_equivalents $alt] + lappend alt_winpatterns {*}[dict get $subresult winglobs] + lappend alt_regexps {*}[dict get $subresult tclregexps] + } + set next_winglob_list [list] + set next_regexp_list [list] + foreach wg $winglob_list re $tclregexp_list { + #puts "wg: $wg" + #puts "re: $re" + foreach alt_wp $alt_winpatterns alt_re $alt_regexps { + #puts " alt_wp: $alt_wp" + #puts " alt_re: $alt_re" + lappend next_winglob_list "$wg$alt_wp" + set alt_re_no_caret [string range $alt_re 1 end] + lappend next_regexp_list "${re}${alt_re_no_caret}" + } + } + + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + + } elseif {$ch eq "\["} { + #windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char + + if {!$brace_depth} { + set in_brackets 1 + } else { + #we have a [ char inside braces + #Let the brace processing handle it as it recurses. + #but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets. + set brace_is_normal 1 + append brace_content $ch + } + } elseif {$ch eq "\]"} { + if {$brace_depth} { + #we have a ] char inside braces + #Let the brace processing hanele it as it recurses. + append brace_content $ch + continue + } + set in_brackets 0 + set charlist [resolve_characterclass $cclass_content] + set cclass_content "" + set next_winglob_list [list] + set next_regexp_list [list] + foreach c $charlist { + #set winglob_list [lmap wg $winglob_list {append wg $c}] + foreach wg $winglob_list { + lappend next_winglob_list "$wg$c" + } + foreach re $tclregexp_list { + set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}] + lappend next_regexp_list "${re}${c_escaped}" + } + } + set winglob_list $next_winglob_list + set tclregexp_list $next_regexp_list + } elseif {$ch eq "\\"} { + if {$in_brackets} { + append cclass_content $ch + #append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself. + set esc_next 1 + continue + } + if {$brace_depth} { + #we have a \ char inside braces + #Let the brace processing handle it as it recurses. + append brace_content $ch + set esc_next 1 + continue + } + set esc_next 1 + continue + } else { + if {$in_brackets} { + append cclass_content $ch + continue + } + if {!$brace_depth} { + set winglob_list [lmap wg $winglob_list {append wg $ch}] + set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}] + if {[string length $re_ch] == 1} { + switch -- $re_ch { + "?" {set re_ch "."} + "*" {set re_ch ".*"} + default { + #we could use the same mixed case filter here for both sensitive and insensitive filesystems, + #because the API filtering will already have done the restriction, + #and so a more permissive regex that matches both cases will still only match the results that the API call returns, + #which will be correct based on the case-sensitivity of the filesystem. + #It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob. + #ie in the parts of the original glob that were in square brackets. + + if {!$case_sensitive_filesystem} { + # add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob. + if {[string is upper $re_ch]} { + set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"] + } elseif {[string is lower $re_ch]} { + set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"] + } else { + #non-alpha char - no need to add case-insensitivity + } + } + } + } + } + set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}] + } else { + #we have a literal char inside braces - add to current brace_content + if {$brace_depth == 1 && $ch eq ","} { + lappend braced_alternatives $brace_content + set brace_content "" + } else { + append brace_content $ch + } + } + } + } + #sanity check + if {[llength $winglob_list] != [llength $tclregexp_list]} { + error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]" + } + set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}] + return [dict create winglobs $winglob_list tclregexps $tclregexp_list] + } + #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs # get listing without using unix-tools (may not be installed on the windows system) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance + + #todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob. + # we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before + #we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder. + #For really large folders and a moderate number of patterns, this could be a significant performance improvement. proc du_dirlisting_twapi {folderpath args} { set defaults [dict create\ -glob *\ -filedebug 0\ + -patterndebug 0\ -with_sizes 1\ -with_times 1\ ] set opts [dict merge $defaults $args] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- - set opt_glob [dict get $opts -glob] + set tcl_glob [dict get $opts -glob] + + #todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps. + set case_sensitive_filesystem 0 ;#todo - consider detecting this properly. + #Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program. + #In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly, + #as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem, + #so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly. + #Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a + #case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem. + + #Note: we only use this to adjust the filtering regexps we generate from the tcl glob. + #The windows API pattern match will already filter based on the case-sensitivity of the filesystem + # so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem. + + set globs_processed [tclglob_equivalents $tcl_glob] + #we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob. + #temp + #set win_glob_list [list $tcl_glob] + set win_glob_list [dict get $globs_processed winglobs] + set tcl_regex_list [dict get $globs_processed tclregexps] + + + #review + # our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob. + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_with_sizes [dict get $opts -with_sizes] # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set opt_filedebug [dict get $opts -filedebug] ;#per file # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set opt_patterndebug [dict get $opts -patterndebug] + # -- --- --- --- --- --- --- --- --- --- --- --- --- --- set ftypes [list f d l] if {"$opt_with_sizes" in {0 1}} { #don't use string is boolean - (f false vs f file!) @@ -711,256 +1031,288 @@ namespace eval punk::du { } } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- + set dirs [list] + set files [list] + set filesizes [list] + set allsizes [dict create] + set alltimes [dict create] + + set links [list] + set linkinfo [dict create] + set debuginfo [dict create] + set flaggedhidden [list] + set flaggedsystem [list] + set flaggedreadonly [list] set errors [dict create] set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ # return it so it can be stored and tried as an alternative for problem paths - #puts stderr ">>> glob: $opt_glob" - #REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames - #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ - #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. - # using * all the time may be inefficient - so we might be able to avoid that in some cases. - try { - #glob of * will return dotfiles too on windows - set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field - } on error args { + + foreach win_glob $win_glob_list tcl_re $tcl_regex_list { + if {$opt_patterndebug} { + puts stderr ">>>du_dirlisting_twapi winglob: $win_glob" + puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re" + } + + #REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames + #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ + #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. + # using * all the time may be inefficient - so we might be able to avoid that in some cases. try { - if {[string match "*denied*" $args]} { - #output similar format as unixy du - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } - if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" - puts stderr " (errorcode: $::errorCode)\n" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - } + #glob of * will return dotfiles too on windows + set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field + } on error args { + try { + if {[string match "*denied*" $args]} { + #output similar format as unixy du + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } + if {[string match "*TWAPI_WIN32 59*" $::errorCode]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)" + puts stderr " (errorcode: $::errorCode)\n" + dict lappend errors $folderpath $::errorCode + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + } - #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} - #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error - #The find-all glob * won't get here because it returns . & .. - #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) - #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob - #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) - if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { + #errorcode TWAPI_WIN32 2 {The system cannot find the file specified.} + #This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error + #The find-all glob * won't get here because it returns . & .. + #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) + #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob + #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) + #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {} if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { #looks like an ordinary no results for chosen glob - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + continue } - } - if {[set plen [pathcharacterlen $folderpath]] >= 250} { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - # re-fetch this folder with altnames - #file normalize - aside from being slow - will have problems with long paths - so this won't work. - #this function should only accept absolute paths - # - # - #Note: using -detail full only helps if the last segment of path has an altname.. - #To properly shorten we need to have kept track of altname all the way from the root! - #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd - #### SLOW - set fixedpath [dict get [file attributes $folderpath] -shortname] - #### SLOW - + if {[set plen [pathcharacterlen $folderpath]] >= 250} { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + # re-fetch this folder with altnames + #file normalize - aside from being slow - will have problems with long paths - so this won't work. + #this function should only accept absolute paths + # + # + #Note: using -detail full only helps if the last segment of path has an altname.. + #To properly shorten we need to have kept track of altname all the way from the root! + #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd + #### SLOW + set fixedpath [dict get [file attributes $folderpath] -shortname] + #### SLOW + + + append errmsg "retrying with with windows altname '$fixedpath'" + puts stderr $errmsg + } else { + set errmsg "error reading folder: $folderpath (len:$plen)\n" + append errmsg "error: $args" \n + append errmsg "errorcode: $::errorCode" \n + set tmp_errors [list $::errorCode] + #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share + #we can use //?/path dos device path - but not with tcl functions + #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. + #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - append errmsg "retrying with with windows altname '$fixedpath'" - puts stderr $errmsg - } else { - set errmsg "error reading folder: $folderpath (len:$plen)\n" - append errmsg "error: $args" \n - append errmsg "errorcode: $::errorCode" \n - set tmp_errors [list $::errorCode] - #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share - #we can use //?/path dos device path - but not with tcl functions - #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again.. - #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here - - set fixedtail "" - - set parent [file dirname $folderpath] - set badtail [file tail $folderpath] - set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - if {$nm eq $badtail} { - set fixedtail [dict get $iteminfo altname] - break + set fixedtail "" + + set parent [file dirname $folderpath] + set badtail [file tail $folderpath] + set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo" + if {$nm eq $badtail} { + set fixedtail [dict get $iteminfo altname] + break + } + } + + if {![string length $fixedtail]} { + dict lappend errors $folderpath {*}$tmp_errors + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + + #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. + #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it + #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) + #so the illegalname_fix doesn't really work here + #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] + + #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. + set fixedpath $parent/$fixedtail + append errmsg "retrying with with windows dos device path $fixedpath\n" + puts stderr $errmsg + } - if {![string length $fixedtail]} { - dict lappend errors $folderpath {*}$tmp_errors - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)" + if {[catch { + set iterator [twapi::find_file_open $fixedpath/* -detail basic] + } errMsg]} { + puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" + puts stderr " (errorcode: $::errorCode)\n" + puts stderr "$errMsg" + dict lappend errors $folderpath $::errorCode return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } - #twapi as at 2023-08 doesn't seem to support //?/ dos device paths.. - #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it - #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way) - #so the illegalname_fix doesn't really work here - #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] - #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. - set fixedpath [file join $parent $fixedtail] - append errmsg "retrying with with windows dos device path $fixedpath\n" - puts stderr $errmsg + } on error args { + set errmsg "error reading folder: $folderpath\n" + append errmsg "error: $args" \n + append errmsg "errorInfo: $::errorInfo" \n + puts stderr "$errmsg" + puts stderr "FAILED to collect info for folder '$folderpath'" + #append errmsg "aborting.." + #error $errmsg + return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] } + } + #jjj - if {[catch { - set iterator [twapi::find_file_open $fixedpath/* -detail basic] - } errMsg]} { - puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')" - puts stderr " (errorcode: $::errorCode)\n" - puts stderr "$errMsg" - dict lappend errors $folderpath $::errorCode - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] + while {[twapi::find_file_next $iterator iteminfo]} { + set nm [dict get $iteminfo name] + if {![regexp $tcl_re $nm]} { + continue + } + if {$nm in {. ..}} { + continue } + set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path + #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] + #set ftype "" + set do_sizes 0 + set do_times 0 + #attributes applicable to any classification + set fullname [file_join_one $folderpath $nm] + + set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict + if {[dict get $attrdict -hidden]} { + lappend flaggedhidden $fullname + } + if {[dict get $attrdict -system]} { + lappend flaggedsystem $fullname + } + if {[dict get $attrdict -readonly]} { + lappend flaggedreadonly $fullname + } + if {[dict exists $attrdict -debug]} { + dict set debuginfo $fullname [dict get $attrdict -debug] + } + set file_attributes [dict get $attrdict -fileattributes] - } on error args { - set errmsg "error reading folder: $folderpath\n" - append errmsg "error: $args" \n - append errmsg "errorInfo: $::errorInfo" \n - puts stderr "$errmsg" - puts stderr "FAILED to collect info for folder '$folderpath'" - #append errmsg "aborting.." - #error $errmsg - return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] - - } - } - set dirs [list] - set files [list] - set filesizes [list] - set allsizes [dict create] - set alltimes [dict create] + set is_reparse_point [expr {"reparse_point" in $file_attributes}] + set is_directory [expr {"directory" in $file_attributes}] - set links [list] - set linkinfo [dict create] - set debuginfo [dict create] - set flaggedhidden [list] - set flaggedsystem [list] - set flaggedreadonly [list] + set linkdata [dict create] + # ----------------------------------------------------------- + #main classification + if {$is_reparse_point} { + #this concept doesn't correspond 1-to-1 with unix links + #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points + #review - and see which if any actually belong in the links key of our return - while {[twapi::find_file_next $iterator iteminfo]} { - set nm [dict get $iteminfo name] - #recheck glob - #review! - if {![string match $opt_glob $nm]} { - continue - } - set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path - #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] - set ftype "" - #attributes applicable to any classification - set fullname [file_join_one $folderpath $nm] - - set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict - set file_attributes [dict get $attrdict -fileattributes] - - set linkdata [dict create] - # ----------------------------------------------------------- - #main classification - if {"reparse_point" in $file_attributes} { - #this concept doesn't correspond 1-to-1 with unix links - #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points - #review - and see which if any actually belong in the links key of our return - - - #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point - # - #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? - #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' - #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls - #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} - #e.g (stripped of headers/footers and other lines) - #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] - #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. - #du includes the size of the symlink - #but we can't get it with tcl's file size - #twapi doesn't seem to have anything to help read it either (?) - #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link - # - #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. - # - #links are techically files too, whether they point to a file/dir or nothing. - lappend links $fullname - set ftype "l" - dict set linkdata linktype reparse_point - dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] - if {"directory" ni $file_attributes} { - dict set linkdata target_type file - } - } - if {"directory" in $file_attributes} { - if {$nm in {. ..}} { - continue + + #One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point + # + #we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du? + #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined' + #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls + #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path} + #e.g (stripped of headers/footers and other lines) + #2022-10-02 04:07 AM priv [\\?\c:\repo\elixir\gameportal\apps\test\priv] + #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers. + #du includes the size of the symlink + #but we can't get it with tcl's file size + #twapi doesn't seem to have anything to help read it either (?) + #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link + # + #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window. + # + #links are techically files too, whether they point to a file/dir or nothing. + lappend links $fullname + #set ftype "l" + if {"l" in $sized_types} { + set do_sizes 1 + } + if {"l" in $timed_types} { + set do_times 1 + } + dict set linkdata linktype reparse_point + dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] + if {"directory" ni $file_attributes} { + dict set linkdata target_type file + } } - if {"reparse_point" ni $file_attributes} { - lappend dirs $fullname - set ftype "d" - } else { - #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections - dict set linkdata target_type directory + if {$is_directory} { + #if {$nm in {. ..}} { + # continue + #} + if {!$is_reparse_point} { + lappend dirs $fullname + #set ftype "d" + if {"d" in $sized_types} { + set do_sizes 1 + } + if {"d" in $timed_types} { + set do_times 1 + } + } else { + #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections + dict set linkdata target_type directory + } } - } - if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { - #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? - lappend files $fullname - if {"f" in $sized_types} { - lappend filesizes [dict get $iteminfo size] + if {!$is_reparse_point && !$is_directory} { + #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? + lappend files $fullname + if {"f" in $sized_types} { + lappend filesizes [dict get $iteminfo size] + set do_sizes 1 + } + if {"f" in $timed_types} { + set do_times 1 + } + #set ftype "f" } - set ftype "f" - } - # ----------------------------------------------------------- + # ----------------------------------------------------------- - if {[dict get $attrdict -hidden]} { - lappend flaggedhidden $fullname - } - if {[dict get $attrdict -system]} { - lappend flaggedsystem $fullname - } - if {[dict get $attrdict -readonly]} { - lappend flaggedreadonly $fullname - } - if {$ftype in $sized_types} { - dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] - } - if {$ftype in $timed_types} { - #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) - #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds - #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict - dict set alltimes $fullname [dict create\ - c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ - a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ - m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ - ] - } - if {[dict size $linkdata]} { - dict set linkinfo $fullname $linkdata - } - if {[dict exists $attrdict -debug]} { - dict set debuginfo $fullname [dict get $attrdict -debug] + if {$do_sizes} { + dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] + } + if {$do_times} { + #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) + #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds + #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict + dict set alltimes $fullname [dict create\ + c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\ + a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\ + m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\ + ] + } + if {[dict size $linkdata]} { + dict set linkinfo $fullname $linkdata + } } + twapi::find_file_close $iterator } - twapi::find_file_close $iterator - set vfsmounts [get_vfsmounts_in_folder $folderpath] + set vfsmounts [get_vfsmounts_in_folder $folderpath] set effective_opts $opts dict set effective_opts -with_times $timed_types dict set effective_opts -with_sizes $sized_types #also determine whether vfs. file system x is *much* faster than file attributes #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder - return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] + return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] } proc get_vfsmounts_in_folder {folderpath} { set vfsmounts [list] @@ -991,9 +1343,11 @@ namespace eval punk::du { #work around the horrible tilde-expansion thing (not needed for tcl 9+) proc file_join_one {base newtail} { if {[string index $newtail 0] ne {~}} { - return [file join $base $newtail] + #return [file join $base $newtail] + return $base/$newtail } - return [file join $base ./$newtail] + #return [file join $base ./$newtail] + return $base/./$newtail } @@ -1121,7 +1475,7 @@ namespace eval punk::du { #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #struct::set will affect order: tcl vs critcl give different ordering! set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] - set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] + set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]] #---- diff --git a/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm index 6a7b79d6..fe8cfc7e 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm @@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check { #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #Not any sort of comprehensive check of known tcl bugs. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases. + + proc has_tclbug_caseinsensitiveglob_windows {} { + #https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted + + if {"windows" ne $::tcl_platform(platform)} { + set bug 0 + } else { + set tmpdir [file tempdir] + set testfile [file join $tmpdir "bugtest"] + set fd [open $testfile w] + puts $fd test + close $fd + set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*] + foreach r $globresult { + if {$r ne "bugtest"} { + set bug 1 + break + } + } + return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning] + #possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized + # to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation. + } + } + + #todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies. + # but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module, + # or at least checking for an existing mounted case-insensitive filesystem. + # A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like. + # It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix. + + proc has_tclbug_regexp_emptystring {} { #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, diff --git a/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm index e767e366..073b6cce 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm @@ -50,7 +50,7 @@ package require punk::lib package require punk::args package require punk::ansi package require punk::winpath -package require punk::du +package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing. package require commandstack #*** !doctools #[item] [package {Tcl 8.6}] @@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs { #[list_begin definitions] + punk::args::define { + @id -id ::punk::nav::fs::d/ + @cmd -name punk::nav::fs::d/ -help\ + {List directories or directories and files in the current directory or in the + targets specified with the fileglob_or_target glob pattern(s). + + If a single target is specified without glob characters, and it exists as a directory, + then the working directory is changed to that target and a listing of that directory + is returned. If the single target specified without glob characters does not exist as + a directory, then it is treated as a glob pattern and the listing is for the current + directory with results filtered to match fileglob_or_target. + + If multiple targets or glob patterns are specified, then a separate listing is returned + for each fileglob_or_target pattern. + + This function is provided via aliases as ./ and .// with v being inferred from the alias + name, and also as d/ with an explicit v argument. + The ./ and .// forms are more convenient for interactive use. + examples: + ./ - list directories in current directory + .// - list directories and files in current directory + ./ src/* - list directories in src + .// src/* - list directories and files in src + .// *.txt - list files in current directory with .txt extension + .// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name + (on a case-insensitive filesystem this would also match T*1.txt etc) + .// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name + (glob chars treated as literals due to being in character-class brackets + This will match files beginning with a capital T and not lower case t + even on a case-insensitive filesystem.) + .// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns: + {[t]*} - names beginning with t + {d{e,d}*} - names beginning with de or dd + (on a case-insensitive filesystem the first pattern would also match names beginning with T) + } + @values -min 1 -max -1 -type string + v -type string -choices {/ //} -help\ + " + / - list directories only + // - list directories and files + " + fileglob_or_target -type string -optional true -multiple true -help\ + "A glob pattern as supported by Tcl's 'glob' command, to filter results. + If multiple patterns are supplied, then a listing for each pattern is returned. + If no patterns are supplied, then all items are listed." + } + #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #As this function recurses and calls cd multiple times - it's not thread-safe. #Another thread could theoretically cd whilst this is running. @@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs { #puts stdout "-->[ansistring VIEW $result]" return $result } else { - set atail [lassign $args cdtarget] if {[llength $args] == 1} { set cdtarget [lindex $args 0] switch -exact -- $cdtarget { . - ./ { - tailcall punk::nav::fs::d/ + tailcall punk::nav::fs::d/ $v } .. - ../ { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { @@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs { if {[string range $cdtarget_copy 0 3] eq "//?/"} { #handle dos device paths - convert to normal path for glob testing set glob_test [string range $cdtarget_copy 3 end] - set cdtarget_is_glob [regexp {[*?]} $glob_test] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test] } else { - set cdtarget_is_glob [regexp {[*?]} $cdtarget] + set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget] + #todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing. } if {!$cdtarget_is_glob} { set cdtarget_file_type [file type $cdtarget] @@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs { } set VIRTUAL_CWD $cdtarget set curdir $cdtarget + tailcall punk::nav::fs::d/ $v } else { set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] if {[string match //zipfs:/* $VIRTUAL_CWD]} { @@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs { } if {[file type $target] eq "directory"} { set VIRTUAL_CWD $target + tailcall punk::nav::fs::d/ $v } } - tailcall punk::nav::fs::d/ $v } set curdir $VIRTUAL_CWD } else { @@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs { #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) - set searchspec [lindex $args 0] set result "" #set chunklist [list] @@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs { set this_result [dict create] foreach searchspec $args { set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] - set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] + #we need to support the same glob chars that Tcl's 'glob' command accepts. + set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]] + #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) @@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs { set allow_nonportable [dict exists $received -nonportable] set curdir [pwd] - set fullpath_list [list] + + set fullpath_list [list] ;#list of full paths to create. + set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir) + #these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests. + set error_paths [list] foreach p $paths { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { @@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] continue } - set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] - #e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. - set fullpath [file join $path1 {*}[lrange $args 1 end]] + set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)] + #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir. #Some subpaths of the supplied paths to create may already exist. - #we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. + #we should test write permissions on the nearest existing parent of the supplied path to create, + #rather than just on the immediate parent segment of the supplied path itself which may not exist. + + set fullpath [file normalize $fullpath] set parent [file dirname $fullpath] while {![file exists $parent]} { set parent [file dirname $parent] @@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs { lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] continue } + lappend existing_parent_list $parent lappend fullpath_list $fullpath } if {[llength $fullpath_list] != [llength $paths]} { @@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs { set num_created 0 set error_string "" - foreach fullpath $fullpath_list { + foreach fullpath $fullpath_list existing_parent $existing_parent_list { + #calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue. + #set relative_path [file relative $fullpath $existing_parent] + #todo. if {[catch {file mkdir $fullpath}]} { set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." + cd $curdir break } incr num_created @@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs { if {$error_string ne ""} { error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." } - d/ $curdir + + #display summaries of created directories (which may have already existed) by reusing d/ to get info on them. + set query_paths [lmap v $paths $v/*] + d/ / {*}$query_paths } #todo use unknown to allow d/~c:/etc ?? @@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs { if {![file isdirectory $target]} { error "Folder $target not found" } - d/ $target + d/ / $target } @@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs { } set relativepath [expr {[file pathtype $searchspec] eq "relative"}] - set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + + #set has_tailglobs [regexp {[?*]} [file tail $searchspec]] + set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) @@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs { } puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] - return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] + return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] } #todo - package as punk::nav::fs @@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs { } proc dirfiles_dict {args} { set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] - lassign [dict values $argd] leaders opts vals - set searchspecs [dict values $vals] + lassign [dict values $argd] leaders opts values + set searchspecs [dict values $values] #puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stdout "arglist: $opts" @@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs { set searchspec [lindex $searchspecs 0] # -- --- --- --- --- --- --- set opt_searchbase [dict get $opts -searchbase] - set opt_tailglob [dict get $opts -tailglob] + set opt_tailglob [dict get $opts -tailglob] set opt_with_sizes [dict get $opts -with_sizes] set opt_with_times [dict get $opts -with_times] # -- --- --- --- --- --- --- @@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs { } } "\uFFFF" { - set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] + set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]] + + #set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] if {$searchtail_has_globs} { if {$is_relativesearchspec} { #set location [file dirname [file join $searchbase $searchspec]] @@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs { } else { set next_opt_with_times [list -with_times $opt_with_times] } + + set ts1 [clock clicks -milliseconds] if {$is_in_vfs} { set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] } else { @@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs { } } } + set ts2 [clock clicks -milliseconds] + set ts_listing [expr {$ts2 - $ts1}] set dirs [dict get $listing dirs] set files [dict get $listing files] @@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs { set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] } - set dirs [lsort -dictionary $dirs] ;#todo - natsort + #----------------------------------------------------------------------------------------- + set ts1 [clock milliseconds] + set dirs [lsort -dictionary $dirs] ;#todo - natsort #foreach d $dirs { # if {[lindex [file system $d] 0] eq "tclvfs"} { @@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs { set files $sorted_files set filesizes $sorted_filesizes + set ts2 [clock milliseconds] + set ts_sorting [expr {$ts2 - $ts1}] + #----------------------------------------------------------------------------------------- # -- --- #jmn + set ts1 [clock milliseconds] foreach nm [list {*}$dirs {*}$files] { if {[punk::winpath::illegalname_test $nm]} { lappend nonportable $nm } } + set ts2 [clock milliseconds] + set ts_nonportable_check [expr {$ts2 - $ts1}] + set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms] + set front_of_dict [dict create location $location searchbase $opt_searchbase] set listing [dict merge $front_of_dict $listing] - set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] + set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info] return [dict merge $listing $updated] } @@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs { @id -id ::punk::nav::fs::dirfiles_dict_as_lines -stripbase -default 0 -type boolean -formatsizes -default 1 -type boolean - -listing -default "/" -choices {/ // //} + -listing -default "/" -choices {/ //} @values -min 1 -max -1 -type dict -unnamed true } #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? proc dirfiles_dict_as_lines {args} { - set ts1 [clock milliseconds] + #set ts1 [clock milliseconds] package require overtype set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] lassign [dict values $argd] leaders opts vals @@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs { #review - symlink to shortcut? hopefully will just work #classify as file or directory - fallback to file if unknown/undeterminable set finfo_plus [list] - set ts2 [clock milliseconds] + #set ts2 [clock milliseconds] foreach fdict $finfo { set fname [dict get $fdict file] if {[file extension $fname] eq ".lnk"} { @@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs { } unset finfo - puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" - puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" + #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" + #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] @@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs { #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #review: punk::winpath calls cygpath! #review: file pathtype is platform dependant - proc path_to_absolute {path base platform} { - set ptype [file pathtype $path] + proc path_to_absolute {subpath base platform} { + set ptype [file pathtype $subpath] if {$ptype eq "absolute"} { - set path_absolute $path + set path_absolute $subpath } elseif {$ptype eq "volumerelative"} { if {$platform eq "windows"} { #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) - if {[string index $path 0] eq "/"} { + if {[string index $subpath 0] eq "/"} { #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #Todo - tidy up. package require punk::unixywindows - set path_absolute [punk::unixywindows::towinpath $path] + set path_absolute [punk::unixywindows::towinpath $subpath] #puts stderr "winpath: $path" } else { #todo handle volume-relative paths with volume specified c:etc c: @@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs { #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #Arguably if ...? - #set path_absolute $base/$path - set path_absolute $path + #set path_absolute $base/$subpath + set path_absolute $subpath } } else { # unknown what paths are reported as this on other platforms.. treat as absolute for now - set path_absolute $path + set path_absolute $subpath } } else { - set path_absolute $base/$path - } - if {$platform eq "windows"} { - if {[punk::winpath::illegalname_test $path_absolute]} { - set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present - } + #e.g relative subpath=* base = c:/test -> c:/test/* + #e.g relative subpath=../test base = c:/test -> c:/test/../test + #e.g relative subpath=* base = //server/share/test -> //server/share/test/* + set path_absolute $base/$subpath } + #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues. + #if {$platform eq "windows"} { + # if {[punk::winpath::illegalname_test $path_absolute]} { + # set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present + # } + #} return $path_absolute } proc strip_prefix_depth {path prefix} { @@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib { #[para] Secondary functions that are part of the API #[list_begin definitions] - #proc utility1 {p1 args} { - # #*** !doctools - # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] - # #[para]Description of utility1 - # return 1 - #} + punk::args::define { + @id -id ::punk::nav::fs::lib::is_fileglob + @cmd -name punk::nav::fs::lib::is_fileglob + @values -min 1 -max 1 + path -type string -required true -help\ + {String to test for being a glob pattern as recognised by the tcl 'glob' command. + If the string represents a path with multiple segments, only the final component + of the path will be tested for glob characters. + Glob patterns in this context are different to globs accepted by TCL's 'string match'. + A glob pattern is any string that contains unescaped * ? { } [ or ]. + This will not detect mismatched unescaped braces or brackets. + Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern. + } + } + proc is_fileglob {str} { + #a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser) + set in_escape 0 + set segments [file split $str] + set tail [lindex $segments end] + foreach c [split $tail ""] { + if {$in_escape} { + set in_escape 0 + } else { + if {$c eq "\\"} { + set in_escape 1 + } elseif {$c in [list * ? "\[" "\]" "{" "}" ]} { + return 1 + } + } + } + return 0 + } #*** !doctools diff --git a/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm index 46ca4aa2..de536724 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm @@ -357,7 +357,7 @@ namespace eval punk::path { } } } - puts "==>finalparts: '$finalparts'" + #puts "==>finalparts: '$finalparts'" # using join - {"" "" server share} -> //server/share and {a b} -> a/b if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { #backtracking on unix-style path can end up with empty string as only member of finalparts diff --git a/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm b/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm index 9079dbbc..1f2948fe 100644 --- a/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm +++ b/src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm @@ -114,6 +114,50 @@ namespace eval punk::winpath { return $path } } + + proc illegal_char_map_to_doublewide {ch} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + set map [dict create \ + "<" "\uFF1C" \ + ">" "\uFF1E" \ + ":" "\uFF1A" \ + "\"" "\uFF02" \ + "/" "\uFF0F" \ + "\\" "\uFF3C" \ + "|" "\uFF5C" \ + "?" "\uFF1F" \ + "*" "\uFF0A"] + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + proc illegal_char_map_to_ntfs {ch} { + #windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc. + #see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars + #see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped) + + set map [dict create \ + "<" "\uF03C" \ + ">" "\uF03E" \ + ":" "\uF03A" \ + "\"" "\uF022" \ + "/" "unknown" \ + "\\" "\uF05c" \ + "|" "\uF07C" \ + "?" "\uF03F" \ + "*" "\uF02A"] + #note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API. + if {$ch in [dict keys $map]} { + return [dict get $map $ch] + } else { + return $ch + } + } + + + #we don't validate that path is actually illegal because we don't know the full range of such names. #The caller can apply this to any path. #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) @@ -200,8 +244,15 @@ namespace eval punk::winpath { set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] #we need to exclude things like path/.. path/. - foreach seg [file split $path] { - if {$seg in [list . ..]} { + set segments [file split $path] + if {[file pathtype $path] eq "absolute"} { + #absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments. + set checksegments [lrange $segments 1 end] + } else { + set checksegments $segments + } + foreach seg $checksegments { + if {$seg in {. ..}} { #review - what if there is a folder or file that actually has a name such as . or .. ? #unlikely in normal use - but could done deliberately for bad reasons? #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. @@ -220,10 +271,17 @@ namespace eval punk::winpath { #only check for actual space as other whitespace seems to work without being stripped #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph - if {[string index $seg end] in [list " " "."]} { + if {[string index $seg end] in {" " .}} { #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) return 1 } + #set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams. + set re {[<>:"/\\|?*]} + + if {[regexp $re $seg]} { + #windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc) + return 1 + } } #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #- they seem to be readable from cmd and tclsh as is.