Browse Source

dir listing and glob fixes - windows

master
Julian Noble 4 days ago
parent
commit
b2f4d67056
  1. 211
      src/bootsupport/modules/punk-0.1.tm
  2. 858
      src/bootsupport/modules/punk/du-0.1.0.tm
  3. 32
      src/bootsupport/modules/punk/lib-0.1.6.tm
  4. 209
      src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  5. 2
      src/bootsupport/modules/punk/path-0.1.0.tm
  6. 64
      src/bootsupport/modules/punk/winpath-0.1.0.tm
  7. 211
      src/modules/punk-0.1.tm
  8. 858
      src/modules/punk/du-999999.0a1.0.tm
  9. 32
      src/modules/punk/lib-999999.0a1.0.tm
  10. 209
      src/modules/punk/nav/fs-999999.0a1.0.tm
  11. 2
      src/modules/punk/path-999999.0a1.0.tm
  12. 64
      src/modules/punk/winpath-999999.0a1.0.tm
  13. 211
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm
  14. 858
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm
  15. 32
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm
  16. 209
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  17. 2
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm
  18. 64
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm
  19. 211
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm
  20. 858
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm
  21. 32
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm
  22. 209
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  23. 2
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm
  24. 64
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm
  25. 211
      src/vfs/_vfscommon.vfs/modules/punk-0.1.tm
  26. 858
      src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm
  27. 32
      src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm
  28. 209
      src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm
  29. 2
      src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm
  30. 64
      src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm

211
src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob]
{*}$pipe
}
proc path {{glob *}} {
proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
}
#-------------------------------------------------------------------

858
src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
#return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
}
}
variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}
#don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}]
set opts [dict get $argd opts]
set iteminfo [dict get $argd values iteminfo]
set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0]
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo]
@ -592,34 +598,38 @@ namespace eval punk::du {
}
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"]
foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1
}
system {
dict set result -system 1
}
readonly {
dict set result -readonly 1
}
reparse_point {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
}
}
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo
return $result
}
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator}
}
}
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-filedebug 0\
-patterndebug 0\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
@ -711,256 +1031,288 @@ namespace eval punk::du {
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
#if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
#return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
continue
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
#jjj
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {![regexp $tcl_re $nm]} {
continue
}
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes]
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
set file_attributes [dict get $attrdict -fileattributes]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {"reparse_point" in $file_attributes} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"directory" in $file_attributes} {
if {$nm in {. ..}} {
continue
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
#set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"reparse_point" ni $file_attributes} {
lappend dirs $fullname
set ftype "d"
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
if {$is_directory} {
#if {$nm in {. ..}} {
# continue
#}
if {!$is_reparse_point} {
lappend dirs $fullname
#set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
}
}
}
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
}
#set ftype "f"
}
set ftype "f"
}
# -----------------------------------------------------------
# -----------------------------------------------------------
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
if {$do_sizes} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
}
twapi::find_file_close $iterator
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
#return [file join $base $newtail]
return $base/$newtail
}
return [file join $base ./$newtail]
#return [file join $base ./$newtail]
return $base/./$newtail
}
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#----

32
src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args
package require punk::ansi
package require punk::winpath
package require punk::du
package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack
#*** !doctools
#[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]"
return $result
} else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} {
set cdtarget [lindex $args 0]
switch -exact -- $cdtarget {
. - ./ {
tailcall punk::nav::fs::d/
tailcall punk::nav::fs::d/ $v
}
.. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
}
if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
}
set VIRTUAL_CWD $cdtarget
set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
}
if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
}
}
tailcall punk::nav::fs::d/ $v
}
set curdir $VIRTUAL_CWD
} else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result ""
#set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create]
foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}]
#we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list]
foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue
}
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
#Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all.
#we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath]
while {![file exists $parent]} {
set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue
}
lappend existing_parent_list $parent
lappend fullpath_list $fullpath
}
if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0
set error_string ""
foreach fullpath $fullpath_list {
foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break
}
incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
}
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
}
#todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} {
error "Folder $target not found"
}
d/ $target
d/ / $target
}
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
}
set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
}
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
}
#todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
}
proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals
set searchspecs [dict values $vals]
lassign [dict values $argd] leaders opts values
set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts"
@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs {
set searchspec [lindex $searchspecs 0]
# -- --- --- --- --- --- ---
set opt_searchbase [dict get $opts -searchbase]
set opt_tailglob [dict get $opts -tailglob]
set opt_tailglob [dict get $opts -tailglob]
set opt_with_sizes [dict get $opts -with_sizes]
set opt_with_times [dict get $opts -with_times]
# -- --- --- --- --- --- ---
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
}
}
"\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} {
if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else {
set next_opt_with_times [list -with_times $opt_with_times]
}
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
}
}
}
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs]
set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
}
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files
set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- ---
#jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm
}
}
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated]
}
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //}
-listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true
}
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds]
#set ts1 [clock milliseconds]
package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list]
set ts2 [clock milliseconds]
#set ts2 [clock milliseconds]
foreach fdict $finfo {
set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
}
unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
#puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant
proc path_to_absolute {path base platform} {
set ptype [file pathtype $path]
proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} {
set path_absolute $path
set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} {
if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up.
package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path]
set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path"
} else {
#todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...?
#set path_absolute $base/$path
set path_absolute $path
#set path_absolute $base/$subpath
set path_absolute $subpath
}
} else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path
set path_absolute $subpath
}
} else {
set path_absolute $base/$path
}
if {$platform eq "windows"} {
if {[punk::winpath::illegalname_test $path_absolute]} {
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
}
#e.g relative subpath=* base = c:/test -> c:/test/*
#e.g relative subpath=../test base = c:/test -> c:/test/../test
#e.g relative subpath=* base = //server/share/test -> //server/share/test/*
set path_absolute $base/$subpath
}
#fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
#if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute
}
proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools

2
src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
}
}
}
puts "==>finalparts: '$finalparts'"
#puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path
}
}
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/.
foreach seg [file split $path] {
if {$seg in [list . ..]} {
set segments [file split $path]
if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} {
if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
}
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is.

211
src/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob]
{*}$pipe
}
proc path {{glob *}} {
proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
}
#-------------------------------------------------------------------

858
src/modules/punk/du-999999.0a1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
#return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
}
}
variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}
#don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}]
set opts [dict get $argd opts]
set iteminfo [dict get $argd values iteminfo]
set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0]
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo]
@ -592,34 +598,38 @@ namespace eval punk::du {
}
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"]
foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1
}
system {
dict set result -system 1
}
readonly {
dict set result -readonly 1
}
reparse_point {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
}
}
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo
return $result
}
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator}
}
}
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-filedebug 0\
-patterndebug 0\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
@ -711,256 +1031,288 @@ namespace eval punk::du {
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
#if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
#return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
continue
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
#jjj
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {![regexp $tcl_re $nm]} {
continue
}
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes]
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
set file_attributes [dict get $attrdict -fileattributes]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {"reparse_point" in $file_attributes} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"directory" in $file_attributes} {
if {$nm in {. ..}} {
continue
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
#set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"reparse_point" ni $file_attributes} {
lappend dirs $fullname
set ftype "d"
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
if {$is_directory} {
#if {$nm in {. ..}} {
# continue
#}
if {!$is_reparse_point} {
lappend dirs $fullname
#set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
}
}
}
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
}
#set ftype "f"
}
set ftype "f"
}
# -----------------------------------------------------------
# -----------------------------------------------------------
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
if {$do_sizes} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
}
twapi::find_file_close $iterator
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
#return [file join $base $newtail]
return $base/$newtail
}
return [file join $base ./$newtail]
#return [file join $base ./$newtail]
return $base/./$newtail
}
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#----

32
src/modules/punk/lib-999999.0a1.0.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/modules/punk/nav/fs-999999.0a1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args
package require punk::ansi
package require punk::winpath
package require punk::du
package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack
#*** !doctools
#[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]"
return $result
} else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} {
set cdtarget [lindex $args 0]
switch -exact -- $cdtarget {
. - ./ {
tailcall punk::nav::fs::d/
tailcall punk::nav::fs::d/ $v
}
.. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
}
if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
}
set VIRTUAL_CWD $cdtarget
set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
}
if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
}
}
tailcall punk::nav::fs::d/ $v
}
set curdir $VIRTUAL_CWD
} else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result ""
#set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create]
foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}]
#we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list]
foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue
}
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
#Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all.
#we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath]
while {![file exists $parent]} {
set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue
}
lappend existing_parent_list $parent
lappend fullpath_list $fullpath
}
if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0
set error_string ""
foreach fullpath $fullpath_list {
foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break
}
incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
}
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
}
#todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} {
error "Folder $target not found"
}
d/ $target
d/ / $target
}
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
}
set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
}
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
}
#todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
}
proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals
set searchspecs [dict values $vals]
lassign [dict values $argd] leaders opts values
set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts"
@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs {
set searchspec [lindex $searchspecs 0]
# -- --- --- --- --- --- ---
set opt_searchbase [dict get $opts -searchbase]
set opt_tailglob [dict get $opts -tailglob]
set opt_tailglob [dict get $opts -tailglob]
set opt_with_sizes [dict get $opts -with_sizes]
set opt_with_times [dict get $opts -with_times]
# -- --- --- --- --- --- ---
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
}
}
"\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} {
if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else {
set next_opt_with_times [list -with_times $opt_with_times]
}
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
}
}
}
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs]
set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
}
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files
set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- ---
#jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm
}
}
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated]
}
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //}
-listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true
}
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds]
#set ts1 [clock milliseconds]
package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list]
set ts2 [clock milliseconds]
#set ts2 [clock milliseconds]
foreach fdict $finfo {
set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
}
unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
#puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant
proc path_to_absolute {path base platform} {
set ptype [file pathtype $path]
proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} {
set path_absolute $path
set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} {
if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up.
package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path]
set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path"
} else {
#todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...?
#set path_absolute $base/$path
set path_absolute $path
#set path_absolute $base/$subpath
set path_absolute $subpath
}
} else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path
set path_absolute $subpath
}
} else {
set path_absolute $base/$path
}
if {$platform eq "windows"} {
if {[punk::winpath::illegalname_test $path_absolute]} {
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
}
#e.g relative subpath=* base = c:/test -> c:/test/*
#e.g relative subpath=../test base = c:/test -> c:/test/../test
#e.g relative subpath=* base = //server/share/test -> //server/share/test/*
set path_absolute $base/$subpath
}
#fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
#if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute
}
proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools

2
src/modules/punk/path-999999.0a1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
}
}
}
puts "==>finalparts: '$finalparts'"
#puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/modules/punk/winpath-999999.0a1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path
}
}
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/.
foreach seg [file split $path] {
if {$seg in [list . ..]} {
set segments [file split $path]
if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} {
if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
}
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is.

211
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob]
{*}$pipe
}
proc path {{glob *}} {
proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
}
#-------------------------------------------------------------------

858
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
#return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
}
}
variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}
#don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}]
set opts [dict get $argd opts]
set iteminfo [dict get $argd values iteminfo]
set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0]
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo]
@ -592,34 +598,38 @@ namespace eval punk::du {
}
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"]
foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1
}
system {
dict set result -system 1
}
readonly {
dict set result -readonly 1
}
reparse_point {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
}
}
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo
return $result
}
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator}
}
}
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-filedebug 0\
-patterndebug 0\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
@ -711,256 +1031,288 @@ namespace eval punk::du {
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
#if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
#return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
continue
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
#jjj
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {![regexp $tcl_re $nm]} {
continue
}
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes]
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
set file_attributes [dict get $attrdict -fileattributes]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {"reparse_point" in $file_attributes} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"directory" in $file_attributes} {
if {$nm in {. ..}} {
continue
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
#set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"reparse_point" ni $file_attributes} {
lappend dirs $fullname
set ftype "d"
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
if {$is_directory} {
#if {$nm in {. ..}} {
# continue
#}
if {!$is_reparse_point} {
lappend dirs $fullname
#set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
}
}
}
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
}
#set ftype "f"
}
set ftype "f"
}
# -----------------------------------------------------------
# -----------------------------------------------------------
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
if {$do_sizes} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
}
twapi::find_file_close $iterator
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
#return [file join $base $newtail]
return $base/$newtail
}
return [file join $base ./$newtail]
#return [file join $base ./$newtail]
return $base/./$newtail
}
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#----

32
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args
package require punk::ansi
package require punk::winpath
package require punk::du
package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack
#*** !doctools
#[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]"
return $result
} else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} {
set cdtarget [lindex $args 0]
switch -exact -- $cdtarget {
. - ./ {
tailcall punk::nav::fs::d/
tailcall punk::nav::fs::d/ $v
}
.. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
}
if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
}
set VIRTUAL_CWD $cdtarget
set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
}
if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
}
}
tailcall punk::nav::fs::d/ $v
}
set curdir $VIRTUAL_CWD
} else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result ""
#set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create]
foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}]
#we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list]
foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue
}
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
#Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all.
#we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath]
while {![file exists $parent]} {
set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue
}
lappend existing_parent_list $parent
lappend fullpath_list $fullpath
}
if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0
set error_string ""
foreach fullpath $fullpath_list {
foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break
}
incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
}
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
}
#todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} {
error "Folder $target not found"
}
d/ $target
d/ / $target
}
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
}
set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
}
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
}
#todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
}
proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals
set searchspecs [dict values $vals]
lassign [dict values $argd] leaders opts values
set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts"
@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs {
set searchspec [lindex $searchspecs 0]
# -- --- --- --- --- --- ---
set opt_searchbase [dict get $opts -searchbase]
set opt_tailglob [dict get $opts -tailglob]
set opt_tailglob [dict get $opts -tailglob]
set opt_with_sizes [dict get $opts -with_sizes]
set opt_with_times [dict get $opts -with_times]
# -- --- --- --- --- --- ---
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
}
}
"\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} {
if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else {
set next_opt_with_times [list -with_times $opt_with_times]
}
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
}
}
}
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs]
set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
}
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files
set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- ---
#jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm
}
}
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated]
}
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //}
-listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true
}
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds]
#set ts1 [clock milliseconds]
package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list]
set ts2 [clock milliseconds]
#set ts2 [clock milliseconds]
foreach fdict $finfo {
set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
}
unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
#puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant
proc path_to_absolute {path base platform} {
set ptype [file pathtype $path]
proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} {
set path_absolute $path
set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} {
if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up.
package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path]
set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path"
} else {
#todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...?
#set path_absolute $base/$path
set path_absolute $path
#set path_absolute $base/$subpath
set path_absolute $subpath
}
} else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path
set path_absolute $subpath
}
} else {
set path_absolute $base/$path
}
if {$platform eq "windows"} {
if {[punk::winpath::illegalname_test $path_absolute]} {
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
}
#e.g relative subpath=* base = c:/test -> c:/test/*
#e.g relative subpath=../test base = c:/test -> c:/test/../test
#e.g relative subpath=* base = //server/share/test -> //server/share/test/*
set path_absolute $base/$subpath
}
#fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
#if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute
}
proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools

2
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
}
}
}
puts "==>finalparts: '$finalparts'"
#puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path
}
}
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/.
foreach seg [file split $path] {
if {$seg in [list . ..]} {
set segments [file split $path]
if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} {
if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
}
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is.

211
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob]
{*}$pipe
}
proc path {{glob *}} {
proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
}
#-------------------------------------------------------------------

858
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
#return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
}
}
variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}
#don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}]
set opts [dict get $argd opts]
set iteminfo [dict get $argd values iteminfo]
set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0]
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo]
@ -592,34 +598,38 @@ namespace eval punk::du {
}
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"]
foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1
}
system {
dict set result -system 1
}
readonly {
dict set result -readonly 1
}
reparse_point {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
}
}
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo
return $result
}
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator}
}
}
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-filedebug 0\
-patterndebug 0\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
@ -711,256 +1031,288 @@ namespace eval punk::du {
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
#if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
#return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
continue
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
#jjj
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {![regexp $tcl_re $nm]} {
continue
}
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes]
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
set file_attributes [dict get $attrdict -fileattributes]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {"reparse_point" in $file_attributes} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"directory" in $file_attributes} {
if {$nm in {. ..}} {
continue
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
#set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"reparse_point" ni $file_attributes} {
lappend dirs $fullname
set ftype "d"
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
if {$is_directory} {
#if {$nm in {. ..}} {
# continue
#}
if {!$is_reparse_point} {
lappend dirs $fullname
#set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
}
}
}
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
}
#set ftype "f"
}
set ftype "f"
}
# -----------------------------------------------------------
# -----------------------------------------------------------
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
if {$do_sizes} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
}
twapi::find_file_close $iterator
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
#return [file join $base $newtail]
return $base/$newtail
}
return [file join $base ./$newtail]
#return [file join $base ./$newtail]
return $base/./$newtail
}
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#----

32
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args
package require punk::ansi
package require punk::winpath
package require punk::du
package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack
#*** !doctools
#[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]"
return $result
} else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} {
set cdtarget [lindex $args 0]
switch -exact -- $cdtarget {
. - ./ {
tailcall punk::nav::fs::d/
tailcall punk::nav::fs::d/ $v
}
.. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
}
if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
}
set VIRTUAL_CWD $cdtarget
set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
}
if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
}
}
tailcall punk::nav::fs::d/ $v
}
set curdir $VIRTUAL_CWD
} else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result ""
#set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create]
foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}]
#we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list]
foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue
}
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
#Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all.
#we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath]
while {![file exists $parent]} {
set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue
}
lappend existing_parent_list $parent
lappend fullpath_list $fullpath
}
if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0
set error_string ""
foreach fullpath $fullpath_list {
foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break
}
incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
}
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
}
#todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} {
error "Folder $target not found"
}
d/ $target
d/ / $target
}
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
}
set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
}
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
}
#todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
}
proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals
set searchspecs [dict values $vals]
lassign [dict values $argd] leaders opts values
set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts"
@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs {
set searchspec [lindex $searchspecs 0]
# -- --- --- --- --- --- ---
set opt_searchbase [dict get $opts -searchbase]
set opt_tailglob [dict get $opts -tailglob]
set opt_tailglob [dict get $opts -tailglob]
set opt_with_sizes [dict get $opts -with_sizes]
set opt_with_times [dict get $opts -with_times]
# -- --- --- --- --- --- ---
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
}
}
"\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} {
if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else {
set next_opt_with_times [list -with_times $opt_with_times]
}
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
}
}
}
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs]
set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
}
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files
set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- ---
#jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm
}
}
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated]
}
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //}
-listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true
}
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds]
#set ts1 [clock milliseconds]
package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list]
set ts2 [clock milliseconds]
#set ts2 [clock milliseconds]
foreach fdict $finfo {
set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
}
unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
#puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant
proc path_to_absolute {path base platform} {
set ptype [file pathtype $path]
proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} {
set path_absolute $path
set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} {
if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up.
package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path]
set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path"
} else {
#todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...?
#set path_absolute $base/$path
set path_absolute $path
#set path_absolute $base/$subpath
set path_absolute $subpath
}
} else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path
set path_absolute $subpath
}
} else {
set path_absolute $base/$path
}
if {$platform eq "windows"} {
if {[punk::winpath::illegalname_test $path_absolute]} {
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
}
#e.g relative subpath=* base = c:/test -> c:/test/*
#e.g relative subpath=../test base = c:/test -> c:/test/../test
#e.g relative subpath=* base = //server/share/test -> //server/share/test/*
set path_absolute $base/$subpath
}
#fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
#if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute
}
proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools

2
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
}
}
}
puts "==>finalparts: '$finalparts'"
#puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path
}
}
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/.
foreach seg [file split $path] {
if {$seg in [list . ..]} {
set segments [file split $path]
if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} {
if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
}
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is.

211
src/vfs/_vfscommon.vfs/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob]
{*}$pipe
}
proc path {{glob *}} {
proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
}
#-------------------------------------------------------------------

858
src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
#return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
}
}
variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}
#don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}]
set opts [dict get $argd opts]
set iteminfo [dict get $argd values iteminfo]
set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0]
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo]
@ -592,34 +598,38 @@ namespace eval punk::du {
}
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"]
foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1
}
system {
dict set result -system 1
}
readonly {
dict set result -readonly 1
}
reparse_point {
#the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+
#todo - is_microsoft from first bit of high_word
set low_int [expr {$low_word}] ;#review - int vs string rep for dict key lookup? does it matter?
if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
}
}
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo
return $result
}
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator}
}
}
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-filedebug 0\
-patterndebug 0\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
@ -711,256 +1031,288 @@ namespace eval punk::du {
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
#if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
#return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
continue
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
#jjj
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {![regexp $tcl_re $nm]} {
continue
}
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes]
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
set file_attributes [dict get $attrdict -fileattributes]
set linkdata [dict create]
# -----------------------------------------------------------
#main classification
if {"reparse_point" in $file_attributes} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"directory" in $file_attributes} {
if {$nm in {. ..}} {
continue
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
#set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} {
dict set linkdata target_type file
}
}
if {"reparse_point" ni $file_attributes} {
lappend dirs $fullname
set ftype "d"
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
if {$is_directory} {
#if {$nm in {. ..}} {
# continue
#}
if {!$is_reparse_point} {
lappend dirs $fullname
#set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory
}
}
}
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
}
#set ftype "f"
}
set ftype "f"
}
# -----------------------------------------------------------
# -----------------------------------------------------------
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
if {$do_sizes} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata
}
}
twapi::find_file_close $iterator
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
#return [file join $base $newtail]
return $base/$newtail
}
return [file join $base ./$newtail]
#return [file join $base ./$newtail]
return $base/./$newtail
}
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#----

32
src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args
package require punk::ansi
package require punk::winpath
package require punk::du
package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack
#*** !doctools
#[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]"
return $result
} else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} {
set cdtarget [lindex $args 0]
switch -exact -- $cdtarget {
. - ./ {
tailcall punk::nav::fs::d/
tailcall punk::nav::fs::d/ $v
}
.. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget]
set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
}
if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
}
set VIRTUAL_CWD $cdtarget
set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
}
if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
}
}
tailcall punk::nav::fs::d/ $v
}
set curdir $VIRTUAL_CWD
} else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result ""
#set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create]
foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}]
#we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list]
foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue
}
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
#Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all.
#we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath]
while {![file exists $parent]} {
set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue
}
lappend existing_parent_list $parent
lappend fullpath_list $fullpath
}
if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0
set error_string ""
foreach fullpath $fullpath_list {
foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break
}
incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
}
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
}
#todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} {
error "Folder $target not found"
}
d/ $target
d/ / $target
}
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
}
set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
}
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
}
#todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
}
proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals
set searchspecs [dict values $vals]
lassign [dict values $argd] leaders opts values
set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts"
@ -893,7 +958,7 @@ tcl::namespace::eval punk::nav::fs {
set searchspec [lindex $searchspecs 0]
# -- --- --- --- --- --- ---
set opt_searchbase [dict get $opts -searchbase]
set opt_tailglob [dict get $opts -tailglob]
set opt_tailglob [dict get $opts -tailglob]
set opt_with_sizes [dict get $opts -with_sizes]
set opt_with_times [dict get $opts -with_times]
# -- --- --- --- --- --- ---
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
}
}
"\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} {
if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else {
set next_opt_with_times [list -with_times $opt_with_times]
}
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
}
}
}
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs]
set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
}
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files
set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- ---
#jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm
}
}
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated]
}
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //}
-listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true
}
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds]
#set ts1 [clock milliseconds]
package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list]
set ts2 [clock milliseconds]
#set ts2 [clock milliseconds]
foreach fdict $finfo {
set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
}
unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
#puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant
proc path_to_absolute {path base platform} {
set ptype [file pathtype $path]
proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} {
set path_absolute $path
set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} {
if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up.
package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path]
set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path"
} else {
#todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...?
#set path_absolute $base/$path
set path_absolute $path
#set path_absolute $base/$subpath
set path_absolute $subpath
}
} else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path
set path_absolute $subpath
}
} else {
set path_absolute $base/$path
}
if {$platform eq "windows"} {
if {[punk::winpath::illegalname_test $path_absolute]} {
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
}
#e.g relative subpath=* base = c:/test -> c:/test/*
#e.g relative subpath=../test base = c:/test -> c:/test/../test
#e.g relative subpath=* base = //server/share/test -> //server/share/test/*
set path_absolute $base/$subpath
}
#fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
#if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute
}
proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools

2
src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
}
}
}
puts "==>finalparts: '$finalparts'"
#puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path
}
}
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/.
foreach seg [file split $path] {
if {$seg in [list . ..]} {
set segments [file split $path]
if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} {
if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
}
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is.

Loading…
Cancel
Save