Browse Source

dir listing and glob fixes - windows

master
Julian Noble 4 days ago
parent
commit
b2f4d67056
  1. 211
      src/bootsupport/modules/punk-0.1.tm
  2. 500
      src/bootsupport/modules/punk/du-0.1.0.tm
  3. 32
      src/bootsupport/modules/punk/lib-0.1.6.tm
  4. 209
      src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  5. 2
      src/bootsupport/modules/punk/path-0.1.0.tm
  6. 64
      src/bootsupport/modules/punk/winpath-0.1.0.tm
  7. 211
      src/modules/punk-0.1.tm
  8. 500
      src/modules/punk/du-999999.0a1.0.tm
  9. 32
      src/modules/punk/lib-999999.0a1.0.tm
  10. 209
      src/modules/punk/nav/fs-999999.0a1.0.tm
  11. 2
      src/modules/punk/path-999999.0a1.0.tm
  12. 64
      src/modules/punk/winpath-999999.0a1.0.tm
  13. 211
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm
  14. 500
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm
  15. 32
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm
  16. 209
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  17. 2
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm
  18. 64
      src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm
  19. 211
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm
  20. 500
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm
  21. 32
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm
  22. 209
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm
  23. 2
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm
  24. 64
      src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm
  25. 211
      src/vfs/_vfscommon.vfs/modules/punk-0.1.tm
  26. 500
      src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm
  27. 32
      src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm
  28. 209
      src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm
  29. 2
      src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm
  30. 64
      src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm

211
src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe {*}$pipe
} }
proc path {{glob *}} { proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines {*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
} }
#------------------------------------------------------------------- #-------------------------------------------------------------------

500
src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
} }
namespace eval lib { namespace eval lib {
variable du_literal variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask] return [dict get $winfile_attributes $bitmask]
} else { } else {
#list/dict shimmering? #list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
} }
} }
variable win_reparse_tags variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl) #then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..) #then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo @id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output" -debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1 @values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'" iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}] }
set opts [dict get $argd opts] #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
set iteminfo [dict get $argd values iteminfo] proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set opt_debug [dict get $opts -debug] set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel] set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set #-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} { if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n" set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo] append dbg [pdict -channel none iteminfo]
@ -592,23 +598,23 @@ namespace eval punk::du {
} }
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} { foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1 dict set result -hidden 1
} }
if {"system" in $attrinfo} { system {
dict set result -system 1 dict set result -system 1
} }
if {"readonly" in $attrinfo} { readonly {
dict set result -readonly 1 dict set result -readonly 1
} }
dict set result -shortname [dict get $iteminfo altname] reparse_point {
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us #the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0] set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1] set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value | #|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
@ -617,9 +623,13 @@ namespace eval punk::du {
if {[dict exists $win_reparse_tags_by_int $low_int]} { if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else { } else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
} }
} }
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo dict set result -raw $iteminfo
return $result return $result
} }
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator} catch {twapi::find_file_close $iterator}
} }
} }
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system) # get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} { proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\ set defaults [dict create\
-glob *\ -glob *\
-filedebug 0\ -filedebug 0\
-patterndebug 0\
-with_sizes 1\ -with_sizes 1\
-with_times 1\ -with_times 1\
] ]
set opts [dict merge $defaults $args] set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob] set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes] set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l] set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} { if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!) #don't use string is boolean - (f false vs f file!)
@ -711,18 +1031,36 @@ namespace eval punk::du {
} }
} }
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create] set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths # return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases. # using * all the time may be inefficient - so we might be able to avoid that in some cases.
try { try {
#glob of * will return dotfiles too on windows #glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args { } on error args {
try { try {
if {[string match "*denied*" $args]} { if {[string match "*denied*" $args]} {
@ -745,11 +1083,11 @@ namespace eval punk::du {
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob #looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
} continue
} }
@ -786,9 +1124,10 @@ namespace eval punk::du {
set parent [file dirname $folderpath] set parent [file dirname $folderpath]
set badtail [file tail $folderpath] set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} { if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname] set fixedtail [dict get $iteminfo altname]
break break
@ -808,7 +1147,7 @@ namespace eval punk::du {
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail] set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n" append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg puts stderr $errmsg
@ -837,39 +1176,47 @@ namespace eval punk::du {
} }
} }
set dirs [list] #jjj
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
#recheck glob if {![regexp $tcl_re $nm]} {
#review!
if {![string match $opt_glob $nm]} {
continue continue
} }
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype "" #set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification #attributes applicable to any classification
set fullname [file_join_one $folderpath $nm] set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes] set file_attributes [dict get $attrdict -fileattributes]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set linkdata [dict create] set linkdata [dict create]
# ----------------------------------------------------------- # -----------------------------------------------------------
#main classification #main classification
if {"reparse_point" in $file_attributes} { if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links #this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return #review - and see which if any actually belong in the links key of our return
@ -893,48 +1240,55 @@ namespace eval punk::du {
# #
#links are techically files too, whether they point to a file/dir or nothing. #links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname lappend links $fullname
set ftype "l" #set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} { if {"directory" ni $file_attributes} {
dict set linkdata target_type file dict set linkdata target_type file
} }
} }
if {"directory" in $file_attributes} { if {$is_directory} {
if {$nm in {. ..}} { #if {$nm in {. ..}} {
continue # continue
} #}
if {"reparse_point" ni $file_attributes} { if {!$is_reparse_point} {
lappend dirs $fullname lappend dirs $fullname
set ftype "d" #set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else { } else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory dict set linkdata target_type directory
} }
} }
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname lappend files $fullname
if {"f" in $sized_types} { if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size] lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
} }
set ftype "f" #set ftype "f"
} }
# ----------------------------------------------------------- # -----------------------------------------------------------
if {[dict get $attrdict -hidden]} { if {$do_sizes} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
} }
if {$ftype in $timed_types} { if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
@ -947,20 +1301,18 @@ namespace eval punk::du {
if {[dict size $linkdata]} { if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata dict set linkinfo $fullname $linkdata
} }
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
} }
twapi::find_file_close $iterator twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath] }
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts set effective_opts $opts
dict set effective_opts -with_times $timed_types dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes #also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
} }
proc get_vfsmounts_in_folder {folderpath} { proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list] set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+) #work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} { proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} { if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail] #return [file join $base $newtail]
return $base/$newtail
} }
return [file join $base ./$newtail] #return [file join $base ./$newtail]
return $base/./$newtail
} }
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering! #struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#---- #----

32
src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs. #Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} { proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args package require punk::args
package require punk::ansi package require punk::ansi
package require punk::winpath package require punk::winpath
package require punk::du package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack package require commandstack
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions] #[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe. #As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running. #Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]" #puts stdout "-->[ansistring VIEW $result]"
return $result return $result
} else { } else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} { if {[llength $args] == 1} {
set cdtarget [lindex $args 0] set cdtarget [lindex $args 0]
switch -exact -- $cdtarget { switch -exact -- $cdtarget {
. - ./ { . - ./ {
tailcall punk::nav::fs::d/ tailcall punk::nav::fs::d/ $v
} }
.. - ../ { .. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} { if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing #handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end] set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else { } else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
} }
if {!$cdtarget_is_glob} { if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget] set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
} }
set VIRTUAL_CWD $cdtarget set VIRTUAL_CWD $cdtarget
set curdir $cdtarget set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else { } else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
} }
if {[file type $target] eq "directory"} { if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
} }
} }
tailcall punk::nav::fs::d/ $v
} }
set curdir $VIRTUAL_CWD set curdir $VIRTUAL_CWD
} else { } else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result "" set result ""
#set chunklist [list] #set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create] set this_result [dict create]
foreach searchspec $args { foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] #we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable] set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd] set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list] set error_paths [list]
foreach p $paths { foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue continue
} }
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
#Some subpaths of the supplied paths to create may already exist. #Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. #we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath] set parent [file dirname $fullpath]
while {![file exists $parent]} { while {![file exists $parent]} {
set parent [file dirname $parent] set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue continue
} }
lappend existing_parent_list $parent
lappend fullpath_list $fullpath lappend fullpath_list $fullpath
} }
if {[llength $fullpath_list] != [llength $paths]} { if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0 set num_created 0
set error_string "" set error_string ""
foreach fullpath $fullpath_list { foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} { if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break break
} }
incr num_created incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} { if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
} }
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
} }
#todo use unknown to allow d/~c:/etc ?? #todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} { if {![file isdirectory $target]} {
error "Folder $target not found" error "Folder $target not found"
} }
d/ $target d/ / $target
} }
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
} }
set relativepath [expr {[file pathtype $searchspec] eq "relative"}] set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
} }
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
} }
#todo - package as punk::nav::fs #todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
} }
proc dirfiles_dict {args} { proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts values
set searchspecs [dict values $vals] set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts" #puts stdout "arglist: $opts"
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
"\uFFFF" { "\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} { if {$searchtail_has_globs} {
if {$is_relativesearchspec} { if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]] #set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else { } else {
set next_opt_with_times [list -with_times $opt_with_times] set next_opt_with_times [list -with_times $opt_with_times]
} }
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} { if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else { } else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
} }
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs] set dirs [dict get $listing dirs]
set files [dict get $listing files] set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
} }
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs { #foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} { # if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files set files $sorted_files
set filesizes $sorted_filesizes set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- --- # -- ---
#jmn #jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] { foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} { if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm lappend nonportable $nm
} }
} }
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase] set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing] set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated] return [dict merge $listing $updated]
} }
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines @id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean -stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean -formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //} -listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true @values -min 1 -max -1 -type dict -unnamed true
} }
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} { proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds] #set ts1 [clock milliseconds]
package require overtype package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work #review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable #classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list] set finfo_plus [list]
set ts2 [clock milliseconds] #set ts2 [clock milliseconds]
foreach fdict $finfo { foreach fdict $finfo {
set fname [dict get $fdict file] set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} { if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
} }
unset finfo unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath! #review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant #review: file pathtype is platform dependant
proc path_to_absolute {path base platform} { proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $path] set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} { if {$ptype eq "absolute"} {
set path_absolute $path set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} { } elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} { if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} { if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up. #Todo - tidy up.
package require punk::unixywindows package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path] set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path" #puts stderr "winpath: $path"
} else { } else {
#todo handle volume-relative paths with volume specified c:etc c: #todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...? #Arguably if ...?
#set path_absolute $base/$path #set path_absolute $base/$subpath
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now # unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
set path_absolute $base/$path #e.g relative subpath=* base = c:/test -> c:/test/*
} #e.g relative subpath=../test base = c:/test -> c:/test/../test
if {$platform eq "windows"} { #e.g relative subpath=* base = //server/share/test -> //server/share/test/*
if {[punk::winpath::illegalname_test $path_absolute]} { set path_absolute $base/$subpath
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present }
} #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
} #if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute return $path_absolute
} }
proc strip_prefix_depth {path prefix} { proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools #*** !doctools

2
src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
} }
} }
} }
puts "==>finalparts: '$finalparts'" #puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b # using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts #backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path return $path
} }
} }
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names. #we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path. #The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/. #we need to exclude things like path/.. path/.
foreach seg [file split $path] { set segments [file split $path]
if {$seg in [list . ..]} { if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ? #review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons? #unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped #only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} { if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1 return 1
} }
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
} }
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is. #- they seem to be readable from cmd and tclsh as is.

211
src/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe {*}$pipe
} }
proc path {{glob *}} { proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines {*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
} }
#------------------------------------------------------------------- #-------------------------------------------------------------------

500
src/modules/punk/du-999999.0a1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
} }
namespace eval lib { namespace eval lib {
variable du_literal variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask] return [dict get $winfile_attributes $bitmask]
} else { } else {
#list/dict shimmering? #list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
} }
} }
variable win_reparse_tags variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl) #then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..) #then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo @id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output" -debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1 @values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'" iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}] }
set opts [dict get $argd opts] #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
set iteminfo [dict get $argd values iteminfo] proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set opt_debug [dict get $opts -debug] set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel] set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set #-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} { if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n" set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo] append dbg [pdict -channel none iteminfo]
@ -592,23 +598,23 @@ namespace eval punk::du {
} }
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} { foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1 dict set result -hidden 1
} }
if {"system" in $attrinfo} { system {
dict set result -system 1 dict set result -system 1
} }
if {"readonly" in $attrinfo} { readonly {
dict set result -readonly 1 dict set result -readonly 1
} }
dict set result -shortname [dict get $iteminfo altname] reparse_point {
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us #the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0] set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1] set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value | #|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
@ -617,9 +623,13 @@ namespace eval punk::du {
if {[dict exists $win_reparse_tags_by_int $low_int]} { if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else { } else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
} }
} }
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo dict set result -raw $iteminfo
return $result return $result
} }
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator} catch {twapi::find_file_close $iterator}
} }
} }
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system) # get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} { proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\ set defaults [dict create\
-glob *\ -glob *\
-filedebug 0\ -filedebug 0\
-patterndebug 0\
-with_sizes 1\ -with_sizes 1\
-with_times 1\ -with_times 1\
] ]
set opts [dict merge $defaults $args] set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob] set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes] set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l] set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} { if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!) #don't use string is boolean - (f false vs f file!)
@ -711,18 +1031,36 @@ namespace eval punk::du {
} }
} }
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create] set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths # return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases. # using * all the time may be inefficient - so we might be able to avoid that in some cases.
try { try {
#glob of * will return dotfiles too on windows #glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args { } on error args {
try { try {
if {[string match "*denied*" $args]} { if {[string match "*denied*" $args]} {
@ -745,11 +1083,11 @@ namespace eval punk::du {
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob #looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
} continue
} }
@ -786,9 +1124,10 @@ namespace eval punk::du {
set parent [file dirname $folderpath] set parent [file dirname $folderpath]
set badtail [file tail $folderpath] set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} { if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname] set fixedtail [dict get $iteminfo altname]
break break
@ -808,7 +1147,7 @@ namespace eval punk::du {
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail] set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n" append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg puts stderr $errmsg
@ -837,39 +1176,47 @@ namespace eval punk::du {
} }
} }
set dirs [list] #jjj
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
#recheck glob if {![regexp $tcl_re $nm]} {
#review!
if {![string match $opt_glob $nm]} {
continue continue
} }
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype "" #set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification #attributes applicable to any classification
set fullname [file_join_one $folderpath $nm] set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes] set file_attributes [dict get $attrdict -fileattributes]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set linkdata [dict create] set linkdata [dict create]
# ----------------------------------------------------------- # -----------------------------------------------------------
#main classification #main classification
if {"reparse_point" in $file_attributes} { if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links #this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return #review - and see which if any actually belong in the links key of our return
@ -893,48 +1240,55 @@ namespace eval punk::du {
# #
#links are techically files too, whether they point to a file/dir or nothing. #links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname lappend links $fullname
set ftype "l" #set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} { if {"directory" ni $file_attributes} {
dict set linkdata target_type file dict set linkdata target_type file
} }
} }
if {"directory" in $file_attributes} { if {$is_directory} {
if {$nm in {. ..}} { #if {$nm in {. ..}} {
continue # continue
} #}
if {"reparse_point" ni $file_attributes} { if {!$is_reparse_point} {
lappend dirs $fullname lappend dirs $fullname
set ftype "d" #set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else { } else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory dict set linkdata target_type directory
} }
} }
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname lappend files $fullname
if {"f" in $sized_types} { if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size] lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
} }
set ftype "f" #set ftype "f"
} }
# ----------------------------------------------------------- # -----------------------------------------------------------
if {[dict get $attrdict -hidden]} { if {$do_sizes} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
} }
if {$ftype in $timed_types} { if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
@ -947,20 +1301,18 @@ namespace eval punk::du {
if {[dict size $linkdata]} { if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata dict set linkinfo $fullname $linkdata
} }
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
} }
twapi::find_file_close $iterator twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath] }
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts set effective_opts $opts
dict set effective_opts -with_times $timed_types dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes #also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
} }
proc get_vfsmounts_in_folder {folderpath} { proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list] set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+) #work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} { proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} { if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail] #return [file join $base $newtail]
return $base/$newtail
} }
return [file join $base ./$newtail] #return [file join $base ./$newtail]
return $base/./$newtail
} }
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering! #struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#---- #----

32
src/modules/punk/lib-999999.0a1.0.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs. #Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} { proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/modules/punk/nav/fs-999999.0a1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args package require punk::args
package require punk::ansi package require punk::ansi
package require punk::winpath package require punk::winpath
package require punk::du package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack package require commandstack
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions] #[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe. #As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running. #Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]" #puts stdout "-->[ansistring VIEW $result]"
return $result return $result
} else { } else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} { if {[llength $args] == 1} {
set cdtarget [lindex $args 0] set cdtarget [lindex $args 0]
switch -exact -- $cdtarget { switch -exact -- $cdtarget {
. - ./ { . - ./ {
tailcall punk::nav::fs::d/ tailcall punk::nav::fs::d/ $v
} }
.. - ../ { .. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} { if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing #handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end] set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else { } else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
} }
if {!$cdtarget_is_glob} { if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget] set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
} }
set VIRTUAL_CWD $cdtarget set VIRTUAL_CWD $cdtarget
set curdir $cdtarget set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else { } else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
} }
if {[file type $target] eq "directory"} { if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
} }
} }
tailcall punk::nav::fs::d/ $v
} }
set curdir $VIRTUAL_CWD set curdir $VIRTUAL_CWD
} else { } else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result "" set result ""
#set chunklist [list] #set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create] set this_result [dict create]
foreach searchspec $args { foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] #we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable] set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd] set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list] set error_paths [list]
foreach p $paths { foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue continue
} }
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
#Some subpaths of the supplied paths to create may already exist. #Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. #we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath] set parent [file dirname $fullpath]
while {![file exists $parent]} { while {![file exists $parent]} {
set parent [file dirname $parent] set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue continue
} }
lappend existing_parent_list $parent
lappend fullpath_list $fullpath lappend fullpath_list $fullpath
} }
if {[llength $fullpath_list] != [llength $paths]} { if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0 set num_created 0
set error_string "" set error_string ""
foreach fullpath $fullpath_list { foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} { if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break break
} }
incr num_created incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} { if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
} }
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
} }
#todo use unknown to allow d/~c:/etc ?? #todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} { if {![file isdirectory $target]} {
error "Folder $target not found" error "Folder $target not found"
} }
d/ $target d/ / $target
} }
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
} }
set relativepath [expr {[file pathtype $searchspec] eq "relative"}] set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
} }
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
} }
#todo - package as punk::nav::fs #todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
} }
proc dirfiles_dict {args} { proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts values
set searchspecs [dict values $vals] set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts" #puts stdout "arglist: $opts"
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
"\uFFFF" { "\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} { if {$searchtail_has_globs} {
if {$is_relativesearchspec} { if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]] #set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else { } else {
set next_opt_with_times [list -with_times $opt_with_times] set next_opt_with_times [list -with_times $opt_with_times]
} }
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} { if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else { } else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
} }
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs] set dirs [dict get $listing dirs]
set files [dict get $listing files] set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
} }
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs { #foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} { # if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files set files $sorted_files
set filesizes $sorted_filesizes set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- --- # -- ---
#jmn #jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] { foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} { if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm lappend nonportable $nm
} }
} }
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase] set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing] set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated] return [dict merge $listing $updated]
} }
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines @id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean -stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean -formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //} -listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true @values -min 1 -max -1 -type dict -unnamed true
} }
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} { proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds] #set ts1 [clock milliseconds]
package require overtype package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work #review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable #classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list] set finfo_plus [list]
set ts2 [clock milliseconds] #set ts2 [clock milliseconds]
foreach fdict $finfo { foreach fdict $finfo {
set fname [dict get $fdict file] set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} { if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
} }
unset finfo unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath! #review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant #review: file pathtype is platform dependant
proc path_to_absolute {path base platform} { proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $path] set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} { if {$ptype eq "absolute"} {
set path_absolute $path set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} { } elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} { if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} { if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up. #Todo - tidy up.
package require punk::unixywindows package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path] set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path" #puts stderr "winpath: $path"
} else { } else {
#todo handle volume-relative paths with volume specified c:etc c: #todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...? #Arguably if ...?
#set path_absolute $base/$path #set path_absolute $base/$subpath
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now # unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
set path_absolute $base/$path #e.g relative subpath=* base = c:/test -> c:/test/*
} #e.g relative subpath=../test base = c:/test -> c:/test/../test
if {$platform eq "windows"} { #e.g relative subpath=* base = //server/share/test -> //server/share/test/*
if {[punk::winpath::illegalname_test $path_absolute]} { set path_absolute $base/$subpath
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present }
} #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
} #if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute return $path_absolute
} }
proc strip_prefix_depth {path prefix} { proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools #*** !doctools

2
src/modules/punk/path-999999.0a1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
} }
} }
} }
puts "==>finalparts: '$finalparts'" #puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b # using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts #backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/modules/punk/winpath-999999.0a1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path return $path
} }
} }
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names. #we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path. #The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/. #we need to exclude things like path/.. path/.
foreach seg [file split $path] { set segments [file split $path]
if {$seg in [list . ..]} { if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ? #review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons? #unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped #only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} { if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1 return 1
} }
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
} }
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is. #- they seem to be readable from cmd and tclsh as is.

211
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe {*}$pipe
} }
proc path {{glob *}} { proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines {*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
} }
#------------------------------------------------------------------- #-------------------------------------------------------------------

500
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
} }
namespace eval lib { namespace eval lib {
variable du_literal variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask] return [dict get $winfile_attributes $bitmask]
} else { } else {
#list/dict shimmering? #list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
} }
} }
variable win_reparse_tags variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl) #then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..) #then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo @id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output" -debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1 @values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'" iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}] }
set opts [dict get $argd opts] #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
set iteminfo [dict get $argd values iteminfo] proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set opt_debug [dict get $opts -debug] set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel] set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set #-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} { if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n" set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo] append dbg [pdict -channel none iteminfo]
@ -592,23 +598,23 @@ namespace eval punk::du {
} }
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} { foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1 dict set result -hidden 1
} }
if {"system" in $attrinfo} { system {
dict set result -system 1 dict set result -system 1
} }
if {"readonly" in $attrinfo} { readonly {
dict set result -readonly 1 dict set result -readonly 1
} }
dict set result -shortname [dict get $iteminfo altname] reparse_point {
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us #the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0] set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1] set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value | #|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
@ -617,9 +623,13 @@ namespace eval punk::du {
if {[dict exists $win_reparse_tags_by_int $low_int]} { if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else { } else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
} }
} }
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo dict set result -raw $iteminfo
return $result return $result
} }
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator} catch {twapi::find_file_close $iterator}
} }
} }
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system) # get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} { proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\ set defaults [dict create\
-glob *\ -glob *\
-filedebug 0\ -filedebug 0\
-patterndebug 0\
-with_sizes 1\ -with_sizes 1\
-with_times 1\ -with_times 1\
] ]
set opts [dict merge $defaults $args] set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob] set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes] set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l] set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} { if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!) #don't use string is boolean - (f false vs f file!)
@ -711,18 +1031,36 @@ namespace eval punk::du {
} }
} }
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create] set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths # return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases. # using * all the time may be inefficient - so we might be able to avoid that in some cases.
try { try {
#glob of * will return dotfiles too on windows #glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args { } on error args {
try { try {
if {[string match "*denied*" $args]} { if {[string match "*denied*" $args]} {
@ -745,11 +1083,11 @@ namespace eval punk::du {
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob #looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
} continue
} }
@ -786,9 +1124,10 @@ namespace eval punk::du {
set parent [file dirname $folderpath] set parent [file dirname $folderpath]
set badtail [file tail $folderpath] set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} { if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname] set fixedtail [dict get $iteminfo altname]
break break
@ -808,7 +1147,7 @@ namespace eval punk::du {
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail] set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n" append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg puts stderr $errmsg
@ -837,39 +1176,47 @@ namespace eval punk::du {
} }
} }
set dirs [list] #jjj
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
#recheck glob if {![regexp $tcl_re $nm]} {
#review!
if {![string match $opt_glob $nm]} {
continue continue
} }
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype "" #set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification #attributes applicable to any classification
set fullname [file_join_one $folderpath $nm] set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes] set file_attributes [dict get $attrdict -fileattributes]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set linkdata [dict create] set linkdata [dict create]
# ----------------------------------------------------------- # -----------------------------------------------------------
#main classification #main classification
if {"reparse_point" in $file_attributes} { if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links #this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return #review - and see which if any actually belong in the links key of our return
@ -893,48 +1240,55 @@ namespace eval punk::du {
# #
#links are techically files too, whether they point to a file/dir or nothing. #links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname lappend links $fullname
set ftype "l" #set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} { if {"directory" ni $file_attributes} {
dict set linkdata target_type file dict set linkdata target_type file
} }
} }
if {"directory" in $file_attributes} { if {$is_directory} {
if {$nm in {. ..}} { #if {$nm in {. ..}} {
continue # continue
} #}
if {"reparse_point" ni $file_attributes} { if {!$is_reparse_point} {
lappend dirs $fullname lappend dirs $fullname
set ftype "d" #set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else { } else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory dict set linkdata target_type directory
} }
} }
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname lappend files $fullname
if {"f" in $sized_types} { if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size] lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
} }
set ftype "f" #set ftype "f"
} }
# ----------------------------------------------------------- # -----------------------------------------------------------
if {[dict get $attrdict -hidden]} { if {$do_sizes} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
} }
if {$ftype in $timed_types} { if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
@ -947,20 +1301,18 @@ namespace eval punk::du {
if {[dict size $linkdata]} { if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata dict set linkinfo $fullname $linkdata
} }
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
} }
twapi::find_file_close $iterator twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath] }
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts set effective_opts $opts
dict set effective_opts -with_times $timed_types dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes #also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
} }
proc get_vfsmounts_in_folder {folderpath} { proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list] set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+) #work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} { proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} { if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail] #return [file join $base $newtail]
return $base/$newtail
} }
return [file join $base ./$newtail] #return [file join $base ./$newtail]
return $base/./$newtail
} }
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering! #struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#---- #----

32
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs. #Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} { proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args package require punk::args
package require punk::ansi package require punk::ansi
package require punk::winpath package require punk::winpath
package require punk::du package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack package require commandstack
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions] #[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe. #As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running. #Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]" #puts stdout "-->[ansistring VIEW $result]"
return $result return $result
} else { } else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} { if {[llength $args] == 1} {
set cdtarget [lindex $args 0] set cdtarget [lindex $args 0]
switch -exact -- $cdtarget { switch -exact -- $cdtarget {
. - ./ { . - ./ {
tailcall punk::nav::fs::d/ tailcall punk::nav::fs::d/ $v
} }
.. - ../ { .. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} { if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing #handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end] set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else { } else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
} }
if {!$cdtarget_is_glob} { if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget] set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
} }
set VIRTUAL_CWD $cdtarget set VIRTUAL_CWD $cdtarget
set curdir $cdtarget set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else { } else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
} }
if {[file type $target] eq "directory"} { if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
} }
} }
tailcall punk::nav::fs::d/ $v
} }
set curdir $VIRTUAL_CWD set curdir $VIRTUAL_CWD
} else { } else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result "" set result ""
#set chunklist [list] #set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create] set this_result [dict create]
foreach searchspec $args { foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] #we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable] set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd] set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list] set error_paths [list]
foreach p $paths { foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue continue
} }
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
#Some subpaths of the supplied paths to create may already exist. #Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. #we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath] set parent [file dirname $fullpath]
while {![file exists $parent]} { while {![file exists $parent]} {
set parent [file dirname $parent] set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue continue
} }
lappend existing_parent_list $parent
lappend fullpath_list $fullpath lappend fullpath_list $fullpath
} }
if {[llength $fullpath_list] != [llength $paths]} { if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0 set num_created 0
set error_string "" set error_string ""
foreach fullpath $fullpath_list { foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} { if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break break
} }
incr num_created incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} { if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
} }
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
} }
#todo use unknown to allow d/~c:/etc ?? #todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} { if {![file isdirectory $target]} {
error "Folder $target not found" error "Folder $target not found"
} }
d/ $target d/ / $target
} }
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
} }
set relativepath [expr {[file pathtype $searchspec] eq "relative"}] set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
} }
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
} }
#todo - package as punk::nav::fs #todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
} }
proc dirfiles_dict {args} { proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts values
set searchspecs [dict values $vals] set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts" #puts stdout "arglist: $opts"
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
"\uFFFF" { "\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} { if {$searchtail_has_globs} {
if {$is_relativesearchspec} { if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]] #set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else { } else {
set next_opt_with_times [list -with_times $opt_with_times] set next_opt_with_times [list -with_times $opt_with_times]
} }
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} { if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else { } else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
} }
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs] set dirs [dict get $listing dirs]
set files [dict get $listing files] set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
} }
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs { #foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} { # if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files set files $sorted_files
set filesizes $sorted_filesizes set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- --- # -- ---
#jmn #jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] { foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} { if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm lappend nonportable $nm
} }
} }
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase] set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing] set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated] return [dict merge $listing $updated]
} }
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines @id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean -stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean -formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //} -listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true @values -min 1 -max -1 -type dict -unnamed true
} }
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} { proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds] #set ts1 [clock milliseconds]
package require overtype package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work #review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable #classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list] set finfo_plus [list]
set ts2 [clock milliseconds] #set ts2 [clock milliseconds]
foreach fdict $finfo { foreach fdict $finfo {
set fname [dict get $fdict file] set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} { if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
} }
unset finfo unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath! #review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant #review: file pathtype is platform dependant
proc path_to_absolute {path base platform} { proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $path] set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} { if {$ptype eq "absolute"} {
set path_absolute $path set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} { } elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} { if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} { if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up. #Todo - tidy up.
package require punk::unixywindows package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path] set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path" #puts stderr "winpath: $path"
} else { } else {
#todo handle volume-relative paths with volume specified c:etc c: #todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...? #Arguably if ...?
#set path_absolute $base/$path #set path_absolute $base/$subpath
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now # unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
set path_absolute $base/$path #e.g relative subpath=* base = c:/test -> c:/test/*
} #e.g relative subpath=../test base = c:/test -> c:/test/../test
if {$platform eq "windows"} { #e.g relative subpath=* base = //server/share/test -> //server/share/test/*
if {[punk::winpath::illegalname_test $path_absolute]} { set path_absolute $base/$subpath
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present }
} #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
} #if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute return $path_absolute
} }
proc strip_prefix_depth {path prefix} { proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools #*** !doctools

2
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
} }
} }
} }
puts "==>finalparts: '$finalparts'" #puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b # using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts #backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path return $path
} }
} }
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names. #we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path. #The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/. #we need to exclude things like path/.. path/.
foreach seg [file split $path] { set segments [file split $path]
if {$seg in [list . ..]} { if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ? #review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons? #unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped #only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} { if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1 return 1
} }
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
} }
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is. #- they seem to be readable from cmd and tclsh as is.

211
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe {*}$pipe
} }
proc path {{glob *}} { proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines {*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
} }
#------------------------------------------------------------------- #-------------------------------------------------------------------

500
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
} }
namespace eval lib { namespace eval lib {
variable du_literal variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask] return [dict get $winfile_attributes $bitmask]
} else { } else {
#list/dict shimmering? #list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
} }
} }
variable win_reparse_tags variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl) #then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..) #then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo @id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output" -debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1 @values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'" iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}] }
set opts [dict get $argd opts] #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
set iteminfo [dict get $argd values iteminfo] proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set opt_debug [dict get $opts -debug] set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel] set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set #-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} { if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n" set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo] append dbg [pdict -channel none iteminfo]
@ -592,23 +598,23 @@ namespace eval punk::du {
} }
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} { foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1 dict set result -hidden 1
} }
if {"system" in $attrinfo} { system {
dict set result -system 1 dict set result -system 1
} }
if {"readonly" in $attrinfo} { readonly {
dict set result -readonly 1 dict set result -readonly 1
} }
dict set result -shortname [dict get $iteminfo altname] reparse_point {
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us #the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0] set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1] set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value | #|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
@ -617,9 +623,13 @@ namespace eval punk::du {
if {[dict exists $win_reparse_tags_by_int $low_int]} { if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else { } else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
} }
} }
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo dict set result -raw $iteminfo
return $result return $result
} }
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator} catch {twapi::find_file_close $iterator}
} }
} }
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system) # get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} { proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\ set defaults [dict create\
-glob *\ -glob *\
-filedebug 0\ -filedebug 0\
-patterndebug 0\
-with_sizes 1\ -with_sizes 1\
-with_times 1\ -with_times 1\
] ]
set opts [dict merge $defaults $args] set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob] set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes] set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l] set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} { if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!) #don't use string is boolean - (f false vs f file!)
@ -711,18 +1031,36 @@ namespace eval punk::du {
} }
} }
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create] set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths # return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases. # using * all the time may be inefficient - so we might be able to avoid that in some cases.
try { try {
#glob of * will return dotfiles too on windows #glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args { } on error args {
try { try {
if {[string match "*denied*" $args]} { if {[string match "*denied*" $args]} {
@ -745,11 +1083,11 @@ namespace eval punk::du {
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob #looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
} continue
} }
@ -786,9 +1124,10 @@ namespace eval punk::du {
set parent [file dirname $folderpath] set parent [file dirname $folderpath]
set badtail [file tail $folderpath] set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} { if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname] set fixedtail [dict get $iteminfo altname]
break break
@ -808,7 +1147,7 @@ namespace eval punk::du {
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail] set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n" append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg puts stderr $errmsg
@ -837,39 +1176,47 @@ namespace eval punk::du {
} }
} }
set dirs [list] #jjj
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
#recheck glob if {![regexp $tcl_re $nm]} {
#review!
if {![string match $opt_glob $nm]} {
continue continue
} }
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype "" #set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification #attributes applicable to any classification
set fullname [file_join_one $folderpath $nm] set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes] set file_attributes [dict get $attrdict -fileattributes]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set linkdata [dict create] set linkdata [dict create]
# ----------------------------------------------------------- # -----------------------------------------------------------
#main classification #main classification
if {"reparse_point" in $file_attributes} { if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links #this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return #review - and see which if any actually belong in the links key of our return
@ -893,48 +1240,55 @@ namespace eval punk::du {
# #
#links are techically files too, whether they point to a file/dir or nothing. #links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname lappend links $fullname
set ftype "l" #set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} { if {"directory" ni $file_attributes} {
dict set linkdata target_type file dict set linkdata target_type file
} }
} }
if {"directory" in $file_attributes} { if {$is_directory} {
if {$nm in {. ..}} { #if {$nm in {. ..}} {
continue # continue
} #}
if {"reparse_point" ni $file_attributes} { if {!$is_reparse_point} {
lappend dirs $fullname lappend dirs $fullname
set ftype "d" #set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else { } else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory dict set linkdata target_type directory
} }
} }
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname lappend files $fullname
if {"f" in $sized_types} { if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size] lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
} }
set ftype "f" #set ftype "f"
} }
# ----------------------------------------------------------- # -----------------------------------------------------------
if {[dict get $attrdict -hidden]} { if {$do_sizes} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
} }
if {$ftype in $timed_types} { if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
@ -947,20 +1301,18 @@ namespace eval punk::du {
if {[dict size $linkdata]} { if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata dict set linkinfo $fullname $linkdata
} }
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
} }
twapi::find_file_close $iterator twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath] }
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts set effective_opts $opts
dict set effective_opts -with_times $timed_types dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes #also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
} }
proc get_vfsmounts_in_folder {folderpath} { proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list] set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+) #work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} { proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} { if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail] #return [file join $base $newtail]
return $base/$newtail
} }
return [file join $base ./$newtail] #return [file join $base ./$newtail]
return $base/./$newtail
} }
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering! #struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#---- #----

32
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs. #Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} { proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args package require punk::args
package require punk::ansi package require punk::ansi
package require punk::winpath package require punk::winpath
package require punk::du package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack package require commandstack
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions] #[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe. #As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running. #Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]" #puts stdout "-->[ansistring VIEW $result]"
return $result return $result
} else { } else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} { if {[llength $args] == 1} {
set cdtarget [lindex $args 0] set cdtarget [lindex $args 0]
switch -exact -- $cdtarget { switch -exact -- $cdtarget {
. - ./ { . - ./ {
tailcall punk::nav::fs::d/ tailcall punk::nav::fs::d/ $v
} }
.. - ../ { .. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} { if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing #handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end] set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else { } else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
} }
if {!$cdtarget_is_glob} { if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget] set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
} }
set VIRTUAL_CWD $cdtarget set VIRTUAL_CWD $cdtarget
set curdir $cdtarget set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else { } else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
} }
if {[file type $target] eq "directory"} { if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
} }
} }
tailcall punk::nav::fs::d/ $v
} }
set curdir $VIRTUAL_CWD set curdir $VIRTUAL_CWD
} else { } else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result "" set result ""
#set chunklist [list] #set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create] set this_result [dict create]
foreach searchspec $args { foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] #we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable] set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd] set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list] set error_paths [list]
foreach p $paths { foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue continue
} }
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
#Some subpaths of the supplied paths to create may already exist. #Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. #we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath] set parent [file dirname $fullpath]
while {![file exists $parent]} { while {![file exists $parent]} {
set parent [file dirname $parent] set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue continue
} }
lappend existing_parent_list $parent
lappend fullpath_list $fullpath lappend fullpath_list $fullpath
} }
if {[llength $fullpath_list] != [llength $paths]} { if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0 set num_created 0
set error_string "" set error_string ""
foreach fullpath $fullpath_list { foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} { if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break break
} }
incr num_created incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} { if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
} }
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
} }
#todo use unknown to allow d/~c:/etc ?? #todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} { if {![file isdirectory $target]} {
error "Folder $target not found" error "Folder $target not found"
} }
d/ $target d/ / $target
} }
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
} }
set relativepath [expr {[file pathtype $searchspec] eq "relative"}] set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
} }
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
} }
#todo - package as punk::nav::fs #todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
} }
proc dirfiles_dict {args} { proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts values
set searchspecs [dict values $vals] set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts" #puts stdout "arglist: $opts"
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
"\uFFFF" { "\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} { if {$searchtail_has_globs} {
if {$is_relativesearchspec} { if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]] #set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else { } else {
set next_opt_with_times [list -with_times $opt_with_times] set next_opt_with_times [list -with_times $opt_with_times]
} }
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} { if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else { } else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
} }
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs] set dirs [dict get $listing dirs]
set files [dict get $listing files] set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
} }
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs { #foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} { # if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files set files $sorted_files
set filesizes $sorted_filesizes set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- --- # -- ---
#jmn #jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] { foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} { if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm lappend nonportable $nm
} }
} }
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase] set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing] set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated] return [dict merge $listing $updated]
} }
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines @id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean -stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean -formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //} -listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true @values -min 1 -max -1 -type dict -unnamed true
} }
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} { proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds] #set ts1 [clock milliseconds]
package require overtype package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work #review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable #classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list] set finfo_plus [list]
set ts2 [clock milliseconds] #set ts2 [clock milliseconds]
foreach fdict $finfo { foreach fdict $finfo {
set fname [dict get $fdict file] set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} { if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
} }
unset finfo unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath! #review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant #review: file pathtype is platform dependant
proc path_to_absolute {path base platform} { proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $path] set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} { if {$ptype eq "absolute"} {
set path_absolute $path set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} { } elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} { if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} { if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up. #Todo - tidy up.
package require punk::unixywindows package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path] set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path" #puts stderr "winpath: $path"
} else { } else {
#todo handle volume-relative paths with volume specified c:etc c: #todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...? #Arguably if ...?
#set path_absolute $base/$path #set path_absolute $base/$subpath
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now # unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
set path_absolute $base/$path #e.g relative subpath=* base = c:/test -> c:/test/*
} #e.g relative subpath=../test base = c:/test -> c:/test/../test
if {$platform eq "windows"} { #e.g relative subpath=* base = //server/share/test -> //server/share/test/*
if {[punk::winpath::illegalname_test $path_absolute]} { set path_absolute $base/$subpath
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present }
} #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
} #if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute return $path_absolute
} }
proc strip_prefix_depth {path prefix} { proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools #*** !doctools

2
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
} }
} }
} }
puts "==>finalparts: '$finalparts'" #puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b # using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts #backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path return $path
} }
} }
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names. #we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path. #The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/. #we need to exclude things like path/.. path/.
foreach seg [file split $path] { set segments [file split $path]
if {$seg in [list . ..]} { if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ? #review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons? #unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped #only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} { if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1 return 1
} }
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
} }
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is. #- they seem to be readable from cmd and tclsh as is.

211
src/vfs/_vfscommon.vfs/modules/punk-0.1.tm

@ -6066,9 +6066,218 @@ namespace eval punk {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe {*}$pipe
} }
proc path {{glob *}} { proc path_basic {{glob *}} {
set pipe [punk::path_list_pipe $glob] set pipe [punk::path_list_pipe $glob]
{*}$pipe |> list_as_lines {*}$pipe |> list_as_lines
}
namespace eval argdoc {
punk::args::define {
@id -id ::punk::path
@cmd -name "punk::path" -help\
"Introspection of the PATH environment variable.
This tool will examine executables within each PATH entry and show which binaries
are overshadowed by earlier PATH entries. It can also be used to examine the contents of each PATH entry, and to filter results using glob patterns."
@opts
-binglobs -type list -default {*} -help "glob pattern to filter results. Default '*' to include all entries."
@values -min 0 -max -1
glob -type string -default {*} -multiple true -optional 1 -help "Case insensitive glob pattern to filter path entries. Default '*' to include all PATH directories."
}
}
proc path {args} {
set argd [punk::args::parse $args withid ::punk::path]
lassign [dict values $argd] leaders opts values received
set binglobs [dict get $opts -binglobs]
set globs [dict get $values glob]
if {$::tcl_platform(platform) eq "windows"} {
set sep ";"
} else {
# : ok for linux/bsd ... mac?
set sep ":"
}
set all_paths [split [string trimright $::env(PATH) $sep] $sep]
set filtered_paths $all_paths
if {[llength $globs]} {
set filtered_paths [list]
foreach p $all_paths {
foreach g $globs {
if {[string match -nocase $g $p]} {
lappend filtered_paths $p
break
}
}
}
}
#Windows executable search location order:
#1. The current directory.
#2. The directories that are listed in the PATH environment variable.
# within each directory windows uses the PATHEXT environment variable to determine
#which files are considered executable and in which order they are considered.
#So for example if PATHEXT is set to .COM;.EXE;.BAT;.CMD then if there are files
#with the same name but different extensions in the same directory, then the one
#with the extension that appears first in PATHEXT will be executed when that name is called.
#On windows platform, PATH entries can be case-insensitively duplicated - we want to treat these as the same path for the purposes of overshadowing - but we want to show the actual paths in the output.
#Duplicate PATH entries are also a fairly likely possibility on all platforms.
#This is likely a misconfiguration, but we want to be able to show it in the output - as it can affect which executables are overshadowed by which PATH entries.
#By default we don't want to display all executables in all PATH entries - as this can be very verbose
#- but we want to be able to show which executables are overshadowed by which PATH entries,
#and to be able to filter the PATH entries and the executables using glob patterns.
#To achieve this we will build two dicts - one keyed by normalized path, and one keyed by normalized executable name.
#The path dict will contain the original paths that correspond to each normalized path, and the indices of those paths
#in the original PATH list. The executable dict will contain the paths and path indices where each executable is found,
#and the actual executable names (with case and extensions as they appear on the filesystem). We will also build a
#dict keyed by path index which contains the list of executables in that path - to make it easy to show which
#executables are overshadowed by which paths.
set d_path_info [dict create] ;#key is normalized path (e.g case-insensitive on windows).
set d_bin_info [dict create] ;#key is normalized executable name (e.g case-insensitive on windows, or callable with extensions stripped off).
set d_index_executables [dict create] ;#key is path index, value is list of executables in that path
set path_idx 0
foreach p $all_paths {
if {$::tcl_platform(platform) eq "windows"} {
set pnorm [string tolower $p]
} else {
set pnorm $p
}
if {![dict exists $d_path_info $pnorm]} {
dict set d_path_info $pnorm [dict create original_paths [list $p] indices [list $path_idx]]
set executables [list]
if {[file isdirectory $p]} {
#get all files that are executable in this path.
#If we don't normalize the path here - then trailing backslashes on windows can cause a problem with the -tail glob returning a leading slash on the executable names.
set pnormglob [file normalize $p]
if {$::tcl_platform(platform) eq "windows"} {
#Sometimes PATHEXT includes an entry of just a dot - which means files with no extension are considered executable.
#We need to account for this in our glob pattern.
set pathexts [list]
if {[info exists ::env(PATHEXT)]} {
set env_pathexts [split $::env(PATHEXT) ";"]
#set pathexts [lmap e $env_pathexts {string tolower $e}]
foreach pe $env_pathexts {
if {$pe eq "."} {
continue
}
lappend pathexts [string tolower $pe]
}
} else {
set env_pathexts [list]
#default PATHEXT if not set - according to Microsoft docs
set pathexts [list .com .exe .bat .cmd]
}
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set has_pathext 1
break
}
}
if {!$has_pathext} {
foreach pe $pathexts {
lappend binglobs "$bg$pe"
}
}
}
set lc_binglobs [lmap e $binglobs {string tolower $e}]
if {"." in $pathexts} {
foreach bg $binglobs {
set has_pathext 0
foreach pe $pathexts {
if {[string match -nocase "*$pe" $bg]} {
set base [string range $bg 0 [expr {[string length $bg] - [string length $pe] - 1}]]
set has_pathext 1
break
}
}
if {$has_pathext} {
if {[string tolower $base] ni $lc_binglobs} {
lappend binglobs "$base"
}
}
}
}
#TCL's glob on windows is case-insensitive, but in some cases return the result with the case as globbed for regardless of the actual case on the filesystem.
#(This seems to occur when the pattern does *not* contain a wildcard and is probably a bug)
#We would rather report results with the actual case as they appear on the filesystem - so we try to normalize the results.
#The normalization doesn't work as expected - but can be worked around by using 'string range $e 0 end' instead of just $e - which seems to force Tcl to give us the actual case from the filesystem rather than the case as globbed for.
#(another workaround is to use a degenerate case glob e.g when looking for 'SDX.exe' to glob for '[S]DX.exe')
set globresults [lsort -unique [glob -nocomplain -directory $pnormglob -types {f x} {*}$binglobs]]
set executables [list]
foreach e $globresults {
puts stderr "glob result: $e"
puts stderr "normalized executable name: [file tail [file normalize [string range $e 0 end]]]]"
lappend executables [file tail [file normalize $e]]
}
} else {
set executables [lsort -unique [glob -nocomplain -directory $p -types {f x} -tail {*}$binglobs]]
}
}
dict set d_index_executables $path_idx $executables
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
if {![dict exists $d_bin_info $exenorm]} {
dict set d_bin_info $exenorm [dict create path_indices [list $path_idx] paths [list $p] executable_names [list $exe]]
} else {
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
} else {
#duplicate path entry - add to list of original paths for this normalized path
# Tcl's dict lappend takes the arguments dictionaryVariable key value ?value ...?
# we need to extract the inner dictionary containig lists to append to, and then set it back after appending to the lists within it.
set pathdata [dict get $d_path_info $pnorm]
dict lappend pathdata original_paths $p
dict lappend pathdata indices $path_idx
dict set d_path_info $pnorm $pathdata
#we don't need to add executables for this path - as they will be the same as the original path that we have already processed.
#However we do need to add the path index to the path_indices for each executable that is found in this path - as this will affect which paths are shown as overshadowing which executables.
set executables [dict get $d_index_executables [lindex [dict get $d_path_info $pnorm indices] 0]] ;#get executables for this path
foreach exe $executables {
if {$::tcl_platform(platform) eq "windows"} {
set exenorm [string tolower $exe]
} else {
set exenorm $exe
}
#dict lappend d_bin_info $exenorm path_indices $path_idx paths $p executable_names $exe
set bindata [dict get $d_bin_info $exenorm]
dict lappend bindata path_indices $path_idx
dict lappend bindata paths $p
dict lappend bindata executable_names $exe
dict set d_bin_info $exenorm $bindata
}
}
incr path_idx
}
#temporary debug output to check dicts are being built correctly
set debug ""
append debug "Path info dict:" \n
append debug [showdict $d_path_info] \n
append debug "Binary info dict:" \n
append debug [showdict $d_bin_info] \n
append debug "Index executables dict:" \n
append debug [showdict $d_index_executables] \n
#return $debug
puts stdout $debug
} }
#------------------------------------------------------------------- #-------------------------------------------------------------------

500
src/vfs/_vfscommon.vfs/modules/punk/du-0.1.0.tm

@ -479,7 +479,7 @@ namespace eval punk::du {
} }
namespace eval lib { namespace eval lib {
variable du_literal variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ] variable winfile_attributes [dict create 16 [list directory] 32 [list archive] 1024 [list reparse_point] 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
@ -489,7 +489,11 @@ namespace eval punk::du {
return [dict get $winfile_attributes $bitmask] return [dict get $winfile_attributes $bitmask]
} else { } else {
#list/dict shimmering? #list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end] #return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
set decoded [twapi::decode_file_attributes $bitmask]
dict set winfile_attributes $bitmask $decoded
return $decoded
} }
} }
variable win_reparse_tags variable win_reparse_tags
@ -563,23 +567,25 @@ namespace eval punk::du {
#then twapi::device_ioctl (win32 DeviceIoControl) #then twapi::device_ioctl (win32 DeviceIoControl)
#then parse buffer somehow (binary scan..) #then parse buffer somehow (binary scan..)
#https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913 #https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-fscc/b41f1cbf-10df-4a47-98d4-1c52a833d913
punk::args::define {
proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
set argd [punk::args::parse $args withdef {
@id -id ::punk::du::lib::Get_attributes_from_iteminfo @id -id ::punk::du::lib::Get_attributes_from_iteminfo
-debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)" -debug -default 0 -help "set 1 for raw data on -debugchannel (default stderr)"
-debugchannel -default stderr -help "channel to write debug output, or none to append to output" -debugchannel -default stderr -help "channel to write debug output, or none to append to output"
@values -min 1 -max 1 @values -min 1 -max 1
iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'" iteminfo -help "iteminfo dict as set by 'twapi::find_file_next <iterator> iteminfo'"
}] }
set opts [dict get $argd opts] #don't use punk::args::parse here. this is a low level proc that is called in a tight loop (each entry in directory) and we want to avoid the overhead of parsing args each time. instead we will just take a list of args and parse manually with the expectation that the caller will pass the correct args.
set iteminfo [dict get $argd values iteminfo] proc Get_attributes_from_iteminfo {args} {
variable win_reparse_tags_by_int
#set argd [punk::args::parse $args withid ::punk::du::lib::Get_attributes_from_iteminfo]
set defaults [dict create -debug 0 -debugchannel stderr]
set opts [dict merge $defaults [lrange $args 0 end-1]]
set iteminfo [lindex $args end]
set opt_debug [dict get $opts -debug] set opt_debug [dict get $opts -debug]
set opt_debugchannel [dict get $opts -debugchannel] set opt_debugchannel [dict get $opts -debugchannel]
#-longname is placeholder - caller needs to set #-longname is placeholder - caller needs to set
set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname {} -system 0] set result [dict create -archive 0 -hidden 0 -longname [dict get $iteminfo name] -readonly 0 -shortname [dict get $iteminfo altname] -system 0 -fileattributes {} -raw {}]
if {$opt_debug} { if {$opt_debug} {
set dbg "iteminfo returned by find_file_open\n" set dbg "iteminfo returned by find_file_open\n"
append dbg [pdict -channel none iteminfo] append dbg [pdict -channel none iteminfo]
@ -592,23 +598,23 @@ namespace eval punk::du {
} }
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
if {"hidden" in $attrinfo} { foreach attr $attrinfo {
switch -- $attr {
hidden {
dict set result -hidden 1 dict set result -hidden 1
} }
if {"system" in $attrinfo} { system {
dict set result -system 1 dict set result -system 1
} }
if {"readonly" in $attrinfo} { readonly {
dict set result -readonly 1 dict set result -readonly 1
} }
dict set result -shortname [dict get $iteminfo altname] reparse_point {
dict set result -fileattributes $attrinfo
if {"reparse_point" in $attrinfo} {
#the twapi API splits this 32bit value for us #the twapi API splits this 32bit value for us
set low_word [dict get $iteminfo reserve0] set low_word [dict get $iteminfo reserve0]
set high_word [dict get $iteminfo reserve1] set high_word [dict get $iteminfo reserve1]
# 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 # 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
# 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 # 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
#|M|R|N|R| Reserved bits | Reparse tag value | #|M|R|N|R| Reserved bits | Reparse tag value |
#+-+-+-+-+-----------------------+-------------------------------+ #+-+-+-+-+-----------------------+-------------------------------+
@ -617,9 +623,13 @@ namespace eval punk::du {
if {[dict exists $win_reparse_tags_by_int $low_int]} { if {[dict exists $win_reparse_tags_by_int $low_int]} {
dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int] dict set result -reparseinfo [dict get $win_reparse_tags_by_int $low_int]
} else { } else {
dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex 0x[format %X $low_int] meaning "unknown reparse tag int:$low_int"] dict set result -reparseinfo [dict create tag "<UNKNOWN>" hex "0x[format %X $low_int]" meaning "unknown reparse tag int:$low_int"]
}
} }
} }
}
#dict set result -shortname [dict get $iteminfo altname]
dict set result -fileattributes $attrinfo
dict set result -raw $iteminfo dict set result -raw $iteminfo
return $result return $result
} }
@ -652,27 +662,337 @@ namespace eval punk::du {
catch {twapi::find_file_close $iterator} catch {twapi::find_file_close $iterator}
} }
} }
proc resolve_characterclass {cclass} {
#takes the inner value from a tcl square bracketed character class and converts to a list of characters.
#todo - we need to detect character class ranges such as [1-3] or [a-z] or [A-Z] and convert to the appropriate specific characters
#e.g a-c-3 -> a b c - 3
#e.g a-c-3-5 -> a b c - 3 4 5
#e.g a-c -> a b c
#e.g a- -> a -
#e.g -c-e -> - c d e
#the tcl character class does not support negation or intersection - so we can ignore those possibilities for now.
#in this context we do not need to support named character classes such as [:digit:]
set chars [list]
set i 0
set len [string length $cclass]
set accept_range 0
while {$i < $len} {
set ch [string index $cclass $i]
if {$ch eq "-"} {
if {$accept_range} {
set start [string index $cclass [expr {$i - 1}]]
set end [string index $cclass [expr {$i + 1}]]
if {$start eq "" || $end eq ""} {
#invalid range - treat - as literal
if {"-" ni $chars} {
lappend chars "-"
}
} else {
#we have a range - add all chars from previous char to next char
#range may be in either direction - e.g a-c or c-a but we don't care about the order of our result.
if {$start eq $end} {
#degenerate range - treat as single char
if {$start ni $chars} {
lappend chars $start
}
} else {
if {[scan $start %c] < [scan $end %c]} {
set c1 [scan $start %c]
set c2 [scan $end %c]
} else {
set c1 [scan $end %c]
set c2 [scan $start %c]
}
for {set c $c1} {$c <= $c2} {incr c} {
set char [format %c $c]
if {$char ni $chars} {
lappend chars $char
}
}
}
incr i ;#skip end char as it's already included in range
}
set accept_range 0
} else {
#we have a literal - add to list and allow for possible range if next char is also a -
if {"-" ni $chars} {
lappend chars "-"
}
set accept_range 1
}
} else { #we have a literal - add to list and allow for possible range if next char is also a -
if {$ch ni $chars} {
lappend chars $ch
}
set accept_range 1
}
incr i
}
return $chars
}
#return a list of broader windows API patterns that can retrieve the same set of files as the tcl glob, with as few calls as possible.
#first attempt - supports square brackets nested in braces and nested braces but will likely fail on braces within character classes.
# e.g {*[\{]*} is a valid tcl glob
#todo - write tests.
#should also support {*\{*} matching a file such as a{blah}b
#Note that despite windows defaulting to case-insensitive matching, we can have individual folders that are set to case-sensitive, or mounted case-sensitive filesystems such as WSL.
#So we need to preserve the case of the supplied glob and rely on post-filtering of results to achieve correct matching, rather than trying to convert to lowercase and relying on windows API case-insensitivity.
proc tclglob_equivalents {tclglob {case_sensitive_filesystem 0}} {
#windows API function in use is the FindFirstFile set of functions.
#these support wildcards * and ? *only*.
#examples of tcl globs that are not directly supported by windows API pattern matching - but could be converted to something that is supported
# {abc[1-3].txt} -> {abc?.txt}
# {abc[XY].txt} -> {abc?.text} - windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
# {S,t}* -> * we will need to convert to multiple patterns and pass them separately to the API, or convert to a single pattern * and do all filtering after the call.
# {{S,t}*.txt} -> {S*.txt} {T*.txt}
# *.{txt,log} -> {*.txt} {*.log}
# {\[abc\].txt} -> {[abc].txt} - remove escaping of special chars - windows API pattern matching doesn't support escaping but treats special chars as literals
set gchars [split $tclglob ""]
set winglob_list [list ""]
set tclregexp_list [list ""] ;#we will generate regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
set in_brackets 0
set esc_next 0
set brace_depth 0
set brace_content ""
set braced_alternatives [list]
set brace_is_normal 0
set cclass_content ""
foreach ch $gchars {
if {$esc_next} {
if {$in_brackets} {
append cclass_content $ch
continue
} elseif {$brace_depth} {
append brace_content $ch
continue
}
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set tclregexp_list [lmap re $tclregexp_list {append re [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]}]
set esc_next 0
continue
}
if {$ch eq "\{"} {
if {$brace_depth} {
#we have an opening brace char inside braces
#Let the brace processing handle it as it recurses.
incr brace_depth 1
append brace_content $ch
continue
}
incr brace_depth 1
set brace_content ""
} elseif {$ch eq "\}"} {
if {$brace_depth > 1} {
#we have a closing brace char inside braces
append brace_content $ch
incr brace_depth -1
continue
}
#process brace_content representing a list of alternatives
#handle list of alternatives - convert {txt,log} to *.txt;*.log
#set alternatives [split $brace_content ","]
lappend braced_alternatives $brace_content
set alternatives $braced_alternatives
set braced_alternatives [list]
set brace_content ""
incr brace_depth -1
set alt_winpatterns [list]
set alt_regexps [list]
foreach alt $alternatives {
set subresult [tclglob_equivalents $alt]
lappend alt_winpatterns {*}[dict get $subresult winglobs]
lappend alt_regexps {*}[dict get $subresult tclregexps]
}
set next_winglob_list [list]
set next_regexp_list [list]
foreach wg $winglob_list re $tclregexp_list {
#puts "wg: $wg"
#puts "re: $re"
foreach alt_wp $alt_winpatterns alt_re $alt_regexps {
#puts " alt_wp: $alt_wp"
#puts " alt_re: $alt_re"
lappend next_winglob_list "$wg$alt_wp"
set alt_re_no_caret [string range $alt_re 1 end]
lappend next_regexp_list "${re}${alt_re_no_caret}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\["} {
#windows API pattern matching doesn't support character classes but we can convert to ? which matches any single char
if {!$brace_depth} {
set in_brackets 1
} else {
#we have a [ char inside braces
#Let the brace processing handle it as it recurses.
#but set a flag so that opening and closing braces aren't processed as normal if they are inside the brackets.
set brace_is_normal 1
append brace_content $ch
}
} elseif {$ch eq "\]"} {
if {$brace_depth} {
#we have a ] char inside braces
#Let the brace processing hanele it as it recurses.
append brace_content $ch
continue
}
set in_brackets 0
set charlist [resolve_characterclass $cclass_content]
set cclass_content ""
set next_winglob_list [list]
set next_regexp_list [list]
foreach c $charlist {
#set winglob_list [lmap wg $winglob_list {append wg $c}]
foreach wg $winglob_list {
lappend next_winglob_list "$wg$c"
}
foreach re $tclregexp_list {
set c_escaped [regsub -all {([\*\.\+\^\$\(\)\|\{\}\[\]\\])} $c {\\\1}]
lappend next_regexp_list "${re}${c_escaped}"
}
}
set winglob_list $next_winglob_list
set tclregexp_list $next_regexp_list
} elseif {$ch eq "\\"} {
if {$in_brackets} {
append cclass_content $ch
#append to character class but also set esc_next so that next char is treated as literal even if it's a special char in character classes such as - or ] or \ itself.
set esc_next 1
continue
}
if {$brace_depth} {
#we have a \ char inside braces
#Let the brace processing handle it as it recurses.
append brace_content $ch
set esc_next 1
continue
}
set esc_next 1
continue
} else {
if {$in_brackets} {
append cclass_content $ch
continue
}
if {!$brace_depth} {
set winglob_list [lmap wg $winglob_list {append wg $ch}]
set re_ch [regsub -all {([\.\+\^\$\(\)\|\{\}\[\]\\])} $ch {\\\1}]
if {[string length $re_ch] == 1} {
switch -- $re_ch {
"?" {set re_ch "."}
"*" {set re_ch ".*"}
default {
#we could use the same mixed case filter here for both sensitive and insensitive filesystems,
#because the API filtering will already have done the restriction,
#and so a more permissive regex that matches both cases will still only match the results that the API call returns,
#which will be correct based on the case-sensitivity of the filesystem.
#It is only really necessary to be more restrictive in the parts of the regex that correspond to literal chars in the glob.
#ie in the parts of the original glob that were in square brackets.
if {!$case_sensitive_filesystem} {
# add character class of upper and lower for any literal chars, to ensure we match the case-insensitivity of the windows API pattern matching - as we are going to use these regexps to post-filter the results of the windows API call, to ensure we only return results that match the original tcl glob.
if {[string is upper $re_ch]} {
set re_ch [string cat "\[" $re_ch [string tolower $re_ch] "\]"]
} elseif {[string is lower $re_ch]} {
set re_ch [string cat "\[" [string toupper $re_ch] $re_ch "\]"]
} else {
#non-alpha char - no need to add case-insensitivity
}
}
}
}
}
set tclregexp_list [lmap re $tclregexp_list {append re $re_ch}]
} else {
#we have a literal char inside braces - add to current brace_content
if {$brace_depth == 1 && $ch eq ","} {
lappend braced_alternatives $brace_content
set brace_content ""
} else {
append brace_content $ch
}
}
}
}
#sanity check
if {[llength $winglob_list] != [llength $tclregexp_list]} {
error "punk::du::lib::tclglob_equivalents: internal error - winglob_list and tclregexp_list have different lengths: [llength $winglob_list] vs [llength $tclregexp_list]"
}
set tclregexp_list [lmap re $tclregexp_list {string cat ^ $re}]
return [dict create winglobs $winglob_list tclregexps $tclregexp_list]
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed? #todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs namespace export attributes_twapi du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided du_dirlisting_tclvfs
# get listing without using unix-tools (may not be installed on the windows system) # get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function) # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance # This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
#todo: consider running a thread to do a full dirlisting for the folderpath in the background when multiple patterns are generated from the supplied glob.
# we may generate multiple listing calls depending on the patterns supplied, so if the full listing returns before
#we have finished processing all patterns, we can use the full listing to avoid making further calls to the windows API for the same folder.
#For really large folders and a moderate number of patterns, this could be a significant performance improvement.
proc du_dirlisting_twapi {folderpath args} { proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\ set defaults [dict create\
-glob *\ -glob *\
-filedebug 0\ -filedebug 0\
-patterndebug 0\
-with_sizes 1\ -with_sizes 1\
-with_times 1\ -with_times 1\
] ]
set opts [dict merge $defaults $args] set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob] set tcl_glob [dict get $opts -glob]
#todo - detect whether folderpath is on a case-sensitive filesystem - if so we need to preserve case in our winglobs and tcl regexps, and not add the [Aa] style entries for case-insensitivity to the regexps.
set case_sensitive_filesystem 0 ;#todo - consider detecting this properly.
#Can be per-folder on windows depending on mount options and underlying filesystem - so we would need to detect this for each folder we process rather than just once at the start of the program.
#In practice leaving it as case_sensitve_filesystem 0 and generating regexps that match both cases for literal chars in the glob should still work correctly,
#as the windows API pattern matching will already filter based on the case-sensitivity of the filesystem,
#so we will only get results that match the case-sensitivity of the filesystem, and our more permissive regexps will still match those results correctly.
#Passing case_sensitive_filesystem 1 will generate regexps that match the case of the supplied glob, which may be more efficient for post-filtering if we are on a
#case-sensitive filesystem, but will still work correctly if we are on a case-insensitive filesystem.
#Note: we only use this to adjust the filtering regexps we generate from the tcl glob.
#The windows API pattern match will already filter based on the case-sensitivity of the filesystem
# so it's not strictly necessary for the regexps to match the case-sensitivity of the filesystem.
set globs_processed [tclglob_equivalents $tcl_glob]
#we also need to generate tcl 'string match' patterns from the tcl glob, to check the results of the windows API call against the original glob - as windows API pattern matching is not the same as tcl glob.
#temp
#set win_glob_list [list $tcl_glob]
set win_glob_list [dict get $globs_processed winglobs]
set tcl_regex_list [dict get $globs_processed tclregexps]
#review
# our glob is going to be passed to twapi::find_file_open - which uses windows api pattern matching - which is not the same as tcl glob.
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes] set opt_with_sizes [dict get $opts -with_sizes]
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_filedebug [dict get $opts -filedebug] ;#per file set opt_filedebug [dict get $opts -filedebug] ;#per file
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_patterndebug [dict get $opts -patterndebug]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set ftypes [list f d l] set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} { if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!) #don't use string is boolean - (f false vs f file!)
@ -711,18 +1031,36 @@ namespace eval punk::du {
} }
} }
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- # -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
set errors [dict create] set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/ set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths # return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames foreach win_glob $win_glob_list tcl_re $tcl_regex_list {
if {$opt_patterndebug} {
puts stderr ">>>du_dirlisting_twapi winglob: $win_glob"
puts stderr ">>>du_dirlisting_twapi tclre : $tcl_re"
}
#REVIEW! windows api pattern match https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/ #https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too. #we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases. # using * all the time may be inefficient - so we might be able to avoid that in some cases.
try { try {
#glob of * will return dotfiles too on windows #glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field set iterator [twapi::find_file_open $folderpath/$win_glob -detail basic] ;# -detail full only adds data to the altname field
} on error args { } on error args {
try { try {
if {[string match "*denied*" $args]} { if {[string match "*denied*" $args]} {
@ -745,11 +1083,11 @@ namespace eval punk::du {
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review) #so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob #Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?) #also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} { #if {$win_glob ne "*" && [regexp {[?*]} $win_glob]} {}
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} { if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob #looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors] #return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
} continue
} }
@ -786,9 +1124,10 @@ namespace eval punk::du {
set parent [file dirname $folderpath] set parent [file dirname $folderpath]
set badtail [file tail $folderpath] set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames set iterator [twapi::find_file_open $parent/* -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
puts stderr "du_dirlisting_twapi: data for $folderpath: name:'$nm' iteminfo:$iteminfo"
if {$nm eq $badtail} { if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname] set fixedtail [dict get $iteminfo altname]
break break
@ -808,7 +1147,7 @@ namespace eval punk::du {
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail] #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW. #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail] set fixedpath $parent/$fixedtail
append errmsg "retrying with with windows dos device path $fixedpath\n" append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg puts stderr $errmsg
@ -837,39 +1176,47 @@ namespace eval punk::du {
} }
} }
set dirs [list] #jjj
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set linkinfo [dict create]
set debuginfo [dict create]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} { while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name] set nm [dict get $iteminfo name]
#recheck glob if {![regexp $tcl_re $nm]} {
#review!
if {![string match $opt_glob $nm]} {
continue continue
} }
if {$nm in {. ..}} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
#set attrinfo [decode_win_attributes [dict get $iteminfo attrs]] #set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set ftype "" #set ftype ""
set do_sizes 0
set do_times 0
#attributes applicable to any classification #attributes applicable to any classification
set fullname [file_join_one $folderpath $nm] set fullname [file_join_one $folderpath $nm]
set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict set attrdict [Get_attributes_from_iteminfo -debug $opt_filedebug -debugchannel none $iteminfo] ;#-debugchannel none puts -debug key in the resulting dict
if {[dict get $attrdict -hidden]} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
set file_attributes [dict get $attrdict -fileattributes] set file_attributes [dict get $attrdict -fileattributes]
set is_reparse_point [expr {"reparse_point" in $file_attributes}]
set is_directory [expr {"directory" in $file_attributes}]
set linkdata [dict create] set linkdata [dict create]
# ----------------------------------------------------------- # -----------------------------------------------------------
#main classification #main classification
if {"reparse_point" in $file_attributes} { if {$is_reparse_point} {
#this concept doesn't correspond 1-to-1 with unix links #this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points #https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return #review - and see which if any actually belong in the links key of our return
@ -893,48 +1240,55 @@ namespace eval punk::du {
# #
#links are techically files too, whether they point to a file/dir or nothing. #links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname lappend links $fullname
set ftype "l" #set ftype "l"
if {"l" in $sized_types} {
set do_sizes 1
}
if {"l" in $timed_types} {
set do_times 1
}
dict set linkdata linktype reparse_point dict set linkdata linktype reparse_point
dict set linkdata reparseinfo [dict get $attrdict -reparseinfo] dict set linkdata reparseinfo [dict get $attrdict -reparseinfo]
if {"directory" ni $file_attributes} { if {"directory" ni $file_attributes} {
dict set linkdata target_type file dict set linkdata target_type file
} }
} }
if {"directory" in $file_attributes} { if {$is_directory} {
if {$nm in {. ..}} { #if {$nm in {. ..}} {
continue # continue
} #}
if {"reparse_point" ni $file_attributes} { if {!$is_reparse_point} {
lappend dirs $fullname lappend dirs $fullname
set ftype "d" #set ftype "d"
if {"d" in $sized_types} {
set do_sizes 1
}
if {"d" in $timed_types} {
set do_times 1
}
} else { } else {
#other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections #other mechanisms can't immediately classify a link as file vs directory - so we don't return this info in the main dirs/files collections
dict set linkdata target_type directory dict set linkdata target_type directory
} }
} }
if {"reparse_point" ni $file_attributes && "directory" ni $file_attributes} { if {!$is_reparse_point && !$is_directory} {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort? #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname lappend files $fullname
if {"f" in $sized_types} { if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size] lappend filesizes [dict get $iteminfo size]
set do_sizes 1
}
if {"f" in $timed_types} {
set do_times 1
} }
set ftype "f" #set ftype "f"
} }
# ----------------------------------------------------------- # -----------------------------------------------------------
if {[dict get $attrdict -hidden]} { if {$do_sizes} {
lappend flaggedhidden $fullname
}
if {[dict get $attrdict -system]} {
lappend flaggedsystem $fullname
}
if {[dict get $attrdict -readonly]} {
lappend flaggedreadonly $fullname
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]] dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
} }
if {$ftype in $timed_types} { if {$do_times} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970) #convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds #We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict #but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
@ -947,20 +1301,18 @@ namespace eval punk::du {
if {[dict size $linkdata]} { if {[dict size $linkdata]} {
dict set linkinfo $fullname $linkdata dict set linkinfo $fullname $linkdata
} }
if {[dict exists $attrdict -debug]} {
dict set debuginfo $fullname [dict get $attrdict -debug]
}
} }
twapi::find_file_close $iterator twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath] }
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts set effective_opts $opts
dict set effective_opts -with_times $timed_types dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes #also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors] return [dict create dirs $dirs vfsmounts $vfsmounts links $links linkinfo $linkinfo files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts debuginfo $debuginfo errors $errors]
} }
proc get_vfsmounts_in_folder {folderpath} { proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list] set vfsmounts [list]
@ -991,9 +1343,11 @@ namespace eval punk::du {
#work around the horrible tilde-expansion thing (not needed for tcl 9+) #work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} { proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} { if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail] #return [file join $base $newtail]
return $base/$newtail
} }
return [file join $base ./$newtail] #return [file join $base ./$newtail]
return $base/./$newtail
} }
@ -1121,7 +1475,7 @@ namespace eval punk::du {
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now. #ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering! #struct::set will affect order: tcl vs critcl give different ordering!
set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links] set files [punk::lib::struct_set_diff_unique [list {*}$hfiles {*}$files[unset files]] $links]
set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links [file join $folderpath .] [file join $folderpath ..]]] set dirs [punk::lib::struct_set_diff_unique [list {*}$hdirs {*}$dirs[unset dirs] ] [list {*}$links $folderpath/. $folderpath/..]]
#---- #----

32
src/vfs/_vfscommon.vfs/modules/punk/lib-0.1.6.tm

@ -132,6 +132,38 @@ tcl::namespace::eval punk::lib::check {
#These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author) #These are just a selection of bugs relevant to punk behaviour (or of specific interest to the author)
#Not any sort of comprehensive check of known tcl bugs. #Not any sort of comprehensive check of known tcl bugs.
#These are reported in warning output of 'help tcl' - or used for workarounds in some cases. #These are reported in warning output of 'help tcl' - or used for workarounds in some cases.
proc has_tclbug_caseinsensitiveglob_windows {} {
#https://core.tcl-lang.org/tcl/tktview/108904173c - not accepted
if {"windows" ne $::tcl_platform(platform)} {
set bug 0
} else {
set tmpdir [file tempdir]
set testfile [file join $tmpdir "bugtest"]
set fd [open $testfile w]
puts $fd test
close $fd
set globresult [glob -nocomplain -directory $tmpdir -types f -tail BUGTEST {BUGTES{T}} {[B]UGTEST} {\BUGTEST} BUGTES? BUGTEST*]
foreach r $globresult {
if {$r ne "bugtest"} {
set bug 1
break
}
}
return [dict create bug $bug bugref 108904173c description "glob with patterns on windows can return inconsistently cased results - apparently by design." level warning]
#possibly related anomaly is that a returned result with case that doesn't match that of the file in the underlying filesystem can't be normalized
# to the correct case without doing some sort of string manipulation on the result such as 'string range $f 0 end' to affect the internal representation.
}
}
#todo: it would be nice to test for the other portion of issue 108904173c - that on unix with a mounted case-insensitive filesystem we get other inconsistencies.
# but that would require some sort of setup to create a case-insensitive filesystem - perhaps using fuse or similar - which is a bit beyond the scope of this module,
# or at least checking for an existing mounted case-insensitive filesystem.
# A common case for this is when using WSL on windows - where the underlying filesystem is case-insensitive, but the WSL environment is unix-like.
# It may also be reasonably common to mount USB drives with case-insensitive filesystems on unix.
proc has_tclbug_regexp_emptystring {} { proc has_tclbug_regexp_emptystring {} {
#The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces #The regexp {} [...] trick - code in brackets only runs when non byte-compiled ie in traces
#This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic, #This was usable as a hack to create low-impact calls that only ran in an execution trace context - handy for debugger logic,

209
src/vfs/_vfscommon.vfs/modules/punk/nav/fs-0.1.0.tm

@ -50,7 +50,7 @@ package require punk::lib
package require punk::args package require punk::args
package require punk::ansi package require punk::ansi
package require punk::winpath package require punk::winpath
package require punk::du package require punk::du ;#required for testing if pattern is glob and also for the dirfiles_dict command which is used for listing and globbing.
package require commandstack package require commandstack
#*** !doctools #*** !doctools
#[item] [package {Tcl 8.6}] #[item] [package {Tcl 8.6}]
@ -157,6 +157,53 @@ tcl::namespace::eval punk::nav::fs {
#[list_begin definitions] #[list_begin definitions]
punk::args::define {
@id -id ::punk::nav::fs::d/
@cmd -name punk::nav::fs::d/ -help\
{List directories or directories and files in the current directory or in the
targets specified with the fileglob_or_target glob pattern(s).
If a single target is specified without glob characters, and it exists as a directory,
then the working directory is changed to that target and a listing of that directory
is returned. If the single target specified without glob characters does not exist as
a directory, then it is treated as a glob pattern and the listing is for the current
directory with results filtered to match fileglob_or_target.
If multiple targets or glob patterns are specified, then a separate listing is returned
for each fileglob_or_target pattern.
This function is provided via aliases as ./ and .// with v being inferred from the alias
name, and also as d/ with an explicit v argument.
The ./ and .// forms are more convenient for interactive use.
examples:
./ - list directories in current directory
.// - list directories and files in current directory
./ src/* - list directories in src
.// src/* - list directories and files in src
.// *.txt - list files in current directory with .txt extension
.// {t*[1-3].txt} - list files in current directory with t*1.txt or t*2.txt or t*3.txt name
(on a case-insensitive filesystem this would also match T*1.txt etc)
.// {[T]*[1-3].txt} - list files in current directory with [T]*[1-3].txt name
(glob chars treated as literals due to being in character-class brackets
This will match files beginning with a capital T and not lower case t
even on a case-insensitive filesystem.)
.// {{[t],d{e,d}}*} - list directories and files in current directory with names that match either of the following patterns:
{[t]*} - names beginning with t
{d{e,d}*} - names beginning with de or dd
(on a case-insensitive filesystem the first pattern would also match names beginning with T)
}
@values -min 1 -max -1 -type string
v -type string -choices {/ //} -help\
"
/ - list directories only
// - list directories and files
"
fileglob_or_target -type string -optional true -multiple true -help\
"A glob pattern as supported by Tcl's 'glob' command, to filter results.
If multiple patterns are supplied, then a listing for each pattern is returned.
If no patterns are supplied, then all items are listed."
}
#NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread. #NOTE - as we expect to run other apps (e.g Tk) in the same process, but possibly different threads - we should be careful about use of cd which is per-process not per-thread.
#As this function recurses and calls cd multiple times - it's not thread-safe. #As this function recurses and calls cd multiple times - it's not thread-safe.
#Another thread could theoretically cd whilst this is running. #Another thread could theoretically cd whilst this is running.
@ -262,12 +309,11 @@ tcl::namespace::eval punk::nav::fs {
#puts stdout "-->[ansistring VIEW $result]" #puts stdout "-->[ansistring VIEW $result]"
return $result return $result
} else { } else {
set atail [lassign $args cdtarget]
if {[llength $args] == 1} { if {[llength $args] == 1} {
set cdtarget [lindex $args 0] set cdtarget [lindex $args 0]
switch -exact -- $cdtarget { switch -exact -- $cdtarget {
. - ./ { . - ./ {
tailcall punk::nav::fs::d/ tailcall punk::nav::fs::d/ $v
} }
.. - ../ { .. - ../ {
if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} { if {$VIRTUAL_CWD eq "//zipfs:/" && ![string match //zipfs:/* [pwd]]} {
@ -301,9 +347,10 @@ tcl::namespace::eval punk::nav::fs {
if {[string range $cdtarget_copy 0 3] eq "//?/"} { if {[string range $cdtarget_copy 0 3] eq "//?/"} {
#handle dos device paths - convert to normal path for glob testing #handle dos device paths - convert to normal path for glob testing
set glob_test [string range $cdtarget_copy 3 end] set glob_test [string range $cdtarget_copy 3 end]
set cdtarget_is_glob [regexp {[*?]} $glob_test] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $glob_test]
} else { } else {
set cdtarget_is_glob [regexp {[*?]} $cdtarget] set cdtarget_is_glob [punk::nav::fs::lib::is_fileglob $cdtarget]
#todo - review - we should be able to handle globs in dos device paths too - but we need to be careful to only test the glob chars in the part after the //?/ prefix - as the prefix itself contains chars that would be treated as globs if we tested the whole thing.
} }
if {!$cdtarget_is_glob} { if {!$cdtarget_is_glob} {
set cdtarget_file_type [file type $cdtarget] set cdtarget_file_type [file type $cdtarget]
@ -370,6 +417,7 @@ tcl::namespace::eval punk::nav::fs {
} }
set VIRTUAL_CWD $cdtarget set VIRTUAL_CWD $cdtarget
set curdir $cdtarget set curdir $cdtarget
tailcall punk::nav::fs::d/ $v
} else { } else {
set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget] set target [punk::path::normjoin $VIRTUAL_CWD $cdtarget]
if {[string match //zipfs:/* $VIRTUAL_CWD]} { if {[string match //zipfs:/* $VIRTUAL_CWD]} {
@ -379,9 +427,9 @@ tcl::namespace::eval punk::nav::fs {
} }
if {[file type $target] eq "directory"} { if {[file type $target] eq "directory"} {
set VIRTUAL_CWD $target set VIRTUAL_CWD $target
tailcall punk::nav::fs::d/ $v
} }
} }
tailcall punk::nav::fs::d/ $v
} }
set curdir $VIRTUAL_CWD set curdir $VIRTUAL_CWD
} else { } else {
@ -391,7 +439,6 @@ tcl::namespace::eval punk::nav::fs {
#globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.) #globchar somewhere in path - treated as literals except in final segment (for now. todo - make more like ns/ which accepts full path globbing with double ** etc.)
set searchspec [lindex $args 0]
set result "" set result ""
#set chunklist [list] #set chunklist [list]
@ -402,7 +449,9 @@ tcl::namespace::eval punk::nav::fs {
set this_result [dict create] set this_result [dict create]
foreach searchspec $args { foreach searchspec $args {
set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)] set path [path_to_absolute $searchspec $curdir $::tcl_platform(platform)]
set has_tailglob [expr {[regexp {[?*]} [file tail $path]]}] #we need to support the same glob chars that Tcl's 'glob' command accepts.
set has_tailglob [punk::nav::fs::lib::is_fileglob [file tail $path]]
#we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean #we have already done a 'cd' if only one unglobbed path was supplied - therefore any remaining non-glob tails must be tested for folderness vs fileness to see what they mean
#this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough. #this may be slightly surprising if user tries to exactly match both a directory name and a file both as single objects; because the dir will be listed (auto /* applied to it) - but is consistent enough.
#lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*) #lower level dirfiles or dirfiles_dict can be used to more precisely craft searches. ( d/ will treat dir the same as dir/*)
@ -605,7 +654,11 @@ tcl::namespace::eval punk::nav::fs {
set allow_nonportable [dict exists $received -nonportable] set allow_nonportable [dict exists $received -nonportable]
set curdir [pwd] set curdir [pwd]
set fullpath_list [list]
set fullpath_list [list] ;#list of full paths to create.
set existing_parent_list [list] ;#list of nearest existing parent for each supplied path (used for writability testing, and as cd point from which to run file mkdir)
#these 2 lists are built up in parallel and should have the same number of items as the supplied paths that pass the initial tests.
set error_paths [list] set error_paths [list]
foreach p $paths { foreach p $paths {
if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} { if {!$allow_nonportable && [punk::winpath::illegalname_test $p]} {
@ -618,11 +671,13 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $p "Path '$p' contains null character which is not allowed"] lappend error_paths [list $p "Path '$p' contains null character which is not allowed"]
continue continue
} }
set path1 [path_to_absolute $a1 $curdir $::tcl_platform(platform)] set fullpath [path_to_absolute $p $curdir $::tcl_platform(platform)]
#e.g can return something like //?/C:/test/illegalpath. which is not a valid path for mkdir. #if we called punk::winpath::illegalname_fix on this, it could return something like //?/C:/test/illegalpath. dos device paths don't seem to be valid paths for file mkdir.
set fullpath [file join $path1 {*}[lrange $args 1 end]]
#Some subpaths of the supplied paths to create may already exist. #Some subpaths of the supplied paths to create may already exist.
#we should test write permissions on the nearest existing parent of the supplied path to create, rather than just on the supplied path itself which may not exist at all. #we should test write permissions on the nearest existing parent of the supplied path to create,
#rather than just on the immediate parent segment of the supplied path itself which may not exist.
set fullpath [file normalize $fullpath]
set parent [file dirname $fullpath] set parent [file dirname $fullpath]
while {![file exists $parent]} { while {![file exists $parent]} {
set parent [file dirname $parent] set parent [file dirname $parent]
@ -632,6 +687,7 @@ tcl::namespace::eval punk::nav::fs {
lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"] lappend error_paths [list $fullpath "Cannot create directory '$fullpath' as parent '$parent' is not writable"]
continue continue
} }
lappend existing_parent_list $parent
lappend fullpath_list $fullpath lappend fullpath_list $fullpath
} }
if {[llength $fullpath_list] != [llength $paths]} { if {[llength $fullpath_list] != [llength $paths]} {
@ -646,9 +702,13 @@ tcl::namespace::eval punk::nav::fs {
set num_created 0 set num_created 0
set error_string "" set error_string ""
foreach fullpath $fullpath_list { foreach fullpath $fullpath_list existing_parent $existing_parent_list {
#calculate relative path from existing parent to fullpath, and cd to existing parent before running file mkdir - to allow creation of directories with non-portable names on windows (e.g reserved device names) by using their relative paths from the nearest existing parent directory, which should be able to be cd'd into without issue.
#set relative_path [file relative $fullpath $existing_parent]
#todo.
if {[catch {file mkdir $fullpath}]} { if {[catch {file mkdir $fullpath}]} {
set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted." set error_string "Failed to create directory '$fullpath' - processing of remaining paths aborted."
cd $curdir
break break
} }
incr num_created incr num_created
@ -656,7 +716,10 @@ tcl::namespace::eval punk::nav::fs {
if {$error_string ne ""} { if {$error_string ne ""} {
error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered." error "punk::nav::fs::d/new $error_string\n$num_created directories out of [llength $fullpath_list] were created successfully before the error was encountered."
} }
d/ $curdir
#display summaries of created directories (which may have already existed) by reusing d/ to get info on them.
set query_paths [lmap v $paths $v/*]
d/ / {*}$query_paths
} }
#todo use unknown to allow d/~c:/etc ?? #todo use unknown to allow d/~c:/etc ??
@ -666,7 +729,7 @@ tcl::namespace::eval punk::nav::fs {
if {![file isdirectory $target]} { if {![file isdirectory $target]} {
error "Folder $target not found" error "Folder $target not found"
} }
d/ $target d/ / $target
} }
@ -799,7 +862,9 @@ tcl::namespace::eval punk::nav::fs {
} }
set relativepath [expr {[file pathtype $searchspec] eq "relative"}] set relativepath [expr {[file pathtype $searchspec] eq "relative"}]
set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
#set has_tailglobs [regexp {[?*]} [file tail $searchspec]]
set has_tailglobs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent. #dirfiles_dict would handle simple cases of globs within paths anyway - but we need to explicitly set tailglob here in all branches so that next level doesn't need to do file vs dir checks to determine user intent.
#(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict) #(dir-listing vs file-info when no glob-chars present is inherently ambiguous so we test file vs dir to make an assumption - more explicit control via -tailglob can be done manually with dirfiles_dict)
@ -838,7 +903,7 @@ tcl::namespace::eval punk::nav::fs {
} }
puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location" puts "--> -searchbase:$searchbase searchspec:$searchspec -tailglob:$tailglob location:$location"
set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location] set contents [dirfiles_dict -searchbase $searchbase -tailglob $tailglob -with_times {f d l} -with_sizes {f d l} $location]
return [dirfiles_dict_as_lines -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents] return [dirfiles_dict_as_lines -listing // -stripbase $opt_stripbase -formatsizes $opt_formatsizes $contents]
} }
#todo - package as punk::nav::fs #todo - package as punk::nav::fs
@ -880,8 +945,8 @@ tcl::namespace::eval punk::nav::fs {
} }
proc dirfiles_dict {args} { proc dirfiles_dict {args} {
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts values
set searchspecs [dict values $vals] set searchspecs [dict values $values]
#puts stderr "searchspecs: $searchspecs [llength $searchspecs]" #puts stderr "searchspecs: $searchspecs [llength $searchspecs]"
#puts stdout "arglist: $opts" #puts stdout "arglist: $opts"
@ -925,7 +990,9 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
"\uFFFF" { "\uFFFF" {
set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]] set searchtail_has_globs [punk::nav::fs::lib::is_fileglob [file tail $searchspec]]
#set searchtail_has_globs [regexp {[*?]} [file tail $searchspec]]
if {$searchtail_has_globs} { if {$searchtail_has_globs} {
if {$is_relativesearchspec} { if {$is_relativesearchspec} {
#set location [file dirname [file join $searchbase $searchspec]] #set location [file dirname [file join $searchbase $searchspec]]
@ -993,6 +1060,8 @@ tcl::namespace::eval punk::nav::fs {
} else { } else {
set next_opt_with_times [list -with_times $opt_with_times] set next_opt_with_times [list -with_times $opt_with_times]
} }
set ts1 [clock clicks -milliseconds]
if {$is_in_vfs} { if {$is_in_vfs} {
set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times] set listing [punk::du::lib::du_dirlisting_tclvfs $location -glob $match_contents {*}$next_opt_with_sizes {*}$next_opt_with_times]
} else { } else {
@ -1042,6 +1111,8 @@ tcl::namespace::eval punk::nav::fs {
} }
} }
} }
set ts2 [clock clicks -milliseconds]
set ts_listing [expr {$ts2 - $ts1}]
set dirs [dict get $listing dirs] set dirs [dict get $listing dirs]
set files [dict get $listing files] set files [dict get $listing files]
@ -1093,9 +1164,11 @@ tcl::namespace::eval punk::nav::fs {
set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden] set flaggedhidden [punk::lib::lunique_unordered $flaggedhidden]
} }
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#-----------------------------------------------------------------------------------------
set ts1 [clock milliseconds]
set dirs [lsort -dictionary $dirs] ;#todo - natsort
#foreach d $dirs { #foreach d $dirs {
# if {[lindex [file system $d] 0] eq "tclvfs"} { # if {[lindex [file system $d] 0] eq "tclvfs"} {
@ -1124,19 +1197,27 @@ tcl::namespace::eval punk::nav::fs {
set files $sorted_files set files $sorted_files
set filesizes $sorted_filesizes set filesizes $sorted_filesizes
set ts2 [clock milliseconds]
set ts_sorting [expr {$ts2 - $ts1}]
#-----------------------------------------------------------------------------------------
# -- --- # -- ---
#jmn #jmn
set ts1 [clock milliseconds]
foreach nm [list {*}$dirs {*}$files] { foreach nm [list {*}$dirs {*}$files] {
if {[punk::winpath::illegalname_test $nm]} { if {[punk::winpath::illegalname_test $nm]} {
lappend nonportable $nm lappend nonportable $nm
} }
} }
set ts2 [clock milliseconds]
set ts_nonportable_check [expr {$ts2 - $ts1}]
set timing_info [dict create nonportable_check ${ts_nonportable_check}ms sorting ${ts_sorting}ms listing ${ts_listing}ms]
set front_of_dict [dict create location $location searchbase $opt_searchbase] set front_of_dict [dict create location $location searchbase $opt_searchbase]
set listing [dict merge $front_of_dict $listing] set listing [dict merge $front_of_dict $listing]
set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes] set updated [dict create dirs $dirs files $files filesizes $filesizes nonportable $nonportable flaggedhidden $flaggedhidden underlayfiles $underlayfiles underlayfilesizes $underlayfilesizes timinginfo $timing_info]
return [dict merge $listing $updated] return [dict merge $listing $updated]
} }
@ -1144,13 +1225,13 @@ tcl::namespace::eval punk::nav::fs {
@id -id ::punk::nav::fs::dirfiles_dict_as_lines @id -id ::punk::nav::fs::dirfiles_dict_as_lines
-stripbase -default 0 -type boolean -stripbase -default 0 -type boolean
-formatsizes -default 1 -type boolean -formatsizes -default 1 -type boolean
-listing -default "/" -choices {/ // //} -listing -default "/" -choices {/ //}
@values -min 1 -max -1 -type dict -unnamed true @values -min 1 -max -1 -type dict -unnamed true
} }
#todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing? #todo - color key via repl-telemetry? help command? documentation? or add tag columns as done in namespace listing?
proc dirfiles_dict_as_lines {args} { proc dirfiles_dict_as_lines {args} {
set ts1 [clock milliseconds] #set ts1 [clock milliseconds]
package require overtype package require overtype
set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines] set argd [punk::args::parse $args withid ::punk::nav::fs::dirfiles_dict_as_lines]
lassign [dict values $argd] leaders opts vals lassign [dict values $argd] leaders opts vals
@ -1355,7 +1436,7 @@ tcl::namespace::eval punk::nav::fs {
#review - symlink to shortcut? hopefully will just work #review - symlink to shortcut? hopefully will just work
#classify as file or directory - fallback to file if unknown/undeterminable #classify as file or directory - fallback to file if unknown/undeterminable
set finfo_plus [list] set finfo_plus [list]
set ts2 [clock milliseconds] #set ts2 [clock milliseconds]
foreach fdict $finfo { foreach fdict $finfo {
set fname [dict get $fdict file] set fname [dict get $fdict file]
if {[file extension $fname] eq ".lnk"} { if {[file extension $fname] eq ".lnk"} {
@ -1440,8 +1521,8 @@ tcl::namespace::eval punk::nav::fs {
} }
unset finfo unset finfo
puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]" #puts stderr "dirfiles_dict_as_lines since ts2 [clock milliseconds] - $ts2 ms = [expr {[clock milliseconds] - $ts2}]"
puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]" #puts stderr "dirfiles_dict_as_lines since start [clock milliseconds] - $ts1 ms = [expr {[clock milliseconds] - $ts1}]"
#set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}] #set widest1 [punk::pipedata [list {*}$dirs ""] {lmap v $data {string length $v}} {tcl::mathfunc::max {*}$data}]
@ -1537,19 +1618,19 @@ tcl::namespace::eval punk::nav::fs {
#consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path #consider haskells approach of well-typed paths for cross-platform paths: https://hackage.haskell.org/package/path
#review: punk::winpath calls cygpath! #review: punk::winpath calls cygpath!
#review: file pathtype is platform dependant #review: file pathtype is platform dependant
proc path_to_absolute {path base platform} { proc path_to_absolute {subpath base platform} {
set ptype [file pathtype $path] set ptype [file pathtype $subpath]
if {$ptype eq "absolute"} { if {$ptype eq "absolute"} {
set path_absolute $path set path_absolute $subpath
} elseif {$ptype eq "volumerelative"} { } elseif {$ptype eq "volumerelative"} {
if {$platform eq "windows"} { if {$platform eq "windows"} {
#unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms) #unix looking paths like /c/users or /usr/local/etc are reported by tcl as volumerelative.. (as opposed to absolute on unix platforms)
if {[string index $path 0] eq "/"} { if {[string index $subpath 0] eq "/"} {
#this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here #this conversion should be an option for the ./ command - not built in as a default way of handling volumerelative paths here
#It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions. #It is more useful on windows to treat /usr/local as a wsl or mingw path - and may be reasonable for ./ - but is likely to surprise if put into utility functions.
#Todo - tidy up. #Todo - tidy up.
package require punk::unixywindows package require punk::unixywindows
set path_absolute [punk::unixywindows::towinpath $path] set path_absolute [punk::unixywindows::towinpath $subpath]
#puts stderr "winpath: $path" #puts stderr "winpath: $path"
} else { } else {
#todo handle volume-relative paths with volume specified c:etc c: #todo handle volume-relative paths with volume specified c:etc c:
@ -1558,21 +1639,25 @@ tcl::namespace::eval punk::nav::fs {
#The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives #The cwd of the process can get out of sync with what tcl thinks is the working directory when you swap drives
#Arguably if ...? #Arguably if ...?
#set path_absolute $base/$path #set path_absolute $base/$subpath
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
# unknown what paths are reported as this on other platforms.. treat as absolute for now # unknown what paths are reported as this on other platforms.. treat as absolute for now
set path_absolute $path set path_absolute $subpath
} }
} else { } else {
set path_absolute $base/$path #e.g relative subpath=* base = c:/test -> c:/test/*
} #e.g relative subpath=../test base = c:/test -> c:/test/../test
if {$platform eq "windows"} { #e.g relative subpath=* base = //server/share/test -> //server/share/test/*
if {[punk::winpath::illegalname_test $path_absolute]} { set path_absolute $base/$subpath
set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present }
} #fixing up paths on windows here violates the principle of single responsibility - this function is supposed to be about converting to absolute paths - not fixing up windows path issues.
} #if {$platform eq "windows"} {
# if {[punk::winpath::illegalname_test $path_absolute]} {
# set path_absolute [punk::winpath::illegalname_fix $path_absolute] ;#add dos-device-prefix protection if not already present
# }
#}
return $path_absolute return $path_absolute
} }
proc strip_prefix_depth {path prefix} { proc strip_prefix_depth {path prefix} {
@ -1616,13 +1701,39 @@ tcl::namespace::eval punk::nav::fs::lib {
#[para] Secondary functions that are part of the API #[para] Secondary functions that are part of the API
#[list_begin definitions] #[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
punk::args::define {
@id -id ::punk::nav::fs::lib::is_fileglob
@cmd -name punk::nav::fs::lib::is_fileglob
@values -min 1 -max 1
path -type string -required true -help\
{String to test for being a glob pattern as recognised by the tcl 'glob' command.
If the string represents a path with multiple segments, only the final component
of the path will be tested for glob characters.
Glob patterns in this context are different to globs accepted by TCL's 'string match'.
A glob pattern is any string that contains unescaped * ? { } [ or ].
This will not detect mismatched unescaped braces or brackets.
Such a sequence will be treated as a glob pattern, even though it may not be a valid glob pattern.
}
}
proc is_fileglob {str} {
#a glob pattern is any string that contains unescaped * ? { } [ or ] (we will ignore the possibility of ] being used without a matching [ as that is likely to be rare and would be difficult to detect without a full glob parser)
set in_escape 0
set segments [file split $str]
set tail [lindex $segments end]
foreach c [split $tail ""] {
if {$in_escape} {
set in_escape 0
} else {
if {$c eq "\\"} {
set in_escape 1
} elseif {$c in [list * ? "\[" "\]" "{" "}" ]} {
return 1
}
}
}
return 0
}
#*** !doctools #*** !doctools

2
src/vfs/_vfscommon.vfs/modules/punk/path-0.1.0.tm

@ -357,7 +357,7 @@ namespace eval punk::path {
} }
} }
} }
puts "==>finalparts: '$finalparts'" #puts "==>finalparts: '$finalparts'"
# using join - {"" "" server share} -> //server/share and {a b} -> a/b # using join - {"" "" server share} -> //server/share and {a b} -> a/b
if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} { if {[llength $finalparts] == 1 && [lindex $finalparts 0] eq ""} {
#backtracking on unix-style path can end up with empty string as only member of finalparts #backtracking on unix-style path can end up with empty string as only member of finalparts

64
src/vfs/_vfscommon.vfs/modules/punk/winpath-0.1.0.tm

@ -114,6 +114,50 @@ namespace eval punk::winpath {
return $path return $path
} }
} }
proc illegal_char_map_to_doublewide {ch} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
set map [dict create \
"<" "\uFF1C" \
">" "\uFF1E" \
":" "\uFF1A" \
"\"" "\uFF02" \
"/" "\uFF0F" \
"\\" "\uFF3C" \
"|" "\uFF5C" \
"?" "\uFF1F" \
"*" "\uFF0A"]
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
proc illegal_char_map_to_ntfs {ch} {
#windows ntfs maps illegal chars to the PUA unicode range 0xF000-0xF0FF for characters that are illegal in windows API but can exist on NTFS or samba shares etc.
#see also: https://cygwin.com/cygwin-ug-net/using-specialnames.html#pathnames-specialchars
#see also: https://stackoverflow.com/questions/76738031/unicode-private-use-characters-in-ntfs-filenames#:~:text=map_dict%20=%20%7B%200xf009:'%5C,c%20return%20(output_name%2C%20mapped)
set map [dict create \
"<" "\uF03C" \
">" "\uF03E" \
":" "\uF03A" \
"\"" "\uF022" \
"/" "unknown" \
"\\" "\uF05c" \
"|" "\uF07C" \
"?" "\uF03F" \
"*" "\uF02A"]
#note that : is used for NTFS alternate data streams - but the character is still illegal in the main filename - so we will map it to a glyph in the PUA range for display purposes - but it will still need dos device syntax to be accessed via windows API.
if {$ch in [dict keys $map]} {
return [dict get $map $ch]
} else {
return $ch
}
}
#we don't validate that path is actually illegal because we don't know the full range of such names. #we don't validate that path is actually illegal because we don't know the full range of such names.
#The caller can apply this to any path. #The caller can apply this to any path.
#don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently) #don't test for platform here - needs to be callable from any platform for potential passing to windows (what usecase? 8.3 name is not always calculable independently)
@ -200,8 +244,15 @@ namespace eval punk::winpath {
set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"] set windows_reserved_names [list "CON" "PRN" "AUX" "NUL" "COM1" "COM2" "COM3" "COM4" "COM5" "COM6" "COM7" "COM8" "COM9" "LPT1" "LPT2" "LPT3" "LPT4" "LPT5" "LPT6" "LPT7" "LPT8" "LPT9"]
#we need to exclude things like path/.. path/. #we need to exclude things like path/.. path/.
foreach seg [file split $path] { set segments [file split $path]
if {$seg in [list . ..]} { if {[file pathtype $path] eq "absolute"} {
#absolute path - we can have a leading double slash for UNC or dos device paths - but these don't require the same checks as the rest of the segments.
set checksegments [lrange $segments 1 end]
} else {
set checksegments $segments
}
foreach seg $checksegments {
if {$seg in {. ..}} {
#review - what if there is a folder or file that actually has a name such as . or .. ? #review - what if there is a folder or file that actually has a name such as . or .. ?
#unlikely in normal use - but could done deliberately for bad reasons? #unlikely in normal use - but could done deliberately for bad reasons?
#We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem. #We are unable to check for it here anyway - as this command is intended for checking the path string - not the actual path on a filesystem.
@ -220,10 +271,17 @@ namespace eval punk::winpath {
#only check for actual space as other whitespace seems to work without being stripped #only check for actual space as other whitespace seems to work without being stripped
#trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph #trailing tab and trailing \n or \r seem to be creatable in windows with Tcl - map to some glyph
if {[string index $seg end] in [list " " "."]} { if {[string index $seg end] in {" " .}} {
#windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc) #windows API doesn't handle trailing dots or spaces (silently strips) - even though such files can be created on NTFS systems (or seen via samba etc)
return 1 return 1
} }
#set re {[<>:"/\\|?*\x01-\x1f]} review - integer values 1-31 are allowed in NTFS alternate data streams.
set re {[<>:"/\\|?*]}
if {[regexp $re $seg]} {
#windows API doesn't handle these characters in filenames - even though such files can be created on NTFS systems (or seen via samba etc)
return 1
}
} }
#glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph) #glob chars '* ?' are probably illegal.. but although x*y.txt and x?y.txt don't display properly (* ? replaced with some other glyph)
#- they seem to be readable from cmd and tclsh as is. #- they seem to be readable from cmd and tclsh as is.

Loading…
Cancel
Save