# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2024 # # @@ Meta Begin # Application fauxlink 0.1.0 # Meta platform tcl # Meta license MIT # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin fauxlink_module_fauxlink 0 0.1.0] #[copyright "2024"] #[titledesc {faux link application shortcuts}] [comment {-- Name section and table of contents description --}] #[moddesc {fauxlink .fxlnk}] [comment {-- Description at end of page heading --}] #[require fauxlink] #[keywords symlink faux fake shortcut toml] #[description] #[para] A cross platform shortcut/symlink alternative. #[para] Unapologetically ugly - but practical in certain circumstances. #[para] A solution is required for application-driven filesystem links that survives cross platform moves as well as #[para] archiving and packaging systems. #[para] The target is specified in a minimally-encoded form in the filename itself - but still human readable. #[para] format of name #.fxlnk #[para] where can be empty - then the effective nominal name is the tail of the #[para] The + symbol substitutes for forward-slashes. #[para] Other chars can be encoded using url-like encoding - (but only up to %7E !) #[para] We deliberately treat higher % sequences literally. #[para] This means actual uri::urn encoded unicode sequences (e.g %E2%99%A5 [heart]) can remain literal for linking to urls. #[para] e.g if an actual + or # is required in a filename or path segment they can be encoded as %2B & %23 #[para] e.g a link to a file file#A.txt in parent dir could be: #[para] file%23A.txt#..+file%23A.txt.fxlnk #[para] or equivalently (but obviously affecting sorting) #..+file%23A.txt.fxlnk #[para] The can be unrelated to the actual target #[para] e.g datafile.dat#..+file%23A.txt.fxlnk #[para] This system has no filesystem support - and must be completely application driven. #[para] This can be useful for example in application test packages which may be tarred or zipped and moved cross platform. #[para] The target being fully specified in the name means the file doesn't have to be read for the target to be determined #[para] Extensions to behaviour should be added in the file as text data in Toml format, #[para] with custom data being under a single application-chosen table name #[para] The toplevel Toml table [fauxlink] is reserved for core extensions to this system. #[para] Aside from the 2 used for delimiting (+ #) #[para] certain characters which might normally be allowed in filesystems are required to be encoded #[para] e.g space and tab are required to be %20 %09 #[para] Others that require encoding are: * ? \ / | : ; " < > #[para] The nul character in raw form, when detected, is always mapped away to the empty string - as very few filesystems support it. #[para] Control characters and other punctuation is optional to encode. #[para] Generally utf-8 should be used where possible and unicode characters left as is where possible on modern systems. #[para] Where encoding of unicode is desired in the nominalname or encodedtarget portions it can be specified as %UXXXXXXXX #[para] There must be between 1 and 8 X digits following the %U. Interpretation of chars following %U stops at the first non-hex character. #[para] This means %Utest would not get any translation as there were no hex digits so it would come out as %Utest # # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #https://learn.microsoft.com/en-us/troubleshoot/windows-client/networking/url-encoding-unc-paths-not-url-decoded # ie "//server/c/Program files" works but "//server/c/Program%20Files" is now treated by windows as a literal path with %20 in it. #Using fauxlink - a link would be: # "my-program-files#++server+c+Program%20Files.fxlnk" #If we needed the old-style literal %20 it would become # "my-program-files#++server+c+Program%2520Files.fxlnk" # # The file:// scheme on windows supposedly *does* decode %xx (for use in a browser) # e.g # pfiles#file%3a++++localhost+c+Program%2520files # The browser will work with literal spaces too though - so it could just as well be: # pfiles#file%3a++++localhost+c+Program%20files #windows may default to using explorer.exe instead of a browser for file:// urls though #and explorer doesn't want the literal %20. It probably depends what API the file:// url is to be passed to? #in a .url shortcut either literal space or %20 will work ie %xx values are decoded #*** !doctools #[section Overview] #[para] overview of fauxlink #[subsection Concepts] #[para] - # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by fauxlink #[list_begin itemized] package require Tcl 8.6- #*** !doctools #[item] [package {Tcl 8.6-}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # oo::class namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink::class { #*** !doctools #[subsection {Namespace fauxlink::class}] #[para] class definitions if {[info commands [namespace current]::interface_sample1] eq ""} { #*** !doctools #[list_begin enumerated] # oo::class create interface_sample1 { # #*** !doctools # #[enum] CLASS [class interface_sample1] # #[list_begin definitions] # method test {arg1} { # #*** !doctools # #[call class::interface_sample1 [method test] [arg arg1]] # #[para] test method # puts "test: $arg1" # } # #*** !doctools # #[list_end] [comment {-- end definitions interface_sample1}] # } #*** !doctools #[list_end] [comment {--- end class enumeration ---}] } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink { namespace export {[a-z]*}; # Convention: export all lowercase #todo - enforce utf-8 #literal unicode chars supported by modern filesystems - leave as is - REVIEW variable encode_map variable decode_map #most filesystems don't allow NULL - map to empty string #Make sure % is not in encode_map set encode_map [dict create\ \x00 ""\ { } %20\ \t %09\ + %2B\ # %23\ * %2A\ ? %3F\ \\ %5C\ / %2F\ | %7C\ : %3A\ {;} %3B\ {"} %22\ < %3C\ > %3E\ ] #above have some overlap with ctrl codes below. #no big deal as it's a dict #must_encode # + # * ? \ / | : ; " < > \t # also NUL to empty string # also ctrl chars 01 to 1F (1..31) for {set i 1} {$i < 32} {incr i} { set ch [format %c $i] set enc "%[format %02X $i]" set enc_lower [string tolower $enc] dict set encode_map $ch $enc dict set decode_map $enc $ch dict set decode_map $enc_lower $ch } variable must_encode set must_encode [dict keys $encode_map] #if they are in #decode map doesn't include # %00 (nul) # %2F "/" # %2f "/" # %7f (del) #we exlude the forward slash because we already have + for that - and multiple ways to specify it obscure intention. # set decode_map [dict merge $decode_map [dict create\ %09 \t\ %20 { }\ %21 "!"\ %22 {"}\ %23 "#"\ %24 "$"\ %25 "%"\ %26 "&"\ %27 "'"\ %28 "("\ %29 ")"\ %2A "*"\ %2a "*"\ %2B "+"\ %2b "+"\ %2C ","\ %2c ","\ %2D "-"\ %2d "-"\ %2E "."\ %2e "."\ %3A ":"\ %3a ":"\ %3B {;}\ %3b {;}\ %3D "="\ %3C "<"\ %3c "<"\ %3d "="\ %3E ">"\ %3e ">"\ %3F "?"\ %3f "?"\ %40 "@"\ %5B "\["\ %5b "\["\ %5C "\\"\ %5c "\\"\ %5D "\]"\ %5d "\]"\ %5E "^"\ %5e "^"\ %60 "`"\ %7B "{"\ %7b "{"\ %7C "|"\ %7c "|"\ %7D "}"\ %7d "}"\ %7E "~"\ %7e "~"\ ]] #Don't go above 7f #if we want to specify p #*** !doctools #[subsection {Namespace fauxlink}] #[para] Core API functions for fauxlink #[list_begin definitions] proc resolve {link} { variable decode_map variable encode_map variable must_encode set ftail [file tail $link] if {[file extension $ftail] ni [list .fxlnk .fauxlink]} { error "fauxlink::resolve refusing to process link $link - file extension must be .fxlnk or .fauxlink" } set linkspec [file rootname $ftail] # - any # or + within the target path or name should have been uri encoded as %23 and %2b if {[tcl::string::first # $linkspec] < 0} { error "fauxlink::resolve error. Link must contain a # (usually at start if name matches target)" } #only the 1st 2 parts of split on # are significant. #if there are more # chars present - the subsequent parts are effectively a comment #check namepec already has required chars encoded lassign [split $linkspec #] namespec targetspec #puts stderr "-->namespec $namespec" set nametest [tcl::string::map $encode_map $namespec] #puts stderr "-->nametest $nametest" #nothing should be changed - if there are unencoded chars that must be encoded it is an error if {[tcl::string::length $nametest] ne [tcl::string::length $namespec]} { set err "fauxlink::resolve invalid chars in name part (section prior to first #)" set idx 0 foreach ch [split $namespec ""] { if {$ch in $must_encode} { set enc [dict get $encode_map $ch] if {[dict exists $decode_map $enc]} { append err " char $idx should be encoded as $enc" \n } else { append err " no %xx encoding available. Use %UXX if really required" \n } } incr idx } error $err } #see comments below regarding 2 rounds and ordering. set name [decode_unicode_escapes $namespec] set name [tcl::string::map $decode_map $name] #puts stderr "-->name: $name" set targetsegment [split $targetspec +] #check each + delimited part of targetspec already has required chars encoded set s 0 ;#segment index set result_segments [list] foreach segment $targetsegment { set targettest [tcl::string::map $encode_map $segment] if {[tcl::string::length $targettest] ne [tcl::string::length $segment]} { set err "fauxlink::resolve invalid chars in targetpath (section following first #)" set idx 0 foreach ch [split $segment ""] { if {$ch in $must_encode} { set enc [dict get $encode_map $ch] if {[dict exists $decode_map $enc]} { append err " segment $s char $idx should be encoded as $enc" \n } else { append err " no %xx encoding available. Use %UXX if really required" \n } } incr idx } error $err } #2 rounds of substitution is possibly asking for trouble.. #We allow anything in the resultant segments anyway (as %UXXXX... allows all) #so it's not so much about what can be encoded, # - but it makes it harder to reason about for users # In particular - if we map %XX first it makes %25 -> % substitution tricky # if the user requires a literal %UXXX - they can't do %25UXXX # the double sub would make it %UXXX -> somechar anyway. #we do unicode first - as a 2nd round of %XX substitutions is unlikely to interfere. #There is still the opportunity to use things like %U00000025 followed by hex-chars # and get some minor surprises, but using %U on ascii is unlikely to be done accidentally - REVIEW set segment [decode_unicode_escapes $segment] set segment [tcl::string::map $decode_map $segment] lappend result_segments $segment incr s } set targetpath [join $result_segments /] if {$name eq ""} { set name [lindex $result_segments end] } return [dict create name $name targetpath $targetpath] } variable map #default exclusion of / (%U2f and equivs) #this would allow obfuscation of intention - when we have + for that anyway proc decode_unicode_escapes {str {exclusions {/ \n \r \x00}}} { variable map set ucstart [string first %U $str 0] if {$ucstart < 0} { return $str } set max 8 set map [list] set strend [expr {[string length $str]-1}] while {$ucstart >= 0} { set s $ucstart set i [expr {$s +2}] ;#skip the %U set hex "" while {[tcl::string::length $hex] < 8 && $i <= $strend} { set in [string index $str $i] if {[tcl::string::is xdigit -strict $in]} { append hex $in } else { break } incr i } if {$hex ne ""} { incr i -1 lappend map $s $i $hex } set ucstart [tcl::string::first %U $str $i] } set out "" set lastidx -1 set e 0 foreach {s e hex} $map { append out [string range $str $lastidx+1 $s-1] set sub [format %c 0x$hex] if {$sub in $exclusions} { append out %U$hex ;#put it back } else { append out $sub } set lastidx $e } if {$e < [tcl::string::length $str]-1} { append out [string range $str $e+1 end] } return $out } proc link_as {name target} { } #proc sample1 {p1 args} { # #*** !doctools # #[call [fun sample1] [arg p1] [opt {?option value...?}]] # #[para]Description of sample1 # return "ok" #} #*** !doctools #[list_end] [comment {--- end definitions namespace fauxlink ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Secondary API namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval fauxlink::lib { namespace export {[a-z]*}; # Convention: export all lowercase namespace path [namespace parent] #*** !doctools #[subsection {Namespace fauxlink::lib}] #[para] Secondary functions that are part of the API #[list_begin definitions] #proc utility1 {p1 args} { # #*** !doctools # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[para]Description of utility1 # return 1 #} #*** !doctools #[list_end] [comment {--- end definitions namespace fauxlink::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Internal] namespace eval fauxlink::system { #*** !doctools #[subsection {Namespace fauxlink::system}] #[para] Internal functions that are not part of the API } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide fauxlink [namespace eval fauxlink { variable pkg fauxlink variable version set version 0.1.0 }] return #*** !doctools #[manpage_end]