diff --git a/src/bootsupport/modules/punk/ansi-0.1.1.tm b/src/bootsupport/modules/punk/ansi-0.1.1.tm index 6b04827d..3c20391f 100644 --- a/src/bootsupport/modules/punk/ansi-0.1.1.tm +++ b/src/bootsupport/modules/punk/ansi-0.1.1.tm @@ -6439,6 +6439,8 @@ tcl::namespace::eval punk::ansi::class { set o_gx0states [list] set o_splitindex [list] + #sha1 takes *much* longer to compute than md5 if tcllibc not available - otherwise it is generally faster + #we should fall back to md5 if no acceleration available. check for command sha1::sha1c ? set o_cksum_command [list sha1::sha1 -hex] diff --git a/src/bootsupport/modules/punk/args-0.2.tm b/src/bootsupport/modules/punk/args-0.2.tm index 7710fa00..ab1ca020 100644 --- a/src/bootsupport/modules/punk/args-0.2.tm +++ b/src/bootsupport/modules/punk/args-0.2.tm @@ -876,7 +876,7 @@ tcl::namespace::eval punk::args { set cache_key $args #ideally we would use a fast hash algorithm to produce a short key with low collision probability. - #something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus) + #something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator) #review - check if there is a built-into-tcl way to do this quickly #for now we will just key using the whole string #performance seems ok - memory usage probably not ideal diff --git a/src/bootsupport/modules/punk/mix/base-0.1.tm b/src/bootsupport/modules/punk/mix/base-0.1.tm index a4bc3c70..57f7f5a7 100644 --- a/src/bootsupport/modules/punk/mix/base-0.1.tm +++ b/src/bootsupport/modules/punk/mix/base-0.1.tm @@ -430,7 +430,16 @@ namespace eval punk::mix::base { #not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through variable cksum_default_opts - set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1] + set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files) + if {![catch {package require sha1}]} { + set impls [::sha1::Implementations] + if {[llength $impls] == 1} { + set default_hash md5 + } + } else { + set default_hash md5 + } + set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm $default_hash] proc cksum_default_opts {} { variable cksum_default_opts return $cksum_default_opts @@ -438,9 +447,9 @@ namespace eval punk::mix::base { #crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?) # - try builtin zlib crc instead? - #sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration. + #sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration. #adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?) - #sha1 as at 2023 seems a reasonable default + #sha1 as at 2023 seems a reasonable default - (but only if accelerator present) proc cksum_algorithms {} { variable sha3_implementation #sha2 is an alias for sha256 diff --git a/src/bootsupport/modules/textblock-0.1.3.tm b/src/bootsupport/modules/textblock-0.1.3.tm index d9858980..c89b3594 100644 --- a/src/bootsupport/modules/textblock-0.1.3.tm +++ b/src/bootsupport/modules/textblock-0.1.3.tm @@ -95,6 +95,7 @@ tcl::namespace::eval textblock { #NOTE sha1, although computationally more intensive, tends to be faster than md5 on modern cpus #(more likely to be optimised for modern cpu features?) #(This speed improvement may not apply for short strings) + #This is probably only true if tcllibc is available - pure-tcl sha1 is excruciatingly slow... variable use_hash ;#framecache set use_hash none ;#slightly faster but uglier layout for viewing frame_cache display diff --git a/src/modules/punk/ansi-999999.0a1.0.tm b/src/modules/punk/ansi-999999.0a1.0.tm index 2681dcf0..988574a7 100644 --- a/src/modules/punk/ansi-999999.0a1.0.tm +++ b/src/modules/punk/ansi-999999.0a1.0.tm @@ -6439,6 +6439,8 @@ tcl::namespace::eval punk::ansi::class { set o_gx0states [list] set o_splitindex [list] + #sha1 takes *much* longer to compute than md5 if tcllibc not available - otherwise it is generally faster + #we should fall back to md5 if no acceleration available. check for command sha1::sha1c ? set o_cksum_command [list sha1::sha1 -hex] diff --git a/src/modules/punk/args-999999.0a1.0.tm b/src/modules/punk/args-999999.0a1.0.tm index f3512a91..ca1d702e 100644 --- a/src/modules/punk/args-999999.0a1.0.tm +++ b/src/modules/punk/args-999999.0a1.0.tm @@ -876,7 +876,7 @@ tcl::namespace::eval punk::args { set cache_key $args #ideally we would use a fast hash algorithm to produce a short key with low collision probability. - #something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus) + #something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator) #review - check if there is a built-into-tcl way to do this quickly #for now we will just key using the whole string #performance seems ok - memory usage probably not ideal diff --git a/src/modules/punk/mix/base-0.1.tm b/src/modules/punk/mix/base-0.1.tm index a4bc3c70..57f7f5a7 100644 --- a/src/modules/punk/mix/base-0.1.tm +++ b/src/modules/punk/mix/base-0.1.tm @@ -430,7 +430,16 @@ namespace eval punk::mix::base { #not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through variable cksum_default_opts - set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1] + set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files) + if {![catch {package require sha1}]} { + set impls [::sha1::Implementations] + if {[llength $impls] == 1} { + set default_hash md5 + } + } else { + set default_hash md5 + } + set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm $default_hash] proc cksum_default_opts {} { variable cksum_default_opts return $cksum_default_opts @@ -438,9 +447,9 @@ namespace eval punk::mix::base { #crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?) # - try builtin zlib crc instead? - #sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration. + #sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration. #adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?) - #sha1 as at 2023 seems a reasonable default + #sha1 as at 2023 seems a reasonable default - (but only if accelerator present) proc cksum_algorithms {} { variable sha3_implementation #sha2 is an alias for sha256 diff --git a/src/modules/textblock-999999.0a1.0.tm b/src/modules/textblock-999999.0a1.0.tm index 7ea386a0..39f1508e 100644 --- a/src/modules/textblock-999999.0a1.0.tm +++ b/src/modules/textblock-999999.0a1.0.tm @@ -95,6 +95,7 @@ tcl::namespace::eval textblock { #NOTE sha1, although computationally more intensive, tends to be faster than md5 on modern cpus #(more likely to be optimised for modern cpu features?) #(This speed improvement may not apply for short strings) + #This is probably only true if tcllibc is available - pure-tcl sha1 is excruciatingly slow... variable use_hash ;#framecache set use_hash none ;#slightly faster but uglier layout for viewing frame_cache display