#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus)
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator)
#review - check if there is a built-into-tcl way to do this quickly
#for now we will just key using the whole string
#performance seems ok - memory usage probably not ideal
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
variable cksum_default_opts
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1]
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
if {![catch {package require sha1}]} {
set impls [::sha1::Implementations]
if {[llength $impls] == 1} {
set default_hash md5
}
} else {
set default_hash md5
}
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm $default_hash]
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
# - try builtin zlib crc instead?
#sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#sha1 as at 2023 seems a reasonable default
#sha1 as at 2023 seems a reasonable default - (but only if accelerator present)
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus)
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator)
#review - check if there is a built-into-tcl way to do this quickly
#for now we will just key using the whole string
#performance seems ok - memory usage probably not ideal
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
variable cksum_default_opts
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1]
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
if {![catch {package require sha1}]} {
set impls [::sha1::Implementations]
if {[llength $impls] == 1} {
set default_hash md5
}
} else {
set default_hash md5
}
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm $default_hash]
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
# - try builtin zlib crc instead?
#sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#sha1 as at 2023 seems a reasonable default
#sha1 as at 2023 seems a reasonable default - (but only if accelerator present)