set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
@ -1895,7 +1922,7 @@ namespace eval punkcheck {
if {[llength $files_copied] || [llength $files_skipped]} {
if {[llength $files_copied] || [llength $files_skipped]} {
#puts stdout ">>>>>>>>>>>>>>>>>>>"
#puts stdout ">>>>>>>>>>>>>>>>>>>"
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
puts stdout "punkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
puts stdout "\npunkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
@ -1895,7 +1922,7 @@ namespace eval punkcheck {
if {[llength $files_copied] || [llength $files_skipped]} {
if {[llength $files_copied] || [llength $files_skipped]} {
#puts stdout ">>>>>>>>>>>>>>>>>>>"
#puts stdout ">>>>>>>>>>>>>>>>>>>"
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
puts stdout "punkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
puts stdout "\npunkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus)
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator)
#review - check if there is a built-into-tcl way to do this quickly
#review - check if there is a built-into-tcl way to do this quickly
#for now we will just key using the whole string
#for now we will just key using the whole string
#performance seems ok - memory usage probably not ideal
#performance seems ok - memory usage probably not ideal
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
variable cksum_default_opts
variable cksum_default_opts
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1]
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
# - try builtin zlib crc instead?
# - try builtin zlib crc instead?
#sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#sha1 as at 2023 seems a reasonable default
#sha1 as at 2023 seems a reasonable default - (but only if accelerator present)
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
@ -1874,7 +1922,7 @@ namespace eval punkcheck {
if {[llength $files_copied] || [llength $files_skipped]} {
if {[llength $files_copied] || [llength $files_skipped]} {
#puts stdout ">>>>>>>>>>>>>>>>>>>"
#puts stdout ">>>>>>>>>>>>>>>>>>>"
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
puts stdout "punkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
puts stdout "\npunkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus)
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator)
#review - check if there is a built-into-tcl way to do this quickly
#review - check if there is a built-into-tcl way to do this quickly
#for now we will just key using the whole string
#for now we will just key using the whole string
#performance seems ok - memory usage probably not ideal
#performance seems ok - memory usage probably not ideal
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
variable cksum_default_opts
variable cksum_default_opts
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1]
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
# - try builtin zlib crc instead?
# - try builtin zlib crc instead?
#sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#sha1 as at 2023 seems a reasonable default
#sha1 as at 2023 seems a reasonable default - (but only if accelerator present)
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
set subdirs [glob -nocomplain -dir $current_source_dir -type d -tail *]
@ -1874,7 +1922,7 @@ namespace eval punkcheck {
if {[llength $files_copied] || [llength $files_skipped]} {
if {[llength $files_copied] || [llength $files_skipped]} {
#puts stdout ">>>>>>>>>>>>>>>>>>>"
#puts stdout ">>>>>>>>>>>>>>>>>>>"
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
set saveresult [punkcheck::save_records_to_file $punkcheck_records $punkcheck_file]
puts stdout "punkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
puts stdout "\npunkcheck::install [dict get $saveresult recordcount] records saved as [dict get $saveresult linecount] lines to $punkcheck_file copied: [llength $files_copied] skipped: [llength $files_skipped]"
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#ideally we would use a fast hash algorithm to produce a short key with low collision probability.
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus)
#something like md5 would be ok (this is non cryptographic) - but md5 uses open and isn't usable by default in a safe interp. (sha1 often faster on modern cpus but terribly slow without an accelerator)
#review - check if there is a built-into-tcl way to do this quickly
#review - check if there is a built-into-tcl way to do this quickly
#for now we will just key using the whole string
#for now we will just key using the whole string
#performance seems ok - memory usage probably not ideal
#performance seems ok - memory usage probably not ideal
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
#not just used by cksum_path. used by caller (e.g fill_relativecksums_from_base_and_relativepathdict via cksum_filter_opts) to determine what opt names passed through
variable cksum_default_opts
variable cksum_default_opts
set cksum_default_opts [dict create -cksum_content 1 -cksum_meta auto -cksum_acls 0 -cksum_usetar auto -cksum_algorithm sha1]
set default_hash sha1 ;#but fall back to md5 if either sha1 is unavailable or unaccelerated (pure tcl sha1 is way slower than pure tcl md5 - can take minutes on even moderate sized source files)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
#crc::cksum is extremely slow in tcllib as at 2023 e.g 20x slower (no c implementation?)
# - try builtin zlib crc instead?
# - try builtin zlib crc instead?
#sha1 is performant - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#sha1 is performant (when accelerator present) - and this is not being used in a cryptographic or adversarial context - so performance and practical unlikelihood of accidental collisions should be the main consideration.
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#adler32 is fastest for some larger files of a few MB but slower on small files (possibly due to Tcl-based file load?)
#sha1 as at 2023 seems a reasonable default
#sha1 as at 2023 seems a reasonable default - (but only if accelerator present)