You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

605 lines
23 KiB

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from <pkg>-buildversion.txt
# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) CMcC 2010
#
# @@ Meta Begin
# Application punk::trie 0.1.0
# Meta platform tcl
# Meta license <unspecified>
# @@ Meta End
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::trie 0 0.1.0]
#[copyright "2010"]
#[titledesc {punk::trie API}] [comment {-- Name section and table of contents description --}]
#[moddesc {punk::trie}] [comment {-- Description at end of page heading --}]
#[require punk::trie]
#[keywords module datastructure trie]
#[description] tcl trie implementation courtesy of CmcC (tcl wiki)
#[para] -
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Overview]
#[para] overview of punk::trie
#[subsection Concepts]
#[para] -
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[subsection dependencies]
#[para] packages used by punk::trie
#[list_begin itemized]
package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section API]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# oo::class namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# #tcl::namespace::eval punk::trie::class {
# #*** !doctools
# #[subsection {Namespace punk::trie::class}]
# #[para] class definitions
# #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} {
# #*** !doctools
# #[list_begin enumerated]
#
# # oo::class create interface_sample1 {
# # #*** !doctools
# # #[enum] CLASS [class interface_sample1]
# # #[list_begin definitions]
#
# # method test {arg1} {
# # #*** !doctools
# # #[call class::interface_sample1 [method test] [arg arg1]]
# # #[para] test method
# # puts "test: $arg1"
# # }
#
# # #*** !doctools
# # #[list_end] [comment {-- end definitions interface_sample1}]
# # }
#
# #*** !doctools
# #[list_end] [comment {--- end class enumeration ---}]
# #}
# #}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Base namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::trie {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
#variable xyz
proc Dolog {lvl txt} {
#return "$lvl -- $txt"
#logger calls this in such a way that a straight uplevel can get us the vars/commands in messages substituted
set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] punk::trie '[uplevel [list subst $txt]]'"
puts stderr $msg
}
package require logger
logger::initNamespace ::punk::trie
foreach lvl [logger::levels] {
interp alias {} ::punk::trie::Log_$lvl {} ::punk::trie::Dolog $lvl
log::logproc $lvl ::punk::trie::Log_$lvl
}
#namespace path ::punk::trie::log
#*** !doctools
#[subsection {Namespace punk::trie}]
#[para] Core API functions for punk::trie
if {[tcl::info::commands [tcl::namespace::current]::trieclass] eq ""} {
#*** !doctools
#[list_begin enumerated]
oo::class create [tcl::namespace::current]::trieclass {
#*** !doctools
#[enum] CLASS [class trieclass]
#[list_begin definitions]
variable trie id
method matches {t what} {
#*** !doctools
#[call class::trieclass [method matches] [arg t] [arg what]]
#[para] search for longest prefix, return matching prefix, element and suffix
set matches {}
set wlen [string length $what]
foreach k [lsort -decreasing -dictionary [dict keys $t]] {
set klen [string length $k]
set match ""
for {set i 0} {$i < $klen
&& $i < $wlen
&& [string index $k $i] eq [string index $what $i]
} {incr i} {
append match [string index $k $i]
}
if {$match ne ""} {
lappend matches $match $k
}
}
#Debug.trie {matches: $what -> $matches}
::punk::trie::log::debug {matches: $what -> $matches}
if {[dict size $matches]} {
# find the longest matching prefix
set match [lindex [lsort -dictionary [dict keys $matches]] end]
set mel [dict get $matches $match]
set suffix [string range $what [string length $match] end]
return [list $match $mel $suffix]
} else {
return {} ;# no matches
}
}
# return next unique id if there's no proffered value
method id {value} {
if {$value} {
return $value
} else {
return [incr id]
}
}
# insert an element with a given optional value into trie
# along path given by $args (no need to specify)
method insert {what {value 0} args} {
if {[llength $args]} {
set t [dict get $trie {*}$args]
} else {
set t $trie
}
if {[dict exists $t $what]} {
#Debug.trie {$what is an exact match on path ($args $what)}
::punk::trie::log::debug {$what is an exact match on path ($args $what)}
if {[catch {dict size [dict get $trie {*}$args $what]} size]} {
# the match is a leaf - we're done
} else {
# the match is a dict - we have to add a null
dict set trie {*}$args $what "" [my id $value]
}
return ;# exact match - no change
}
# search for longest prefix
set match [my matches $t $what]
if {![llength $match]} {
;# no matching prefix - new element
#Debug.trie {no matching prefix of '$what' in $t - add it on path ($args $what)}
::punk::trie::log::debug {no matching prefix of '$what' in $t add it on path ($args $what)}
dict set trie {*}$args $what [my id $value]
return
}
lassign $match match mel suffix ;# prefix, element of match, suffix
if {$match ne $mel} {
# the matching element shares a prefix, but has a variant suffix
# it must be split
#Debug.trie {splitting '$mel' along '$match'}
::punk::trie::log::debug {splitting '$mel' along '$match'}
set melC [dict get $t $mel]
dict unset trie {*}$args $mel
dict set trie {*}$args $match [string range $mel [string length $match] end] $melC
}
if {[catch {dict size [dict get $trie {*}$args $match]} size]} {
# the match is a leaf - must be split
if {$match eq $mel} {
# the matching element shares a prefix, but has a variant suffix
# it must be split
#Debug.trie {splitting '$mel' along '$match'}
::punk::trie::log::debug {splitting '$mel' along '$match'}
set melC [dict get $t $mel]
dict unset trie {*}$args $mel
dict set trie {*}$args $match "" $melC
}
#Debug.trie {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'}
::punk::trie::log::debug {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'}
set melid [dict get $t $mel]
dict set trie {*}$args $match $suffix [my id $value]
} else {
# it's a dict - keep searching
#Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)}
::punk::trie::log::debug {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)}
my insert $suffix $value {*}$args $match
}
return
}
# find a path matching an element $what
# if the element's not found, return the nearest path
method find_path {what args} {
if {[llength $args]} {
set t [dict get $trie {*}$args]
} else {
set t $trie
}
if {[dict exists $t $what]} {
#Debug.trie {$what is an exact match on path ($args $what)}
return [list {*}$args $what] ;# exact match - no change
}
# search for longest prefix
set match [my matches $t $what]
if {![llength $match]} {
return $args
}
lassign $match match mel suffix ;# prefix, element of match, suffix
if {$match ne $mel} {
# the matching element shares a prefix, but has a variant suffix
# no match
return $args
}
if {[catch {dict size [dict get $trie {*}$args $match]} size] || $size == 0} {
# got to a non-matching leaf - no match
return $args
} else {
# it's a dict - keep searching
#Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)}
return [my find_path $suffix {*}$args $match]
}
}
# given a trie, which may have been modified by deletion,
# optimize it by removing empty nodes and coalescing singleton nodes
method optimize {args} {
if {[llength $args]} {
set t [dict get $trie {*}$args]
} else {
set t $trie
}
if {[catch {dict size $t} size]} {
#Debug.trie {optimize leaf '$t' along '$args'}
::punk::trie::log::debug {optimize leaf '$t' along '$args'}
# leaf - leave it
} else {
switch -- $size {
0 {
#Debug.trie {optimize empty dict ($t) along '$args'}
::punk::trie::log::debug {optimize empty dict ($t) along '$args'}
if {[llength $args]} {
dict unset trie {*}$args
}
}
1 {
#Debug.trie {optimize singleton dict ($t) along '$args'}
::punk::trie::log::debug {optimize singleton dict ($t) along '$args'}
lassign $t k v
if {[llength $args]} {
dict unset trie {*}$args
}
append args $k
if {[llength $v]} {
dict set trie {*}$args $v
}
my optimize {*}$args
}
default {
#Debug.trie {optimize dict ($t) along '$args'}
::punk::trie::log::debug {optimize dict ($t) along '$args'}
dict for {k v} $t {
my optimize {*}$args $k
}
}
}
}
}
# delete element $what from trie
method delete {what} {
set path [my find_path $what]
if {[join $path ""] eq $what} {
#Debug.trie {del '$what' along ($path) was [dict get $trie {*}$path]}
if {[catch {dict size [dict get $trie {*}$path]} size]} {
# got to a matching leaf - delete it
dict unset trie {*}$path
set path [lrange $path 0 end-1]
} else {
dict unset trie {*}$path ""
}
my optimize ;# remove empty and singleton elements
} else {
# nothing to delete, guess we're done
}
}
# find the value of element $what in trie,
# error if not found
method find_or_error {what} {
set path [my find_path $what]
if {[join $path ""] eq $what} {
if {[catch {dict size [dict get $trie {*}$path]} size]} {
# got to a matching leaf - done
return [dict get $trie {*}$path]
} else {
#JMN - what could be an exact match for a path, but not be in the trie itself
if {[dict exists $trie {*}$path ""]} {
return [dict get $trie {*}$path ""]
} else {
::punk::trie::log::debug {'$what' matches a path but is not a leaf}
error "'$what' not found"
}
}
} else {
error "'$what' not found"
}
}
#JMN - renamed original find to find_or_error
#prefer not to catch on result - but test for -1
method find {what} {
set path [my find_path $what]
if {[join $path ""] eq $what} {
#presumably we use catch and dict size to avoid llength shimmering large inner dicts to list rep
if {[catch {dict size [dict get $trie {*}$path]} size]} {
# got to a matching leaf - done
return [dict get $trie {*}$path]
} else {
#JMN - what could be an exact match for a path, but not be in the trie itself
if {[dict exists $trie {*}$path ""]} {
return [dict get $trie {*}$path ""]
} else {
::punk::trie::log::debug {'$what' matches a path but is not a leaf}
return -1
}
}
} else {
return -1
}
}
# dump the trie as a string
method dump {} {
return $trie
}
# return a string rep of the trie sorted in dict order
method order {{t {}}} {
if {![llength $t]} {
set t $trie
} elseif {[llength $t] == 1} {
return $t
}
set acc {}
foreach key [lsort -dictionary [dict keys $t]] {
lappend acc $key [my order [dict get $t $key]]
}
return $acc
}
# return the trie as a dict of names with values
method flatten {{t {}} {prefix ""}} {
if {![llength $t]} {
set t $trie
} elseif {[llength $t] == 1} {
return [list $prefix $t]
}
set acc {}
dict for {key val} $t {
lappend acc {*}[my flatten $val $prefix$key]
}
return $acc
}
#shortest possible string to identify an element in the trie using the same principle as tcl::prefix::match
#ie if a stored word is a prefix of any other words - it must be fully specified to identify itself.
#JMN - REVIEW - better algorithms?
#caller having retained all members can avoid flatten call
#by selecting a single 'which' known not to be in the trie (or empty string) - all idents can be returned.
#when all 'which' members are in the tree - scanning stops when they're all found
# - and a dict containing result and scanned keys is returned
# - result contains a dict with keys for each which member
# - scanned contains a dict of all words longer than our shortest which - (and a subset of words the same length)
method shortest_idents {which {allmembers {}}} {
set t $trie
if {![llength $allmembers]} {
set members [dict keys [my flatten]]
} else {
set members $allmembers
}
set len_members [lmap m $members {list [string length $m] $m}]
set longestfirst [lsort -index 0 -integer -decreasing $len_members]
set longestfirst [lmap v $longestfirst {lindex $v 1}]
set taken [dict create]
set scanned [dict create]
set result [dict create] ;#words in our which list - if found
foreach w $longestfirst {
set path [my find_path $w]
if {[dict exists $taken $w]} {
#whole word - no unique prefix
dict set scanned $w $w
if {$w in $which} {
#puts stderr "$w -> $w"
dict set result $w $w
if {[dict size $result] == [llength $which]} {
return [dict create result $result scanned $scanned]
}
}
continue
}
set acc ""
foreach p [lrange $path 0 end-1] {
dict set taken [append acc $p] 1 ;#no need to test first - just set even though may already be present
}
append acc [string index [lindex $path end] 0]
dict set scanned $w $acc ;#sorted by length - so no other can have this prefix - and no longer necessary
if {$w in $which} {
#puts stderr "$w -> $acc"
dict set result $w $acc
if {[dict size $result] == [llength $which]} {
return [dict create result $result scanned $scanned]
}
}
}
return [dict create result $result scanned $scanned]
}
# overwrite the trie
method set {t} {
set trie $t
}
constructor {args} {
set trie {}
set id 0
foreach a $args {
my insert $a
}
}
#*** !doctools
#[list_end] [comment {--- end definitions ---}]
}
#*** !doctools
#[list_end] [comment {--- end class enumeration ---}]
set testlist [list blah x black blacken]
proc test1 {} {
#JMN
#test that find_or_error of a path that isn't stored as a value returns an appropriate error
#(used to report couldn't find dict key "")
set t [punk::trie::trieclass new blah x black blacken]
if {[catch {$t find_or_error bla} errM]} {
puts stderr "should be error indicating 'bla' not found"
puts stderr "err during $t find bla\n$errM"
}
return $t
}
# oo::class create interface_sample1 {
# #*** !doctools
# #[enum] CLASS [class interface_sample1]
# #[list_begin definitions]
# method test {arg1} {
# #*** !doctools
# #[call class::interface_sample1 [method test] [arg arg1]]
# #[para] test method
# puts "test: $arg1"
# }
# #*** !doctools
# #[list_end] [comment {-- end definitions interface_sample1}]
# }
}
#proc sample1 {p1 n args} {
# #*** !doctools
# #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]]
# #[para]Description of sample1
# #[para] Arguments:
# # [list_begin arguments]
# # [arg_def tring p1] A description of string argument p1.
# # [arg_def integer n] A description of integer argument n.
# # [list_end]
# return "ok"
#}
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Secondary API namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::trie::lib {
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase
tcl::namespace::path [tcl::namespace::parent]
#*** !doctools
#[subsection {Namespace punk::trie::lib}]
#[para] Secondary functions that are part of the API
#[list_begin definitions]
#proc utility1 {p1 args} {
# #*** !doctools
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]]
# #[para]Description of utility1
# return 1
#}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::trie::lib ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Internal]
#tcl::namespace::eval punk::trie::system {
#*** !doctools
#[subsection {Namespace punk::trie::system}]
#[para] Internal functions that are not part of the API
#}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
package provide punk::trie [tcl::namespace::eval punk::trie {
variable pkg punk::trie
variable version
set version 0.1.0
}]
return
#*** !doctools
#[manpage_end]