# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from -buildversion.txt # module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) CMcC 2010 # # @@ Meta Begin # Application punk::trie 0.1.0 # Meta platform tcl # Meta license # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::trie 0 0.1.0] #[copyright "2010"] #[titledesc {punk::trie API}] [comment {-- Name section and table of contents description --}] #[moddesc {punk::trie}] [comment {-- Description at end of page heading --}] #[require punk::trie] #[keywords module datastructure trie] #[description] tcl trie implementation courtesy of CmcC (tcl wiki) #[para] - # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::trie #[subsection Concepts] #[para] - # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::trie #[list_begin itemized] package require Tcl 8.6- #*** !doctools #[item] [package {Tcl 8.6}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # oo::class namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # #tcl::namespace::eval punk::trie::class { # #*** !doctools # #[subsection {Namespace punk::trie::class}] # #[para] class definitions # #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { # #*** !doctools # #[list_begin enumerated] # # # oo::class create interface_sample1 { # # #*** !doctools # # #[enum] CLASS [class interface_sample1] # # #[list_begin definitions] # # # method test {arg1} { # # #*** !doctools # # #[call class::interface_sample1 [method test] [arg arg1]] # # #[para] test method # # puts "test: $arg1" # # } # # # #*** !doctools # # #[list_end] [comment {-- end definitions interface_sample1}] # # } # # #*** !doctools # #[list_end] [comment {--- end class enumeration ---}] # #} # #} # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Base namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ tcl::namespace::eval punk::trie { tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase #variable xyz proc Dolog {lvl txt} { #return "$lvl -- $txt" #logger calls this in such a way that a straight uplevel can get us the vars/commands in messages substituted set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] punk::trie '[uplevel [list subst $txt]]'" puts stderr $msg } package require logger logger::initNamespace ::punk::trie foreach lvl [logger::levels] { interp alias {} ::punk::trie::Log_$lvl {} ::punk::trie::Dolog $lvl log::logproc $lvl ::punk::trie::Log_$lvl } #namespace path ::punk::trie::log #*** !doctools #[subsection {Namespace punk::trie}] #[para] Core API functions for punk::trie if {[tcl::info::commands [tcl::namespace::current]::trieclass] eq ""} { #*** !doctools #[list_begin enumerated] oo::class create [tcl::namespace::current]::trieclass { #*** !doctools #[enum] CLASS [class trieclass] #[list_begin definitions] variable trie id method matches {t what} { #*** !doctools #[call class::trieclass [method matches] [arg t] [arg what]] #[para] search for longest prefix, return matching prefix, element and suffix set matches {} set wlen [string length $what] foreach k [lsort -decreasing -dictionary [dict keys $t]] { set klen [string length $k] set match "" for {set i 0} {$i < $klen && $i < $wlen && [string index $k $i] eq [string index $what $i] } {incr i} { append match [string index $k $i] } if {$match ne ""} { lappend matches $match $k } } #Debug.trie {matches: $what -> $matches} ::punk::trie::log::debug {matches: $what -> $matches} if {[dict size $matches]} { # find the longest matching prefix set match [lindex [lsort -dictionary [dict keys $matches]] end] set mel [dict get $matches $match] set suffix [string range $what [string length $match] end] return [list $match $mel $suffix] } else { return {} ;# no matches } } # return next unique id if there's no proffered value method id {value} { if {$value} { return $value } else { return [incr id] } } # insert an element with a given optional value into trie # along path given by $args (no need to specify) method insert {what {value 0} args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[dict exists $t $what]} { #Debug.trie {$what is an exact match on path ($args $what)} ::punk::trie::log::debug {$what is an exact match on path ($args $what)} if {[catch {dict size [dict get $trie {*}$args $what]} size]} { # the match is a leaf - we're done } else { # the match is a dict - we have to add a null dict set trie {*}$args $what "" [my id $value] } return ;# exact match - no change } # search for longest prefix set match [my matches $t $what] if {![llength $match]} { ;# no matching prefix - new element #Debug.trie {no matching prefix of '$what' in $t - add it on path ($args $what)} ::punk::trie::log::debug {no matching prefix of '$what' in $t add it on path ($args $what)} dict set trie {*}$args $what [my id $value] return } lassign $match match mel suffix ;# prefix, element of match, suffix if {$match ne $mel} { # the matching element shares a prefix, but has a variant suffix # it must be split #Debug.trie {splitting '$mel' along '$match'} ::punk::trie::log::debug {splitting '$mel' along '$match'} set melC [dict get $t $mel] dict unset trie {*}$args $mel dict set trie {*}$args $match [string range $mel [string length $match] end] $melC } if {[catch {dict size [dict get $trie {*}$args $match]} size]} { # the match is a leaf - must be split if {$match eq $mel} { # the matching element shares a prefix, but has a variant suffix # it must be split #Debug.trie {splitting '$mel' along '$match'} ::punk::trie::log::debug {splitting '$mel' along '$match'} set melC [dict get $t $mel] dict unset trie {*}$args $mel dict set trie {*}$args $match "" $melC } #Debug.trie {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} ::punk::trie::log::debug {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} set melid [dict get $t $mel] dict set trie {*}$args $match $suffix [my id $value] } else { # it's a dict - keep searching #Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} ::punk::trie::log::debug {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} my insert $suffix $value {*}$args $match } return } # find a path matching an element $what # if the element's not found, return the nearest path method find_path {what args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[dict exists $t $what]} { #Debug.trie {$what is an exact match on path ($args $what)} return [list {*}$args $what] ;# exact match - no change } # search for longest prefix set match [my matches $t $what] if {![llength $match]} { return $args } lassign $match match mel suffix ;# prefix, element of match, suffix if {$match ne $mel} { # the matching element shares a prefix, but has a variant suffix # no match return $args } if {[catch {dict size [dict get $trie {*}$args $match]} size] || $size == 0} { # got to a non-matching leaf - no match return $args } else { # it's a dict - keep searching #Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} return [my find_path $suffix {*}$args $match] } } # given a trie, which may have been modified by deletion, # optimize it by removing empty nodes and coalescing singleton nodes method optimize {args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[catch {dict size $t} size]} { #Debug.trie {optimize leaf '$t' along '$args'} ::punk::trie::log::debug {optimize leaf '$t' along '$args'} # leaf - leave it } else { switch -- $size { 0 { #Debug.trie {optimize empty dict ($t) along '$args'} ::punk::trie::log::debug {optimize empty dict ($t) along '$args'} if {[llength $args]} { dict unset trie {*}$args } } 1 { #Debug.trie {optimize singleton dict ($t) along '$args'} ::punk::trie::log::debug {optimize singleton dict ($t) along '$args'} lassign $t k v if {[llength $args]} { dict unset trie {*}$args } append args $k if {[llength $v]} { dict set trie {*}$args $v } my optimize {*}$args } default { #Debug.trie {optimize dict ($t) along '$args'} ::punk::trie::log::debug {optimize dict ($t) along '$args'} dict for {k v} $t { my optimize {*}$args $k } } } } } # delete element $what from trie method delete {what} { set path [my find_path $what] if {[join $path ""] eq $what} { #Debug.trie {del '$what' along ($path) was [dict get $trie {*}$path]} if {[catch {dict size [dict get $trie {*}$path]} size]} { # got to a matching leaf - delete it dict unset trie {*}$path set path [lrange $path 0 end-1] } else { dict unset trie {*}$path "" } my optimize ;# remove empty and singleton elements } else { # nothing to delete, guess we're done } } # find the value of element $what in trie, # error if not found method find_or_error {what} { set path [my find_path $what] if {[join $path ""] eq $what} { if {[catch {dict size [dict get $trie {*}$path]} size]} { # got to a matching leaf - done return [dict get $trie {*}$path] } else { #JMN - what could be an exact match for a path, but not be in the trie itself if {[dict exists $trie {*}$path ""]} { return [dict get $trie {*}$path ""] } else { ::punk::trie::log::debug {'$what' matches a path but is not a leaf} error "'$what' not found" } } } else { error "'$what' not found" } } #JMN - renamed original find to find_or_error #prefer not to catch on result - but test for -1 method find {what} { set path [my find_path $what] if {[join $path ""] eq $what} { #presumably we use catch and dict size to avoid llength shimmering large inner dicts to list rep if {[catch {dict size [dict get $trie {*}$path]} size]} { # got to a matching leaf - done return [dict get $trie {*}$path] } else { #JMN - what could be an exact match for a path, but not be in the trie itself if {[dict exists $trie {*}$path ""]} { return [dict get $trie {*}$path ""] } else { ::punk::trie::log::debug {'$what' matches a path but is not a leaf} return -1 } } } else { return -1 } } # dump the trie as a string method dump {} { return $trie } # return a string rep of the trie sorted in dict order method order {{t {}}} { if {![llength $t]} { set t $trie } elseif {[llength $t] == 1} { return $t } set acc {} foreach key [lsort -dictionary [dict keys $t]] { lappend acc $key [my order [dict get $t $key]] } return $acc } # return the trie as a dict of names with values method flatten {{t {}} {prefix ""}} { if {![llength $t]} { set t $trie } elseif {[llength $t] == 1} { return [list $prefix $t] } set acc {} dict for {key val} $t { lappend acc {*}[my flatten $val $prefix$key] } return $acc } #shortest possible string to identify an element in the trie using the same principle as tcl::prefix::match #ie if a stored word is a prefix of any other words - it must be fully specified to identify itself. #JMN - REVIEW - better algorithms? #caller having retained all members can avoid flatten call #by selecting a single 'which' known not to be in the trie (or empty string) - all idents can be returned. #when all 'which' members are in the tree - scanning stops when they're all found # - and a dict containing result and scanned keys is returned # - result contains a dict with keys for each which member # - scanned contains a dict of all words longer than our shortest which - (and a subset of words the same length) method shortest_idents {which {allmembers {}}} { set t $trie if {![llength $allmembers]} { set members [dict keys [my flatten]] } else { set members $allmembers } set len_members [lmap m $members {list [string length $m] $m}] set longestfirst [lsort -index 0 -integer -decreasing $len_members] set longestfirst [lmap v $longestfirst {lindex $v 1}] set taken [dict create] set scanned [dict create] set result [dict create] ;#words in our which list - if found foreach w $longestfirst { set path [my find_path $w] if {[dict exists $taken $w]} { #whole word - no unique prefix dict set scanned $w $w if {$w in $which} { #puts stderr "$w -> $w" dict set result $w $w if {[dict size $result] == [llength $which]} { return [dict create result $result scanned $scanned] } } continue } set acc "" foreach p [lrange $path 0 end-1] { dict set taken [append acc $p] 1 ;#no need to test first - just set even though may already be present } append acc [string index [lindex $path end] 0] dict set scanned $w $acc ;#sorted by length - so no other can have this prefix - and no longer necessary if {$w in $which} { #puts stderr "$w -> $acc" dict set result $w $acc if {[dict size $result] == [llength $which]} { return [dict create result $result scanned $scanned] } } } return [dict create result $result scanned $scanned] } # overwrite the trie method set {t} { set trie $t } constructor {args} { set trie {} set id 0 foreach a $args { my insert $a } } #*** !doctools #[list_end] [comment {--- end definitions ---}] } #*** !doctools #[list_end] [comment {--- end class enumeration ---}] set testlist [list blah x black blacken] proc test1 {} { #JMN #test that find_or_error of a path that isn't stored as a value returns an appropriate error #(used to report couldn't find dict key "") set t [punk::trie::trieclass new blah x black blacken] if {[catch {$t find_or_error bla} errM]} { puts stderr "should be error indicating 'bla' not found" puts stderr "err during $t find bla\n$errM" } return $t } # oo::class create interface_sample1 { # #*** !doctools # #[enum] CLASS [class interface_sample1] # #[list_begin definitions] # method test {arg1} { # #*** !doctools # #[call class::interface_sample1 [method test] [arg arg1]] # #[para] test method # puts "test: $arg1" # } # #*** !doctools # #[list_end] [comment {-- end definitions interface_sample1}] # } } #proc sample1 {p1 n args} { # #*** !doctools # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] # #[para]Description of sample1 # #[para] Arguments: # # [list_begin arguments] # # [arg_def tring p1] A description of string argument p1. # # [arg_def integer n] A description of integer argument n. # # [list_end] # return "ok" #} } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # Secondary API namespace # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ tcl::namespace::eval punk::trie::lib { tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase tcl::namespace::path [tcl::namespace::parent] #*** !doctools #[subsection {Namespace punk::trie::lib}] #[para] Secondary functions that are part of the API #[list_begin definitions] #proc utility1 {p1 args} { # #*** !doctools # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] # #[para]Description of utility1 # return 1 #} #*** !doctools #[list_end] [comment {--- end definitions namespace punk::trie::lib ---}] } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Internal] #tcl::namespace::eval punk::trie::system { #*** !doctools #[subsection {Namespace punk::trie::system}] #[para] Internal functions that are not part of the API #} # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::trie [tcl::namespace::eval punk::trie { variable pkg punk::trie variable version set version 0.1.0 }] return #*** !doctools #[manpage_end]