From 3621745d687086331dccf052c2b2494fff37b9b1 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Sun, 6 Apr 2025 12:52:45 +1000 Subject: [PATCH] update tomlish - datatructure fixes --- src/bootsupport/modules/dictn-0.1.1.tm | 349 +++ .../modules/include_modules.config | 1 + src/bootsupport/modules/test/tomlish-1.1.3.tm | Bin 41874 -> 47064 bytes src/bootsupport/modules/tomlish-1.1.4.tm | 2009 +++++++++-------- .../src/bootsupport/modules/dictn-0.1.1.tm | 349 +++ .../modules/include_modules.config | 1 + .../bootsupport/modules/test/tomlish-1.1.3.tm | Bin 41874 -> 47064 bytes .../src/bootsupport/modules/tomlish-1.1.4.tm | 2009 +++++++++-------- .../src/bootsupport/modules/dictn-0.1.1.tm | 349 +++ .../modules/include_modules.config | 1 + .../bootsupport/modules/test/tomlish-1.1.3.tm | Bin 41874 -> 47064 bytes .../src/bootsupport/modules/tomlish-1.1.4.tm | 2009 +++++++++-------- src/vendormodules/dictn-0.1.1.tm | 349 +++ src/vendormodules/include_modules.config | 1 + src/vendormodules/test/tomlish-1.1.3.tm | Bin 41874 -> 47064 bytes src/vendormodules/tomlish-1.1.4.tm | 2009 +++++++++-------- src/vfs/_vfscommon.vfs/modules/dictn-0.1.1.tm | 349 +++ .../modules/test/tomlish-1.1.3.tm | Bin 41874 -> 47064 bytes .../_vfscommon.vfs/modules/tomlish-1.1.4.tm | 2009 +++++++++-------- 19 files changed, 6839 insertions(+), 4955 deletions(-) create mode 100644 src/bootsupport/modules/dictn-0.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictn-0.1.1.tm create mode 100644 src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/dictn-0.1.1.tm create mode 100644 src/vendormodules/dictn-0.1.1.tm create mode 100644 src/vfs/_vfscommon.vfs/modules/dictn-0.1.1.tm diff --git a/src/bootsupport/modules/dictn-0.1.1.tm b/src/bootsupport/modules/dictn-0.1.1.tm new file mode 100644 index 00000000..c9ef87f2 --- /dev/null +++ b/src/bootsupport/modules/dictn-0.1.1.tm @@ -0,0 +1,349 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application dictn 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval dictn { + namespace export {[a-z]*} + namespace ensemble create +} + + +## ::dictn::append +#This can of course 'ruin' a nested dict if applied to the wrong element +# - i.e using the string op 'append' on an element that is itself a nested dict is analogous to the standard Tcl: +# %set list {a b {c d}} +# %append list x +# a b {c d}x +# IOW - don't do that unless you really know that's what you want. +# +proc ::dictn::append {dictvar path {value {}}} { + if {[llength $path] == 1} { + uplevel 1 [list dict append $dictvar $path $value] + } else { + upvar 1 $dictvar dvar + + ::set str [dict get $dvar {*}$path] + append str $val + dict set dvar {*}$path $str + } +} + +proc ::dictn::create {args} { + ::set data {} + foreach {path val} $args { + dict set data {*}$path $val + } + return $data +} + +proc ::dictn::exists {dictval path} { + return [dict exists $dictval {*}$path] +} + +proc ::dictn::filter {dictval path filterType args} { + ::set sub [dict get $dictval {*}$path] + dict filter $sub $filterType {*}$args +} + +proc ::dictn::for {keyvalvars dictval path body} { + ::set sub [dict get $dictval {*}$path] + dict for $keyvalvars $sub $body +} + +proc ::dictn::get {dictval {path {}}} { + return [dict get $dictval {*}$path] +} + +proc ::dictn::getdef {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +proc ::dictn::getwithdefault {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +if {[info commands ::tcl::dict::getdef] ne ""} { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + ::set newval [expr {[dict getdef $dvar {*}$path 0] + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} else { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + if {![dict exists $dvar {*}$path]} { + ::set val 0 + } else { + ::set val [dict get $dvar {*}$path] + } + ::set newval [expr {$val + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} + +proc ::dictn::info {dictval {path {}}} { + if {![string length $path]} { + return [dict info $dictval] + } else { + ::set sub [dict get $dictval {*}$path] + return [dict info $sub] + } +} + +proc ::dictn::keys {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict keys $sub $glob] + } else { + return [dict keys $sub] + } +} + +proc ::dictn::lappend {dictvar path args} { + if {[llength $path] == 1} { + uplevel 1 [list dict lappend $dictvar $path {*}$args] + } else { + upvar 1 $dictvar dvar + + ::set list [dict get $dvar {*}$path] + ::lappend list {*}$args + dict set dvar {*}$path $list + } +} + +proc ::dictn::merge {args} { + error "nested merge not yet supported" +} + +#dictn remove dictionaryValue ?path ...? +proc ::dictn::remove {dictval args} { + ::set basic [list] ;#buffer basic (1element path) removals to do in a single call. + + foreach path $args { + if {[llength $path] == 1} { + ::lappend basic $path + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict remove $sub [lindex $path end]] + + dict set dictval {*}$subpath $sub + } + } + + if {[llength $basic]} { + return [dict remove $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::replace {dictval args} { + ::set basic [list] ;#buffer basic (1element path) replacements to do in a single call. + + foreach {path val} $args { + if {[llength $path] == 1} { + ::lappend basic $path $val + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict replace $sub [lindex $path end] $val] + + dict set dictval {*}$subpath $sub + } + } + + + if {[llength $basic]} { + return [dict replace $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::set {dictvar path newval} { + upvar 1 $dictvar dvar + return [dict set dvar {*}$path $newval] +} + +proc ::dictn::size {dictval {path {}}} { + return [dict size [dict get $dictval {*}$path]] +} + +proc ::dictn::unset {dictvar path} { + upvar 1 $dictvar dvar + return [dict unset dvar {*}$path +} + +proc ::dictn::update {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + uplevel 1 [list set $var [dict get $dvar $path]] + } + } + + catch {uplevel 1 $body} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + upvar 1 $var $var + if {![::info exists $var]} { + uplevel 1 [list dict unset $dictvar {*}$path] + } else { + uplevel 1 [list dict set $dictvar {*}$path [::set $var]] + } + } + } + return $result +} + +#an experiment. +proc ::dictn::Applyupdate {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + + ::set headscript "" + ::set i 0 + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + #uplevel 1 [list set $var [dict get $dvar $path]] + ::lappend arglist $var + ::lappend vallist [dict get $dvar {*}$path] + ::append headscript [string map [list %i% $i %v% $var] {upvar 1 %v% %v%; set %v% [lindex $args %i%]} ] + ::append headscript \n + ::incr i + } + } + + ::set body $headscript\r\n$body + + puts stderr "BODY: $body" + + #set result [apply [list args $body] {*}$vallist] + catch {apply [list args $body] {*}$vallist} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path] && [::info exists $var]} { + dict set dvar {*}$path [::set $var] + } + } + return $result +} + +proc ::dictn::values {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict values $sub $glob] + } else { + return [dict values $sub] + } +} + +# Standard form: +#'dictn with dictVariable path body' +# +# Extended form: +#'dictn with dictVariable path arrayVariable body' +# +proc ::dictn::with {dictvar path args} { + if {[llength $args] == 1} { + ::set body [lindex $args 0] + return [uplevel 1 [list dict with $dictvar {*}$path $body]] + } else { + upvar 1 $dictvar dvar + ::lassign $args arrayname body + + upvar 1 $arrayname arr + array set arr [dict get $dvar {*}$path] + ::set prevkeys [array names arr] + + catch {uplevel 1 $body} result + + + foreach k $prevkeys { + if {![::info exists arr($k)]} { + dict unset $dvar {*}$path $k + } + } + foreach k [array names arr] { + dict set $dvar {*}$path $k $arr($k) + } + + return $result + } +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide dictn [namespace eval dictn { + variable version + ::set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/bootsupport/modules/include_modules.config b/src/bootsupport/modules/include_modules.config index 247371ee..afd1e8f2 100644 --- a/src/bootsupport/modules/include_modules.config +++ b/src/bootsupport/modules/include_modules.config @@ -27,6 +27,7 @@ set bootsupport_modules [list\ src/vendormodules sha1\ src/vendormodules tomlish\ src/vendormodules test::tomlish\ + src/vendormodules dictn\ src/vendormodules textutil::adjust\ src/vendormodules textutil::repeat\ src/vendormodules textutil::split\ diff --git a/src/bootsupport/modules/test/tomlish-1.1.3.tm b/src/bootsupport/modules/test/tomlish-1.1.3.tm index ed5044a73e5fccdc0c437116e82de7c592c4f98a..8afb43d956b0212bfd728b88613392b2099993ea 100644 GIT binary patch delta 12790 zcmaKSbwHF|6YsKw(%s$Nl9Ee@G@^8ebeFJ5m()Xdhjc1}q;yCr-2x&KN(l(u6@A}N zUhlp8$L`ELGxN+jdt%O$#Gq6^74Kn*JCJ(mQ=f!jwoU^iI~fEtB+KSU{XPNJp#fp> zk&V>Msds{0m1=3Y8F`=Ke85J%SKxO5_48y@GbuG^9&g1d9#5vjf1N+#d(wRu$v!oD ztxU^V9^pE}+Tr_dl{1{Z1&+OjtFeLvjqEddlt5d80@8O%KQwgNS6yF>y^v*jVic_W znXI=_lF(Y6r1P=Oc8~Hn`kdarj%>XQWnH{#uTO>5RUyWBQu}AZn3dwKksp+$^-%9^ zui4~MZ6>d}VD&Q#uGBu3%ydRUDRLnPd3D@^$9k5U(k4$63VohYxSLrWXn`4 z>d4Dw8MP0KVhv*6BWFdUpv_uD`*iTM zOq95__av{+K<_v&ufa(F^fu0C*rlvav_i8wiG&sr(pLUNI8G;?@ z^p4&GgFc;3SOq?-w?bkiNUa?ex=9+&x%$QsVtkG9`5IYGfFq`+D$t@YeR51ppwT0R zjiH5Rsvra3eTkaiqOSl6^?x}&hjSor#l85Po#_(`3Tr#)Rhac(}K_g~B7OouJ zQkI4Dh4g%a$E}2UeTO24RnNlk)Pj_j!j->R1@Uh(JJyr+rYF2I6%415pAL|_H0FlS z@qmQr42mI9M8j)il6C9QN^YEWwsB^;3%6-Cw2z}sreB>jHNF>JbYGu3vEQn#t6Di$ zt`tPyvG*+Bve$XN#mBej4gI9@j@61?SP(s)+}o@|m(8)PSve|Vs5?$Q23NKj4x9At*KpjD&iCB@yoTqP-zpe&%g@K zmrFLdfo#N+=*48a%AKssb)V^;VNdSb;V}EMB?;M#+i@%k^n(Jmg8eX2;zAzBop0c4~9F ze=!FS$-1CVZr#n8yIWSME(N<#QE173pt{1-|jjz>XYgCEzOLKe%S+x=z= zq@srRzqY`887MEeh(jI11UBw^FnW{+JRsdTIPAWxc@6#8Qq9*q6y~RGztM+r)b)iA zg15w1b88Nn9LG&B&152nBiaB@QK7RB){KRu+Y2fR6L;KYG- zPE*$9rtW@XJ(o&qe#Fp) zx(YQ7(xTFcZ!>P;)Go@#4i zGC+q*-PCY@L}8QeAFP@P|B>_60_{8Vdx_m6t$G3l;-saCq0~?{m*w)8Qj;3iUja&P zTIfId&YZ94=LmWg^3OjL=IV zGd;$mWE87x>u%~)>&bGve1ebjHsHPyKdKa|fT>8)1FqM^3=4w-Fz!28R0mPR{dmY_QFKk|=v#zWTGY%xk|`JQJ(O4lM#im2s$ zJaimRNhWHrtFPs!AH3#huYeAI$f-Ye{CcmtDs(M&-=CqjhgPwEnwqU7)nA2YN6{&l z{F-tyh(iY5wt{oDa@*t8!O2c*`vF!*RS5-EQ4KS)gP0v@YgkBu%oe9C3S(Q)e)_bV#8>bPc zn_iu2gpps+1oUA29Zz#JKq7fsjW+)#hHlcjnnp)84mke6mIHJSE)N zPx%JTPVzL1h4i&UAYVq$?k~uP0&VRX(X3w1rd@AQ4Wetfet=6In`&{+_ssn-BpE*5 zZ*zJsw!iM#c{HtJM-L`7>FKv zCA2GmQ;Sitxt0pgBY>5KP>yBLzd!h3fr{BOaTw>tWixM|h31{MAZZL6YC%9Scu97f zb~I z>uY!r2o4Ou-P8Le<46BTR$p?DObA;UHmQEjwtr@w8c<^ln{`i+fc@M%z-2BGz->o` z9S_S{{?xJqIu-xXx{Ge&;$q@$WaemZZ{gquvmq9|CE;+haCPIbG;#N`vvIJ62mNo( zkN(H)lRsX57xkAD8wZ#b@FSN5?GN7KCMsH{+fkYS85Od*qnn$Bxvhow&xr2*%l*Hj zvHVXomVZWrV(#qj_&_4I0>L4^5HxpAk3!{IVBKW7k;{P3y@qfI!{23AE zZzuNdc5XJm`v~hlvS0n=`k(*!RyPBLk>Y^39S*kff3&Rs>`g$?7YhLA;Q<{Shy<=~ zE;bHU{|y^xwkHE>jVXT*p8GdWfBdHffA%Q`!tZ~F@)z;5A3yv@-|deeFi?K#8{IS! zHXx|@|HuA+`mmckL{}(WR#XBJ1R`Yufw*td?G+~iC>#j1n?CQb&W{yZU0y=O1UJDR zC*x{MMc7j{jx+)%kH!``gJm5N6ET(u(NNHr+Fx3gY@*YPz?}7%oFPo^>5jv>j?JX@ z%PpHg(daDUQl$UHjhMBh*?Gl~UIb2L@I2z>LZMAl;d1QyI%Jqe()~_VId)P%sJ8np zITm*aG}t=7Vc0B72}Oh+dGjesAQkOb(R6qPN|(^b!kfj0Sa*5yP+XbEp=(gOdSl5SsIzwSUiQ>XK-Srd3ZiruL>2^77ri?n}{;1FycO?vl@Sj_kM;j$<-j4 z9qh{;y2~r8YF6wwRR1jx{{(JCyDnT_VvKPV0jiC7w1rM)hU$$J*0!;3lCI3Uo8p)2 z$>>!6jhbJ0C#ntfV`4MVsK>a>Yc^Kzx?qyq40pCxNLufhB-5O@=j!Z^=uuK2eZ3S} zR^)>$pD3{+hhwTCb%EkW?SWn`_N?c|mMHLkPR%`$(^QlCbh>qHEO#v810$*RLmGt< z)u2_MLPK%*Nz`W>j@C$R)Ajt_#rzzq@&i&eY1H8BlbYKi^qzLU&L=%|ws}(?by%Mv zZtO98>EZ7&izvoIBCzaWjMVdDD(7J_gx6cm@zvP`VRH}zc}%dcFy9panbQEP6H}3) z?R(xjFQp{I%5HQa^sv=C&o&-BJ2T1kmU;wjGd%WLtcH4A2hI*Z#nt-3JnY~zZI#mA zT{9#3O{<5l02>#Z!`ZUOc5n$F#DJ&7^a`>gQNs?BJ@X?c@ZD(F+t7LYNOGlxhihlY zlo4%0U0ODMFkFR4G^^eiH9Ddxn@%`ZL;NFi@x&dLF?9%kFc&?L$l_dxjoa|dk!TUh zd_=cs&rs&EWF-tKdB8-nl`g7Lkw_}e;miQ1$GuOE`kbgFCky^1fl<$jcHxP{8`a!M zi_pl2cKlMs7{ezuPSK==?x~*}9zAYHr}=Co@$r*&Y(E8dSpV7MYTwKGGmPk0*3@0{ z=S8XWVI%|yNlLcMH4`{d)5mQJL>TxsyOwI)o!y%`t+j?Sbl3ax zpvOl^Yq3|lZ=C?dpTQhQ&&)ZsmMQ5`pUF;a8BUVYcL$mIbWX7->f?|m}j_(6=| zVZ=nnrwLpet*AA{t24qRbPBpijR<{eN2Y!DuIdRWb05X5w+j^+wPGJQ3byoR$E1Cp z8X7HgC$G0k_eme;xyR+J`FbH+)I&hlTjQKmMcsds?(jIP0c*O=^f7ow8Il9t;^oWi zsD|gA3~f?ceX(jcY;P&OvrarK(7Ke%AMG7Cw`=EJ(YVdOVcMXV<2dbIPPTrwpv_<> z4kbQr*&d33E~Y(k-Cu8cpi$|)cByK-^MLW%QCi@eJ%5qY2J?&()Ti$1SIo#IV$4j06M{yV=!NoG^)gTi)!Gzn1jtYgl3e z2fU|Y!xGC7M1bVJ{~+%xPZzgd&k11D@=a^C{5)+W>~@?gJZAk-x$3~LQ?q1KFhcQr^q}?!iTnxKX3Ho09Q@pjGPqF5 zIjgpzVX{jJd+NHRxJ(9xs8!F+SCyjG@YZvwbEvEKn6yfqF_^Ow6F~@YE{;}_hE*lJ zyQwAAhXtlC$$A2PZSZ$@9Crw6!JHsYkQ zg2f~!-C_<47EAOhnoBHjhbP}!@%cd0?*h$kZv&LqmR}uya*0@tXdhA-xGysDpzX?Q z%^);*r!01quZA%FGusO(DIYqdtvwIH)Tcq8Hb`mBZPey^A~zTF@WZSl_!x_8cV^CDv5L0g93J%;Ig` zw*mnVG|qj5;Uu&0)ld0}bPMS^P_+JyIlDTkE_!V(rG;>g5Q}#RUSN7hW-1>NeMGI! zFgcEF?{DegxL`X}3J2o?D}|P`@0QZygfuUB_lbEhA!@9L{dqXYVbP$u+ng-NI$vvo>`|^3t zYC1E4IS{1)rQl_?5dAlE6c6joc=ZpqU%%>5V-mB7{NT{4zPw4!f1P#FVOb8|K)L5B zECo5a0|MRuX9hrxZs)1LhX1a%f$cmOo}a*XaeYWGmwaw@Xj@NB4aZ`5UNi^MY}=@` z#ZcqM1l7^9C;m|FI=@-;aivdOrEl!$<+si@o!X=LUF;`MpLjpd)yS(+Z`<;vXRVoLjY0S%#}j_%mKlCuRo#J9SI!KWqP@sKHnZ?xat&~1iX1P>|Gjr@?zFcw?VLU7}l&gH*7h0Qf zTs|mOHI}C1rRgcYb$lYbn5azjZu&_67-#HEolSwwLr>%Tzi^GVSHqo3HA62=wL>gC z+-#a!6|0i#-I|6Ss+b|ODS`~!wq8!0l5x{@;k(G2()IV-Y4kzU?aeL61g?;7%5j!$ z`78^_uJ}pcBNN-+8 zbFoO0&{-o)GSO)qo5t+~@AD=iK#cYc^1kDA29~tL@ZR%-Nt9g za^D6$S>j1S){*Q**qRugz{AY8jO`^mn$z!1o^$px?7M{qFqAJqnv$Y$G2&>mi>TG?KvkYn^KGtrvMZ|A- zDMsL3+EcioBguD}9bB?$n2eZ)wc4lm6$ngZJrJInfc2+|1`!wc;hxV0$w3t|S=Kf8 zJQq)khESD+^zKf~FnnS~bLemuW)P3bfAlcl(!7+ZfQ|6;ia~1Ppbjobiwrrta7mKcM;vX1%;SidQdVm15|_>rv#_!o=6=sfJ-#n{F0WL1RsJ%W4ManH6HD=Y#_taCYQ;f>ySx@i+Xu8GYQN4&veF{Ba(;0NG54ugy zZ|$L)C#V=+k9H#Y6tcW1&Lb)6Y`OLjNb`deeSO;c_0!IGMr+Y?hwOQ1et_N>?|z^8 zIWdv8kFfu%4COwmu6!?lsXD#Y&?r&4j7BRa5zOu|l2BTdm85cfCuZ&t(RRWvu!jy; zm2pGI*c4umQRw=;yOi$&B`C?Anh|7^w3yeslnr(CR}ohOb}?2W`|xA=DK45`p;7Ih zoC@?~r0{=sPjl`mi)}{GwAeH4o-ie;lms=dk!;uui)FPxRZj%bKj47i=~tUv3#nl8 zeg;Ffau)g`PdGIL!1(;o^yo{%|BbD1*m_i5SA#qJbF(D{fyDo}9{qh%{k$E6Nx)HKLXO=R~+C*BE}QAq|C zUW{#57;$t}9ZF6p^Ejh^C6iB2&_)XDc>I<1C`^(3D+dchCIYDV5F`$`j`w++)rjJqO#Jz z^HI2pR$iJnQS(Sq5DiIJO@(P1&aM$DDnT$wx1@m*O)Qy3WZHy_NRn3xMFi*iDFTVTT z+%+{vjdHFdI-6HeE2t&rt)KORW+mA#YbvjVnh}$}v@G5riH)9M1-}Vu;~$-GF@Rc$8b)HNs`8Y5 zGkUi&Nh4xgM0;nD_ng3!I^&(q>y7>G!Ul4OtzyykvlXjMoM*9yyBb`m24(~nnre+2 zSo)b4OW#pT>!YZMvi-=fKt)HVwAo>12Z4Yl^zMuN)OUwJHZe!%n(a#$M11EMAxd*{ zZEBCM?e6l9zuz{p0kJ{hk^}hRUO;l$4f#Nwdo9?r=*YO}!cW+0So-&z1bwnYV;a>VOUzY!ZS?GE@iv-Tm3N=}1+IkS@ zOL2M@^FDMb&31#U?R%b=N*VgdE5GA0OYOYL$GesrJe7@v+q0}^(H|w6ejMuEvc1FTnn5j7TH2$OTuhQAC-B}hk8$7?9 zvNpnlkU-UpAQ0rH2mwn_p}&d`7DoTnTz(~~ySjR+u>ILzZcUsFpRN#@0>$^m)E)j8 zmAcQ6dGaFi1{G8~yPDaYUp`d7x`=UAox}Si!P=cX!5}6^NO^H#S8|-@{>}j#{6lC1 zF0di^t(t?Qy<_um=a>}%_{tRQBT6sNn!n4Qs{w6ViP=ZgXjC}%edB2Z7_qGk=H;Oy0On=sTXL|NQwE(h+RYkUO%9q6{ zZG=NVx7~$nhxqq0Lk#E=Jrzeh1&<$ebLB`B{(!p*HYg&|0W?T?9Y4?UtXv6FzNriC zO^>M2)QN_Z3ExSrKQj;P41DvR*@i@aSh*hBCMM4IY`BORr;)Fp%s`XCLOU)+%Q!P9 zCNw37T9{P-d8idU7Ovvp)W45(4Vhk7D$#`>H|~K z0!m#rF)Bp^Bfx>jqqPUmH42us z%+H>keCIKKvGWo+g?j|eF@!jIs{9+45wxMSlBY}&b)aI*t3RJb=>!kc8c)>5Og!0{ za$MgNv9r?2wH|U3EE6$U}YS;25Je$vxQ#yHV zL!HxJ?1zpaWWfEk&Oz46!a>5+HbHHXZ8OkL7Mf8Y9_}I?MmZGW!nk0C0ZBPO71~X% zB*if!Qv2f`zjjaX6}472d%D)($jK?go0lWzFeInD zZtsj-7d@<}1!3dNy3>`~zZdIE6BftwsMT3UDw?^e=U1rg`jBArjyLx6Grk)>N!H}0 zdgpNsTH!%jr|$b#rza%FFz zSAP_v$(TcQ`$Cfh?!m5?hB$s6OMl+W!)84|5Q&3$lT_6Yd59rkpOS{?(lpk-)`wLT z(`5d+!2hYJ;7bV2TvS~b#OlbwjJP(z^U@_}9Di@4ljJ36GfpmStg571j+@*6VBv%A z^ynOin#|eqJC6l^BlaU#lN}KW(%8OHbN{s3`}TqPj-|1ZEBmxGT&3YGZ0}b)SK*|j z*l_VoIxxc_E%qeeaJN_CKAAB{h9VOV_J6#b-}P+nhsQyHx|e7;bK645YspOVScB><+3hD`OlWzp zNISC1&|DIu6B`NyvIPbQ(F^aCVc+>7>Nxr$Ico_jW?I{vwE7m;(&}hLM(d$+4qP!# zbv_5f8wvtGzo^~E+K>3`78_)m(N^g4iMV&Ix2L{l?&;Y>Z4f<=Q3ptcbE-Kw9j*e8 zCbv|!eoQWJT}!5?x2rxzvT{1!x7b=MKHOM4SCy}R*KTO2p98^9N2@@tr11v06VyWNbqT-Nq$~9+q%VG-MS? zj;9h(7&km&fu>70Q~7ma+0VOL&OJAsyhvJkm8D7W1vBLE)kP6@tJSzKfslOrk_1Yc z3^RLY%JubDEGN0IYNI@MHD4_XLd8#JI;wKLgF-5$yTPantec#9+ zL>+7dgwV>;P^@DO^-HGSr7*EcM~{t>VnmW1?kM3M4TRRcf5rQC&tVEe2|Zo#-t!n( z?2<^?6iZ{5Nk7{vp5cxf?)ieBi>j__qK+A2ffDt776ZK#lfQ?~i%B%G8g9@=VgV^D z>ibkEvJmPSEvh|uF8HGzt>tHX)6y(OXE=FC?A=8~O_8_53{AR^vq!2hEcrb#Ejv$? zQ}R2zb~B**wTz3v)w(>V-M2(*fzgbl(}3i+SD!Z2)3}Oho$UzIzfDPsN7OfA$2Aq; z^gAtwrN$0#I&!oleZWFHl0t|l3P+*qhoc-PdM%vGO57paRkP)Q`AW}>T|S;H_+%1$ z>!f$$q~heuxeum~aDSSr_;-28JqfUk@OfoV$D$>a%+GsziSrJt?hgUSZw47Zbbfw| z955_54qVnh-D0?2d2p3{Sg$j2&vB&TyKAr*ttbw4F$6$S;ptcKu@=t63#KA;QLtbx z*QfPseRB%>@;wc(T>3cAbp|QO@y^(v8HsJ}@iVXN>o5K`qVY&eo~eb;OP9@oIkd%; z9TggBmVehO9MT4dEdT6%JU(Xw4sjGaCliM+z{fFX>hgqVRGGXA$NDvAhThP=q(j8p zr!g|(yS=jrSM3D;-^h^j_`o!d=80-^d|d`~7@2Gx}rR z@<|f@PdcMz{*jpuCj+llKc8@l5UIAI@{^D;)Kn#n+lNi((BzbZc5q+&(RnBhSEY;+ zo7*~are#3B0L?l%!|Vt54$)Y3E=EwXu&q$AlS^3LDnb@cP3=xcIB=Jo%$;NhZ0Y@Q zUZ&Xo7~$AQjc!UdZ`fdQ6;R$H?VZGD^A$PuMs?5@8{(sWe0HjBs$AREWAsQ!%Qr@i2WCdvTLjmw$Y!GY#pA&A?Jb)d4IZ=L|kYUwGI3pm7nGpK$Rt4pg)U<%q zGRa&GM^ThXGzUUYn~L;C>?4K)V~r?#Cw<3W#^wOEou7t6`a^h^s$z+gWmTjUJ1q+_ce3+QED#l^Jey=SkeA~%8t1) z4~J!}rearJTO-W5$zvNA77h{?F?G4Svb*?fXnk>q>J8&o@o5L}!vmJ=ynG6{G&=BTh+tS@}|IE!D0dp*JpcADO7a(EN4qGc}QI5H9nf-w$m*Rqn7e zmB00VU+YrC)YUPc%H(lRt)zTXGWM7!5-ZVWT44}jcdeVt()p9>5!$)*`aJ__?A%XI z(D^HaP3B##=MO^>ZE7cIHhI;ADOPaK=lE<>S{E0%&_#$3tu-868zEtV5ryvoG&Q>K zoNg33BV0?XzHjL5yB0?qpwk&G`zYpcyX`*6Y6x{@QFRVXat<|nZLy*2q-xWi$rgrC zUg$H8bA`S@m2S|~*!hvSIX~Pw<@uXIII|spuM7M~vYPI;S7hjp53uUlT(cwQmV!yl z3sTEqQ23ST(4A>Q5{mv#agPtH@+=hb!VPOWq za0CT79udf+p@l)8G^sHEi+lQs#Q66^g#=JP;(@`S5P{}M7Vs(p!k>21644AGEa2AaTBu2A<%Q1hm5b#T^8`4l19SC~P0@lL*%@P3B>1Y5- z3#!`T>@oq$MBEO6eyvq;_q z#XA6u2l`X-frvN?@BzV%%DFfiuouSyeoS=3YQ_tIFR5LIPg;pJ+CP7i_@|(~yHbWdWP=z(^mkGcW8%5)ez{0^{8KO(Fp= z(|Eyef3dmgNC0m-3-~SH&C}d;A@Cso4H=t(1khx#fK>&4vqIpNUnFX#ICxF)Cay0g z=s-dy3wTKAU-oB*6K7F?`-E>qzUJrvG>Zl7eE&B~0Cc3G0Z7>tU}_PVhzU^1mITX5 z0`b`~;4w*nI_ELi5d!;R0iM{R05v%*U;-)FLwW$sju&9bLjxpoDZmN8MB#SCKz1$~ z)nCA(e@E<30zu}+{vV7HSsn@1Ul^lX20s57{9oWk{{bNVboLjr=$11i+23#f3uJT) zbVQm*O7$1U=$518e;wV18~wrkjOH(7(Jdz=a{om0C#nbrTKbdy>E-7@|5ujzk4Mt`zD+xZuq z=$4yMC0GPh07d}@uc5pR0mg6&s=uzoZz)@9{i_@vPEPgL z&EYNORh@s7;bHfPz_UC;s=uy{ZW##Z{$l|6!5a71J-{vHM*V-3k$&9@0MEmTsQ##y z{wkK9-<25R`{c1xpKWXN9+vKfC;^ToXY?_19K@OS#q^wpkOw Vc6K5RWsoxHD=ln+^ZBXqe*m+y3oifw delta 9769 zcmZWvbySqw*B*xMmhO=5?gj~Ikj|mIONJ1Xj+gERY3YzI1xZCZ1PMVxy7}gM@9*C4 zt~-Cs+0Q=Dd1q#=v!AtN=8wS1Bj9=pQd}@ssNp^4rT78@2$W9*0*M3DvebZE3N_$D ziUfUOm7%3goG#){#8|{&dc5@`EnBuJ2 zg5OD0Fuy?^C6{-*MVAmrqH?&P8bX+lW{=n=C!Ex2s@|ycy^U3hEh8*AKkZ&-0sgKU z)jEr0|IHxpJVDYtg7@+Qnp1XCEVEjZS3R8JuUU9+S-7 z2IZnh7Uxr#uQP?8-JNr^hqR!@31l+A5wpl#@8v^g$gSTU9w4V8t>O=2!7yx2i(a&S zJ5?L@ZL|Qtsp7+W#k(_W%PaiF&P6j)cAIjI)gh~#u$)ojbB8Nbq0ZAqiXm6`Zi zjZImX8kEDrAi@#JcHFqZtqu$VO7+ZqzIryBkjgpM47susq4g@M3ge(qE{<|-i_V$6 zI3xXK8<0P-I0t?!T>!;`FaM7&zy5XTFWkGkZ8%mJ7qlUZNDq?lT9ft6IUiGApvaf> zdc1f-_#wz-Q-^Auweqq-D;YXScGGdzD-N+dlh3K>5z0x0Mi^2ut$t7f-g>t;EedLe z**mT4jUnBt)MSXy8P<`tn3647~858#+pS>?~YBhH~TX6?g z8tB6-S&YvE`kRM-Q1aRC`+nN#Z&vrf#Fxeu!aHx8)3-aB}9$8IRKp}#lsFn#4* zVpM#-SBhjGv=lwANf$^ZrZ7W@aWv2xW@@>-y@@m>Fp*s9oAJZ?kiQF~eU;C#?sUQK zhn}L53F_1B;DI zImbJ6A_QE`CR7Z~rdSqnE7INa&x3wp7C=0H@s#tP)mkz{35mIs3|mqRo|pB>3_*JU zn4?U~YWD|mbz@Rp)=fw)q+6YN!MBGqK;qu0#?#87I$IC5;R`kTM z+K|olpoHV@8N+8Cx2UJ70Z`IL=6DJ#^;GVhq@ZuDKSF;&-N^f_Vl@?&?_wA+GJ}_-j{BH2A~(EbmferqFwS!Rrd6 zy(A<)TwxO)x>h$Qbf3(}0n7 z%lT4#3>1o6?~=qUAFx+BP{I^uTQ+w~~&i;_C_2HE}npd;7rqx!LswVUGd) z4&B}_3WRq7fxItP7pkBK#ZrVP7G~_NmB;Ztx4}o_!E$f{XRU}*tTFhVs3%8i zqQ*nIX-_UgWRpB-m&0*7B2ulKZr|yx!Sg`bQi6s)=*Pm|T*Hzi#dt1?%nKc$lN0X< zbM2s?S8s_lRNxT^Kp+qbh@B$baOUIL_%0X(5~2fvcwt^ZM}gvrHN=P0!o$PD-^|L@ z*%{*E#o-0<^lDUbJOSfYR)!gxDsD`W!h=Auq#zIvps7Gj_|FM@7bklc$nOI{stX&A zX|@o8j9q!0QlJvO#zx~LFzq4$&E7x~o?@sVbm=2B#&zzbz_KWkS&2fx-5cYi2F3!| zsSoM=rBHs^tH~m4@!)H8>D9S!6o2vgVu~8>GDn;oR$&VN(we`5>WxOG$g^9QOKmL( z(PfxD6(?LrfGUL@p9iDW?!Q03_u$nDxI7pd=t7T zV@)CDkx9=Qxt2N5a?yrWU~gs5`Knbsmg#_issXM985R6}b%LPFSlxD!TU}PzDI1A< z)?WE}=L&@BuDw01%tcbvWw6>-bY@!uv%H%~#7Z$fwcf?~*+lbKCF=l$ps+G~@*Txa z^~M*T>SGKU<*1X&XJPF#V$Wlj#rtQeW-PIAwb!K0bmtv>3D)OEwoMR&FR{faj!G&j0>nys_pV&S~hI-sDx zynknME*(#p=c$j~_B)NQj=z2pQDBp>h~1TG*L8va2^cp}n4$TQB_bj0?$42fKmvdT zCpE$!cOP)KB!oUb!U75Iz+mb5k`&I%gcK-ogqZFz&aQTeSksV0u}lT2N<|UV;ynI~ zcirZ$c64Kps}ySogIH5jNZ(SaCyXaP)n1aDZSZWyuTcB>^?auc<9j7%<|DciKhAb$ zVne+SkIzM(OecV_f>n{E=r-mp_Ke&8!R?9z%io;R))QLD72Ij3#D_e!uWKA>We}em zHse-hBAmAV;7qw*tn(boY>h;uHTjFi8)S@k^+}7{*p1BAuHAz_gc=ThDOA+>y4Tr5 zmH^AZ4@L6SL$PvoCL(y|$MiC{{!lPB+pzQ7hjnp!bjxfzmD#q+P?u%RnR-K2m*I9k z%=aWE8Z%ILMQMTTUHBPcLfgEFbrxY_ETTNC@m;br!c}i!d9FG6CDr}Q##v*^febe%qmy0I2@q_Dfh{0%7`S7+UL*Op{ z_uC9_!L@a&v!<(5ky18Q6nI|{=_6{tu#6VbQM`l7_6SLmsrN{6O>;~R<6{oWu>aU1 z2t(1N$T!{X@VU{wg_D+goK4f*#znBY9~j}S5#VL>SFYX1JkXhZPD8%{|1s8k=9MD% zk&rFr^qcu_^Onu|XQCl55RF(`be8!EX|BB7gKO2iAO$9R-?k+o2TvyQs@;A1#3eWO z^QK#%5T~*E9$P1Z_v#o7D)=fCWV7N<6I$oFE)G%b>b7>%zfRI$kUH_%k=)vlboM>B zB1>?b$)dLZlGsCIMMfKkId{WOvY@)B6UC5K^4(S=DSbpnP{CC2ijq5_<7|6_^JOO< z`nT9uvCe4&X2z>wXT(EmQsh@6^e6SzNi}jUP`m|YYiZi@#nxkkV^?*?V!F~d5pxAe z9|?(jFc^lsro(r{H@jYmDYR=+4eugg^~v?^>~cq0BDS)*2!B3Dxwh|6w!$j5VhR%c za#3X2KBSXIsa!l4{YxU|*Yodfx|@%M!`{cIT!CDZd*nMUD@1%Oh>fjaoonkFkL^_p zbTJzfbnW|FzY~%aoURGQa1-P8{*yXjFDwKvrbpdJ6bkw$l1`gL4WBgghV8?FK%!_M z5Esx#|5qfvizb4msl_aD;Y1#V-o2(lc!Cx#3u1#cmgR@V1C}@I+`Y4QeNX;Zzk>RnW6$~*r#}TL~ zU@E4kKixolSr~bv#w7XLwb)^TkWqB)9Ck!|Ck6$iO)_F=TtFd{`e(5!`TNBC$`c*8 zE*CsLQnWppjPRUO{`9(T@KiQ!`Tdj0woW)B=8!UFdzJApukI< zNT4$Ta5X=;YlOPwh8xwN9*=olP!bD^X9soVPcTBM@pkf_D_u~yl6QjbK9R zfDQr){v)vJ4Ns8%6j*>NjtrV25XmiuPjE|*o67laOF;Un=ZL9gQ)Q_>ZrcJ!d_L_g z_h|WyME=uDbM#ll`pgNqC9L#}c(oi7#z8-QI&wpa-I2f7j#e*xF;7N z#AX*DTb8FT2MRFs6Do4O58J@IDVC~QG`?O2KThbCA>a#+G_KU96UfIf zPz1a{pki4bub=mF#%0-fPO_`fk#%Y$!TlMWpf-p1e0o82J#`^igAx_{$=PU9VsQut z&y}ux@bkiQ+_RXHNn2=P>)Ucuo7XXwT3hCtuR>c_XxFBta%K-ee)4cd|m3-;m@<$&hkP#-y}8G=z26@pEDLk%g+1stzsFG!!xSn@KkA(=3Q`hmG%jFK7wyr^>pH{D&9US zX)kS_UR2K1(Tc0AMRw*^`|Dn46b#-#+Lx{|0aGe+U`Cb-SV~c$eSCyT5&Zv?sJq5c z!#&E?a${JVAO!;i;)h8TFCa=SgTfB*t_7XacL5R9_Hbsv5w#`jWXr$@5+jcq8Ue(} z<+SDrbCqCFHy*|O*9NWi)>AQ4hr^6-rU@AS<;VCy5REd#%s`KcnfoSDwN>5TGBf24 zv85}ySp3_D!5Oje@>aRc=wgmx^u*E$jr#;U{HE%Rk%O60N^K-a)$^?gTLPb&WdGAq zxDz7d!z5cC($Q}=t2Nov`ItJ&1BsGM?MJgqh4#!%R{$$59rPN{2eBsgM6%0IsRzB> z{7rj0VC;U2>K$A*LuK+_EufYLGQ2@v{6Jjjru?44#l+*ytaeaNTjLFuC{i(j?@V*0 z?k?Q$V*5J-;>ETf8_dpO)8o6FdqVG%XPx`lB2UMK_le(^GX&(@g^CG-weE7%4L;-h zJxUz=AaR^@%Ro1XYRnQi$Zb(Qh2myJS&$;j4Sbx=BlM;Q9EJHSlCjC0E(N(tet$(TR z^lLwzR7YJQ?u~?9{YoV%Hj5Y{7?sqdhFMBA~|JrY&xW43rN z<<+7L6+@4G7Qz0!ESWFu&5QKm&~YD`IXO?W3f3O=_kttn!LNunc7KE$*w2dH6vMTD zw{7v1E= z?~rq#r?^Xv8#g&X3}K%k+et%D4?}sq=2uPka)_8zsBzoFcJthuVGf_LK`=qy}&ojJ-D( zqW+wZiBC~3c$AFpa(#A4agX_jYy%0Yq_B!n5I9UF1c(@nV9_KDR54(~!(yqihk;xe zXt%-xv=pd-khe&{v6U!rZ-Ig9ju&c}$R$%k4pVtAOc03gj}j7)^g@AJyL-EOL987i z{=W;zgY-2=SROF427gV3#i<@H=CuSAx}qArNN*U@P|y#v%*Dqw@NsHa<9b;0jo!La zoFPQBr+U&gKa<-nl79%l6U=Gzkj0LRw`@%-J=OgsV=uUFO`B~nJ?i6(Y z9%McTY}{eZqB!+-=$f}=8b<;ft`p5OE3)FcB(u;>FV$dz$fQcSaV1S{IeW(QzD_Qh z)VgiZdNvzv`!%Vre&Rk;3ayE>a%duL0uP;W2dJIkxLl7aIq1niZ=R^JT+p0YS2QTy zyMET(JvRDCt-aB^mSQ`xrBcSxrOvXWm9v(NUQ@&x_g=k_Z ztEZ^WQ)QfFX-~8RFqIiAgyt%q!(VP~Da3veX35Hl zU~g-(eiVM;%UcN!+cgPjEV^GB)GQj_P5|CLBL0&(V8gCdOa``#Ga?8?M+*XR1EJBJ zKrRFs3MrEk>*Y%-uqE*FP)EFdZI_0kk+nRH z-OHp3;Th=`9^njxI3P(%a*M0*U=CyB`$`Q$!Ini)GdwX~jLgTc2Io&>j&7Nw4yP%U zZW6hF&fp4e2AL%a>&~l$O5kCIhnl20zqkp>Pld}akNPrPLM%pn=fF-GWo_L*J>aQK zr?#1ZgD{@QlsWk;2`yo}xMH}uB6fd9;PPNdsAZB^Vm!l#mvij3UK^5^{yr@&l61Zh z6klhHtCCTukEDShe&y3sfZp`WHs_qwkwkyZD5KC14|)agu}U_2@YZI$e|x;qr4<*~ zJ5*<<_-TDlgs1k(?k^9e6IQ5uHhsdT-(G)-I^8wQGEJDlaJRBCnsYMdUtAza8z$bB zozAXXK+3eH{hC~AL6V}2P8;b)$T$353ff*2C=$Kkr3_j6JWS!(x$lzeShqZjgX6A@ zU}@goGN?6`L1NrU(5%)1^}0jP)tR*^dt^Ln zDZ6`*qIE%&>suZNhU3y5Q`X#6;i~{Ey^*9@j#<5Cbk`5|T&NFEt*^^oU0W!fVnRQj z{5aXRx%yG}ZLQuo%QftcWW?6_`0T3GJ?mHgilHIU|Os4b8pR zY<+q+Qix~_-y?B`d7f9`N!pj7-tBXpg! z{NlOl+sBZl=e7(nMwI4tThaD`(+_BF&0Dxb8hy_(C{~A9t6b{(nQ@}2W*759tS&}m za)A91K1Z!Q?3f5NSx`f6D*LUwcR*3r-2=3Sjow}PxyVhIsv9eTUueP|f|0cC_Eryt zEig+%r=Duq$%QOx=d(GSU#SSRAL+SGADo*ja@)bPTCCFAd(YF^&fS7KQ3Uw490cwu z$|kr*_LtdFMe#AR`1C_jS(#ar%9Hb{!3qeXA03;RR$pwXAm|hql43r`7yYP=Mo88P z#-)80Mc53E)Ynhfw=3TQ^VW+dv#*ocZVPH)*jp2DoAu@}Lnw|1O>35*M0wJ*=~-Hz zq=(KBi>Y^y$Pmd!yybp4%X#tbPbb`>I$MXQ?CiNLS20HLib== z+rQRnAPMiS6SXyCff!u+tG63Ns9*VW=6#u>HuDP7thqlew7H3z5I+rTfi$i)Dy4pU zo0mE3=99ph%gt}MsHe*Q1Zp#RcTQVc+RxXV&5^+QJen+AjClpK97WP|rl)HL<9I9C zmnP)%;92`|H7yVW6`)@q|a+#bceA_I<203BFfuCqv5)umD?b) zy_4J%J(5ip-&9P_Ni(E-`5Ttr9>+2Vy#v>*l7QR6TXpl!Qs7ZQ9(pN4I-eea`LQ!M z>>S`3$i!MYrkE}x*5(=zL>ozCO2V5PAnJSu36zVqzbc5l)F0S(@9Cj6jaRrXOCD|H z52mofbjqBfV|;|@P{e`hIseU&uA`A%d%spetzVEt|9yvJ-X zMVr94SS{Z!h_QsxVw=wu$k5t`naP z%r$6qUsT?A-HS&mykKM+3^~G$aGB#Hc-n9;J~i?wEEbO=is$&$4!wWZgtEc?36WV&VJ(b#Guq!A<+4uBHy__0!`QD^4{ z&fO9hF7WHsxMny|888L}#lG?uFsDfoQ{&wAwZEnl56bE-%%0QCSKK`L$;dkxzBzjK zDKr_O@q+*)p`0S{yW7~dQQT8Ks4lSWwoMvE-X9k@eJ~uB06xXSYOd;ZAlgH67oqGh zRgasQgVQ9HaTg2`)h7E+{VcVqxHV63q7sun@>2%CVBznqPyl_x{4|7UD> z`X)iXIV7U=!#b#hNIg$EJT*m1cLi?OgWuhZj zcgA8)_j{;s&)VliZ_h3lpox&}BdfdtX5)vzY0;j;m%=?a_zhK!bwoS*wo+CrUw64` zeLox-D%9+o^0a3g@5J^?B-3NC*=hM&!oQ|Y;D@x}mPk~!w~`ymj!L!LCThRKwDYX|66T!+QA?Ah8uN6|6NP})peD7R3LwI?& z;5{6NH*V1yM^z2oWqNRp}H_qS6nVS;tFa9udJ;XAgF{)~yiq?c1EBUm@j z@|xp9n&OqI2kt?%d09hi{lSRUg}h&=Xy3?5=_rPgpQI>E&7qon#Hwb%3_SD+=_JfU zWKJIF4Hn(#UuANh-231naU@+0pw%~Q^!R^l zWxuX7J#ki9{RQh=csv?(dw-Pl`?%)u@!nL-$7KhKkorKmXV!{_tRtRM0#J(pd|E=RouSX@+;e@* zgQeul&BKB0EsT;5sCf4p@$f4^%+7GYxNa=UZ2Z|yxYXd4gF9win;AoLdel~n!EEtC zR1FQQp7-Ja>Ev1ULS1e<}vGc4J*H- zVG1e)Q%^47umuV2cRBUD#nI^e1|E#74IBNDNmgK?!Q}W1)_-M$H8)6rh6FssKgDBAEdP@J_=4@Y6`a zo}@630t~W%@5o@LKRAT~wg>|gOxS>RItv(}{2MMaqXMH97{Ea~B{-kzw|jvF6VT3J z0gF-p4Sh-pfyE3;@b;fBYBo|pEt3UoM)R9t&Q1wT!5|~;U+^gaEyV!rvM9mVf4Z_c zv4N>97O(~VZ$>sJF>suQ0T^Ucg6A22yBe|uz|v2Dn}Ix-09OtRc#HElq!PshiePYo z>u*RchWh(Fb1o(LEB9|VjyNh1lgk2j<@pVdb49@!{C}CafNvfzcv|RBqC7fqoW}zG zD*P8D`28sFd`j@X$Zyv~J~voX6o4ym2bYV(7Ez$CKmsfz@s~*eNEGsdKTG`PVk%<; zorNr5Wl2D@SOmx@!UY(Lae#JZEI^|O13dgcw;Wha3dT@`?ZU{?Ff#ybAOl;Z0jv@( z@Xh~BB;a)kFSuIv_l{dB5&&Py0tli$DhM>!>29Uxs#4fLjw0ted2z_AVzpkBcO zHc!2rHiP=ZlaU|UQ8M6lLwST+*=+nSh)c@(z{Ohti=)lO- zfRq<9oHcONLQ4Ig8vI}MdA+~rSipw{T`H?WOa diff --git a/src/bootsupport/modules/tomlish-1.1.4.tm b/src/bootsupport/modules/tomlish-1.1.4.tm index 7a6d5205..33d5b912 100644 --- a/src/bootsupport/modules/tomlish-1.1.4.tm +++ b/src/bootsupport/modules/tomlish-1.1.4.tm @@ -153,15 +153,10 @@ namespace eval tomlish { } #review - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed - } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keys are lists {parenttable subtable etc} corresponding to parenttable.subtable.etc } foreach sub [lrange $keyval_element 2 end] { @@ -207,13 +202,10 @@ namespace eval tomlish { ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! - set prev_tablenames_seen $tablenames_seen - set prev_tablenames_closed $tablenames_closed - set tablenames_seen [list] - set tablenames_closed [list] + set prev_tablenames_info $tablenames_info + set tablenames_info [dict create] set result [list type $type value [::tomlish::to_dict [list $found_sub]]] - set tablenames_seen $prev_tablenames_seen - set tablenames_closed $prev_tablenames_closed + set tablenames_info $prev_tablenames_info } MULTISTRING - MULTILITERAL { #review - mapping these to STRING might make some conversions harder? @@ -295,23 +287,66 @@ namespace eval tomlish { #[Data] #temps = [{cpu = 79.5, case = 72.0}] proc to_dict {tomlish} { + package require dictn #keep track of which tablenames have already been directly defined, # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. #we don't error out just because a previous tablename segment has already appeared. - ##variable tablenames_seen [list] - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen - } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed + + #Declaring, Creating, and Defining Tables + #https://github.com/toml-lang/toml/issues/795 + #(update - only Creating and Defining are relevant terminology) + + #review + #tablenames_info keys created, defined, createdby, definedby, closedby + + #consider the following 2 which are legal: + #[table] #'table' created, defined=open definedby={header table} + #x.y = 3 + #[table.x.z] #'table' defined=closed closedby={header table.x.z}, 'table.x' created, 'table.x.z' created defined=open definedby={header table.x.z} + #k= 22 + # #'table.x.z' defined=closed closedby={eof eof} + + #equivalent datastructure + + #[table] #'table' created, defined=open definedby={header table} + #[table.x] #'table' defined=closed closedby={header table.x}, 'table.x' created defined=open definedby={header table.x} + #y = 3 + #[table.x.z] #'table.x' defined=closed closedby={header table.x.z}, 'table.x.z' created defined=open definedby={header table.x.z} + #k=22 + + #illegal + #[table] #'table' created and defined=open + #x.y = 3 #'table.x' created first keyval pair defined=open definedby={keyval x.y = 3} + #[table.x.y.z] #'table' defined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created + #k = 22 + # + ## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) + + #illegal + #[table] + #x.y = {p=3} + #[table.x.y.z] + #k = 22 + ## we should fail because y is an inline table which is closed to further entries + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + + + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keyed on tablepath each of which is a list such as {config subgroup etc} (corresponding to config.subgroup.etc) } + log::info "---> to_dict processing '$tomlish'<<<" set items $tomlish @@ -354,7 +389,7 @@ namespace eval tomlish { #a.b.c = 1 #table_key_hierarchy -> a b - #leafkey -> c + #tleaf -> c if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively @@ -362,10 +397,10 @@ namespace eval tomlish { } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] - set leafkey [lindex $dotted_key_hierarchy 0] + set tleaf [lindex $dotted_key_hierarchy 0] } else { set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] - set leafkey [lindex $dotted_key_hierarchy end] + set tleaf [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure @@ -380,143 +415,101 @@ namespace eval tomlish { } } #review? - if {[dict exists $datastructure {*}$table_hierarchy $leafkey]} { - error "Duplicate key '$table_hierarchy $leafkey'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$table_hierarchy $tleaf]} { + error "Duplicate key '$table_hierarchy $tleaf'. The key already exists at this level in the toml data. The toml data is not valid." } #JMN test 2025 if {[llength $table_hierarchy]} { - lappend tablenames_seen $table_hierarchy + dictn incr tablenames_info [list $table_hierarchy seencount] } set keyval_dict [_get_keyval_value $item] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { - lappend tablenames_seen [list {*}$table_hierarchy $leafkey] - lappend tablenames_closed [list {*}$table_hierarchy $leafkey] + set t [list {*}$table_hierarchy $tleaf] + dictn incr tablenames_info [list $t seencount] + dictn set tablenames_info [list $t closed] 1 #review - item is an ITABLE - we recurse here without datastructure context :/ #overwriting keys? todo ? - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } else { - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } + } + TABLEARRAY { + set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLENAME (name: $tablename): $item" + set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize + #we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. + } TABLE { set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLE (name: $tablename): $item" #set tablename [::tomlish::utils::tablename_trim $tablename] set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize - if {$norm_segments in $tablenames_seen} { - error "Table name '$tablename' has already been directly defined in the toml data. Invalid." - } - log::debug "---> to_dict processing item $tag (name: $tablename): $item" - set name_segments [::tomlish::utils::tablename_split $tablename] ;#unnormalized - set last_seg "" - #toml spec rule - all segments mst be non-empty - #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - - set table_key_sublist [list] - - foreach normseg $norm_segments { - lappend table_key_sublist $normseg - if {[dict exists $datastructure {*}$table_key_sublist]} { - #It's ok for this key to already exist *if* it was defined by a previous tablename or equivalent - #and if this key is longer - - #consider the following 2 which are legal: - #[table] - #x.y = 3 - #[table.x.z] - #k= 22 - - #equivalent - - #[table] - #[table.x] - #y = 3 - #[table.x.z] - #k=22 - - #illegal - #[table] - #x.y = 3 - #[table.x.y.z] - #k = 22 - ## - we should fail on encountering table.x.y because only table and table.x are effectively tables - - #illegal - #[table] - #x.y = {p=3} - #[table.x.y.z] - #k = 22 - ## we should fail because y is an inline table which is closed to further entries - - - #note: it is not safe to compare normalized tablenames using join! - # e.g a.'b.c'.d is not the same as a.b.c.d - # instead compare {a b.c d} with {a b c d} - # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. - #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} - - set sublist_length [llength $table_key_sublist] - set found_testkey 0 - if {$table_key_sublist in $tablenames_seen} { - set found_testkey 1 - } else { - #see if it was defined by a longer entry - foreach seen_table_segments $tablenames_seen { - if {[llength $seen_table_segments] <= $sublist_length} { - continue - } - #each tablenames_seen entry is already a list of normalized segments - - #we could have [a.b.c.d] early on - # followed by [a.b] - which was still defined by the earlier one. + set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] + if {$T_DEFINED ne "NULL"} { + #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path + set msg "Table name $tablename has already been directly defined in the toml data. Invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } - set seen_longer [lrange $seen_segments 0 [expr {$sublist_length -1}]] - puts stderr "testkey:'$table_key_sublist' vs seen_match:'$seen_longer'" - if {$table_key_sublist eq $seen_longer} { - set found_testkey 1 - } - } - } - if {$found_testkey == 0} { - #the raw unnormalized tablename might be ok to display in the error message, although it's not the actual dict keyset - set msg "key $table_key_sublist already exists in datastructure, but wasn't defined by a supertable." - append msg \n "tablenames_seen:" \n - foreach ts $tablenames_seen { - append msg " " $ts \n - } + set name_segments [::tomlish::utils::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d + #results of tablename_split 0 are 'raw' - ie some segments may be enclosed in single or double quotes. + + + set supertable [list] + ############## + # [a.b.c.d] + # norm_segments = {a b c d} + #check a {a b} {a b c} <---- supertables of a.b.c.d + ############## + foreach normseg [lrange $norm_segments 0 end-1] { + lappend supertable $normseg + if {![dictn exists $tablenames_info [list $supertable type]]} { + #supertable with this path doesn't yet exist + if {[dict exists $datastructure {*}$supertable]} { + #There is data though - so it must have been created as a keyval + set msg "Supertable [join $supertable .] of table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] error $msg + } else { + #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here + dictn set tablenames_info [list $supertable type] header + #ensure empty tables are still represented in the datastructure + dict set datastructure {*}$supertable [list] } - } - - } - - #ensure empty tables are still represented in the datastructure - set key_sublist [list] - foreach k $norm_segments { - lappend key_sublist $k - if {![dict exists $datastructure {*}$key_sublist]} { - dict set datastructure {*}$key_sublist [list] } else { - tomlish::log::notice "to_dict datastructure at (TABLE) subkey $key_sublist already had data: [dict get $datastructure {*}$key_sublist]" + #supertable has already been created - and maybe defined - but even if defined we can add subtables } } + #table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename + # - or may have existing data from a keyval + if {![dictn exists $tablenames_info [list $norm_segments type]]} { + if {[dict exists $datastructure {*}$norm_segments]} { + set msg "Table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } + #no data or previously created table + dictn set tablenames_info [list $norm_segments type] header - #We must do this after the key-collision test above! - lappend tablenames_seen $norm_segments - - + #We are 'defining' this table's keys and values here (even if empty) + dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here + } + dictn set tablenames_info [list $norm_segments defined] open log::debug ">>> to_dict >>>>>>>>>>>>>>>>> normalized table key hierarchy : $norm_segments" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] - log::debug "----> tododict processing $tag subitem $type processing contained element $element" + log::debug "----> todict processing $tag subitem $type processing contained element $element" switch -exact -- $type { DOTTEDKEY { set dkey_info [_get_dottedkey_info $element] @@ -547,14 +540,19 @@ namespace eval tomlish { puts stdout "to_dict>>> $keyval_dict" dict set datastructure {*}$norm_segments {*}$dkeys $leaf_key $keyval_dict #JMN 2025 - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys] + set tkey [list {*}$norm_segments {*}$dkeys] + dictn incr tablenames_info [list $tkey seencount] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys # inner structure will contain {type value } if all leaves are not empty ITABLES - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys $leaf_key] + set tkey [list {*}$norm_segments {*}$dkeys $leaf_key] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys $leaf_key] + dictn incr tablenames_info [list $tkey seencount] #if the keyval_dict is not a simple type x value y - then it's an inline table ? #if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. + dictn set tablenames_info [list $tkey closed] 1 } } @@ -562,7 +560,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" } } } @@ -1316,7 +1314,12 @@ namespace eval tomlish::encode { #NOTE - this DELIBERATELY does not validate the data, or process escapes etc #It encodes the tomlish records as they are. #ie it only produces toml shaped data from a tomlish list. + # #It is part of the roundtripability of data from toml to tomlish + #!! ie - it is not the place to do formatting of inline vs multiline !! + # That needs to be encoded in the tomlish data that is being passed in + # (e.g from_dict could make formatting decisions in the tomlish it produces) + # #e.g duplicate keys etc can exist in the toml output. #The to_dict from_dict (or any equivalent processor pair) is responsible for validation and conversion #back and forth of escape sequences where appropriate. @@ -1646,17 +1649,27 @@ namespace eval tomlish::decode { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. set parentlevel [expr {$nest -1}] - set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { + #should only apply within a multiliteral #### set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed #Without this - we would get extraneous empty list entries in the parent # - as the xxx-squote-space isn't a space level from the toml perspective # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-squote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-squote-space { + } + default { + error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" + } + } switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? @@ -1669,74 +1682,51 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 4 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the last for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] - #todo integrate left squote with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]'" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "'"] - } - MULTILITERAL { - #empty - lappend v($parentlevel) [list LITERALPART "'"] - } - default { - error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + MULTILITERAL { + #empty + lappend v($parentlevel) [list LITERALPART "'"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" } } } ''''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 5 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the following squotes for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] - #todo integrate left 2 squotes with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]''" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "''"] - } - MULTILITERAL { - lappend v($parentlevel) [list LITERALPART "''"] - } - default { - error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "''"] + } + MULTILITERAL { + lappend v($parentlevel) [list LITERALPART "''"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" } } } } - puts stderr "tomlish::decode::toml ---- HERE squote_seq pop <$tok>" } triple_squote { #presumably popping multiliteral-space @@ -1763,7 +1753,119 @@ namespace eval tomlish::decode { lappend merged $part } default { - error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + tentative_accum_dquote { + #should only apply within a multistring + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the trailing-dquote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-dquote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-dquote-space { + } + default { + error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" + } + } + switch -- $tok { + {"} { + tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] + } + {""} { + #review - we should perhaps return double_dquote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] + } + {"""} { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] + } + {""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left dquote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {"}] + } + MULTISTRING { + #empty + lappend v($parentlevel) [list STRINGPART {"}] + } + default { + error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" + } + } + } + {"""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 dquotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {""}] + } + MULTISTRING { + lappend v($parentlevel) [list STRINGPART {""}] + } + default { + error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" + } + } + } + } + } + triple_dquote { + #presumably popping multistring-space + ::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTISTRING { + lappend merged $part + } + STRINGPART { + if {$lasttype eq "STRINGPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + CONT - WS { + lappend merged $part + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" } } set lasttype [lindex $part 0] @@ -1809,15 +1911,12 @@ namespace eval tomlish::decode { endinlinetable { ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } - endmultiquote { - ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" - } default { error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } if {$do_append_to_parent} { - #e.g squote_seq does it's own appends as necessary - so won't get here + #e.g tentative_accum_squote does it's own appends as necessary - so won't get here lappend v($parentlevel) [set v($nest)] } @@ -1831,8 +1930,8 @@ namespace eval tomlish::decode { switch -exact -- $tokenType { - squote_seq_begin { - #### + tentative_trigger_squote - tentative_trigger_dquote { + #### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { lassign [dict get $transition_info starttok] starttok_type starttok_val set next_tokenType_known 1 @@ -1840,6 +1939,16 @@ namespace eval tomlish::decode { set tok $starttok_val } } + single_squote { + #JMN - REVIEW + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART + } squotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1849,6 +1958,9 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } + triple_dquote { + set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT + } dquotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1858,7 +1970,7 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { #todo set v($nest) [list DQKEY $tok] ;#$tok is the keyname } @@ -1878,34 +1990,29 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } } - startsquote { - #JMN - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "squotedkey" - set tok "" - } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish # back to toml file will be identical. #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, - # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from + # a structural perspective. #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the # tomlish list? - set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. } tablearrayname { - set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name } startarray { @@ -1914,14 +2021,6 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } - startmultiquote { - ::tomlish::log::debug "---- push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE - } - triple_squote { - ::tomlish::log::debug "---- push trigger tokenType triple_squote" - set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL - } default { error "---- push trigger tokenType '$tokenType' not yet implemented" } @@ -1931,11 +2030,11 @@ namespace eval tomlish::decode { #no space level change switch -exact -- $tokenType { squotedkey { - puts "---- squotedkey in state $prevstate (no space level change)" + #puts "---- squotedkey in state $prevstate (no space level change)" lappend v($nest) [list SQKEY $tok] } dquotedkey { - puts "---- dquotedkey in state $prevstate (no space level change)" + #puts "---- dquotedkey in state $prevstate (no space level change)" lappend v($nest) [list DQKEY $tok] } barekey { @@ -1960,29 +2059,46 @@ namespace eval tomlish::decode { startinlinetable { puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } - startquote { + single_dquote { switch -exact -- $newstate { string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quoted-key { + dquoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "dquotedkey" set tok "" } - XXXitable-quoted-key { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "itablequotedkey" - set tok "" + multistring-space { + lappend v($nest) [list STRINGPART {"}] + #may need to be joined on pop if there are neighbouring STRINGPARTS + } + default { + error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + double_dquote { + #leading extra quotes - test: toml_multistring_startquote2 + switch -exact -- $prevstate { + itable-keyval-value-expected - keyval-value-expected { + puts stderr "tomlish::decode::toml double_dquote TEST" + #empty string + lappend v($nest) [list STRINGPART ""] + } + multistring-space { + #multistring-space to multistring-space + lappend v($nest) [list STRINGPART {""}] } default { - error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" } } + } - startsquote { + single_squote { switch -exact -- $newstate { literal-state { set next_tokenType_known 1 @@ -1995,41 +2111,17 @@ namespace eval tomlish::decode { set tok "" } multiliteral-space { - #false alarm squote returned from squote_seq pop + #false alarm squote returned from tentative_accum_squote pop ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" #(single squote - not terminating space) lappend v($nest) [list LITERALPART '] #may need to be joined on pop if there are neighbouring LITERALPARTs } default { - error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" } } } - startmultiquote { - #review - puts stderr "---- got startmultiquote in state $prevstate (no space level change)" - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringpart" - set tok "" - } - endquote { - #nothing to do? - set tok "" - } - endsquote { - set tok "" - } - endmultiquote { - #JMN!! - set tok "" - } - string { - lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes - } - literal { - lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes - } double_squote { switch -exact -- $prevstate { keyval-value-expected { @@ -2044,6 +2136,19 @@ namespace eval tomlish::decode { } } } + enddquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } multistring { #review lappend v($nest) [list MULTISTRING $tok] @@ -2056,11 +2161,9 @@ namespace eval tomlish::decode { } literalpart { lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly - } - itablequotedkey { - } untyped_value { + #would be better termed unclassified_value #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -2238,7 +2341,7 @@ namespace eval tomlish::utils { #eg {dog."tater.man"} set sLen [tcl::string::length $tablename] set segments [list] - set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" for {set i 0} {$i < $sLen} {incr i} { @@ -2249,139 +2352,166 @@ namespace eval tomlish::utils { set lastChar "" } + #todo - track\count backslashes properly + set c [tcl::string::index $tablename $i] + if {$c eq "\""} { + if {($lastChar eq "\\")} { + #not strictly correct - we could have had an even number prior-backslash sequence + #the toml spec would have us error out immediately on bsl in bad location - but we're + #trying to parse to unvalidated tomlish + set ctest escq + } else { + set ctest dq + } + } else { + set ctest [string map [list " " sp \t tab] $c] + } - if {$c eq "."} { - switch -exact -- $mode { - unquoted { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } - quoted { - append seg $c - } - unknown { - lappend segments $seg - set seg "" - } - litquoted { - append seg $c - } - default { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -- $ctest { + . { + switch -exact -- $mode { + preval { + error "tablename_split. dot not allowed - expecting a value" + } + unquoted { + #dot marks end of segment. + #if {![is_barekey $seg]} { + # error "tablename_split. dot not allowed - expecting a value" + #} + lappend segments $seg + set seg "" + set mode "preval" + } + quoted { + append seg $c + } + litquoted { + append seg $c + } + postval { + #got dot in an expected location + set mode "preval" + } } } - } elseif {($c eq "\"") && ($lastChar ne "\\")} { - if {$mode eq "unknown"} { - if {[tcl::string::trim $seg] ne ""} { - #we don't allow a quote in the middle of a bare key - error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" - } - set mode "quoted" - set seg "\"" - } elseif {$mode eq "unquoted"} { - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - } else { - lappend segments $seg + dq { + #unescaped dquote + switch -- $mode { + preval { + set mode "quoted" + set seg "\"" + } + unquoted { + #invalid in barekey - but we are after structure only + append seg $c + } + quoted { + append seg $c + if {$normalize} { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" ;#make sure we only accept a dot or end-of-data now. + } + litquoted { + append seg $c + } + postval { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } } - - set seg "" - set mode "syntax" ;#make sure we only accept a dot or end-of-data now. - } elseif {$mode eq "litquoted"} { - append seg $c - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" - } - } elseif {($c eq "\'")} { - if {$mode eq "unknown"} { - append seg $c - set mode "litquoted" - } elseif {$mode eq "unquoted"} { - #single quote inside e.g o'neill - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - } elseif {$mode eq "litquoted"} { - append seg $c - #no normalization to do - lappend segments $seg - set seg "" - set mode "syntax" - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" } - - } elseif {$c in [list " " \t]} { - if {$mode eq "syntax"} { - #ignore - } else { - append seg $c + ' { + switch -- $mode { + preval { + append seg $c + set mode "litquoted" + } + unquoted { + #single quote inside e.g o'neill - ultimately invalid - but we pass through here. + append seg $c + } + quoted { + append seg $c + } + litquoted { + append seg $c + #no normalization to do aside from stripping squotes + if {$normalize} { + lappend segments [tcl::string::range $seg 1 end-1] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" + } + postval { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + } } - } else { - if {$mode eq "syntax"} { - error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + sp - tab { + switch -- $mode { + preval - postval { + #ignore + } + unquoted { + #terminates a barekey + lappend segments $seg + set seg "" + set mode "postval" + } + default { + #append to quoted or litquoted + append seg $c + } + } } - if {$mode eq "unknown"} { - set mode "unquoted" + default { + switch -- $mode { + preval { + set mode unquoted + append seg $c + } + postval { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + default { + append seg $c + } + } } - append seg $c } + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" - #REVIEW - we can only end up in unquoted or syntax here? are other branches reachable? switch -exact -- $mode { - quoted { - if {$c ne "\""} { - error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" - } - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong - } else { - lappend segments $seg - } + preval { + error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" } - litquoted { - set trimmed_seg [tcl::string::trim $seg] - if {[tcl::string::index $trimmed_seg end] ne "\'"} { - error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" - } + unquoted { lappend segments $seg } - unquoted - unknown { - lappend segments $seg + quoted { + error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" } - syntax { - #ok - segment already lappended + litquoted { + error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" } - default { - lappend segments $seg + postval { + #ok - segment already lappended } } } } - foreach seg $segments { - set trimmed [tcl::string::trim $seg " \t"] - #note - we explicitly allow 'empty' quoted strings '' & "" - # (these are 'discouraged' but valid toml keys) - #if {$trimmed in [list "''" "\"\""]} { - # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" - #} - if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" - } - } + + #note - we must allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + return $segments } @@ -2432,26 +2562,34 @@ namespace eval tomlish::utils { #- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. #REVIEW - provide it anyway? When would it be desirable to use? - variable Bstring_control_map [list\ - \b {\b}\ - \n {\n}\ - \r {\r}\ - \" {\"}\ - \x1b {\e}\ - \\ "\\\\"\ - ] + variable Bstring_control_map [dict create] + dict set Bstring_control_map \b {\b} + dict set Bstring_control_map \n {\n} + dict set Bstring_control_map \r {\r} + dict set Bstring_control_map \" {\"} + #dict set Bstring_control_map \x1b {\e} ;#should presumably be only be a convenience for decode - going the other way we get \u001B + dict set Bstring_control_map \\ "\\\\" + #\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ #for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. - for {set cdec 0} {$cdec <= 8} {incr cdec} { + #8 = \b - already in list. + #built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list + for {set cdec 0} {$cdec <= 7} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } # \u007F = 127 - lappend Bstring_control_map [format %c 127] \\u007F + dict set Bstring_control_map [format %c 127] \\u007F #Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! #escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) @@ -2474,6 +2612,7 @@ namespace eval tomlish::utils { # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn # it replaces \ with a single whitespace (trailing backslash) #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + #plus \e for \x1b? set buffer "" set buffer4 "" ;#buffer for 4 hex characters following a \u @@ -2558,12 +2697,13 @@ namespace eval tomlish::utils { set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { - set e "\\\"" - append buffer [subst -nocommand -novariable $e] + append buffer {"} } b - t - n - f - r { - set e "\\$c" - append buffer [subst -nocommand -novariable $e] + append buffer [subst -nocommand -novariable "\\$c"] + } + e { + append buffer \x1b } u { set unicode4_active 1 @@ -2578,8 +2718,7 @@ namespace eval tomlish::utils { #review - toml spec says all other escapes are reserved #and if they are used TOML should produce an error. #we leave detecting this for caller for now - REVIEW - append buffer "\\" - append buffer $c + append buffer "\\$c" } } } else { @@ -3003,7 +3142,7 @@ namespace eval tomlish::parse { # states: # table-space, itable-space, array-space # array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, - # quoted-key, squoted-key + # dquoted-key, squoted-key # string-state, literal-state, multistring... # # notes: @@ -3039,6 +3178,12 @@ namespace eval tomlish::parse { variable stateMatrix set stateMatrix [dict create] + #--------------------------------------------------------- + #WARNING + #The stateMatrix implementation here is currently messy. + #The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. + #This means the state behaviour has to be reasoned about by looking at both in conjuction. + #--------------------------------------------------------- #xxx-space vs xxx-syntax inadequately documented - TODO @@ -3060,35 +3205,19 @@ namespace eval tomlish::parse { barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ - XXXstartquote "quoted-key"\ - XXXstartsquote "squoted-key"\ + XXXsingle_dquote "quoted-key"\ + XXXsingle_squote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ - startmultiquote "err-state"\ - endquote "err-state"\ + enddquote "err-state"\ + endsquote "err-state"\ comma "err-state"\ eof "end-state"\ equal "err-state"\ cr "err-lonecr"\ } - #itable-space/ curly-syntax : itables - dict set stateMatrix\ - itable-space {\ - whitespace "itable-space"\ - newline "itable-space"\ - barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - endinlinetable "POPSPACE"\ - XXXstartquote "quoted-key"\ - XXXstartsquote {TOSTATE "squoted-key" comment "jn-testing"}\ - comma "err-state"\ - comment "itable-space"\ - eof "err-state"\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}} dict set stateMatrix\ @@ -3113,26 +3242,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ keyval-value-expected {\ whitespace "keyval-value-expected"\ - untyped_value {TOSTATE "keyval-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} - dict set stateMatrix\ - leading-squote-space {\ - squote_seq "POPSPACE"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ + string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ + single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - #dict set stateMatrix\ - # keyval-process-leading-squotes {\ - # startsquote "literal-state"\ - # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - # } + #double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} + + #2025 - no leading-squote-space - only trailing-squote-space. dict set stateMatrix\ keyval-tail {\ @@ -3142,81 +3264,106 @@ namespace eval tomlish::parse { eof "end-state"\ } + + #itable-space/ curly-syntax : itables + # x={y=1,} + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + comma "err-state"\ + comment "itable-space"\ + eof "err-state"\ + } + #we don't get single_squote etc here - instead we get the resulting squotedkey token + + + # ??? review - something like this + # + # x={y =1,} dict set stateMatrix\ itable-keyval-syntax {\ - whitespace "itable-keyval-syntax"\ - barekey {PUSHSPACE "dottedkey-space"}\ - squotedkey {PUSHSPACE "dottedkey-space"}\ - dquotedkey {PUSHSPACE "dottedkey-space"}\ - equal "itable-keyval-value-expected"\ + whitespace {TOSTATE "itable-keyval-syntax"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + dquotedkey {PUSHSPACE "dottedkey-space"}\ + equal {TOSTATE "itable-keyval-value-expected"}\ newline "err-state"\ eof "err-state"\ } + + # x={y=1} + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ itable-keyval-value-expected {\ whitespace "itable-keyval-value-expected"\ untyped_value {TOSTATE "itable-val-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate itable-val-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ } - dict set stateMatrix\ - itable-keyval-space {\ - whitespace "itable-keyval-syntax"\ - equal {TOSTATE "itable-keyval-value-expected" note "required"}\ - } + #double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' + # review + # double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} + + + # x={y=1,z="x"} + #POPSPACE is transition from itable-keyval-space to parent itable-space dict set stateMatrix\ itable-val-tail {\ whitespace "itable-val-tail"\ endinlinetable "POPSPACE"\ comma "POPSPACE"\ - XXXnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ - newline "POPSPACE"\ + newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ comment "itable-val-tail"\ eof "err-state"\ } - #dict set stateMatrix\ - # itable-quoted-key {\ - # whitespace "NA"\ - # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endquote "itable-keyval-syntax"\ - # } - #dict set stateMatrix\ - # itable-squoted-key {\ - # whitespace "NA"\ - # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endsquote "itable-keyval-syntax"\ - # } + # XXXnewline "POPSPACE" + # We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail + # This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record + #e.g + # x = { + # j=1 + # #comment within dottedkey j record + # , # comment unattached + # #comment unattached + # k=2 , #comment unattached + # l=3 #comment within l record + # , m=4 + # #comment associated with m record + # + # #still associated with m record + # } + ## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. + #The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma + #so we cant do: j= 1 #comment for j1 , + # and have the trailing comma recognised. + # + # To associate: j= 1, #comment for j1 + # we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? + # + # The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma + # is 'associated' with the previous entry. + # + # These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, + # but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments + # (e.g reordering records within an itable) + #The user's intention for 'associated' isn't always clear and the specs don't really guide on this. - - - #array-value-expected ? - dict set stateMatrix\ - XXXvalue-expected {\ - whitespace "value-expected"\ - untyped_value {"SAMESPACE" "" replay untyped_value}\ - startquote "string-state"\ - startsquote "literal-state"\ - triple_squote {PUSHSPACE "multiliteral-space"}\ - startmultiquote {PUSHSPACE "multistring-space"}\ - startinlinetable {PUSHSPACE itable-space}\ - startarray {PUSHSPACE array-space}\ - comment "err-state-value-expected-got-comment"\ - comma "err-state"\ - newline "err-state"\ - eof "err-state"\ - } - #note comment token should never be delivered to array-value-expected state? - #dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] #it is for keyval ie x.y.z = value @@ -3245,6 +3392,8 @@ namespace eval tomlish::parse { whitespace "dottedkey-space-tail" dotsep "dottedkey-space" equal "POPSPACE"\ + eof "err-state"\ + newline "err-state"\ } #-------------------------------------------------------------------------- @@ -3262,22 +3411,10 @@ namespace eval tomlish::parse { #toml spec looks like heading towards allowing newlines within inline tables #https://github.com/toml-lang/toml/issues/781 - #2025 - appears to be valid for 1.1 - which we are targeting. + #2025 - multiline itables appear to be valid for 1.1 - which we are targeting. #https://github.com/toml-lang/toml/blob/main/toml.md#inline-table #JMN2025 - #dict set stateMatrix\ - # curly-syntax {\ - # whitespace "curly-syntax"\ - # newline "curly-syntax"\ - # barekey {PUSHSPACE "itable-keyval-space"}\ - # itablequotedkey "itable-keyval-space"\ - # endinlinetable "POPSPACE"\ - # startquote "itable-quoted-key"\ - # comma "itable-space"\ - # comment "itable-space"\ - # eof "err-state"\ - # } #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -3291,10 +3428,9 @@ namespace eval tomlish::parse { # untyped_value "SAMESPACE"\ # startarray {PUSHSPACE "array-space"}\ # endarray "POPSPACE"\ - # startmultiquote {PUSHSPACE multistring-space}\ # startinlinetable {PUSHSPACE itable-space}\ - # startquote "string-state"\ - # startsquote "literal-state"\ + # single_dquote "string-state"\ + # single_squote "literal-state"\ # triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ # comma "array-space"\ # comment "array-space"\ @@ -3305,15 +3441,16 @@ namespace eval tomlish::parse { set aspace [dict create] dict set aspace whitespace "array-space" dict set aspace newline "array-space" - dict set aspace untyped_value "SAMESPACE" + #dict set aspace untyped_value "SAMESPACE" + dict set aspace untyped_value "array-syntax" dict set aspace startarray {PUSHSPACE "array-space"} dict set aspace endarray "POPSPACE" - dict set aspace startmultiquote {PUSHSPACE multistring-space} + dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} + dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} + dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} + dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} dict set aspace startinlinetable {PUSHSPACE itable-space} - dict set aspace startquote "string-state" - dict set aspace startsquote "literal-state" - dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"} - dict set aspace comma "array-space" + #dict set aspace comma "array-space" dict set aspace comment "array-space" dict set aspace eof "err-state-array-space-got-eof" dict set stateMatrix array-space $aspace @@ -3329,26 +3466,16 @@ namespace eval tomlish::parse { #dict set asyntax untyped_value "SAMESPACE" #dict set asyntax startarray {PUSHSPACE array-space} dict set asyntax endarray "POPSPACE" - #dict set asyntax startmultiquote {PUSHSPACE multistring-space} - #dict set asyntax startquote "string-state" - #dict set asyntax startsquote "literal-state" + #dict set asyntax single_dquote "string-state" + #dict set asyntax single_squote "literal-state" dict set asyntax comma "array-space" dict set asyntax comment "array-syntax" dict set stateMatrix array-syntax $asyntax - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space - dict set stateMatrix\ - quoted-key {\ - whitespace "NA"\ - dquotedkey {PUSHSPACE "keyval-space"}\ - newline "err-state"\ - endquote "keyval-syntax"\ - } - - #review + #dquotedkey is a token - dquoted-key is a state dict set stateMatrix\ dquoted-key {\ whitespace "NA"\ @@ -3367,7 +3494,7 @@ namespace eval tomlish::parse { string-state {\ whitespace "NA"\ string "string-state"\ - endquote "SAMESPACE"\ + enddquote "SAMESPACE"\ newline "err-state"\ eof "err-state"\ } @@ -3381,20 +3508,21 @@ namespace eval tomlish::parse { } - #dict set stateMatrix\ - # stringpart {\ - # continuation "SAMESPACE"\ - # endmultiquote "POPSPACE"\ - # eof "err-state"\ - # } dict set stateMatrix\ multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "POPSPACE"\ - eof "err-state"\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ + single_dquote {TOSTATE multistring-space}\ + double_dquote {TOSTATE multistring-space}\ + triple_dquote {POPSPACE}\ + eof "err-state"\ + } + dict set stateMatrix\ + trailing-dquote-space { + tentative_accum_dquote "POPSPACE" } @@ -3402,19 +3530,19 @@ namespace eval tomlish::parse { #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ multiliteral-space {\ - literalpart "multiliteral-space"\ - newline "multiliteral-space"\ - squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ - triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ - double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ - startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ - eof "err-premature-eof-in-multiliteral-space"\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ + single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ + double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + eof "err-premature-eof-in-multiliteral-space"\ } #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - trailing-squote-space {\ - squote_seq "POPSPACE"\ + trailing-squote-space { + tentative_accum_squote "POPSPACE" } @@ -3499,7 +3627,7 @@ namespace eval tomlish::parse { - + dict set stateMatrix\ end-state {} @@ -3557,14 +3685,13 @@ namespace eval tomlish::parse { dict set spacePushTransitions itable-keyval-space itable-keyval-syntax dict set spacePushTransitions array-space array-space dict set spacePushTransitions table-space tablename-state - dict set spacePushTransitions #itable-space itable-space + #dict set spacePushTransitions #itable-space itable-space #Pop to, next variable spacePopTransitions [dict create] dict set spacePopTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail @@ -3575,7 +3702,6 @@ namespace eval tomlish::parse { #JMN test #dict set spaceSameTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail @@ -3611,6 +3737,8 @@ namespace eval tomlish::parse { ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { POPSPACE { + set popfromspace_info [spacestack peek] + set popfromspace_state [dict get $popfromspace_info state] spacestack pop set parent_info [spacestack peek] set type [dict get $parent_info type] @@ -3625,17 +3753,17 @@ namespace eval tomlish::parse { set existing [spacestack pop] dict unset existing returnstate spacestack push $existing ;#re-push modification - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" } else { ### #review - do away with spacePopTransitions - which although useful to provide a default.. # - involve error-prone configurations distant to the main state transition configuration in stateMatrix if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" } else { set next $parentspace - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" } } set result $next @@ -3805,22 +3933,6 @@ namespace eval tomlish::parse { return $tokenType } - proc _shortcircuit_startquotesequence {} { - variable tok - variable i - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - set_tokenType "startquote" - incr i -1 - return -level 2 1 - } elseif {$toklen == 2} { - puts stderr "_shortcircuit_startquotesequence toklen 2" - set_tokenType "startquote" - set tok "\"" - incr i -2 - return -level 2 1 - } - } proc get_token_waiting {} { variable token_waiting @@ -3940,7 +4052,6 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" - set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [tcl::string::index $s [expr {$i - 1}]] @@ -3957,8 +4068,6 @@ namespace eval tomlish::parse { switch -exact -- $ctest { # { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 @@ -3966,16 +4075,20 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #for multiliteral, multistring - data and/or end incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { + #pseudo token beginning with underscore - never returned to state machine - review incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -4003,7 +4116,7 @@ namespace eval tomlish::parse { append tok $c } default { - #dquotedkey, itablequotedkey, string,literal, multistring + #dquotedkey, string,literal, multistring append tok $c } } @@ -4015,7 +4128,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes#" + append tok "#" } multiliteral-space { set_tokenType "literalpart" @@ -4031,23 +4144,23 @@ namespace eval tomlish::parse { } lc { #left curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { @@ -4059,7 +4172,7 @@ namespace eval tomlish::parse { } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - starttablearrayname { #*bare* tablename can only contain letters,digits underscores @@ -4105,7 +4218,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\{" + append tok "\{" } multiliteral-space { set_tokenType "literalpart" @@ -4120,37 +4233,35 @@ namespace eval tomlish::parse { } rc { #right curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - tablename { if {$had_slash} {append tok "\\"} @@ -4221,7 +4332,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\}" + append tok "\}" } multiliteral-space { set_tokenType "literalpart" ; #review @@ -4237,35 +4348,35 @@ namespace eval tomlish::parse { } lb { #left square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename { #change the tokenType @@ -4332,7 +4443,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\[" + append tok "\[" } multiliteral-space { set_tokenType "literalpart" @@ -4350,37 +4461,35 @@ namespace eval tomlish::parse { } rb { #right square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } comment { if {$had_slash} {append tok "\\"} @@ -4428,16 +4537,6 @@ namespace eval tomlish::parse { } } } - XXXtablearraynames { - puts "rb @ tablearraynames ??" - #switch? - - #todo? - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex - return 1 - } default { incr i -1 return 1 @@ -4485,7 +4584,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\]" + append tok "\]" } multiliteral-space { set_tokenType "literalpart" @@ -4498,21 +4597,21 @@ namespace eval tomlish::parse { } } bsl { - set dquotes $multi_dquote - set multi_dquote "" ;#!! #backslash if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { @@ -4529,9 +4628,7 @@ namespace eval tomlish::parse { append tok "\\" set slash_active 0 } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -4545,7 +4642,6 @@ namespace eval tomlish::parse { set slash_active 0 append tok "\\\\" } else { - append tok $dquotes set slash_active 1 } } @@ -4575,10 +4671,6 @@ namespace eval tomlish::parse { set tok "\\\\" set slash_active 0 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } set slash_active 1 } } @@ -4599,58 +4691,56 @@ namespace eval tomlish::parse { set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { - #short squote_seq tokens are returned if active during any other character + tentative_accum_squote { + #for within multiliteral + #short tentative_accum_squote tokens are returned if active upon receipt of any other character #longest allowable for leading/trailing are returned here #### set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote - switch -- $state { - leading-squote-space { - append tok $c - if {$existingtoklen > 2} { - error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" - } elseif {$existingtoklen == 2} { - return 1 ;#return tok ''' - } - } - trailing-squote-space { - append tok $c - if {$existingtoklen == 4} { - #maxlen to be an squote_seq is multisquote + 2 = 5 - #return tok ''''' - return 1 - } - } - default { - error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" - } + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_squote is multisquote + 2 = 5 + #return tok with value ''''' + return 1 } } - whitespace { - #end whitespace - incr i -1 ;#reprocess sq + tentative_accum_dquote { + incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { - #temp token creatable only during value-expected or array-space + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space switch -- [tcl::string::length $tok] { 1 { + #no conclusion can yet be reached append tok $c } 2 { + #enter multiliteral #switch? append tok $c set_tokenType triple_squote return 1 } default { + #if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled + #by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 squotes as data. error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" + return 1 + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } literal { #slash_active always false #terminate the literal @@ -4663,7 +4753,7 @@ namespace eval tomlish::parse { # idea: end this literalpart (possibly 'temporarily') # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) - incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing return 1 } XXXitablesquotedkey { @@ -4684,7 +4774,11 @@ namespace eval tomlish::parse { append tok $c } barekey { - #not clear why o'shennanigan shouldn't be a legal barekey - but it seems not to be. + #barekeys now support all sorts of unicode letter/number chars for other cultures + #but not punctuation - not even for those of Irish heritage who don't object + #to the anglicised form of some names. + # o'shenanigan seems to not be a legal barekey + #The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" } default { @@ -4693,63 +4787,69 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - array-space { + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading squote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_squote token or triple_squote token + #It currently doesn't trigger double_squote token + #(handle '' same as 'x' ie produce a single_squote and go into processing literal) + #review - producing double_squote for empty literal may be slightly more efficient. + #This token is not used to handle squote sequences *within* a multiliteral set_tokenType "_start_squote_sequence" set tok "'" } - itable-keyval-value-expected - keyval-value-expected { - set_tokenType "squote_seq_begin" + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote set tok "'" return 1 } - table-space { - #tests: squotedkey.test - set_tokenType "squotedkey" - set tok "" - } - itable-space { - #tests: squotedkey_itable.test + table-space - itable-space { + #tests: squotedkey.test squotedkey_itable.test set_tokenType "squotedkey" set tok "" } - XXXitable-space { - #future - could there be multiline keys? - #this would allow arbitrary tcl dicts to be stored in toml + XXXtable-space - XXXitable-space { + #future - could there be multiline keys? MLLKEY, MLBKEY ? + #this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) #probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files - set_tokenType "squote_seq_begin" + #@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key + #where key is simple-key or dotted-key - no MLL or MLB components + #the spec states solution for arbitrary binary data is application specific involving encodings + #such as hex, base64 + set_tokenType "_start_squote_sequence" set tok "'" return 1 } tablename-state { #first char in tablename-state/tablearrayname-state - set_tokenType tablename + set_tokenType "tablename" append tok "'" } tablearrayname-state { - set_tokenType tablearrayname + set_tokenType "tablearrayname" append tok "'" } literal-state { + #shouldn't get here? review tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" - set_tokenType literal + set_tokenType "literal" incr -1 return 1 } multistring-space { - error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" - } - multiliteral-space { - #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row - #we are building up an squote_seq to determine if - #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines - #b) it is exactly ''' and we can terminate the whole multiliteral - #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space - set_tokenType "squote_seq_begin" - set tok "'" - return 1 + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "," + #error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" } dottedkey-space { - set_tokenType squotedkey + set_tokenType "squotedkey" } default { error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -4765,44 +4865,50 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { incr i -1 return 1 } - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - #switch vs set? - set_tokenType "startmultiquote" - return 1 - } else { - error "tomlish unexpected token length $toklen in 'startquotesequence'" - } - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" return 1 - - #set toklen [tcl::string::length $tok] - #switch -- $toklen { - # 1 { - # set_tokenType "startsquote" - # incr i -1 - # return 1 - # } - # 2 { - # set_tokenType "startsquote" - # incr i -2 - # return 1 - # } - # default { - # error "tomlish unexpected _start_squote_sequence length $toklen" - # } - #} + } + tentative_accum_dquote { + #within multistring + #short tentative_accum_dquote tokens are returned if active upon receipt of any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 + #return tok with value """"" + return 1 + } + } + _start_dquote_sequence { + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + #no conclusion can yet be reached + append tok $c + } + 2 { + #enter multistring + #switch? + append tok $c + set_tokenType triple_dquote + return 1 + } + default { + #if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled + #by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 dquotes as data. + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" + } + } } literal - literalpart { append tok $c @@ -4811,8 +4917,8 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + #unescaped quote always terminates a string + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4821,77 +4927,31 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing + return 1 } } whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$had_slash} { - incr i -2 - return 1 - } else { - switch -- [tcl::string::length $multi_dquote] { - 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] - set multi_dquote "" - return 1 - } - 1 { - incr i -2 - return 1 - } - 0 { - incr i -1 - return 1 - } - } - } - } - keyval-value-expected { - #end whitespace token and reprocess - incr i -1 - return 1 - - #if {$multi_dquote eq "\"\""} { - # set_token_waiting type startmultiquote value "\"\"\"" complete 1 - # set multi_dquote "" - # return 1 - #} else { - # #end whitespace token and reprocess - # incr i -1 - # return 1 - #} - } - table-space - itable-space { - incr i -1 - return 1 - } - default { - set_token_waiting type startquote value "\"" complete 1 startindex $cindex - return 1 - } + #assert: had_slash will only ever be true in multistring-space + if {$had_slash} { + incr i -2 + return 1 + } else { + #end whitespace token - throw dq back for reprocessing + incr i -1 + return 1 } } comment { if {$had_slash} {append tok "\\"} append tok $c } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { if {$had_slash} { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4901,7 +4961,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - #set_token_waiting type endsquote value "'" complete 1 + #set_token_waiting type enddquote value {"} complete 1 return 1 } } @@ -4924,64 +4984,40 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - keyval-value-expected - itable-keyval-value-expected { - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading dquote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_dquote token or triple_dquote token + #It currently doesn't trigger double_dquote token + #(handle "" same as "x" ie produce a single_dquote and go into processing string) + #review - producing double_dquote for empty string may be slightly more efficient. + #This token is not used to handle dquote sequences once *within* a multistring + set_tokenType "_start_dquote_sequence" + set tok {"} } multistring-space { - #TODO - had_slash!!! - #REVIEW if {$had_slash} { set_tokenType "stringpart" set tok "\\\"" - set multi_dquote "" } else { - if {$multi_dquote eq "\"\""} { - tomlish::log::debug "- tokloop char dq ---> endmultiquote" - set_tokenType "endmultiquote" - set tok "\"\"\"" - return 1 - #set_token_waiting type endmultiquote value "\"\"\"" complete 1 - #set multi_dquote "" - #return 1 - } else { - append multi_dquote "\"" - } + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote + set tok {"} + return 1 } } multiliteral-space { set_tokenType "literalpart" set tok "\"" } - XXXtable-space { - set_tokenType "startquote" - set tok $c - return 1 - } - XXXitable-space { - set_tokenType "startquote" - set tok $c - } table-space - itable-space { set_tokenType "dquotedkey" set tok "" } - tablename-state { - set_tokenType tablename - set tok $c - } - tablearrayname-state { - set_tokenType tablearrayname - set tok $c - } dottedkey-space { set_tokenType dquotedkey set tok "" @@ -4990,49 +5026,56 @@ namespace eval tomlish::parse { #set_tokenType dquote_seq_begin #set tok $c } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } default { - error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" } } } } = { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { - #assertion had_slash 0, multi_dquote "" + #assertion had_slash 0 append tok $c } - string - comment - dquotedkey - itablequotedkey { + string - comment - dquotedkey { #for these tokenTypes an = is just data. if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type equal value = complete 1 startindex $cindex @@ -5063,7 +5106,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok ${dquotes}= + append tok = } multiliteral-space { set_tokenType "literalpart" @@ -5084,8 +5127,6 @@ namespace eval tomlish::parse { } cr { #REVIEW! - set dquotes $multi_dquote - set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -5098,16 +5139,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5159,8 +5202,6 @@ namespace eval tomlish::parse { } lf { # \n newline - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5171,16 +5212,19 @@ namespace eval tomlish::parse { append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #multiliteral or multistring incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5196,20 +5240,14 @@ namespace eval tomlish::parse { return 1 } stringpart { - if {$dquotes ne ""} { - append tok $dquotes + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - if {$had_slash} { - #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] - incr i -1 - return 1 - } else { - set_token_waiting type newline value lf complete 1 startindex $cindex - return 1 - } + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } } starttablename - tablename - tablearrayname - starttablearrayname { @@ -5236,20 +5274,13 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - #e.g one or 2 quotes just before nl - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "newline" set tok lf return 1 } } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "newline" set tok "lf" return 1 @@ -5275,8 +5306,6 @@ namespace eval tomlish::parse { } } , { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5287,39 +5316,40 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - tablename - tablearrayname { if {$had_slash} {append tok "\\"} append tok , } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type comma value "," complete 1 startindex $cindex @@ -5338,10 +5368,10 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes," + append tok "," } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "literalpart" set tok "," } @@ -5354,8 +5384,6 @@ namespace eval tomlish::parse { } } . { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5366,42 +5394,45 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { switch -exact -- $state { multistring-space { - set backchars [expr {[tcl::string::length $dquotes] + 1}] + #review if {$had_slash} { - incr backchars 1 + incr i -2 + } else { + incr i -1 } - incr i -$backchars return 1 } xxxdottedkey-space { @@ -5444,7 +5475,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes." + append tok "." } multiliteral-space { set_tokenType "literalpart" @@ -5471,8 +5502,6 @@ namespace eval tomlish::parse { } " " { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 @@ -5483,16 +5512,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5512,9 +5543,9 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $dquotes$c + append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} { append tok "\\" } append tok $c } @@ -5526,8 +5557,7 @@ namespace eval tomlish::parse { incr i -2 return 1 } else { - #split into STRINGPART aaa WS " " - append tok $dquotes + #split into STRINGPART xxx WS " " incr i -1 return 1 } @@ -5537,15 +5567,7 @@ namespace eval tomlish::parse { } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - #end whitespace token - #go back by the number of quotes plus this space char - set backchars [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backchars - return 1 - } else { - append tok $c - } + append tok $c } else { append tok $c } @@ -5588,12 +5610,6 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "whitespace" append tok $c } @@ -5613,9 +5629,6 @@ namespace eval tomlish::parse { } } tab { - set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 @@ -5626,12 +5639,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5662,7 +5681,6 @@ namespace eval tomlish::parse { return 1 } else { #split into STRINGPART aaa WS " " - append tok $dquotes incr i -1 return 1 } @@ -5706,15 +5724,8 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType stringpart - set tok $dquotes - incr i -1 - return 1 - } else { - set_tokenType whitespace - append tok $c - } + set_tokenType whitespace + append tok $c } } multiliteral-space { @@ -5732,16 +5743,31 @@ namespace eval tomlish::parse { #BOM (Byte Order Mark) - ignored by token consumer if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 + } _start_squote_sequence { #assert - tok will be one or two squotes only + #A toml literal probably isn't allowed to contain this + #but we will parse and let the validator sort it out. incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart { append tok $c } + string - stringpart { + append tok $c + } default { + #state machine will generally not have entry to accept bom - let it crash set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } @@ -5752,6 +5778,10 @@ namespace eval tomlish::parse { set_tokenType "literalpart" set tok $c } + multistring-space { + set_tokenType "stringpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" @@ -5761,8 +5791,6 @@ namespace eval tomlish::parse { } } default { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. @@ -5774,28 +5802,24 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen - return 1 - } else { - incr i -1 - return 1 - } + incr i -1 + return 1 } else { #review incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. @@ -5815,7 +5839,7 @@ namespace eval tomlish::parse { return 1 } stringpart { - append tok $dquotes$c + append tok $c } default { #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname @@ -5835,22 +5859,12 @@ namespace eval tomlish::parse { error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } - XXXcurly-syntax { - puts stderr "curly-syntax - review" - if {[tomlish::utils::is_barekey $c]} { - set_tokenType "barekey" - append tok $c - } else { - error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" - } - } multistring-space { set_tokenType "stringpart" if {$had_slash} { - #assert - we don't get had_slash and dquotes at same time set tok \\$c } else { - set tok $dquotes$c + set tok $c } } multiliteral-space { @@ -5890,21 +5904,6 @@ namespace eval tomlish::parse { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #set_token_waiting type string value "" complete 1 - return 1 - } - } _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { @@ -5913,11 +5912,29 @@ namespace eval tomlish::parse { error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - #review - set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 + + ##review + #set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + #set_tokenType "literal" + #set tok "" + #return 1 + } + } + } + _start_dquote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open string + error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" + } + 2 { + set_tokenType "string" + set tok "" + return 1 } } } @@ -6011,6 +6028,16 @@ namespace eval tomlish::dict { return $name } + proc _show_tablenames {tablenames_info} { + append msg \n "tablenames_info:" \n + dict for {tkey tinfo} $tablenames_info { + append msg " " "table: $tkey" \n + dict for {field finfo} $tinfo { + append msg " " "$field $finfo" \n + } + } + return $msg + } } tcl::namespace::eval tomlish::app { diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictn-0.1.1.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictn-0.1.1.tm new file mode 100644 index 00000000..c9ef87f2 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/dictn-0.1.1.tm @@ -0,0 +1,349 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application dictn 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval dictn { + namespace export {[a-z]*} + namespace ensemble create +} + + +## ::dictn::append +#This can of course 'ruin' a nested dict if applied to the wrong element +# - i.e using the string op 'append' on an element that is itself a nested dict is analogous to the standard Tcl: +# %set list {a b {c d}} +# %append list x +# a b {c d}x +# IOW - don't do that unless you really know that's what you want. +# +proc ::dictn::append {dictvar path {value {}}} { + if {[llength $path] == 1} { + uplevel 1 [list dict append $dictvar $path $value] + } else { + upvar 1 $dictvar dvar + + ::set str [dict get $dvar {*}$path] + append str $val + dict set dvar {*}$path $str + } +} + +proc ::dictn::create {args} { + ::set data {} + foreach {path val} $args { + dict set data {*}$path $val + } + return $data +} + +proc ::dictn::exists {dictval path} { + return [dict exists $dictval {*}$path] +} + +proc ::dictn::filter {dictval path filterType args} { + ::set sub [dict get $dictval {*}$path] + dict filter $sub $filterType {*}$args +} + +proc ::dictn::for {keyvalvars dictval path body} { + ::set sub [dict get $dictval {*}$path] + dict for $keyvalvars $sub $body +} + +proc ::dictn::get {dictval {path {}}} { + return [dict get $dictval {*}$path] +} + +proc ::dictn::getdef {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +proc ::dictn::getwithdefault {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +if {[info commands ::tcl::dict::getdef] ne ""} { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + ::set newval [expr {[dict getdef $dvar {*}$path 0] + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} else { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + if {![dict exists $dvar {*}$path]} { + ::set val 0 + } else { + ::set val [dict get $dvar {*}$path] + } + ::set newval [expr {$val + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} + +proc ::dictn::info {dictval {path {}}} { + if {![string length $path]} { + return [dict info $dictval] + } else { + ::set sub [dict get $dictval {*}$path] + return [dict info $sub] + } +} + +proc ::dictn::keys {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict keys $sub $glob] + } else { + return [dict keys $sub] + } +} + +proc ::dictn::lappend {dictvar path args} { + if {[llength $path] == 1} { + uplevel 1 [list dict lappend $dictvar $path {*}$args] + } else { + upvar 1 $dictvar dvar + + ::set list [dict get $dvar {*}$path] + ::lappend list {*}$args + dict set dvar {*}$path $list + } +} + +proc ::dictn::merge {args} { + error "nested merge not yet supported" +} + +#dictn remove dictionaryValue ?path ...? +proc ::dictn::remove {dictval args} { + ::set basic [list] ;#buffer basic (1element path) removals to do in a single call. + + foreach path $args { + if {[llength $path] == 1} { + ::lappend basic $path + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict remove $sub [lindex $path end]] + + dict set dictval {*}$subpath $sub + } + } + + if {[llength $basic]} { + return [dict remove $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::replace {dictval args} { + ::set basic [list] ;#buffer basic (1element path) replacements to do in a single call. + + foreach {path val} $args { + if {[llength $path] == 1} { + ::lappend basic $path $val + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict replace $sub [lindex $path end] $val] + + dict set dictval {*}$subpath $sub + } + } + + + if {[llength $basic]} { + return [dict replace $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::set {dictvar path newval} { + upvar 1 $dictvar dvar + return [dict set dvar {*}$path $newval] +} + +proc ::dictn::size {dictval {path {}}} { + return [dict size [dict get $dictval {*}$path]] +} + +proc ::dictn::unset {dictvar path} { + upvar 1 $dictvar dvar + return [dict unset dvar {*}$path +} + +proc ::dictn::update {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + uplevel 1 [list set $var [dict get $dvar $path]] + } + } + + catch {uplevel 1 $body} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + upvar 1 $var $var + if {![::info exists $var]} { + uplevel 1 [list dict unset $dictvar {*}$path] + } else { + uplevel 1 [list dict set $dictvar {*}$path [::set $var]] + } + } + } + return $result +} + +#an experiment. +proc ::dictn::Applyupdate {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + + ::set headscript "" + ::set i 0 + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + #uplevel 1 [list set $var [dict get $dvar $path]] + ::lappend arglist $var + ::lappend vallist [dict get $dvar {*}$path] + ::append headscript [string map [list %i% $i %v% $var] {upvar 1 %v% %v%; set %v% [lindex $args %i%]} ] + ::append headscript \n + ::incr i + } + } + + ::set body $headscript\r\n$body + + puts stderr "BODY: $body" + + #set result [apply [list args $body] {*}$vallist] + catch {apply [list args $body] {*}$vallist} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path] && [::info exists $var]} { + dict set dvar {*}$path [::set $var] + } + } + return $result +} + +proc ::dictn::values {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict values $sub $glob] + } else { + return [dict values $sub] + } +} + +# Standard form: +#'dictn with dictVariable path body' +# +# Extended form: +#'dictn with dictVariable path arrayVariable body' +# +proc ::dictn::with {dictvar path args} { + if {[llength $args] == 1} { + ::set body [lindex $args 0] + return [uplevel 1 [list dict with $dictvar {*}$path $body]] + } else { + upvar 1 $dictvar dvar + ::lassign $args arrayname body + + upvar 1 $arrayname arr + array set arr [dict get $dvar {*}$path] + ::set prevkeys [array names arr] + + catch {uplevel 1 $body} result + + + foreach k $prevkeys { + if {![::info exists arr($k)]} { + dict unset $dvar {*}$path $k + } + } + foreach k [array names arr] { + dict set $dvar {*}$path $k $arr($k) + } + + return $result + } +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide dictn [namespace eval dictn { + variable version + ::set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config index 247371ee..afd1e8f2 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/include_modules.config @@ -27,6 +27,7 @@ set bootsupport_modules [list\ src/vendormodules sha1\ src/vendormodules tomlish\ src/vendormodules test::tomlish\ + src/vendormodules dictn\ src/vendormodules textutil::adjust\ src/vendormodules textutil::repeat\ src/vendormodules textutil::split\ diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.3.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/test/tomlish-1.1.3.tm index ed5044a73e5fccdc0c437116e82de7c592c4f98a..8afb43d956b0212bfd728b88613392b2099993ea 100644 GIT binary patch delta 12790 zcmaKSbwHF|6YsKw(%s$Nl9Ee@G@^8ebeFJ5m()Xdhjc1}q;yCr-2x&KN(l(u6@A}N zUhlp8$L`ELGxN+jdt%O$#Gq6^74Kn*JCJ(mQ=f!jwoU^iI~fEtB+KSU{XPNJp#fp> zk&V>Msds{0m1=3Y8F`=Ke85J%SKxO5_48y@GbuG^9&g1d9#5vjf1N+#d(wRu$v!oD ztxU^V9^pE}+Tr_dl{1{Z1&+OjtFeLvjqEddlt5d80@8O%KQwgNS6yF>y^v*jVic_W znXI=_lF(Y6r1P=Oc8~Hn`kdarj%>XQWnH{#uTO>5RUyWBQu}AZn3dwKksp+$^-%9^ zui4~MZ6>d}VD&Q#uGBu3%ydRUDRLnPd3D@^$9k5U(k4$63VohYxSLrWXn`4 z>d4Dw8MP0KVhv*6BWFdUpv_uD`*iTM zOq95__av{+K<_v&ufa(F^fu0C*rlvav_i8wiG&sr(pLUNI8G;?@ z^p4&GgFc;3SOq?-w?bkiNUa?ex=9+&x%$QsVtkG9`5IYGfFq`+D$t@YeR51ppwT0R zjiH5Rsvra3eTkaiqOSl6^?x}&hjSor#l85Po#_(`3Tr#)Rhac(}K_g~B7OouJ zQkI4Dh4g%a$E}2UeTO24RnNlk)Pj_j!j->R1@Uh(JJyr+rYF2I6%415pAL|_H0FlS z@qmQr42mI9M8j)il6C9QN^YEWwsB^;3%6-Cw2z}sreB>jHNF>JbYGu3vEQn#t6Di$ zt`tPyvG*+Bve$XN#mBej4gI9@j@61?SP(s)+}o@|m(8)PSve|Vs5?$Q23NKj4x9At*KpjD&iCB@yoTqP-zpe&%g@K zmrFLdfo#N+=*48a%AKssb)V^;VNdSb;V}EMB?;M#+i@%k^n(Jmg8eX2;zAzBop0c4~9F ze=!FS$-1CVZr#n8yIWSME(N<#QE173pt{1-|jjz>XYgCEzOLKe%S+x=z= zq@srRzqY`887MEeh(jI11UBw^FnW{+JRsdTIPAWxc@6#8Qq9*q6y~RGztM+r)b)iA zg15w1b88Nn9LG&B&152nBiaB@QK7RB){KRu+Y2fR6L;KYG- zPE*$9rtW@XJ(o&qe#Fp) zx(YQ7(xTFcZ!>P;)Go@#4i zGC+q*-PCY@L}8QeAFP@P|B>_60_{8Vdx_m6t$G3l;-saCq0~?{m*w)8Qj;3iUja&P zTIfId&YZ94=LmWg^3OjL=IV zGd;$mWE87x>u%~)>&bGve1ebjHsHPyKdKa|fT>8)1FqM^3=4w-Fz!28R0mPR{dmY_QFKk|=v#zWTGY%xk|`JQJ(O4lM#im2s$ zJaimRNhWHrtFPs!AH3#huYeAI$f-Ye{CcmtDs(M&-=CqjhgPwEnwqU7)nA2YN6{&l z{F-tyh(iY5wt{oDa@*t8!O2c*`vF!*RS5-EQ4KS)gP0v@YgkBu%oe9C3S(Q)e)_bV#8>bPc zn_iu2gpps+1oUA29Zz#JKq7fsjW+)#hHlcjnnp)84mke6mIHJSE)N zPx%JTPVzL1h4i&UAYVq$?k~uP0&VRX(X3w1rd@AQ4Wetfet=6In`&{+_ssn-BpE*5 zZ*zJsw!iM#c{HtJM-L`7>FKv zCA2GmQ;Sitxt0pgBY>5KP>yBLzd!h3fr{BOaTw>tWixM|h31{MAZZL6YC%9Scu97f zb~I z>uY!r2o4Ou-P8Le<46BTR$p?DObA;UHmQEjwtr@w8c<^ln{`i+fc@M%z-2BGz->o` z9S_S{{?xJqIu-xXx{Ge&;$q@$WaemZZ{gquvmq9|CE;+haCPIbG;#N`vvIJ62mNo( zkN(H)lRsX57xkAD8wZ#b@FSN5?GN7KCMsH{+fkYS85Od*qnn$Bxvhow&xr2*%l*Hj zvHVXomVZWrV(#qj_&_4I0>L4^5HxpAk3!{IVBKW7k;{P3y@qfI!{23AE zZzuNdc5XJm`v~hlvS0n=`k(*!RyPBLk>Y^39S*kff3&Rs>`g$?7YhLA;Q<{Shy<=~ zE;bHU{|y^xwkHE>jVXT*p8GdWfBdHffA%Q`!tZ~F@)z;5A3yv@-|deeFi?K#8{IS! zHXx|@|HuA+`mmckL{}(WR#XBJ1R`Yufw*td?G+~iC>#j1n?CQb&W{yZU0y=O1UJDR zC*x{MMc7j{jx+)%kH!``gJm5N6ET(u(NNHr+Fx3gY@*YPz?}7%oFPo^>5jv>j?JX@ z%PpHg(daDUQl$UHjhMBh*?Gl~UIb2L@I2z>LZMAl;d1QyI%Jqe()~_VId)P%sJ8np zITm*aG}t=7Vc0B72}Oh+dGjesAQkOb(R6qPN|(^b!kfj0Sa*5yP+XbEp=(gOdSl5SsIzwSUiQ>XK-Srd3ZiruL>2^77ri?n}{;1FycO?vl@Sj_kM;j$<-j4 z9qh{;y2~r8YF6wwRR1jx{{(JCyDnT_VvKPV0jiC7w1rM)hU$$J*0!;3lCI3Uo8p)2 z$>>!6jhbJ0C#ntfV`4MVsK>a>Yc^Kzx?qyq40pCxNLufhB-5O@=j!Z^=uuK2eZ3S} zR^)>$pD3{+hhwTCb%EkW?SWn`_N?c|mMHLkPR%`$(^QlCbh>qHEO#v810$*RLmGt< z)u2_MLPK%*Nz`W>j@C$R)Ajt_#rzzq@&i&eY1H8BlbYKi^qzLU&L=%|ws}(?by%Mv zZtO98>EZ7&izvoIBCzaWjMVdDD(7J_gx6cm@zvP`VRH}zc}%dcFy9panbQEP6H}3) z?R(xjFQp{I%5HQa^sv=C&o&-BJ2T1kmU;wjGd%WLtcH4A2hI*Z#nt-3JnY~zZI#mA zT{9#3O{<5l02>#Z!`ZUOc5n$F#DJ&7^a`>gQNs?BJ@X?c@ZD(F+t7LYNOGlxhihlY zlo4%0U0ODMFkFR4G^^eiH9Ddxn@%`ZL;NFi@x&dLF?9%kFc&?L$l_dxjoa|dk!TUh zd_=cs&rs&EWF-tKdB8-nl`g7Lkw_}e;miQ1$GuOE`kbgFCky^1fl<$jcHxP{8`a!M zi_pl2cKlMs7{ezuPSK==?x~*}9zAYHr}=Co@$r*&Y(E8dSpV7MYTwKGGmPk0*3@0{ z=S8XWVI%|yNlLcMH4`{d)5mQJL>TxsyOwI)o!y%`t+j?Sbl3ax zpvOl^Yq3|lZ=C?dpTQhQ&&)ZsmMQ5`pUF;a8BUVYcL$mIbWX7->f?|m}j_(6=| zVZ=nnrwLpet*AA{t24qRbPBpijR<{eN2Y!DuIdRWb05X5w+j^+wPGJQ3byoR$E1Cp z8X7HgC$G0k_eme;xyR+J`FbH+)I&hlTjQKmMcsds?(jIP0c*O=^f7ow8Il9t;^oWi zsD|gA3~f?ceX(jcY;P&OvrarK(7Ke%AMG7Cw`=EJ(YVdOVcMXV<2dbIPPTrwpv_<> z4kbQr*&d33E~Y(k-Cu8cpi$|)cByK-^MLW%QCi@eJ%5qY2J?&()Ti$1SIo#IV$4j06M{yV=!NoG^)gTi)!Gzn1jtYgl3e z2fU|Y!xGC7M1bVJ{~+%xPZzgd&k11D@=a^C{5)+W>~@?gJZAk-x$3~LQ?q1KFhcQr^q}?!iTnxKX3Ho09Q@pjGPqF5 zIjgpzVX{jJd+NHRxJ(9xs8!F+SCyjG@YZvwbEvEKn6yfqF_^Ow6F~@YE{;}_hE*lJ zyQwAAhXtlC$$A2PZSZ$@9Crw6!JHsYkQ zg2f~!-C_<47EAOhnoBHjhbP}!@%cd0?*h$kZv&LqmR}uya*0@tXdhA-xGysDpzX?Q z%^);*r!01quZA%FGusO(DIYqdtvwIH)Tcq8Hb`mBZPey^A~zTF@WZSl_!x_8cV^CDv5L0g93J%;Ig` zw*mnVG|qj5;Uu&0)ld0}bPMS^P_+JyIlDTkE_!V(rG;>g5Q}#RUSN7hW-1>NeMGI! zFgcEF?{DegxL`X}3J2o?D}|P`@0QZygfuUB_lbEhA!@9L{dqXYVbP$u+ng-NI$vvo>`|^3t zYC1E4IS{1)rQl_?5dAlE6c6joc=ZpqU%%>5V-mB7{NT{4zPw4!f1P#FVOb8|K)L5B zECo5a0|MRuX9hrxZs)1LhX1a%f$cmOo}a*XaeYWGmwaw@Xj@NB4aZ`5UNi^MY}=@` z#ZcqM1l7^9C;m|FI=@-;aivdOrEl!$<+si@o!X=LUF;`MpLjpd)yS(+Z`<;vXRVoLjY0S%#}j_%mKlCuRo#J9SI!KWqP@sKHnZ?xat&~1iX1P>|Gjr@?zFcw?VLU7}l&gH*7h0Qf zTs|mOHI}C1rRgcYb$lYbn5azjZu&_67-#HEolSwwLr>%Tzi^GVSHqo3HA62=wL>gC z+-#a!6|0i#-I|6Ss+b|ODS`~!wq8!0l5x{@;k(G2()IV-Y4kzU?aeL61g?;7%5j!$ z`78^_uJ}pcBNN-+8 zbFoO0&{-o)GSO)qo5t+~@AD=iK#cYc^1kDA29~tL@ZR%-Nt9g za^D6$S>j1S){*Q**qRugz{AY8jO`^mn$z!1o^$px?7M{qFqAJqnv$Y$G2&>mi>TG?KvkYn^KGtrvMZ|A- zDMsL3+EcioBguD}9bB?$n2eZ)wc4lm6$ngZJrJInfc2+|1`!wc;hxV0$w3t|S=Kf8 zJQq)khESD+^zKf~FnnS~bLemuW)P3bfAlcl(!7+ZfQ|6;ia~1Ppbjobiwrrta7mKcM;vX1%;SidQdVm15|_>rv#_!o=6=sfJ-#n{F0WL1RsJ%W4ManH6HD=Y#_taCYQ;f>ySx@i+Xu8GYQN4&veF{Ba(;0NG54ugy zZ|$L)C#V=+k9H#Y6tcW1&Lb)6Y`OLjNb`deeSO;c_0!IGMr+Y?hwOQ1et_N>?|z^8 zIWdv8kFfu%4COwmu6!?lsXD#Y&?r&4j7BRa5zOu|l2BTdm85cfCuZ&t(RRWvu!jy; zm2pGI*c4umQRw=;yOi$&B`C?Anh|7^w3yeslnr(CR}ohOb}?2W`|xA=DK45`p;7Ih zoC@?~r0{=sPjl`mi)}{GwAeH4o-ie;lms=dk!;uui)FPxRZj%bKj47i=~tUv3#nl8 zeg;Ffau)g`PdGIL!1(;o^yo{%|BbD1*m_i5SA#qJbF(D{fyDo}9{qh%{k$E6Nx)HKLXO=R~+C*BE}QAq|C zUW{#57;$t}9ZF6p^Ejh^C6iB2&_)XDc>I<1C`^(3D+dchCIYDV5F`$`j`w++)rjJqO#Jz z^HI2pR$iJnQS(Sq5DiIJO@(P1&aM$DDnT$wx1@m*O)Qy3WZHy_NRn3xMFi*iDFTVTT z+%+{vjdHFdI-6HeE2t&rt)KORW+mA#YbvjVnh}$}v@G5riH)9M1-}Vu;~$-GF@Rc$8b)HNs`8Y5 zGkUi&Nh4xgM0;nD_ng3!I^&(q>y7>G!Ul4OtzyykvlXjMoM*9yyBb`m24(~nnre+2 zSo)b4OW#pT>!YZMvi-=fKt)HVwAo>12Z4Yl^zMuN)OUwJHZe!%n(a#$M11EMAxd*{ zZEBCM?e6l9zuz{p0kJ{hk^}hRUO;l$4f#Nwdo9?r=*YO}!cW+0So-&z1bwnYV;a>VOUzY!ZS?GE@iv-Tm3N=}1+IkS@ zOL2M@^FDMb&31#U?R%b=N*VgdE5GA0OYOYL$GesrJe7@v+q0}^(H|w6ejMuEvc1FTnn5j7TH2$OTuhQAC-B}hk8$7?9 zvNpnlkU-UpAQ0rH2mwn_p}&d`7DoTnTz(~~ySjR+u>ILzZcUsFpRN#@0>$^m)E)j8 zmAcQ6dGaFi1{G8~yPDaYUp`d7x`=UAox}Si!P=cX!5}6^NO^H#S8|-@{>}j#{6lC1 zF0di^t(t?Qy<_um=a>}%_{tRQBT6sNn!n4Qs{w6ViP=ZgXjC}%edB2Z7_qGk=H;Oy0On=sTXL|NQwE(h+RYkUO%9q6{ zZG=NVx7~$nhxqq0Lk#E=Jrzeh1&<$ebLB`B{(!p*HYg&|0W?T?9Y4?UtXv6FzNriC zO^>M2)QN_Z3ExSrKQj;P41DvR*@i@aSh*hBCMM4IY`BORr;)Fp%s`XCLOU)+%Q!P9 zCNw37T9{P-d8idU7Ovvp)W45(4Vhk7D$#`>H|~K z0!m#rF)Bp^Bfx>jqqPUmH42us z%+H>keCIKKvGWo+g?j|eF@!jIs{9+45wxMSlBY}&b)aI*t3RJb=>!kc8c)>5Og!0{ za$MgNv9r?2wH|U3EE6$U}YS;25Je$vxQ#yHV zL!HxJ?1zpaWWfEk&Oz46!a>5+HbHHXZ8OkL7Mf8Y9_}I?MmZGW!nk0C0ZBPO71~X% zB*if!Qv2f`zjjaX6}472d%D)($jK?go0lWzFeInD zZtsj-7d@<}1!3dNy3>`~zZdIE6BftwsMT3UDw?^e=U1rg`jBArjyLx6Grk)>N!H}0 zdgpNsTH!%jr|$b#rza%FFz zSAP_v$(TcQ`$Cfh?!m5?hB$s6OMl+W!)84|5Q&3$lT_6Yd59rkpOS{?(lpk-)`wLT z(`5d+!2hYJ;7bV2TvS~b#OlbwjJP(z^U@_}9Di@4ljJ36GfpmStg571j+@*6VBv%A z^ynOin#|eqJC6l^BlaU#lN}KW(%8OHbN{s3`}TqPj-|1ZEBmxGT&3YGZ0}b)SK*|j z*l_VoIxxc_E%qeeaJN_CKAAB{h9VOV_J6#b-}P+nhsQyHx|e7;bK645YspOVScB><+3hD`OlWzp zNISC1&|DIu6B`NyvIPbQ(F^aCVc+>7>Nxr$Ico_jW?I{vwE7m;(&}hLM(d$+4qP!# zbv_5f8wvtGzo^~E+K>3`78_)m(N^g4iMV&Ix2L{l?&;Y>Z4f<=Q3ptcbE-Kw9j*e8 zCbv|!eoQWJT}!5?x2rxzvT{1!x7b=MKHOM4SCy}R*KTO2p98^9N2@@tr11v06VyWNbqT-Nq$~9+q%VG-MS? zj;9h(7&km&fu>70Q~7ma+0VOL&OJAsyhvJkm8D7W1vBLE)kP6@tJSzKfslOrk_1Yc z3^RLY%JubDEGN0IYNI@MHD4_XLd8#JI;wKLgF-5$yTPantec#9+ zL>+7dgwV>;P^@DO^-HGSr7*EcM~{t>VnmW1?kM3M4TRRcf5rQC&tVEe2|Zo#-t!n( z?2<^?6iZ{5Nk7{vp5cxf?)ieBi>j__qK+A2ffDt776ZK#lfQ?~i%B%G8g9@=VgV^D z>ibkEvJmPSEvh|uF8HGzt>tHX)6y(OXE=FC?A=8~O_8_53{AR^vq!2hEcrb#Ejv$? zQ}R2zb~B**wTz3v)w(>V-M2(*fzgbl(}3i+SD!Z2)3}Oho$UzIzfDPsN7OfA$2Aq; z^gAtwrN$0#I&!oleZWFHl0t|l3P+*qhoc-PdM%vGO57paRkP)Q`AW}>T|S;H_+%1$ z>!f$$q~heuxeum~aDSSr_;-28JqfUk@OfoV$D$>a%+GsziSrJt?hgUSZw47Zbbfw| z955_54qVnh-D0?2d2p3{Sg$j2&vB&TyKAr*ttbw4F$6$S;ptcKu@=t63#KA;QLtbx z*QfPseRB%>@;wc(T>3cAbp|QO@y^(v8HsJ}@iVXN>o5K`qVY&eo~eb;OP9@oIkd%; z9TggBmVehO9MT4dEdT6%JU(Xw4sjGaCliM+z{fFX>hgqVRGGXA$NDvAhThP=q(j8p zr!g|(yS=jrSM3D;-^h^j_`o!d=80-^d|d`~7@2Gx}rR z@<|f@PdcMz{*jpuCj+llKc8@l5UIAI@{^D;)Kn#n+lNi((BzbZc5q+&(RnBhSEY;+ zo7*~are#3B0L?l%!|Vt54$)Y3E=EwXu&q$AlS^3LDnb@cP3=xcIB=Jo%$;NhZ0Y@Q zUZ&Xo7~$AQjc!UdZ`fdQ6;R$H?VZGD^A$PuMs?5@8{(sWe0HjBs$AREWAsQ!%Qr@i2WCdvTLjmw$Y!GY#pA&A?Jb)d4IZ=L|kYUwGI3pm7nGpK$Rt4pg)U<%q zGRa&GM^ThXGzUUYn~L;C>?4K)V~r?#Cw<3W#^wOEou7t6`a^h^s$z+gWmTjUJ1q+_ce3+QED#l^Jey=SkeA~%8t1) z4~J!}rearJTO-W5$zvNA77h{?F?G4Svb*?fXnk>q>J8&o@o5L}!vmJ=ynG6{G&=BTh+tS@}|IE!D0dp*JpcADO7a(EN4qGc}QI5H9nf-w$m*Rqn7e zmB00VU+YrC)YUPc%H(lRt)zTXGWM7!5-ZVWT44}jcdeVt()p9>5!$)*`aJ__?A%XI z(D^HaP3B##=MO^>ZE7cIHhI;ADOPaK=lE<>S{E0%&_#$3tu-868zEtV5ryvoG&Q>K zoNg33BV0?XzHjL5yB0?qpwk&G`zYpcyX`*6Y6x{@QFRVXat<|nZLy*2q-xWi$rgrC zUg$H8bA`S@m2S|~*!hvSIX~Pw<@uXIII|spuM7M~vYPI;S7hjp53uUlT(cwQmV!yl z3sTEqQ23ST(4A>Q5{mv#agPtH@+=hb!VPOWq za0CT79udf+p@l)8G^sHEi+lQs#Q66^g#=JP;(@`S5P{}M7Vs(p!k>21644AGEa2AaTBu2A<%Q1hm5b#T^8`4l19SC~P0@lL*%@P3B>1Y5- z3#!`T>@oq$MBEO6eyvq;_q z#XA6u2l`X-frvN?@BzV%%DFfiuouSyeoS=3YQ_tIFR5LIPg;pJ+CP7i_@|(~yHbWdWP=z(^mkGcW8%5)ez{0^{8KO(Fp= z(|Eyef3dmgNC0m-3-~SH&C}d;A@Cso4H=t(1khx#fK>&4vqIpNUnFX#ICxF)Cay0g z=s-dy3wTKAU-oB*6K7F?`-E>qzUJrvG>Zl7eE&B~0Cc3G0Z7>tU}_PVhzU^1mITX5 z0`b`~;4w*nI_ELi5d!;R0iM{R05v%*U;-)FLwW$sju&9bLjxpoDZmN8MB#SCKz1$~ z)nCA(e@E<30zu}+{vV7HSsn@1Ul^lX20s57{9oWk{{bNVboLjr=$11i+23#f3uJT) zbVQm*O7$1U=$518e;wV18~wrkjOH(7(Jdz=a{om0C#nbrTKbdy>E-7@|5ujzk4Mt`zD+xZuq z=$4yMC0GPh07d}@uc5pR0mg6&s=uzoZz)@9{i_@vPEPgL z&EYNORh@s7;bHfPz_UC;s=uy{ZW##Z{$l|6!5a71J-{vHM*V-3k$&9@0MEmTsQ##y z{wkK9-<25R`{c1xpKWXN9+vKfC;^ToXY?_19K@OS#q^wpkOw Vc6K5RWsoxHD=ln+^ZBXqe*m+y3oifw delta 9769 zcmZWvbySqw*B*xMmhO=5?gj~Ikj|mIONJ1Xj+gERY3YzI1xZCZ1PMVxy7}gM@9*C4 zt~-Cs+0Q=Dd1q#=v!AtN=8wS1Bj9=pQd}@ssNp^4rT78@2$W9*0*M3DvebZE3N_$D ziUfUOm7%3goG#){#8|{&dc5@`EnBuJ2 zg5OD0Fuy?^C6{-*MVAmrqH?&P8bX+lW{=n=C!Ex2s@|ycy^U3hEh8*AKkZ&-0sgKU z)jEr0|IHxpJVDYtg7@+Qnp1XCEVEjZS3R8JuUU9+S-7 z2IZnh7Uxr#uQP?8-JNr^hqR!@31l+A5wpl#@8v^g$gSTU9w4V8t>O=2!7yx2i(a&S zJ5?L@ZL|Qtsp7+W#k(_W%PaiF&P6j)cAIjI)gh~#u$)ojbB8Nbq0ZAqiXm6`Zi zjZImX8kEDrAi@#JcHFqZtqu$VO7+ZqzIryBkjgpM47susq4g@M3ge(qE{<|-i_V$6 zI3xXK8<0P-I0t?!T>!;`FaM7&zy5XTFWkGkZ8%mJ7qlUZNDq?lT9ft6IUiGApvaf> zdc1f-_#wz-Q-^Auweqq-D;YXScGGdzD-N+dlh3K>5z0x0Mi^2ut$t7f-g>t;EedLe z**mT4jUnBt)MSXy8P<`tn3647~858#+pS>?~YBhH~TX6?g z8tB6-S&YvE`kRM-Q1aRC`+nN#Z&vrf#Fxeu!aHx8)3-aB}9$8IRKp}#lsFn#4* zVpM#-SBhjGv=lwANf$^ZrZ7W@aWv2xW@@>-y@@m>Fp*s9oAJZ?kiQF~eU;C#?sUQK zhn}L53F_1B;DI zImbJ6A_QE`CR7Z~rdSqnE7INa&x3wp7C=0H@s#tP)mkz{35mIs3|mqRo|pB>3_*JU zn4?U~YWD|mbz@Rp)=fw)q+6YN!MBGqK;qu0#?#87I$IC5;R`kTM z+K|olpoHV@8N+8Cx2UJ70Z`IL=6DJ#^;GVhq@ZuDKSF;&-N^f_Vl@?&?_wA+GJ}_-j{BH2A~(EbmferqFwS!Rrd6 zy(A<)TwxO)x>h$Qbf3(}0n7 z%lT4#3>1o6?~=qUAFx+BP{I^uTQ+w~~&i;_C_2HE}npd;7rqx!LswVUGd) z4&B}_3WRq7fxItP7pkBK#ZrVP7G~_NmB;Ztx4}o_!E$f{XRU}*tTFhVs3%8i zqQ*nIX-_UgWRpB-m&0*7B2ulKZr|yx!Sg`bQi6s)=*Pm|T*Hzi#dt1?%nKc$lN0X< zbM2s?S8s_lRNxT^Kp+qbh@B$baOUIL_%0X(5~2fvcwt^ZM}gvrHN=P0!o$PD-^|L@ z*%{*E#o-0<^lDUbJOSfYR)!gxDsD`W!h=Auq#zIvps7Gj_|FM@7bklc$nOI{stX&A zX|@o8j9q!0QlJvO#zx~LFzq4$&E7x~o?@sVbm=2B#&zzbz_KWkS&2fx-5cYi2F3!| zsSoM=rBHs^tH~m4@!)H8>D9S!6o2vgVu~8>GDn;oR$&VN(we`5>WxOG$g^9QOKmL( z(PfxD6(?LrfGUL@p9iDW?!Q03_u$nDxI7pd=t7T zV@)CDkx9=Qxt2N5a?yrWU~gs5`Knbsmg#_issXM985R6}b%LPFSlxD!TU}PzDI1A< z)?WE}=L&@BuDw01%tcbvWw6>-bY@!uv%H%~#7Z$fwcf?~*+lbKCF=l$ps+G~@*Txa z^~M*T>SGKU<*1X&XJPF#V$Wlj#rtQeW-PIAwb!K0bmtv>3D)OEwoMR&FR{faj!G&j0>nys_pV&S~hI-sDx zynknME*(#p=c$j~_B)NQj=z2pQDBp>h~1TG*L8va2^cp}n4$TQB_bj0?$42fKmvdT zCpE$!cOP)KB!oUb!U75Iz+mb5k`&I%gcK-ogqZFz&aQTeSksV0u}lT2N<|UV;ynI~ zcirZ$c64Kps}ySogIH5jNZ(SaCyXaP)n1aDZSZWyuTcB>^?auc<9j7%<|DciKhAb$ zVne+SkIzM(OecV_f>n{E=r-mp_Ke&8!R?9z%io;R))QLD72Ij3#D_e!uWKA>We}em zHse-hBAmAV;7qw*tn(boY>h;uHTjFi8)S@k^+}7{*p1BAuHAz_gc=ThDOA+>y4Tr5 zmH^AZ4@L6SL$PvoCL(y|$MiC{{!lPB+pzQ7hjnp!bjxfzmD#q+P?u%RnR-K2m*I9k z%=aWE8Z%ILMQMTTUHBPcLfgEFbrxY_ETTNC@m;br!c}i!d9FG6CDr}Q##v*^febe%qmy0I2@q_Dfh{0%7`S7+UL*Op{ z_uC9_!L@a&v!<(5ky18Q6nI|{=_6{tu#6VbQM`l7_6SLmsrN{6O>;~R<6{oWu>aU1 z2t(1N$T!{X@VU{wg_D+goK4f*#znBY9~j}S5#VL>SFYX1JkXhZPD8%{|1s8k=9MD% zk&rFr^qcu_^Onu|XQCl55RF(`be8!EX|BB7gKO2iAO$9R-?k+o2TvyQs@;A1#3eWO z^QK#%5T~*E9$P1Z_v#o7D)=fCWV7N<6I$oFE)G%b>b7>%zfRI$kUH_%k=)vlboM>B zB1>?b$)dLZlGsCIMMfKkId{WOvY@)B6UC5K^4(S=DSbpnP{CC2ijq5_<7|6_^JOO< z`nT9uvCe4&X2z>wXT(EmQsh@6^e6SzNi}jUP`m|YYiZi@#nxkkV^?*?V!F~d5pxAe z9|?(jFc^lsro(r{H@jYmDYR=+4eugg^~v?^>~cq0BDS)*2!B3Dxwh|6w!$j5VhR%c za#3X2KBSXIsa!l4{YxU|*Yodfx|@%M!`{cIT!CDZd*nMUD@1%Oh>fjaoonkFkL^_p zbTJzfbnW|FzY~%aoURGQa1-P8{*yXjFDwKvrbpdJ6bkw$l1`gL4WBgghV8?FK%!_M z5Esx#|5qfvizb4msl_aD;Y1#V-o2(lc!Cx#3u1#cmgR@V1C}@I+`Y4QeNX;Zzk>RnW6$~*r#}TL~ zU@E4kKixolSr~bv#w7XLwb)^TkWqB)9Ck!|Ck6$iO)_F=TtFd{`e(5!`TNBC$`c*8 zE*CsLQnWppjPRUO{`9(T@KiQ!`Tdj0woW)B=8!UFdzJApukI< zNT4$Ta5X=;YlOPwh8xwN9*=olP!bD^X9soVPcTBM@pkf_D_u~yl6QjbK9R zfDQr){v)vJ4Ns8%6j*>NjtrV25XmiuPjE|*o67laOF;Un=ZL9gQ)Q_>ZrcJ!d_L_g z_h|WyME=uDbM#ll`pgNqC9L#}c(oi7#z8-QI&wpa-I2f7j#e*xF;7N z#AX*DTb8FT2MRFs6Do4O58J@IDVC~QG`?O2KThbCA>a#+G_KU96UfIf zPz1a{pki4bub=mF#%0-fPO_`fk#%Y$!TlMWpf-p1e0o82J#`^igAx_{$=PU9VsQut z&y}ux@bkiQ+_RXHNn2=P>)Ucuo7XXwT3hCtuR>c_XxFBta%K-ee)4cd|m3-;m@<$&hkP#-y}8G=z26@pEDLk%g+1stzsFG!!xSn@KkA(=3Q`hmG%jFK7wyr^>pH{D&9US zX)kS_UR2K1(Tc0AMRw*^`|Dn46b#-#+Lx{|0aGe+U`Cb-SV~c$eSCyT5&Zv?sJq5c z!#&E?a${JVAO!;i;)h8TFCa=SgTfB*t_7XacL5R9_Hbsv5w#`jWXr$@5+jcq8Ue(} z<+SDrbCqCFHy*|O*9NWi)>AQ4hr^6-rU@AS<;VCy5REd#%s`KcnfoSDwN>5TGBf24 zv85}ySp3_D!5Oje@>aRc=wgmx^u*E$jr#;U{HE%Rk%O60N^K-a)$^?gTLPb&WdGAq zxDz7d!z5cC($Q}=t2Nov`ItJ&1BsGM?MJgqh4#!%R{$$59rPN{2eBsgM6%0IsRzB> z{7rj0VC;U2>K$A*LuK+_EufYLGQ2@v{6Jjjru?44#l+*ytaeaNTjLFuC{i(j?@V*0 z?k?Q$V*5J-;>ETf8_dpO)8o6FdqVG%XPx`lB2UMK_le(^GX&(@g^CG-weE7%4L;-h zJxUz=AaR^@%Ro1XYRnQi$Zb(Qh2myJS&$;j4Sbx=BlM;Q9EJHSlCjC0E(N(tet$(TR z^lLwzR7YJQ?u~?9{YoV%Hj5Y{7?sqdhFMBA~|JrY&xW43rN z<<+7L6+@4G7Qz0!ESWFu&5QKm&~YD`IXO?W3f3O=_kttn!LNunc7KE$*w2dH6vMTD zw{7v1E= z?~rq#r?^Xv8#g&X3}K%k+et%D4?}sq=2uPka)_8zsBzoFcJthuVGf_LK`=qy}&ojJ-D( zqW+wZiBC~3c$AFpa(#A4agX_jYy%0Yq_B!n5I9UF1c(@nV9_KDR54(~!(yqihk;xe zXt%-xv=pd-khe&{v6U!rZ-Ig9ju&c}$R$%k4pVtAOc03gj}j7)^g@AJyL-EOL987i z{=W;zgY-2=SROF427gV3#i<@H=CuSAx}qArNN*U@P|y#v%*Dqw@NsHa<9b;0jo!La zoFPQBr+U&gKa<-nl79%l6U=Gzkj0LRw`@%-J=OgsV=uUFO`B~nJ?i6(Y z9%McTY}{eZqB!+-=$f}=8b<;ft`p5OE3)FcB(u;>FV$dz$fQcSaV1S{IeW(QzD_Qh z)VgiZdNvzv`!%Vre&Rk;3ayE>a%duL0uP;W2dJIkxLl7aIq1niZ=R^JT+p0YS2QTy zyMET(JvRDCt-aB^mSQ`xrBcSxrOvXWm9v(NUQ@&x_g=k_Z ztEZ^WQ)QfFX-~8RFqIiAgyt%q!(VP~Da3veX35Hl zU~g-(eiVM;%UcN!+cgPjEV^GB)GQj_P5|CLBL0&(V8gCdOa``#Ga?8?M+*XR1EJBJ zKrRFs3MrEk>*Y%-uqE*FP)EFdZI_0kk+nRH z-OHp3;Th=`9^njxI3P(%a*M0*U=CyB`$`Q$!Ini)GdwX~jLgTc2Io&>j&7Nw4yP%U zZW6hF&fp4e2AL%a>&~l$O5kCIhnl20zqkp>Pld}akNPrPLM%pn=fF-GWo_L*J>aQK zr?#1ZgD{@QlsWk;2`yo}xMH}uB6fd9;PPNdsAZB^Vm!l#mvij3UK^5^{yr@&l61Zh z6klhHtCCTukEDShe&y3sfZp`WHs_qwkwkyZD5KC14|)agu}U_2@YZI$e|x;qr4<*~ zJ5*<<_-TDlgs1k(?k^9e6IQ5uHhsdT-(G)-I^8wQGEJDlaJRBCnsYMdUtAza8z$bB zozAXXK+3eH{hC~AL6V}2P8;b)$T$353ff*2C=$Kkr3_j6JWS!(x$lzeShqZjgX6A@ zU}@goGN?6`L1NrU(5%)1^}0jP)tR*^dt^Ln zDZ6`*qIE%&>suZNhU3y5Q`X#6;i~{Ey^*9@j#<5Cbk`5|T&NFEt*^^oU0W!fVnRQj z{5aXRx%yG}ZLQuo%QftcWW?6_`0T3GJ?mHgilHIU|Os4b8pR zY<+q+Qix~_-y?B`d7f9`N!pj7-tBXpg! z{NlOl+sBZl=e7(nMwI4tThaD`(+_BF&0Dxb8hy_(C{~A9t6b{(nQ@}2W*759tS&}m za)A91K1Z!Q?3f5NSx`f6D*LUwcR*3r-2=3Sjow}PxyVhIsv9eTUueP|f|0cC_Eryt zEig+%r=Duq$%QOx=d(GSU#SSRAL+SGADo*ja@)bPTCCFAd(YF^&fS7KQ3Uw490cwu z$|kr*_LtdFMe#AR`1C_jS(#ar%9Hb{!3qeXA03;RR$pwXAm|hql43r`7yYP=Mo88P z#-)80Mc53E)Ynhfw=3TQ^VW+dv#*ocZVPH)*jp2DoAu@}Lnw|1O>35*M0wJ*=~-Hz zq=(KBi>Y^y$Pmd!yybp4%X#tbPbb`>I$MXQ?CiNLS20HLib== z+rQRnAPMiS6SXyCff!u+tG63Ns9*VW=6#u>HuDP7thqlew7H3z5I+rTfi$i)Dy4pU zo0mE3=99ph%gt}MsHe*Q1Zp#RcTQVc+RxXV&5^+QJen+AjClpK97WP|rl)HL<9I9C zmnP)%;92`|H7yVW6`)@q|a+#bceA_I<203BFfuCqv5)umD?b) zy_4J%J(5ip-&9P_Ni(E-`5Ttr9>+2Vy#v>*l7QR6TXpl!Qs7ZQ9(pN4I-eea`LQ!M z>>S`3$i!MYrkE}x*5(=zL>ozCO2V5PAnJSu36zVqzbc5l)F0S(@9Cj6jaRrXOCD|H z52mofbjqBfV|;|@P{e`hIseU&uA`A%d%spetzVEt|9yvJ-X zMVr94SS{Z!h_QsxVw=wu$k5t`naP z%r$6qUsT?A-HS&mykKM+3^~G$aGB#Hc-n9;J~i?wEEbO=is$&$4!wWZgtEc?36WV&VJ(b#Guq!A<+4uBHy__0!`QD^4{ z&fO9hF7WHsxMny|888L}#lG?uFsDfoQ{&wAwZEnl56bE-%%0QCSKK`L$;dkxzBzjK zDKr_O@q+*)p`0S{yW7~dQQT8Ks4lSWwoMvE-X9k@eJ~uB06xXSYOd;ZAlgH67oqGh zRgasQgVQ9HaTg2`)h7E+{VcVqxHV63q7sun@>2%CVBznqPyl_x{4|7UD> z`X)iXIV7U=!#b#hNIg$EJT*m1cLi?OgWuhZj zcgA8)_j{;s&)VliZ_h3lpox&}BdfdtX5)vzY0;j;m%=?a_zhK!bwoS*wo+CrUw64` zeLox-D%9+o^0a3g@5J^?B-3NC*=hM&!oQ|Y;D@x}mPk~!w~`ymj!L!LCThRKwDYX|66T!+QA?Ah8uN6|6NP})peD7R3LwI?& z;5{6NH*V1yM^z2oWqNRp}H_qS6nVS;tFa9udJ;XAgF{)~yiq?c1EBUm@j z@|xp9n&OqI2kt?%d09hi{lSRUg}h&=Xy3?5=_rPgpQI>E&7qon#Hwb%3_SD+=_JfU zWKJIF4Hn(#UuANh-231naU@+0pw%~Q^!R^l zWxuX7J#ki9{RQh=csv?(dw-Pl`?%)u@!nL-$7KhKkorKmXV!{_tRtRM0#J(pd|E=RouSX@+;e@* zgQeul&BKB0EsT;5sCf4p@$f4^%+7GYxNa=UZ2Z|yxYXd4gF9win;AoLdel~n!EEtC zR1FQQp7-Ja>Ev1ULS1e<}vGc4J*H- zVG1e)Q%^47umuV2cRBUD#nI^e1|E#74IBNDNmgK?!Q}W1)_-M$H8)6rh6FssKgDBAEdP@J_=4@Y6`a zo}@630t~W%@5o@LKRAT~wg>|gOxS>RItv(}{2MMaqXMH97{Ea~B{-kzw|jvF6VT3J z0gF-p4Sh-pfyE3;@b;fBYBo|pEt3UoM)R9t&Q1wT!5|~;U+^gaEyV!rvM9mVf4Z_c zv4N>97O(~VZ$>sJF>suQ0T^Ucg6A22yBe|uz|v2Dn}Ix-09OtRc#HElq!PshiePYo z>u*RchWh(Fb1o(LEB9|VjyNh1lgk2j<@pVdb49@!{C}CafNvfzcv|RBqC7fqoW}zG zD*P8D`28sFd`j@X$Zyv~J~voX6o4ym2bYV(7Ez$CKmsfz@s~*eNEGsdKTG`PVk%<; zorNr5Wl2D@SOmx@!UY(Lae#JZEI^|O13dgcw;Wha3dT@`?ZU{?Ff#ybAOl;Z0jv@( z@Xh~BB;a)kFSuIv_l{dB5&&Py0tli$DhM>!>29Uxs#4fLjw0ted2z_AVzpkBcO zHc!2rHiP=ZlaU|UQ8M6lLwST+*=+nSh)c@(z{Ohti=)lO- zfRq<9oHcONLQ4Ig8vI}MdA+~rSipw{T`H?WOa diff --git a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.4.tm b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.4.tm index 7a6d5205..33d5b912 100644 --- a/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.4.tm +++ b/src/project_layouts/custom/_project/punk.project-0.1/src/bootsupport/modules/tomlish-1.1.4.tm @@ -153,15 +153,10 @@ namespace eval tomlish { } #review - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed - } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keys are lists {parenttable subtable etc} corresponding to parenttable.subtable.etc } foreach sub [lrange $keyval_element 2 end] { @@ -207,13 +202,10 @@ namespace eval tomlish { ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! - set prev_tablenames_seen $tablenames_seen - set prev_tablenames_closed $tablenames_closed - set tablenames_seen [list] - set tablenames_closed [list] + set prev_tablenames_info $tablenames_info + set tablenames_info [dict create] set result [list type $type value [::tomlish::to_dict [list $found_sub]]] - set tablenames_seen $prev_tablenames_seen - set tablenames_closed $prev_tablenames_closed + set tablenames_info $prev_tablenames_info } MULTISTRING - MULTILITERAL { #review - mapping these to STRING might make some conversions harder? @@ -295,23 +287,66 @@ namespace eval tomlish { #[Data] #temps = [{cpu = 79.5, case = 72.0}] proc to_dict {tomlish} { + package require dictn #keep track of which tablenames have already been directly defined, # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. #we don't error out just because a previous tablename segment has already appeared. - ##variable tablenames_seen [list] - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen - } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed + + #Declaring, Creating, and Defining Tables + #https://github.com/toml-lang/toml/issues/795 + #(update - only Creating and Defining are relevant terminology) + + #review + #tablenames_info keys created, defined, createdby, definedby, closedby + + #consider the following 2 which are legal: + #[table] #'table' created, defined=open definedby={header table} + #x.y = 3 + #[table.x.z] #'table' defined=closed closedby={header table.x.z}, 'table.x' created, 'table.x.z' created defined=open definedby={header table.x.z} + #k= 22 + # #'table.x.z' defined=closed closedby={eof eof} + + #equivalent datastructure + + #[table] #'table' created, defined=open definedby={header table} + #[table.x] #'table' defined=closed closedby={header table.x}, 'table.x' created defined=open definedby={header table.x} + #y = 3 + #[table.x.z] #'table.x' defined=closed closedby={header table.x.z}, 'table.x.z' created defined=open definedby={header table.x.z} + #k=22 + + #illegal + #[table] #'table' created and defined=open + #x.y = 3 #'table.x' created first keyval pair defined=open definedby={keyval x.y = 3} + #[table.x.y.z] #'table' defined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created + #k = 22 + # + ## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) + + #illegal + #[table] + #x.y = {p=3} + #[table.x.y.z] + #k = 22 + ## we should fail because y is an inline table which is closed to further entries + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + + + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keyed on tablepath each of which is a list such as {config subgroup etc} (corresponding to config.subgroup.etc) } + log::info "---> to_dict processing '$tomlish'<<<" set items $tomlish @@ -354,7 +389,7 @@ namespace eval tomlish { #a.b.c = 1 #table_key_hierarchy -> a b - #leafkey -> c + #tleaf -> c if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively @@ -362,10 +397,10 @@ namespace eval tomlish { } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] - set leafkey [lindex $dotted_key_hierarchy 0] + set tleaf [lindex $dotted_key_hierarchy 0] } else { set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] - set leafkey [lindex $dotted_key_hierarchy end] + set tleaf [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure @@ -380,143 +415,101 @@ namespace eval tomlish { } } #review? - if {[dict exists $datastructure {*}$table_hierarchy $leafkey]} { - error "Duplicate key '$table_hierarchy $leafkey'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$table_hierarchy $tleaf]} { + error "Duplicate key '$table_hierarchy $tleaf'. The key already exists at this level in the toml data. The toml data is not valid." } #JMN test 2025 if {[llength $table_hierarchy]} { - lappend tablenames_seen $table_hierarchy + dictn incr tablenames_info [list $table_hierarchy seencount] } set keyval_dict [_get_keyval_value $item] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { - lappend tablenames_seen [list {*}$table_hierarchy $leafkey] - lappend tablenames_closed [list {*}$table_hierarchy $leafkey] + set t [list {*}$table_hierarchy $tleaf] + dictn incr tablenames_info [list $t seencount] + dictn set tablenames_info [list $t closed] 1 #review - item is an ITABLE - we recurse here without datastructure context :/ #overwriting keys? todo ? - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } else { - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } + } + TABLEARRAY { + set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLENAME (name: $tablename): $item" + set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize + #we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. + } TABLE { set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLE (name: $tablename): $item" #set tablename [::tomlish::utils::tablename_trim $tablename] set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize - if {$norm_segments in $tablenames_seen} { - error "Table name '$tablename' has already been directly defined in the toml data. Invalid." - } - log::debug "---> to_dict processing item $tag (name: $tablename): $item" - set name_segments [::tomlish::utils::tablename_split $tablename] ;#unnormalized - set last_seg "" - #toml spec rule - all segments mst be non-empty - #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - - set table_key_sublist [list] - - foreach normseg $norm_segments { - lappend table_key_sublist $normseg - if {[dict exists $datastructure {*}$table_key_sublist]} { - #It's ok for this key to already exist *if* it was defined by a previous tablename or equivalent - #and if this key is longer - - #consider the following 2 which are legal: - #[table] - #x.y = 3 - #[table.x.z] - #k= 22 - - #equivalent - - #[table] - #[table.x] - #y = 3 - #[table.x.z] - #k=22 - - #illegal - #[table] - #x.y = 3 - #[table.x.y.z] - #k = 22 - ## - we should fail on encountering table.x.y because only table and table.x are effectively tables - - #illegal - #[table] - #x.y = {p=3} - #[table.x.y.z] - #k = 22 - ## we should fail because y is an inline table which is closed to further entries - - - #note: it is not safe to compare normalized tablenames using join! - # e.g a.'b.c'.d is not the same as a.b.c.d - # instead compare {a b.c d} with {a b c d} - # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. - #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} - - set sublist_length [llength $table_key_sublist] - set found_testkey 0 - if {$table_key_sublist in $tablenames_seen} { - set found_testkey 1 - } else { - #see if it was defined by a longer entry - foreach seen_table_segments $tablenames_seen { - if {[llength $seen_table_segments] <= $sublist_length} { - continue - } - #each tablenames_seen entry is already a list of normalized segments - - #we could have [a.b.c.d] early on - # followed by [a.b] - which was still defined by the earlier one. + set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] + if {$T_DEFINED ne "NULL"} { + #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path + set msg "Table name $tablename has already been directly defined in the toml data. Invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } - set seen_longer [lrange $seen_segments 0 [expr {$sublist_length -1}]] - puts stderr "testkey:'$table_key_sublist' vs seen_match:'$seen_longer'" - if {$table_key_sublist eq $seen_longer} { - set found_testkey 1 - } - } - } - if {$found_testkey == 0} { - #the raw unnormalized tablename might be ok to display in the error message, although it's not the actual dict keyset - set msg "key $table_key_sublist already exists in datastructure, but wasn't defined by a supertable." - append msg \n "tablenames_seen:" \n - foreach ts $tablenames_seen { - append msg " " $ts \n - } + set name_segments [::tomlish::utils::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d + #results of tablename_split 0 are 'raw' - ie some segments may be enclosed in single or double quotes. + + + set supertable [list] + ############## + # [a.b.c.d] + # norm_segments = {a b c d} + #check a {a b} {a b c} <---- supertables of a.b.c.d + ############## + foreach normseg [lrange $norm_segments 0 end-1] { + lappend supertable $normseg + if {![dictn exists $tablenames_info [list $supertable type]]} { + #supertable with this path doesn't yet exist + if {[dict exists $datastructure {*}$supertable]} { + #There is data though - so it must have been created as a keyval + set msg "Supertable [join $supertable .] of table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] error $msg + } else { + #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here + dictn set tablenames_info [list $supertable type] header + #ensure empty tables are still represented in the datastructure + dict set datastructure {*}$supertable [list] } - } - - } - - #ensure empty tables are still represented in the datastructure - set key_sublist [list] - foreach k $norm_segments { - lappend key_sublist $k - if {![dict exists $datastructure {*}$key_sublist]} { - dict set datastructure {*}$key_sublist [list] } else { - tomlish::log::notice "to_dict datastructure at (TABLE) subkey $key_sublist already had data: [dict get $datastructure {*}$key_sublist]" + #supertable has already been created - and maybe defined - but even if defined we can add subtables } } + #table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename + # - or may have existing data from a keyval + if {![dictn exists $tablenames_info [list $norm_segments type]]} { + if {[dict exists $datastructure {*}$norm_segments]} { + set msg "Table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } + #no data or previously created table + dictn set tablenames_info [list $norm_segments type] header - #We must do this after the key-collision test above! - lappend tablenames_seen $norm_segments - - + #We are 'defining' this table's keys and values here (even if empty) + dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here + } + dictn set tablenames_info [list $norm_segments defined] open log::debug ">>> to_dict >>>>>>>>>>>>>>>>> normalized table key hierarchy : $norm_segments" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] - log::debug "----> tododict processing $tag subitem $type processing contained element $element" + log::debug "----> todict processing $tag subitem $type processing contained element $element" switch -exact -- $type { DOTTEDKEY { set dkey_info [_get_dottedkey_info $element] @@ -547,14 +540,19 @@ namespace eval tomlish { puts stdout "to_dict>>> $keyval_dict" dict set datastructure {*}$norm_segments {*}$dkeys $leaf_key $keyval_dict #JMN 2025 - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys] + set tkey [list {*}$norm_segments {*}$dkeys] + dictn incr tablenames_info [list $tkey seencount] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys # inner structure will contain {type value } if all leaves are not empty ITABLES - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys $leaf_key] + set tkey [list {*}$norm_segments {*}$dkeys $leaf_key] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys $leaf_key] + dictn incr tablenames_info [list $tkey seencount] #if the keyval_dict is not a simple type x value y - then it's an inline table ? #if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. + dictn set tablenames_info [list $tkey closed] 1 } } @@ -562,7 +560,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" } } } @@ -1316,7 +1314,12 @@ namespace eval tomlish::encode { #NOTE - this DELIBERATELY does not validate the data, or process escapes etc #It encodes the tomlish records as they are. #ie it only produces toml shaped data from a tomlish list. + # #It is part of the roundtripability of data from toml to tomlish + #!! ie - it is not the place to do formatting of inline vs multiline !! + # That needs to be encoded in the tomlish data that is being passed in + # (e.g from_dict could make formatting decisions in the tomlish it produces) + # #e.g duplicate keys etc can exist in the toml output. #The to_dict from_dict (or any equivalent processor pair) is responsible for validation and conversion #back and forth of escape sequences where appropriate. @@ -1646,17 +1649,27 @@ namespace eval tomlish::decode { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. set parentlevel [expr {$nest -1}] - set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { + #should only apply within a multiliteral #### set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed #Without this - we would get extraneous empty list entries in the parent # - as the xxx-squote-space isn't a space level from the toml perspective # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-squote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-squote-space { + } + default { + error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" + } + } switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? @@ -1669,74 +1682,51 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 4 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the last for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] - #todo integrate left squote with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]'" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "'"] - } - MULTILITERAL { - #empty - lappend v($parentlevel) [list LITERALPART "'"] - } - default { - error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + MULTILITERAL { + #empty + lappend v($parentlevel) [list LITERALPART "'"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" } } } ''''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 5 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the following squotes for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] - #todo integrate left 2 squotes with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]''" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "''"] - } - MULTILITERAL { - lappend v($parentlevel) [list LITERALPART "''"] - } - default { - error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "''"] + } + MULTILITERAL { + lappend v($parentlevel) [list LITERALPART "''"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" } } } } - puts stderr "tomlish::decode::toml ---- HERE squote_seq pop <$tok>" } triple_squote { #presumably popping multiliteral-space @@ -1763,7 +1753,119 @@ namespace eval tomlish::decode { lappend merged $part } default { - error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + tentative_accum_dquote { + #should only apply within a multistring + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the trailing-dquote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-dquote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-dquote-space { + } + default { + error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" + } + } + switch -- $tok { + {"} { + tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] + } + {""} { + #review - we should perhaps return double_dquote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] + } + {"""} { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] + } + {""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left dquote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {"}] + } + MULTISTRING { + #empty + lappend v($parentlevel) [list STRINGPART {"}] + } + default { + error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" + } + } + } + {"""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 dquotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {""}] + } + MULTISTRING { + lappend v($parentlevel) [list STRINGPART {""}] + } + default { + error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" + } + } + } + } + } + triple_dquote { + #presumably popping multistring-space + ::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTISTRING { + lappend merged $part + } + STRINGPART { + if {$lasttype eq "STRINGPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + CONT - WS { + lappend merged $part + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" } } set lasttype [lindex $part 0] @@ -1809,15 +1911,12 @@ namespace eval tomlish::decode { endinlinetable { ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } - endmultiquote { - ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" - } default { error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } if {$do_append_to_parent} { - #e.g squote_seq does it's own appends as necessary - so won't get here + #e.g tentative_accum_squote does it's own appends as necessary - so won't get here lappend v($parentlevel) [set v($nest)] } @@ -1831,8 +1930,8 @@ namespace eval tomlish::decode { switch -exact -- $tokenType { - squote_seq_begin { - #### + tentative_trigger_squote - tentative_trigger_dquote { + #### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { lassign [dict get $transition_info starttok] starttok_type starttok_val set next_tokenType_known 1 @@ -1840,6 +1939,16 @@ namespace eval tomlish::decode { set tok $starttok_val } } + single_squote { + #JMN - REVIEW + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART + } squotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1849,6 +1958,9 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } + triple_dquote { + set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT + } dquotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1858,7 +1970,7 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { #todo set v($nest) [list DQKEY $tok] ;#$tok is the keyname } @@ -1878,34 +1990,29 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } } - startsquote { - #JMN - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "squotedkey" - set tok "" - } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish # back to toml file will be identical. #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, - # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from + # a structural perspective. #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the # tomlish list? - set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. } tablearrayname { - set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name } startarray { @@ -1914,14 +2021,6 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } - startmultiquote { - ::tomlish::log::debug "---- push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE - } - triple_squote { - ::tomlish::log::debug "---- push trigger tokenType triple_squote" - set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL - } default { error "---- push trigger tokenType '$tokenType' not yet implemented" } @@ -1931,11 +2030,11 @@ namespace eval tomlish::decode { #no space level change switch -exact -- $tokenType { squotedkey { - puts "---- squotedkey in state $prevstate (no space level change)" + #puts "---- squotedkey in state $prevstate (no space level change)" lappend v($nest) [list SQKEY $tok] } dquotedkey { - puts "---- dquotedkey in state $prevstate (no space level change)" + #puts "---- dquotedkey in state $prevstate (no space level change)" lappend v($nest) [list DQKEY $tok] } barekey { @@ -1960,29 +2059,46 @@ namespace eval tomlish::decode { startinlinetable { puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } - startquote { + single_dquote { switch -exact -- $newstate { string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quoted-key { + dquoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "dquotedkey" set tok "" } - XXXitable-quoted-key { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "itablequotedkey" - set tok "" + multistring-space { + lappend v($nest) [list STRINGPART {"}] + #may need to be joined on pop if there are neighbouring STRINGPARTS + } + default { + error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + double_dquote { + #leading extra quotes - test: toml_multistring_startquote2 + switch -exact -- $prevstate { + itable-keyval-value-expected - keyval-value-expected { + puts stderr "tomlish::decode::toml double_dquote TEST" + #empty string + lappend v($nest) [list STRINGPART ""] + } + multistring-space { + #multistring-space to multistring-space + lappend v($nest) [list STRINGPART {""}] } default { - error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" } } + } - startsquote { + single_squote { switch -exact -- $newstate { literal-state { set next_tokenType_known 1 @@ -1995,41 +2111,17 @@ namespace eval tomlish::decode { set tok "" } multiliteral-space { - #false alarm squote returned from squote_seq pop + #false alarm squote returned from tentative_accum_squote pop ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" #(single squote - not terminating space) lappend v($nest) [list LITERALPART '] #may need to be joined on pop if there are neighbouring LITERALPARTs } default { - error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" } } } - startmultiquote { - #review - puts stderr "---- got startmultiquote in state $prevstate (no space level change)" - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringpart" - set tok "" - } - endquote { - #nothing to do? - set tok "" - } - endsquote { - set tok "" - } - endmultiquote { - #JMN!! - set tok "" - } - string { - lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes - } - literal { - lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes - } double_squote { switch -exact -- $prevstate { keyval-value-expected { @@ -2044,6 +2136,19 @@ namespace eval tomlish::decode { } } } + enddquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } multistring { #review lappend v($nest) [list MULTISTRING $tok] @@ -2056,11 +2161,9 @@ namespace eval tomlish::decode { } literalpart { lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly - } - itablequotedkey { - } untyped_value { + #would be better termed unclassified_value #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -2238,7 +2341,7 @@ namespace eval tomlish::utils { #eg {dog."tater.man"} set sLen [tcl::string::length $tablename] set segments [list] - set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" for {set i 0} {$i < $sLen} {incr i} { @@ -2249,139 +2352,166 @@ namespace eval tomlish::utils { set lastChar "" } + #todo - track\count backslashes properly + set c [tcl::string::index $tablename $i] + if {$c eq "\""} { + if {($lastChar eq "\\")} { + #not strictly correct - we could have had an even number prior-backslash sequence + #the toml spec would have us error out immediately on bsl in bad location - but we're + #trying to parse to unvalidated tomlish + set ctest escq + } else { + set ctest dq + } + } else { + set ctest [string map [list " " sp \t tab] $c] + } - if {$c eq "."} { - switch -exact -- $mode { - unquoted { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } - quoted { - append seg $c - } - unknown { - lappend segments $seg - set seg "" - } - litquoted { - append seg $c - } - default { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -- $ctest { + . { + switch -exact -- $mode { + preval { + error "tablename_split. dot not allowed - expecting a value" + } + unquoted { + #dot marks end of segment. + #if {![is_barekey $seg]} { + # error "tablename_split. dot not allowed - expecting a value" + #} + lappend segments $seg + set seg "" + set mode "preval" + } + quoted { + append seg $c + } + litquoted { + append seg $c + } + postval { + #got dot in an expected location + set mode "preval" + } } } - } elseif {($c eq "\"") && ($lastChar ne "\\")} { - if {$mode eq "unknown"} { - if {[tcl::string::trim $seg] ne ""} { - #we don't allow a quote in the middle of a bare key - error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" - } - set mode "quoted" - set seg "\"" - } elseif {$mode eq "unquoted"} { - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - } else { - lappend segments $seg + dq { + #unescaped dquote + switch -- $mode { + preval { + set mode "quoted" + set seg "\"" + } + unquoted { + #invalid in barekey - but we are after structure only + append seg $c + } + quoted { + append seg $c + if {$normalize} { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" ;#make sure we only accept a dot or end-of-data now. + } + litquoted { + append seg $c + } + postval { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } } - - set seg "" - set mode "syntax" ;#make sure we only accept a dot or end-of-data now. - } elseif {$mode eq "litquoted"} { - append seg $c - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" - } - } elseif {($c eq "\'")} { - if {$mode eq "unknown"} { - append seg $c - set mode "litquoted" - } elseif {$mode eq "unquoted"} { - #single quote inside e.g o'neill - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - } elseif {$mode eq "litquoted"} { - append seg $c - #no normalization to do - lappend segments $seg - set seg "" - set mode "syntax" - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" } - - } elseif {$c in [list " " \t]} { - if {$mode eq "syntax"} { - #ignore - } else { - append seg $c + ' { + switch -- $mode { + preval { + append seg $c + set mode "litquoted" + } + unquoted { + #single quote inside e.g o'neill - ultimately invalid - but we pass through here. + append seg $c + } + quoted { + append seg $c + } + litquoted { + append seg $c + #no normalization to do aside from stripping squotes + if {$normalize} { + lappend segments [tcl::string::range $seg 1 end-1] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" + } + postval { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + } } - } else { - if {$mode eq "syntax"} { - error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + sp - tab { + switch -- $mode { + preval - postval { + #ignore + } + unquoted { + #terminates a barekey + lappend segments $seg + set seg "" + set mode "postval" + } + default { + #append to quoted or litquoted + append seg $c + } + } } - if {$mode eq "unknown"} { - set mode "unquoted" + default { + switch -- $mode { + preval { + set mode unquoted + append seg $c + } + postval { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + default { + append seg $c + } + } } - append seg $c } + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" - #REVIEW - we can only end up in unquoted or syntax here? are other branches reachable? switch -exact -- $mode { - quoted { - if {$c ne "\""} { - error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" - } - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong - } else { - lappend segments $seg - } + preval { + error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" } - litquoted { - set trimmed_seg [tcl::string::trim $seg] - if {[tcl::string::index $trimmed_seg end] ne "\'"} { - error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" - } + unquoted { lappend segments $seg } - unquoted - unknown { - lappend segments $seg + quoted { + error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" } - syntax { - #ok - segment already lappended + litquoted { + error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" } - default { - lappend segments $seg + postval { + #ok - segment already lappended } } } } - foreach seg $segments { - set trimmed [tcl::string::trim $seg " \t"] - #note - we explicitly allow 'empty' quoted strings '' & "" - # (these are 'discouraged' but valid toml keys) - #if {$trimmed in [list "''" "\"\""]} { - # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" - #} - if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" - } - } + + #note - we must allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + return $segments } @@ -2432,26 +2562,34 @@ namespace eval tomlish::utils { #- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. #REVIEW - provide it anyway? When would it be desirable to use? - variable Bstring_control_map [list\ - \b {\b}\ - \n {\n}\ - \r {\r}\ - \" {\"}\ - \x1b {\e}\ - \\ "\\\\"\ - ] + variable Bstring_control_map [dict create] + dict set Bstring_control_map \b {\b} + dict set Bstring_control_map \n {\n} + dict set Bstring_control_map \r {\r} + dict set Bstring_control_map \" {\"} + #dict set Bstring_control_map \x1b {\e} ;#should presumably be only be a convenience for decode - going the other way we get \u001B + dict set Bstring_control_map \\ "\\\\" + #\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ #for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. - for {set cdec 0} {$cdec <= 8} {incr cdec} { + #8 = \b - already in list. + #built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list + for {set cdec 0} {$cdec <= 7} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } # \u007F = 127 - lappend Bstring_control_map [format %c 127] \\u007F + dict set Bstring_control_map [format %c 127] \\u007F #Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! #escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) @@ -2474,6 +2612,7 @@ namespace eval tomlish::utils { # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn # it replaces \ with a single whitespace (trailing backslash) #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + #plus \e for \x1b? set buffer "" set buffer4 "" ;#buffer for 4 hex characters following a \u @@ -2558,12 +2697,13 @@ namespace eval tomlish::utils { set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { - set e "\\\"" - append buffer [subst -nocommand -novariable $e] + append buffer {"} } b - t - n - f - r { - set e "\\$c" - append buffer [subst -nocommand -novariable $e] + append buffer [subst -nocommand -novariable "\\$c"] + } + e { + append buffer \x1b } u { set unicode4_active 1 @@ -2578,8 +2718,7 @@ namespace eval tomlish::utils { #review - toml spec says all other escapes are reserved #and if they are used TOML should produce an error. #we leave detecting this for caller for now - REVIEW - append buffer "\\" - append buffer $c + append buffer "\\$c" } } } else { @@ -3003,7 +3142,7 @@ namespace eval tomlish::parse { # states: # table-space, itable-space, array-space # array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, - # quoted-key, squoted-key + # dquoted-key, squoted-key # string-state, literal-state, multistring... # # notes: @@ -3039,6 +3178,12 @@ namespace eval tomlish::parse { variable stateMatrix set stateMatrix [dict create] + #--------------------------------------------------------- + #WARNING + #The stateMatrix implementation here is currently messy. + #The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. + #This means the state behaviour has to be reasoned about by looking at both in conjuction. + #--------------------------------------------------------- #xxx-space vs xxx-syntax inadequately documented - TODO @@ -3060,35 +3205,19 @@ namespace eval tomlish::parse { barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ - XXXstartquote "quoted-key"\ - XXXstartsquote "squoted-key"\ + XXXsingle_dquote "quoted-key"\ + XXXsingle_squote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ - startmultiquote "err-state"\ - endquote "err-state"\ + enddquote "err-state"\ + endsquote "err-state"\ comma "err-state"\ eof "end-state"\ equal "err-state"\ cr "err-lonecr"\ } - #itable-space/ curly-syntax : itables - dict set stateMatrix\ - itable-space {\ - whitespace "itable-space"\ - newline "itable-space"\ - barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - endinlinetable "POPSPACE"\ - XXXstartquote "quoted-key"\ - XXXstartsquote {TOSTATE "squoted-key" comment "jn-testing"}\ - comma "err-state"\ - comment "itable-space"\ - eof "err-state"\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}} dict set stateMatrix\ @@ -3113,26 +3242,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ keyval-value-expected {\ whitespace "keyval-value-expected"\ - untyped_value {TOSTATE "keyval-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} - dict set stateMatrix\ - leading-squote-space {\ - squote_seq "POPSPACE"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ + string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ + single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - #dict set stateMatrix\ - # keyval-process-leading-squotes {\ - # startsquote "literal-state"\ - # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - # } + #double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} + + #2025 - no leading-squote-space - only trailing-squote-space. dict set stateMatrix\ keyval-tail {\ @@ -3142,81 +3264,106 @@ namespace eval tomlish::parse { eof "end-state"\ } + + #itable-space/ curly-syntax : itables + # x={y=1,} + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + comma "err-state"\ + comment "itable-space"\ + eof "err-state"\ + } + #we don't get single_squote etc here - instead we get the resulting squotedkey token + + + # ??? review - something like this + # + # x={y =1,} dict set stateMatrix\ itable-keyval-syntax {\ - whitespace "itable-keyval-syntax"\ - barekey {PUSHSPACE "dottedkey-space"}\ - squotedkey {PUSHSPACE "dottedkey-space"}\ - dquotedkey {PUSHSPACE "dottedkey-space"}\ - equal "itable-keyval-value-expected"\ + whitespace {TOSTATE "itable-keyval-syntax"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + dquotedkey {PUSHSPACE "dottedkey-space"}\ + equal {TOSTATE "itable-keyval-value-expected"}\ newline "err-state"\ eof "err-state"\ } + + # x={y=1} + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ itable-keyval-value-expected {\ whitespace "itable-keyval-value-expected"\ untyped_value {TOSTATE "itable-val-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate itable-val-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ } - dict set stateMatrix\ - itable-keyval-space {\ - whitespace "itable-keyval-syntax"\ - equal {TOSTATE "itable-keyval-value-expected" note "required"}\ - } + #double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' + # review + # double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} + + + # x={y=1,z="x"} + #POPSPACE is transition from itable-keyval-space to parent itable-space dict set stateMatrix\ itable-val-tail {\ whitespace "itable-val-tail"\ endinlinetable "POPSPACE"\ comma "POPSPACE"\ - XXXnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ - newline "POPSPACE"\ + newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ comment "itable-val-tail"\ eof "err-state"\ } - #dict set stateMatrix\ - # itable-quoted-key {\ - # whitespace "NA"\ - # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endquote "itable-keyval-syntax"\ - # } - #dict set stateMatrix\ - # itable-squoted-key {\ - # whitespace "NA"\ - # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endsquote "itable-keyval-syntax"\ - # } + # XXXnewline "POPSPACE" + # We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail + # This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record + #e.g + # x = { + # j=1 + # #comment within dottedkey j record + # , # comment unattached + # #comment unattached + # k=2 , #comment unattached + # l=3 #comment within l record + # , m=4 + # #comment associated with m record + # + # #still associated with m record + # } + ## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. + #The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma + #so we cant do: j= 1 #comment for j1 , + # and have the trailing comma recognised. + # + # To associate: j= 1, #comment for j1 + # we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? + # + # The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma + # is 'associated' with the previous entry. + # + # These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, + # but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments + # (e.g reordering records within an itable) + #The user's intention for 'associated' isn't always clear and the specs don't really guide on this. - - - #array-value-expected ? - dict set stateMatrix\ - XXXvalue-expected {\ - whitespace "value-expected"\ - untyped_value {"SAMESPACE" "" replay untyped_value}\ - startquote "string-state"\ - startsquote "literal-state"\ - triple_squote {PUSHSPACE "multiliteral-space"}\ - startmultiquote {PUSHSPACE "multistring-space"}\ - startinlinetable {PUSHSPACE itable-space}\ - startarray {PUSHSPACE array-space}\ - comment "err-state-value-expected-got-comment"\ - comma "err-state"\ - newline "err-state"\ - eof "err-state"\ - } - #note comment token should never be delivered to array-value-expected state? - #dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] #it is for keyval ie x.y.z = value @@ -3245,6 +3392,8 @@ namespace eval tomlish::parse { whitespace "dottedkey-space-tail" dotsep "dottedkey-space" equal "POPSPACE"\ + eof "err-state"\ + newline "err-state"\ } #-------------------------------------------------------------------------- @@ -3262,22 +3411,10 @@ namespace eval tomlish::parse { #toml spec looks like heading towards allowing newlines within inline tables #https://github.com/toml-lang/toml/issues/781 - #2025 - appears to be valid for 1.1 - which we are targeting. + #2025 - multiline itables appear to be valid for 1.1 - which we are targeting. #https://github.com/toml-lang/toml/blob/main/toml.md#inline-table #JMN2025 - #dict set stateMatrix\ - # curly-syntax {\ - # whitespace "curly-syntax"\ - # newline "curly-syntax"\ - # barekey {PUSHSPACE "itable-keyval-space"}\ - # itablequotedkey "itable-keyval-space"\ - # endinlinetable "POPSPACE"\ - # startquote "itable-quoted-key"\ - # comma "itable-space"\ - # comment "itable-space"\ - # eof "err-state"\ - # } #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -3291,10 +3428,9 @@ namespace eval tomlish::parse { # untyped_value "SAMESPACE"\ # startarray {PUSHSPACE "array-space"}\ # endarray "POPSPACE"\ - # startmultiquote {PUSHSPACE multistring-space}\ # startinlinetable {PUSHSPACE itable-space}\ - # startquote "string-state"\ - # startsquote "literal-state"\ + # single_dquote "string-state"\ + # single_squote "literal-state"\ # triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ # comma "array-space"\ # comment "array-space"\ @@ -3305,15 +3441,16 @@ namespace eval tomlish::parse { set aspace [dict create] dict set aspace whitespace "array-space" dict set aspace newline "array-space" - dict set aspace untyped_value "SAMESPACE" + #dict set aspace untyped_value "SAMESPACE" + dict set aspace untyped_value "array-syntax" dict set aspace startarray {PUSHSPACE "array-space"} dict set aspace endarray "POPSPACE" - dict set aspace startmultiquote {PUSHSPACE multistring-space} + dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} + dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} + dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} + dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} dict set aspace startinlinetable {PUSHSPACE itable-space} - dict set aspace startquote "string-state" - dict set aspace startsquote "literal-state" - dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"} - dict set aspace comma "array-space" + #dict set aspace comma "array-space" dict set aspace comment "array-space" dict set aspace eof "err-state-array-space-got-eof" dict set stateMatrix array-space $aspace @@ -3329,26 +3466,16 @@ namespace eval tomlish::parse { #dict set asyntax untyped_value "SAMESPACE" #dict set asyntax startarray {PUSHSPACE array-space} dict set asyntax endarray "POPSPACE" - #dict set asyntax startmultiquote {PUSHSPACE multistring-space} - #dict set asyntax startquote "string-state" - #dict set asyntax startsquote "literal-state" + #dict set asyntax single_dquote "string-state" + #dict set asyntax single_squote "literal-state" dict set asyntax comma "array-space" dict set asyntax comment "array-syntax" dict set stateMatrix array-syntax $asyntax - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space - dict set stateMatrix\ - quoted-key {\ - whitespace "NA"\ - dquotedkey {PUSHSPACE "keyval-space"}\ - newline "err-state"\ - endquote "keyval-syntax"\ - } - - #review + #dquotedkey is a token - dquoted-key is a state dict set stateMatrix\ dquoted-key {\ whitespace "NA"\ @@ -3367,7 +3494,7 @@ namespace eval tomlish::parse { string-state {\ whitespace "NA"\ string "string-state"\ - endquote "SAMESPACE"\ + enddquote "SAMESPACE"\ newline "err-state"\ eof "err-state"\ } @@ -3381,20 +3508,21 @@ namespace eval tomlish::parse { } - #dict set stateMatrix\ - # stringpart {\ - # continuation "SAMESPACE"\ - # endmultiquote "POPSPACE"\ - # eof "err-state"\ - # } dict set stateMatrix\ multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "POPSPACE"\ - eof "err-state"\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ + single_dquote {TOSTATE multistring-space}\ + double_dquote {TOSTATE multistring-space}\ + triple_dquote {POPSPACE}\ + eof "err-state"\ + } + dict set stateMatrix\ + trailing-dquote-space { + tentative_accum_dquote "POPSPACE" } @@ -3402,19 +3530,19 @@ namespace eval tomlish::parse { #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ multiliteral-space {\ - literalpart "multiliteral-space"\ - newline "multiliteral-space"\ - squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ - triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ - double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ - startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ - eof "err-premature-eof-in-multiliteral-space"\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ + single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ + double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + eof "err-premature-eof-in-multiliteral-space"\ } #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - trailing-squote-space {\ - squote_seq "POPSPACE"\ + trailing-squote-space { + tentative_accum_squote "POPSPACE" } @@ -3499,7 +3627,7 @@ namespace eval tomlish::parse { - + dict set stateMatrix\ end-state {} @@ -3557,14 +3685,13 @@ namespace eval tomlish::parse { dict set spacePushTransitions itable-keyval-space itable-keyval-syntax dict set spacePushTransitions array-space array-space dict set spacePushTransitions table-space tablename-state - dict set spacePushTransitions #itable-space itable-space + #dict set spacePushTransitions #itable-space itable-space #Pop to, next variable spacePopTransitions [dict create] dict set spacePopTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail @@ -3575,7 +3702,6 @@ namespace eval tomlish::parse { #JMN test #dict set spaceSameTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail @@ -3611,6 +3737,8 @@ namespace eval tomlish::parse { ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { POPSPACE { + set popfromspace_info [spacestack peek] + set popfromspace_state [dict get $popfromspace_info state] spacestack pop set parent_info [spacestack peek] set type [dict get $parent_info type] @@ -3625,17 +3753,17 @@ namespace eval tomlish::parse { set existing [spacestack pop] dict unset existing returnstate spacestack push $existing ;#re-push modification - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" } else { ### #review - do away with spacePopTransitions - which although useful to provide a default.. # - involve error-prone configurations distant to the main state transition configuration in stateMatrix if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" } else { set next $parentspace - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" } } set result $next @@ -3805,22 +3933,6 @@ namespace eval tomlish::parse { return $tokenType } - proc _shortcircuit_startquotesequence {} { - variable tok - variable i - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - set_tokenType "startquote" - incr i -1 - return -level 2 1 - } elseif {$toklen == 2} { - puts stderr "_shortcircuit_startquotesequence toklen 2" - set_tokenType "startquote" - set tok "\"" - incr i -2 - return -level 2 1 - } - } proc get_token_waiting {} { variable token_waiting @@ -3940,7 +4052,6 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" - set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [tcl::string::index $s [expr {$i - 1}]] @@ -3957,8 +4068,6 @@ namespace eval tomlish::parse { switch -exact -- $ctest { # { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 @@ -3966,16 +4075,20 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #for multiliteral, multistring - data and/or end incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { + #pseudo token beginning with underscore - never returned to state machine - review incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -4003,7 +4116,7 @@ namespace eval tomlish::parse { append tok $c } default { - #dquotedkey, itablequotedkey, string,literal, multistring + #dquotedkey, string,literal, multistring append tok $c } } @@ -4015,7 +4128,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes#" + append tok "#" } multiliteral-space { set_tokenType "literalpart" @@ -4031,23 +4144,23 @@ namespace eval tomlish::parse { } lc { #left curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { @@ -4059,7 +4172,7 @@ namespace eval tomlish::parse { } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - starttablearrayname { #*bare* tablename can only contain letters,digits underscores @@ -4105,7 +4218,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\{" + append tok "\{" } multiliteral-space { set_tokenType "literalpart" @@ -4120,37 +4233,35 @@ namespace eval tomlish::parse { } rc { #right curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - tablename { if {$had_slash} {append tok "\\"} @@ -4221,7 +4332,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\}" + append tok "\}" } multiliteral-space { set_tokenType "literalpart" ; #review @@ -4237,35 +4348,35 @@ namespace eval tomlish::parse { } lb { #left square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename { #change the tokenType @@ -4332,7 +4443,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\[" + append tok "\[" } multiliteral-space { set_tokenType "literalpart" @@ -4350,37 +4461,35 @@ namespace eval tomlish::parse { } rb { #right square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } comment { if {$had_slash} {append tok "\\"} @@ -4428,16 +4537,6 @@ namespace eval tomlish::parse { } } } - XXXtablearraynames { - puts "rb @ tablearraynames ??" - #switch? - - #todo? - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex - return 1 - } default { incr i -1 return 1 @@ -4485,7 +4584,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\]" + append tok "\]" } multiliteral-space { set_tokenType "literalpart" @@ -4498,21 +4597,21 @@ namespace eval tomlish::parse { } } bsl { - set dquotes $multi_dquote - set multi_dquote "" ;#!! #backslash if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { @@ -4529,9 +4628,7 @@ namespace eval tomlish::parse { append tok "\\" set slash_active 0 } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -4545,7 +4642,6 @@ namespace eval tomlish::parse { set slash_active 0 append tok "\\\\" } else { - append tok $dquotes set slash_active 1 } } @@ -4575,10 +4671,6 @@ namespace eval tomlish::parse { set tok "\\\\" set slash_active 0 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } set slash_active 1 } } @@ -4599,58 +4691,56 @@ namespace eval tomlish::parse { set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { - #short squote_seq tokens are returned if active during any other character + tentative_accum_squote { + #for within multiliteral + #short tentative_accum_squote tokens are returned if active upon receipt of any other character #longest allowable for leading/trailing are returned here #### set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote - switch -- $state { - leading-squote-space { - append tok $c - if {$existingtoklen > 2} { - error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" - } elseif {$existingtoklen == 2} { - return 1 ;#return tok ''' - } - } - trailing-squote-space { - append tok $c - if {$existingtoklen == 4} { - #maxlen to be an squote_seq is multisquote + 2 = 5 - #return tok ''''' - return 1 - } - } - default { - error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" - } + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_squote is multisquote + 2 = 5 + #return tok with value ''''' + return 1 } } - whitespace { - #end whitespace - incr i -1 ;#reprocess sq + tentative_accum_dquote { + incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { - #temp token creatable only during value-expected or array-space + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space switch -- [tcl::string::length $tok] { 1 { + #no conclusion can yet be reached append tok $c } 2 { + #enter multiliteral #switch? append tok $c set_tokenType triple_squote return 1 } default { + #if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled + #by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 squotes as data. error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" + return 1 + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } literal { #slash_active always false #terminate the literal @@ -4663,7 +4753,7 @@ namespace eval tomlish::parse { # idea: end this literalpart (possibly 'temporarily') # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) - incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing return 1 } XXXitablesquotedkey { @@ -4684,7 +4774,11 @@ namespace eval tomlish::parse { append tok $c } barekey { - #not clear why o'shennanigan shouldn't be a legal barekey - but it seems not to be. + #barekeys now support all sorts of unicode letter/number chars for other cultures + #but not punctuation - not even for those of Irish heritage who don't object + #to the anglicised form of some names. + # o'shenanigan seems to not be a legal barekey + #The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" } default { @@ -4693,63 +4787,69 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - array-space { + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading squote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_squote token or triple_squote token + #It currently doesn't trigger double_squote token + #(handle '' same as 'x' ie produce a single_squote and go into processing literal) + #review - producing double_squote for empty literal may be slightly more efficient. + #This token is not used to handle squote sequences *within* a multiliteral set_tokenType "_start_squote_sequence" set tok "'" } - itable-keyval-value-expected - keyval-value-expected { - set_tokenType "squote_seq_begin" + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote set tok "'" return 1 } - table-space { - #tests: squotedkey.test - set_tokenType "squotedkey" - set tok "" - } - itable-space { - #tests: squotedkey_itable.test + table-space - itable-space { + #tests: squotedkey.test squotedkey_itable.test set_tokenType "squotedkey" set tok "" } - XXXitable-space { - #future - could there be multiline keys? - #this would allow arbitrary tcl dicts to be stored in toml + XXXtable-space - XXXitable-space { + #future - could there be multiline keys? MLLKEY, MLBKEY ? + #this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) #probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files - set_tokenType "squote_seq_begin" + #@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key + #where key is simple-key or dotted-key - no MLL or MLB components + #the spec states solution for arbitrary binary data is application specific involving encodings + #such as hex, base64 + set_tokenType "_start_squote_sequence" set tok "'" return 1 } tablename-state { #first char in tablename-state/tablearrayname-state - set_tokenType tablename + set_tokenType "tablename" append tok "'" } tablearrayname-state { - set_tokenType tablearrayname + set_tokenType "tablearrayname" append tok "'" } literal-state { + #shouldn't get here? review tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" - set_tokenType literal + set_tokenType "literal" incr -1 return 1 } multistring-space { - error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" - } - multiliteral-space { - #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row - #we are building up an squote_seq to determine if - #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines - #b) it is exactly ''' and we can terminate the whole multiliteral - #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space - set_tokenType "squote_seq_begin" - set tok "'" - return 1 + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "," + #error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" } dottedkey-space { - set_tokenType squotedkey + set_tokenType "squotedkey" } default { error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -4765,44 +4865,50 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { incr i -1 return 1 } - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - #switch vs set? - set_tokenType "startmultiquote" - return 1 - } else { - error "tomlish unexpected token length $toklen in 'startquotesequence'" - } - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" return 1 - - #set toklen [tcl::string::length $tok] - #switch -- $toklen { - # 1 { - # set_tokenType "startsquote" - # incr i -1 - # return 1 - # } - # 2 { - # set_tokenType "startsquote" - # incr i -2 - # return 1 - # } - # default { - # error "tomlish unexpected _start_squote_sequence length $toklen" - # } - #} + } + tentative_accum_dquote { + #within multistring + #short tentative_accum_dquote tokens are returned if active upon receipt of any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 + #return tok with value """"" + return 1 + } + } + _start_dquote_sequence { + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + #no conclusion can yet be reached + append tok $c + } + 2 { + #enter multistring + #switch? + append tok $c + set_tokenType triple_dquote + return 1 + } + default { + #if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled + #by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 dquotes as data. + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" + } + } } literal - literalpart { append tok $c @@ -4811,8 +4917,8 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + #unescaped quote always terminates a string + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4821,77 +4927,31 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing + return 1 } } whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$had_slash} { - incr i -2 - return 1 - } else { - switch -- [tcl::string::length $multi_dquote] { - 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] - set multi_dquote "" - return 1 - } - 1 { - incr i -2 - return 1 - } - 0 { - incr i -1 - return 1 - } - } - } - } - keyval-value-expected { - #end whitespace token and reprocess - incr i -1 - return 1 - - #if {$multi_dquote eq "\"\""} { - # set_token_waiting type startmultiquote value "\"\"\"" complete 1 - # set multi_dquote "" - # return 1 - #} else { - # #end whitespace token and reprocess - # incr i -1 - # return 1 - #} - } - table-space - itable-space { - incr i -1 - return 1 - } - default { - set_token_waiting type startquote value "\"" complete 1 startindex $cindex - return 1 - } + #assert: had_slash will only ever be true in multistring-space + if {$had_slash} { + incr i -2 + return 1 + } else { + #end whitespace token - throw dq back for reprocessing + incr i -1 + return 1 } } comment { if {$had_slash} {append tok "\\"} append tok $c } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { if {$had_slash} { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4901,7 +4961,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - #set_token_waiting type endsquote value "'" complete 1 + #set_token_waiting type enddquote value {"} complete 1 return 1 } } @@ -4924,64 +4984,40 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - keyval-value-expected - itable-keyval-value-expected { - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading dquote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_dquote token or triple_dquote token + #It currently doesn't trigger double_dquote token + #(handle "" same as "x" ie produce a single_dquote and go into processing string) + #review - producing double_dquote for empty string may be slightly more efficient. + #This token is not used to handle dquote sequences once *within* a multistring + set_tokenType "_start_dquote_sequence" + set tok {"} } multistring-space { - #TODO - had_slash!!! - #REVIEW if {$had_slash} { set_tokenType "stringpart" set tok "\\\"" - set multi_dquote "" } else { - if {$multi_dquote eq "\"\""} { - tomlish::log::debug "- tokloop char dq ---> endmultiquote" - set_tokenType "endmultiquote" - set tok "\"\"\"" - return 1 - #set_token_waiting type endmultiquote value "\"\"\"" complete 1 - #set multi_dquote "" - #return 1 - } else { - append multi_dquote "\"" - } + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote + set tok {"} + return 1 } } multiliteral-space { set_tokenType "literalpart" set tok "\"" } - XXXtable-space { - set_tokenType "startquote" - set tok $c - return 1 - } - XXXitable-space { - set_tokenType "startquote" - set tok $c - } table-space - itable-space { set_tokenType "dquotedkey" set tok "" } - tablename-state { - set_tokenType tablename - set tok $c - } - tablearrayname-state { - set_tokenType tablearrayname - set tok $c - } dottedkey-space { set_tokenType dquotedkey set tok "" @@ -4990,49 +5026,56 @@ namespace eval tomlish::parse { #set_tokenType dquote_seq_begin #set tok $c } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } default { - error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" } } } } = { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { - #assertion had_slash 0, multi_dquote "" + #assertion had_slash 0 append tok $c } - string - comment - dquotedkey - itablequotedkey { + string - comment - dquotedkey { #for these tokenTypes an = is just data. if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type equal value = complete 1 startindex $cindex @@ -5063,7 +5106,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok ${dquotes}= + append tok = } multiliteral-space { set_tokenType "literalpart" @@ -5084,8 +5127,6 @@ namespace eval tomlish::parse { } cr { #REVIEW! - set dquotes $multi_dquote - set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -5098,16 +5139,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5159,8 +5202,6 @@ namespace eval tomlish::parse { } lf { # \n newline - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5171,16 +5212,19 @@ namespace eval tomlish::parse { append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #multiliteral or multistring incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5196,20 +5240,14 @@ namespace eval tomlish::parse { return 1 } stringpart { - if {$dquotes ne ""} { - append tok $dquotes + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - if {$had_slash} { - #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] - incr i -1 - return 1 - } else { - set_token_waiting type newline value lf complete 1 startindex $cindex - return 1 - } + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } } starttablename - tablename - tablearrayname - starttablearrayname { @@ -5236,20 +5274,13 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - #e.g one or 2 quotes just before nl - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "newline" set tok lf return 1 } } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "newline" set tok "lf" return 1 @@ -5275,8 +5306,6 @@ namespace eval tomlish::parse { } } , { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5287,39 +5316,40 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - tablename - tablearrayname { if {$had_slash} {append tok "\\"} append tok , } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type comma value "," complete 1 startindex $cindex @@ -5338,10 +5368,10 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes," + append tok "," } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "literalpart" set tok "," } @@ -5354,8 +5384,6 @@ namespace eval tomlish::parse { } } . { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5366,42 +5394,45 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { switch -exact -- $state { multistring-space { - set backchars [expr {[tcl::string::length $dquotes] + 1}] + #review if {$had_slash} { - incr backchars 1 + incr i -2 + } else { + incr i -1 } - incr i -$backchars return 1 } xxxdottedkey-space { @@ -5444,7 +5475,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes." + append tok "." } multiliteral-space { set_tokenType "literalpart" @@ -5471,8 +5502,6 @@ namespace eval tomlish::parse { } " " { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 @@ -5483,16 +5512,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5512,9 +5543,9 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $dquotes$c + append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} { append tok "\\" } append tok $c } @@ -5526,8 +5557,7 @@ namespace eval tomlish::parse { incr i -2 return 1 } else { - #split into STRINGPART aaa WS " " - append tok $dquotes + #split into STRINGPART xxx WS " " incr i -1 return 1 } @@ -5537,15 +5567,7 @@ namespace eval tomlish::parse { } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - #end whitespace token - #go back by the number of quotes plus this space char - set backchars [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backchars - return 1 - } else { - append tok $c - } + append tok $c } else { append tok $c } @@ -5588,12 +5610,6 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "whitespace" append tok $c } @@ -5613,9 +5629,6 @@ namespace eval tomlish::parse { } } tab { - set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 @@ -5626,12 +5639,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5662,7 +5681,6 @@ namespace eval tomlish::parse { return 1 } else { #split into STRINGPART aaa WS " " - append tok $dquotes incr i -1 return 1 } @@ -5706,15 +5724,8 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType stringpart - set tok $dquotes - incr i -1 - return 1 - } else { - set_tokenType whitespace - append tok $c - } + set_tokenType whitespace + append tok $c } } multiliteral-space { @@ -5732,16 +5743,31 @@ namespace eval tomlish::parse { #BOM (Byte Order Mark) - ignored by token consumer if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 + } _start_squote_sequence { #assert - tok will be one or two squotes only + #A toml literal probably isn't allowed to contain this + #but we will parse and let the validator sort it out. incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart { append tok $c } + string - stringpart { + append tok $c + } default { + #state machine will generally not have entry to accept bom - let it crash set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } @@ -5752,6 +5778,10 @@ namespace eval tomlish::parse { set_tokenType "literalpart" set tok $c } + multistring-space { + set_tokenType "stringpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" @@ -5761,8 +5791,6 @@ namespace eval tomlish::parse { } } default { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. @@ -5774,28 +5802,24 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen - return 1 - } else { - incr i -1 - return 1 - } + incr i -1 + return 1 } else { #review incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. @@ -5815,7 +5839,7 @@ namespace eval tomlish::parse { return 1 } stringpart { - append tok $dquotes$c + append tok $c } default { #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname @@ -5835,22 +5859,12 @@ namespace eval tomlish::parse { error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } - XXXcurly-syntax { - puts stderr "curly-syntax - review" - if {[tomlish::utils::is_barekey $c]} { - set_tokenType "barekey" - append tok $c - } else { - error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" - } - } multistring-space { set_tokenType "stringpart" if {$had_slash} { - #assert - we don't get had_slash and dquotes at same time set tok \\$c } else { - set tok $dquotes$c + set tok $c } } multiliteral-space { @@ -5890,21 +5904,6 @@ namespace eval tomlish::parse { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #set_token_waiting type string value "" complete 1 - return 1 - } - } _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { @@ -5913,11 +5912,29 @@ namespace eval tomlish::parse { error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - #review - set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 + + ##review + #set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + #set_tokenType "literal" + #set tok "" + #return 1 + } + } + } + _start_dquote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open string + error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" + } + 2 { + set_tokenType "string" + set tok "" + return 1 } } } @@ -6011,6 +6028,16 @@ namespace eval tomlish::dict { return $name } + proc _show_tablenames {tablenames_info} { + append msg \n "tablenames_info:" \n + dict for {tkey tinfo} $tablenames_info { + append msg " " "table: $tkey" \n + dict for {field finfo} $tinfo { + append msg " " "$field $finfo" \n + } + } + return $msg + } } tcl::namespace::eval tomlish::app { diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/dictn-0.1.1.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/dictn-0.1.1.tm new file mode 100644 index 00000000..c9ef87f2 --- /dev/null +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/dictn-0.1.1.tm @@ -0,0 +1,349 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application dictn 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval dictn { + namespace export {[a-z]*} + namespace ensemble create +} + + +## ::dictn::append +#This can of course 'ruin' a nested dict if applied to the wrong element +# - i.e using the string op 'append' on an element that is itself a nested dict is analogous to the standard Tcl: +# %set list {a b {c d}} +# %append list x +# a b {c d}x +# IOW - don't do that unless you really know that's what you want. +# +proc ::dictn::append {dictvar path {value {}}} { + if {[llength $path] == 1} { + uplevel 1 [list dict append $dictvar $path $value] + } else { + upvar 1 $dictvar dvar + + ::set str [dict get $dvar {*}$path] + append str $val + dict set dvar {*}$path $str + } +} + +proc ::dictn::create {args} { + ::set data {} + foreach {path val} $args { + dict set data {*}$path $val + } + return $data +} + +proc ::dictn::exists {dictval path} { + return [dict exists $dictval {*}$path] +} + +proc ::dictn::filter {dictval path filterType args} { + ::set sub [dict get $dictval {*}$path] + dict filter $sub $filterType {*}$args +} + +proc ::dictn::for {keyvalvars dictval path body} { + ::set sub [dict get $dictval {*}$path] + dict for $keyvalvars $sub $body +} + +proc ::dictn::get {dictval {path {}}} { + return [dict get $dictval {*}$path] +} + +proc ::dictn::getdef {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +proc ::dictn::getwithdefault {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +if {[info commands ::tcl::dict::getdef] ne ""} { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + ::set newval [expr {[dict getdef $dvar {*}$path 0] + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} else { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + if {![dict exists $dvar {*}$path]} { + ::set val 0 + } else { + ::set val [dict get $dvar {*}$path] + } + ::set newval [expr {$val + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} + +proc ::dictn::info {dictval {path {}}} { + if {![string length $path]} { + return [dict info $dictval] + } else { + ::set sub [dict get $dictval {*}$path] + return [dict info $sub] + } +} + +proc ::dictn::keys {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict keys $sub $glob] + } else { + return [dict keys $sub] + } +} + +proc ::dictn::lappend {dictvar path args} { + if {[llength $path] == 1} { + uplevel 1 [list dict lappend $dictvar $path {*}$args] + } else { + upvar 1 $dictvar dvar + + ::set list [dict get $dvar {*}$path] + ::lappend list {*}$args + dict set dvar {*}$path $list + } +} + +proc ::dictn::merge {args} { + error "nested merge not yet supported" +} + +#dictn remove dictionaryValue ?path ...? +proc ::dictn::remove {dictval args} { + ::set basic [list] ;#buffer basic (1element path) removals to do in a single call. + + foreach path $args { + if {[llength $path] == 1} { + ::lappend basic $path + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict remove $sub [lindex $path end]] + + dict set dictval {*}$subpath $sub + } + } + + if {[llength $basic]} { + return [dict remove $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::replace {dictval args} { + ::set basic [list] ;#buffer basic (1element path) replacements to do in a single call. + + foreach {path val} $args { + if {[llength $path] == 1} { + ::lappend basic $path $val + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict replace $sub [lindex $path end] $val] + + dict set dictval {*}$subpath $sub + } + } + + + if {[llength $basic]} { + return [dict replace $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::set {dictvar path newval} { + upvar 1 $dictvar dvar + return [dict set dvar {*}$path $newval] +} + +proc ::dictn::size {dictval {path {}}} { + return [dict size [dict get $dictval {*}$path]] +} + +proc ::dictn::unset {dictvar path} { + upvar 1 $dictvar dvar + return [dict unset dvar {*}$path +} + +proc ::dictn::update {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + uplevel 1 [list set $var [dict get $dvar $path]] + } + } + + catch {uplevel 1 $body} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + upvar 1 $var $var + if {![::info exists $var]} { + uplevel 1 [list dict unset $dictvar {*}$path] + } else { + uplevel 1 [list dict set $dictvar {*}$path [::set $var]] + } + } + } + return $result +} + +#an experiment. +proc ::dictn::Applyupdate {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + + ::set headscript "" + ::set i 0 + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + #uplevel 1 [list set $var [dict get $dvar $path]] + ::lappend arglist $var + ::lappend vallist [dict get $dvar {*}$path] + ::append headscript [string map [list %i% $i %v% $var] {upvar 1 %v% %v%; set %v% [lindex $args %i%]} ] + ::append headscript \n + ::incr i + } + } + + ::set body $headscript\r\n$body + + puts stderr "BODY: $body" + + #set result [apply [list args $body] {*}$vallist] + catch {apply [list args $body] {*}$vallist} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path] && [::info exists $var]} { + dict set dvar {*}$path [::set $var] + } + } + return $result +} + +proc ::dictn::values {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict values $sub $glob] + } else { + return [dict values $sub] + } +} + +# Standard form: +#'dictn with dictVariable path body' +# +# Extended form: +#'dictn with dictVariable path arrayVariable body' +# +proc ::dictn::with {dictvar path args} { + if {[llength $args] == 1} { + ::set body [lindex $args 0] + return [uplevel 1 [list dict with $dictvar {*}$path $body]] + } else { + upvar 1 $dictvar dvar + ::lassign $args arrayname body + + upvar 1 $arrayname arr + array set arr [dict get $dvar {*}$path] + ::set prevkeys [array names arr] + + catch {uplevel 1 $body} result + + + foreach k $prevkeys { + if {![::info exists arr($k)]} { + dict unset $dvar {*}$path $k + } + } + foreach k [array names arr] { + dict set $dvar {*}$path $k $arr($k) + } + + return $result + } +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide dictn [namespace eval dictn { + variable version + ::set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config index 247371ee..afd1e8f2 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/include_modules.config @@ -27,6 +27,7 @@ set bootsupport_modules [list\ src/vendormodules sha1\ src/vendormodules tomlish\ src/vendormodules test::tomlish\ + src/vendormodules dictn\ src/vendormodules textutil::adjust\ src/vendormodules textutil::repeat\ src/vendormodules textutil::split\ diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.3.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/test/tomlish-1.1.3.tm index ed5044a73e5fccdc0c437116e82de7c592c4f98a..8afb43d956b0212bfd728b88613392b2099993ea 100644 GIT binary patch delta 12790 zcmaKSbwHF|6YsKw(%s$Nl9Ee@G@^8ebeFJ5m()Xdhjc1}q;yCr-2x&KN(l(u6@A}N zUhlp8$L`ELGxN+jdt%O$#Gq6^74Kn*JCJ(mQ=f!jwoU^iI~fEtB+KSU{XPNJp#fp> zk&V>Msds{0m1=3Y8F`=Ke85J%SKxO5_48y@GbuG^9&g1d9#5vjf1N+#d(wRu$v!oD ztxU^V9^pE}+Tr_dl{1{Z1&+OjtFeLvjqEddlt5d80@8O%KQwgNS6yF>y^v*jVic_W znXI=_lF(Y6r1P=Oc8~Hn`kdarj%>XQWnH{#uTO>5RUyWBQu}AZn3dwKksp+$^-%9^ zui4~MZ6>d}VD&Q#uGBu3%ydRUDRLnPd3D@^$9k5U(k4$63VohYxSLrWXn`4 z>d4Dw8MP0KVhv*6BWFdUpv_uD`*iTM zOq95__av{+K<_v&ufa(F^fu0C*rlvav_i8wiG&sr(pLUNI8G;?@ z^p4&GgFc;3SOq?-w?bkiNUa?ex=9+&x%$QsVtkG9`5IYGfFq`+D$t@YeR51ppwT0R zjiH5Rsvra3eTkaiqOSl6^?x}&hjSor#l85Po#_(`3Tr#)Rhac(}K_g~B7OouJ zQkI4Dh4g%a$E}2UeTO24RnNlk)Pj_j!j->R1@Uh(JJyr+rYF2I6%415pAL|_H0FlS z@qmQr42mI9M8j)il6C9QN^YEWwsB^;3%6-Cw2z}sreB>jHNF>JbYGu3vEQn#t6Di$ zt`tPyvG*+Bve$XN#mBej4gI9@j@61?SP(s)+}o@|m(8)PSve|Vs5?$Q23NKj4x9At*KpjD&iCB@yoTqP-zpe&%g@K zmrFLdfo#N+=*48a%AKssb)V^;VNdSb;V}EMB?;M#+i@%k^n(Jmg8eX2;zAzBop0c4~9F ze=!FS$-1CVZr#n8yIWSME(N<#QE173pt{1-|jjz>XYgCEzOLKe%S+x=z= zq@srRzqY`887MEeh(jI11UBw^FnW{+JRsdTIPAWxc@6#8Qq9*q6y~RGztM+r)b)iA zg15w1b88Nn9LG&B&152nBiaB@QK7RB){KRu+Y2fR6L;KYG- zPE*$9rtW@XJ(o&qe#Fp) zx(YQ7(xTFcZ!>P;)Go@#4i zGC+q*-PCY@L}8QeAFP@P|B>_60_{8Vdx_m6t$G3l;-saCq0~?{m*w)8Qj;3iUja&P zTIfId&YZ94=LmWg^3OjL=IV zGd;$mWE87x>u%~)>&bGve1ebjHsHPyKdKa|fT>8)1FqM^3=4w-Fz!28R0mPR{dmY_QFKk|=v#zWTGY%xk|`JQJ(O4lM#im2s$ zJaimRNhWHrtFPs!AH3#huYeAI$f-Ye{CcmtDs(M&-=CqjhgPwEnwqU7)nA2YN6{&l z{F-tyh(iY5wt{oDa@*t8!O2c*`vF!*RS5-EQ4KS)gP0v@YgkBu%oe9C3S(Q)e)_bV#8>bPc zn_iu2gpps+1oUA29Zz#JKq7fsjW+)#hHlcjnnp)84mke6mIHJSE)N zPx%JTPVzL1h4i&UAYVq$?k~uP0&VRX(X3w1rd@AQ4Wetfet=6In`&{+_ssn-BpE*5 zZ*zJsw!iM#c{HtJM-L`7>FKv zCA2GmQ;Sitxt0pgBY>5KP>yBLzd!h3fr{BOaTw>tWixM|h31{MAZZL6YC%9Scu97f zb~I z>uY!r2o4Ou-P8Le<46BTR$p?DObA;UHmQEjwtr@w8c<^ln{`i+fc@M%z-2BGz->o` z9S_S{{?xJqIu-xXx{Ge&;$q@$WaemZZ{gquvmq9|CE;+haCPIbG;#N`vvIJ62mNo( zkN(H)lRsX57xkAD8wZ#b@FSN5?GN7KCMsH{+fkYS85Od*qnn$Bxvhow&xr2*%l*Hj zvHVXomVZWrV(#qj_&_4I0>L4^5HxpAk3!{IVBKW7k;{P3y@qfI!{23AE zZzuNdc5XJm`v~hlvS0n=`k(*!RyPBLk>Y^39S*kff3&Rs>`g$?7YhLA;Q<{Shy<=~ zE;bHU{|y^xwkHE>jVXT*p8GdWfBdHffA%Q`!tZ~F@)z;5A3yv@-|deeFi?K#8{IS! zHXx|@|HuA+`mmckL{}(WR#XBJ1R`Yufw*td?G+~iC>#j1n?CQb&W{yZU0y=O1UJDR zC*x{MMc7j{jx+)%kH!``gJm5N6ET(u(NNHr+Fx3gY@*YPz?}7%oFPo^>5jv>j?JX@ z%PpHg(daDUQl$UHjhMBh*?Gl~UIb2L@I2z>LZMAl;d1QyI%Jqe()~_VId)P%sJ8np zITm*aG}t=7Vc0B72}Oh+dGjesAQkOb(R6qPN|(^b!kfj0Sa*5yP+XbEp=(gOdSl5SsIzwSUiQ>XK-Srd3ZiruL>2^77ri?n}{;1FycO?vl@Sj_kM;j$<-j4 z9qh{;y2~r8YF6wwRR1jx{{(JCyDnT_VvKPV0jiC7w1rM)hU$$J*0!;3lCI3Uo8p)2 z$>>!6jhbJ0C#ntfV`4MVsK>a>Yc^Kzx?qyq40pCxNLufhB-5O@=j!Z^=uuK2eZ3S} zR^)>$pD3{+hhwTCb%EkW?SWn`_N?c|mMHLkPR%`$(^QlCbh>qHEO#v810$*RLmGt< z)u2_MLPK%*Nz`W>j@C$R)Ajt_#rzzq@&i&eY1H8BlbYKi^qzLU&L=%|ws}(?by%Mv zZtO98>EZ7&izvoIBCzaWjMVdDD(7J_gx6cm@zvP`VRH}zc}%dcFy9panbQEP6H}3) z?R(xjFQp{I%5HQa^sv=C&o&-BJ2T1kmU;wjGd%WLtcH4A2hI*Z#nt-3JnY~zZI#mA zT{9#3O{<5l02>#Z!`ZUOc5n$F#DJ&7^a`>gQNs?BJ@X?c@ZD(F+t7LYNOGlxhihlY zlo4%0U0ODMFkFR4G^^eiH9Ddxn@%`ZL;NFi@x&dLF?9%kFc&?L$l_dxjoa|dk!TUh zd_=cs&rs&EWF-tKdB8-nl`g7Lkw_}e;miQ1$GuOE`kbgFCky^1fl<$jcHxP{8`a!M zi_pl2cKlMs7{ezuPSK==?x~*}9zAYHr}=Co@$r*&Y(E8dSpV7MYTwKGGmPk0*3@0{ z=S8XWVI%|yNlLcMH4`{d)5mQJL>TxsyOwI)o!y%`t+j?Sbl3ax zpvOl^Yq3|lZ=C?dpTQhQ&&)ZsmMQ5`pUF;a8BUVYcL$mIbWX7->f?|m}j_(6=| zVZ=nnrwLpet*AA{t24qRbPBpijR<{eN2Y!DuIdRWb05X5w+j^+wPGJQ3byoR$E1Cp z8X7HgC$G0k_eme;xyR+J`FbH+)I&hlTjQKmMcsds?(jIP0c*O=^f7ow8Il9t;^oWi zsD|gA3~f?ceX(jcY;P&OvrarK(7Ke%AMG7Cw`=EJ(YVdOVcMXV<2dbIPPTrwpv_<> z4kbQr*&d33E~Y(k-Cu8cpi$|)cByK-^MLW%QCi@eJ%5qY2J?&()Ti$1SIo#IV$4j06M{yV=!NoG^)gTi)!Gzn1jtYgl3e z2fU|Y!xGC7M1bVJ{~+%xPZzgd&k11D@=a^C{5)+W>~@?gJZAk-x$3~LQ?q1KFhcQr^q}?!iTnxKX3Ho09Q@pjGPqF5 zIjgpzVX{jJd+NHRxJ(9xs8!F+SCyjG@YZvwbEvEKn6yfqF_^Ow6F~@YE{;}_hE*lJ zyQwAAhXtlC$$A2PZSZ$@9Crw6!JHsYkQ zg2f~!-C_<47EAOhnoBHjhbP}!@%cd0?*h$kZv&LqmR}uya*0@tXdhA-xGysDpzX?Q z%^);*r!01quZA%FGusO(DIYqdtvwIH)Tcq8Hb`mBZPey^A~zTF@WZSl_!x_8cV^CDv5L0g93J%;Ig` zw*mnVG|qj5;Uu&0)ld0}bPMS^P_+JyIlDTkE_!V(rG;>g5Q}#RUSN7hW-1>NeMGI! zFgcEF?{DegxL`X}3J2o?D}|P`@0QZygfuUB_lbEhA!@9L{dqXYVbP$u+ng-NI$vvo>`|^3t zYC1E4IS{1)rQl_?5dAlE6c6joc=ZpqU%%>5V-mB7{NT{4zPw4!f1P#FVOb8|K)L5B zECo5a0|MRuX9hrxZs)1LhX1a%f$cmOo}a*XaeYWGmwaw@Xj@NB4aZ`5UNi^MY}=@` z#ZcqM1l7^9C;m|FI=@-;aivdOrEl!$<+si@o!X=LUF;`MpLjpd)yS(+Z`<;vXRVoLjY0S%#}j_%mKlCuRo#J9SI!KWqP@sKHnZ?xat&~1iX1P>|Gjr@?zFcw?VLU7}l&gH*7h0Qf zTs|mOHI}C1rRgcYb$lYbn5azjZu&_67-#HEolSwwLr>%Tzi^GVSHqo3HA62=wL>gC z+-#a!6|0i#-I|6Ss+b|ODS`~!wq8!0l5x{@;k(G2()IV-Y4kzU?aeL61g?;7%5j!$ z`78^_uJ}pcBNN-+8 zbFoO0&{-o)GSO)qo5t+~@AD=iK#cYc^1kDA29~tL@ZR%-Nt9g za^D6$S>j1S){*Q**qRugz{AY8jO`^mn$z!1o^$px?7M{qFqAJqnv$Y$G2&>mi>TG?KvkYn^KGtrvMZ|A- zDMsL3+EcioBguD}9bB?$n2eZ)wc4lm6$ngZJrJInfc2+|1`!wc;hxV0$w3t|S=Kf8 zJQq)khESD+^zKf~FnnS~bLemuW)P3bfAlcl(!7+ZfQ|6;ia~1Ppbjobiwrrta7mKcM;vX1%;SidQdVm15|_>rv#_!o=6=sfJ-#n{F0WL1RsJ%W4ManH6HD=Y#_taCYQ;f>ySx@i+Xu8GYQN4&veF{Ba(;0NG54ugy zZ|$L)C#V=+k9H#Y6tcW1&Lb)6Y`OLjNb`deeSO;c_0!IGMr+Y?hwOQ1et_N>?|z^8 zIWdv8kFfu%4COwmu6!?lsXD#Y&?r&4j7BRa5zOu|l2BTdm85cfCuZ&t(RRWvu!jy; zm2pGI*c4umQRw=;yOi$&B`C?Anh|7^w3yeslnr(CR}ohOb}?2W`|xA=DK45`p;7Ih zoC@?~r0{=sPjl`mi)}{GwAeH4o-ie;lms=dk!;uui)FPxRZj%bKj47i=~tUv3#nl8 zeg;Ffau)g`PdGIL!1(;o^yo{%|BbD1*m_i5SA#qJbF(D{fyDo}9{qh%{k$E6Nx)HKLXO=R~+C*BE}QAq|C zUW{#57;$t}9ZF6p^Ejh^C6iB2&_)XDc>I<1C`^(3D+dchCIYDV5F`$`j`w++)rjJqO#Jz z^HI2pR$iJnQS(Sq5DiIJO@(P1&aM$DDnT$wx1@m*O)Qy3WZHy_NRn3xMFi*iDFTVTT z+%+{vjdHFdI-6HeE2t&rt)KORW+mA#YbvjVnh}$}v@G5riH)9M1-}Vu;~$-GF@Rc$8b)HNs`8Y5 zGkUi&Nh4xgM0;nD_ng3!I^&(q>y7>G!Ul4OtzyykvlXjMoM*9yyBb`m24(~nnre+2 zSo)b4OW#pT>!YZMvi-=fKt)HVwAo>12Z4Yl^zMuN)OUwJHZe!%n(a#$M11EMAxd*{ zZEBCM?e6l9zuz{p0kJ{hk^}hRUO;l$4f#Nwdo9?r=*YO}!cW+0So-&z1bwnYV;a>VOUzY!ZS?GE@iv-Tm3N=}1+IkS@ zOL2M@^FDMb&31#U?R%b=N*VgdE5GA0OYOYL$GesrJe7@v+q0}^(H|w6ejMuEvc1FTnn5j7TH2$OTuhQAC-B}hk8$7?9 zvNpnlkU-UpAQ0rH2mwn_p}&d`7DoTnTz(~~ySjR+u>ILzZcUsFpRN#@0>$^m)E)j8 zmAcQ6dGaFi1{G8~yPDaYUp`d7x`=UAox}Si!P=cX!5}6^NO^H#S8|-@{>}j#{6lC1 zF0di^t(t?Qy<_um=a>}%_{tRQBT6sNn!n4Qs{w6ViP=ZgXjC}%edB2Z7_qGk=H;Oy0On=sTXL|NQwE(h+RYkUO%9q6{ zZG=NVx7~$nhxqq0Lk#E=Jrzeh1&<$ebLB`B{(!p*HYg&|0W?T?9Y4?UtXv6FzNriC zO^>M2)QN_Z3ExSrKQj;P41DvR*@i@aSh*hBCMM4IY`BORr;)Fp%s`XCLOU)+%Q!P9 zCNw37T9{P-d8idU7Ovvp)W45(4Vhk7D$#`>H|~K z0!m#rF)Bp^Bfx>jqqPUmH42us z%+H>keCIKKvGWo+g?j|eF@!jIs{9+45wxMSlBY}&b)aI*t3RJb=>!kc8c)>5Og!0{ za$MgNv9r?2wH|U3EE6$U}YS;25Je$vxQ#yHV zL!HxJ?1zpaWWfEk&Oz46!a>5+HbHHXZ8OkL7Mf8Y9_}I?MmZGW!nk0C0ZBPO71~X% zB*if!Qv2f`zjjaX6}472d%D)($jK?go0lWzFeInD zZtsj-7d@<}1!3dNy3>`~zZdIE6BftwsMT3UDw?^e=U1rg`jBArjyLx6Grk)>N!H}0 zdgpNsTH!%jr|$b#rza%FFz zSAP_v$(TcQ`$Cfh?!m5?hB$s6OMl+W!)84|5Q&3$lT_6Yd59rkpOS{?(lpk-)`wLT z(`5d+!2hYJ;7bV2TvS~b#OlbwjJP(z^U@_}9Di@4ljJ36GfpmStg571j+@*6VBv%A z^ynOin#|eqJC6l^BlaU#lN}KW(%8OHbN{s3`}TqPj-|1ZEBmxGT&3YGZ0}b)SK*|j z*l_VoIxxc_E%qeeaJN_CKAAB{h9VOV_J6#b-}P+nhsQyHx|e7;bK645YspOVScB><+3hD`OlWzp zNISC1&|DIu6B`NyvIPbQ(F^aCVc+>7>Nxr$Ico_jW?I{vwE7m;(&}hLM(d$+4qP!# zbv_5f8wvtGzo^~E+K>3`78_)m(N^g4iMV&Ix2L{l?&;Y>Z4f<=Q3ptcbE-Kw9j*e8 zCbv|!eoQWJT}!5?x2rxzvT{1!x7b=MKHOM4SCy}R*KTO2p98^9N2@@tr11v06VyWNbqT-Nq$~9+q%VG-MS? zj;9h(7&km&fu>70Q~7ma+0VOL&OJAsyhvJkm8D7W1vBLE)kP6@tJSzKfslOrk_1Yc z3^RLY%JubDEGN0IYNI@MHD4_XLd8#JI;wKLgF-5$yTPantec#9+ zL>+7dgwV>;P^@DO^-HGSr7*EcM~{t>VnmW1?kM3M4TRRcf5rQC&tVEe2|Zo#-t!n( z?2<^?6iZ{5Nk7{vp5cxf?)ieBi>j__qK+A2ffDt776ZK#lfQ?~i%B%G8g9@=VgV^D z>ibkEvJmPSEvh|uF8HGzt>tHX)6y(OXE=FC?A=8~O_8_53{AR^vq!2hEcrb#Ejv$? zQ}R2zb~B**wTz3v)w(>V-M2(*fzgbl(}3i+SD!Z2)3}Oho$UzIzfDPsN7OfA$2Aq; z^gAtwrN$0#I&!oleZWFHl0t|l3P+*qhoc-PdM%vGO57paRkP)Q`AW}>T|S;H_+%1$ z>!f$$q~heuxeum~aDSSr_;-28JqfUk@OfoV$D$>a%+GsziSrJt?hgUSZw47Zbbfw| z955_54qVnh-D0?2d2p3{Sg$j2&vB&TyKAr*ttbw4F$6$S;ptcKu@=t63#KA;QLtbx z*QfPseRB%>@;wc(T>3cAbp|QO@y^(v8HsJ}@iVXN>o5K`qVY&eo~eb;OP9@oIkd%; z9TggBmVehO9MT4dEdT6%JU(Xw4sjGaCliM+z{fFX>hgqVRGGXA$NDvAhThP=q(j8p zr!g|(yS=jrSM3D;-^h^j_`o!d=80-^d|d`~7@2Gx}rR z@<|f@PdcMz{*jpuCj+llKc8@l5UIAI@{^D;)Kn#n+lNi((BzbZc5q+&(RnBhSEY;+ zo7*~are#3B0L?l%!|Vt54$)Y3E=EwXu&q$AlS^3LDnb@cP3=xcIB=Jo%$;NhZ0Y@Q zUZ&Xo7~$AQjc!UdZ`fdQ6;R$H?VZGD^A$PuMs?5@8{(sWe0HjBs$AREWAsQ!%Qr@i2WCdvTLjmw$Y!GY#pA&A?Jb)d4IZ=L|kYUwGI3pm7nGpK$Rt4pg)U<%q zGRa&GM^ThXGzUUYn~L;C>?4K)V~r?#Cw<3W#^wOEou7t6`a^h^s$z+gWmTjUJ1q+_ce3+QED#l^Jey=SkeA~%8t1) z4~J!}rearJTO-W5$zvNA77h{?F?G4Svb*?fXnk>q>J8&o@o5L}!vmJ=ynG6{G&=BTh+tS@}|IE!D0dp*JpcADO7a(EN4qGc}QI5H9nf-w$m*Rqn7e zmB00VU+YrC)YUPc%H(lRt)zTXGWM7!5-ZVWT44}jcdeVt()p9>5!$)*`aJ__?A%XI z(D^HaP3B##=MO^>ZE7cIHhI;ADOPaK=lE<>S{E0%&_#$3tu-868zEtV5ryvoG&Q>K zoNg33BV0?XzHjL5yB0?qpwk&G`zYpcyX`*6Y6x{@QFRVXat<|nZLy*2q-xWi$rgrC zUg$H8bA`S@m2S|~*!hvSIX~Pw<@uXIII|spuM7M~vYPI;S7hjp53uUlT(cwQmV!yl z3sTEqQ23ST(4A>Q5{mv#agPtH@+=hb!VPOWq za0CT79udf+p@l)8G^sHEi+lQs#Q66^g#=JP;(@`S5P{}M7Vs(p!k>21644AGEa2AaTBu2A<%Q1hm5b#T^8`4l19SC~P0@lL*%@P3B>1Y5- z3#!`T>@oq$MBEO6eyvq;_q z#XA6u2l`X-frvN?@BzV%%DFfiuouSyeoS=3YQ_tIFR5LIPg;pJ+CP7i_@|(~yHbWdWP=z(^mkGcW8%5)ez{0^{8KO(Fp= z(|Eyef3dmgNC0m-3-~SH&C}d;A@Cso4H=t(1khx#fK>&4vqIpNUnFX#ICxF)Cay0g z=s-dy3wTKAU-oB*6K7F?`-E>qzUJrvG>Zl7eE&B~0Cc3G0Z7>tU}_PVhzU^1mITX5 z0`b`~;4w*nI_ELi5d!;R0iM{R05v%*U;-)FLwW$sju&9bLjxpoDZmN8MB#SCKz1$~ z)nCA(e@E<30zu}+{vV7HSsn@1Ul^lX20s57{9oWk{{bNVboLjr=$11i+23#f3uJT) zbVQm*O7$1U=$518e;wV18~wrkjOH(7(Jdz=a{om0C#nbrTKbdy>E-7@|5ujzk4Mt`zD+xZuq z=$4yMC0GPh07d}@uc5pR0mg6&s=uzoZz)@9{i_@vPEPgL z&EYNORh@s7;bHfPz_UC;s=uy{ZW##Z{$l|6!5a71J-{vHM*V-3k$&9@0MEmTsQ##y z{wkK9-<25R`{c1xpKWXN9+vKfC;^ToXY?_19K@OS#q^wpkOw Vc6K5RWsoxHD=ln+^ZBXqe*m+y3oifw delta 9769 zcmZWvbySqw*B*xMmhO=5?gj~Ikj|mIONJ1Xj+gERY3YzI1xZCZ1PMVxy7}gM@9*C4 zt~-Cs+0Q=Dd1q#=v!AtN=8wS1Bj9=pQd}@ssNp^4rT78@2$W9*0*M3DvebZE3N_$D ziUfUOm7%3goG#){#8|{&dc5@`EnBuJ2 zg5OD0Fuy?^C6{-*MVAmrqH?&P8bX+lW{=n=C!Ex2s@|ycy^U3hEh8*AKkZ&-0sgKU z)jEr0|IHxpJVDYtg7@+Qnp1XCEVEjZS3R8JuUU9+S-7 z2IZnh7Uxr#uQP?8-JNr^hqR!@31l+A5wpl#@8v^g$gSTU9w4V8t>O=2!7yx2i(a&S zJ5?L@ZL|Qtsp7+W#k(_W%PaiF&P6j)cAIjI)gh~#u$)ojbB8Nbq0ZAqiXm6`Zi zjZImX8kEDrAi@#JcHFqZtqu$VO7+ZqzIryBkjgpM47susq4g@M3ge(qE{<|-i_V$6 zI3xXK8<0P-I0t?!T>!;`FaM7&zy5XTFWkGkZ8%mJ7qlUZNDq?lT9ft6IUiGApvaf> zdc1f-_#wz-Q-^Auweqq-D;YXScGGdzD-N+dlh3K>5z0x0Mi^2ut$t7f-g>t;EedLe z**mT4jUnBt)MSXy8P<`tn3647~858#+pS>?~YBhH~TX6?g z8tB6-S&YvE`kRM-Q1aRC`+nN#Z&vrf#Fxeu!aHx8)3-aB}9$8IRKp}#lsFn#4* zVpM#-SBhjGv=lwANf$^ZrZ7W@aWv2xW@@>-y@@m>Fp*s9oAJZ?kiQF~eU;C#?sUQK zhn}L53F_1B;DI zImbJ6A_QE`CR7Z~rdSqnE7INa&x3wp7C=0H@s#tP)mkz{35mIs3|mqRo|pB>3_*JU zn4?U~YWD|mbz@Rp)=fw)q+6YN!MBGqK;qu0#?#87I$IC5;R`kTM z+K|olpoHV@8N+8Cx2UJ70Z`IL=6DJ#^;GVhq@ZuDKSF;&-N^f_Vl@?&?_wA+GJ}_-j{BH2A~(EbmferqFwS!Rrd6 zy(A<)TwxO)x>h$Qbf3(}0n7 z%lT4#3>1o6?~=qUAFx+BP{I^uTQ+w~~&i;_C_2HE}npd;7rqx!LswVUGd) z4&B}_3WRq7fxItP7pkBK#ZrVP7G~_NmB;Ztx4}o_!E$f{XRU}*tTFhVs3%8i zqQ*nIX-_UgWRpB-m&0*7B2ulKZr|yx!Sg`bQi6s)=*Pm|T*Hzi#dt1?%nKc$lN0X< zbM2s?S8s_lRNxT^Kp+qbh@B$baOUIL_%0X(5~2fvcwt^ZM}gvrHN=P0!o$PD-^|L@ z*%{*E#o-0<^lDUbJOSfYR)!gxDsD`W!h=Auq#zIvps7Gj_|FM@7bklc$nOI{stX&A zX|@o8j9q!0QlJvO#zx~LFzq4$&E7x~o?@sVbm=2B#&zzbz_KWkS&2fx-5cYi2F3!| zsSoM=rBHs^tH~m4@!)H8>D9S!6o2vgVu~8>GDn;oR$&VN(we`5>WxOG$g^9QOKmL( z(PfxD6(?LrfGUL@p9iDW?!Q03_u$nDxI7pd=t7T zV@)CDkx9=Qxt2N5a?yrWU~gs5`Knbsmg#_issXM985R6}b%LPFSlxD!TU}PzDI1A< z)?WE}=L&@BuDw01%tcbvWw6>-bY@!uv%H%~#7Z$fwcf?~*+lbKCF=l$ps+G~@*Txa z^~M*T>SGKU<*1X&XJPF#V$Wlj#rtQeW-PIAwb!K0bmtv>3D)OEwoMR&FR{faj!G&j0>nys_pV&S~hI-sDx zynknME*(#p=c$j~_B)NQj=z2pQDBp>h~1TG*L8va2^cp}n4$TQB_bj0?$42fKmvdT zCpE$!cOP)KB!oUb!U75Iz+mb5k`&I%gcK-ogqZFz&aQTeSksV0u}lT2N<|UV;ynI~ zcirZ$c64Kps}ySogIH5jNZ(SaCyXaP)n1aDZSZWyuTcB>^?auc<9j7%<|DciKhAb$ zVne+SkIzM(OecV_f>n{E=r-mp_Ke&8!R?9z%io;R))QLD72Ij3#D_e!uWKA>We}em zHse-hBAmAV;7qw*tn(boY>h;uHTjFi8)S@k^+}7{*p1BAuHAz_gc=ThDOA+>y4Tr5 zmH^AZ4@L6SL$PvoCL(y|$MiC{{!lPB+pzQ7hjnp!bjxfzmD#q+P?u%RnR-K2m*I9k z%=aWE8Z%ILMQMTTUHBPcLfgEFbrxY_ETTNC@m;br!c}i!d9FG6CDr}Q##v*^febe%qmy0I2@q_Dfh{0%7`S7+UL*Op{ z_uC9_!L@a&v!<(5ky18Q6nI|{=_6{tu#6VbQM`l7_6SLmsrN{6O>;~R<6{oWu>aU1 z2t(1N$T!{X@VU{wg_D+goK4f*#znBY9~j}S5#VL>SFYX1JkXhZPD8%{|1s8k=9MD% zk&rFr^qcu_^Onu|XQCl55RF(`be8!EX|BB7gKO2iAO$9R-?k+o2TvyQs@;A1#3eWO z^QK#%5T~*E9$P1Z_v#o7D)=fCWV7N<6I$oFE)G%b>b7>%zfRI$kUH_%k=)vlboM>B zB1>?b$)dLZlGsCIMMfKkId{WOvY@)B6UC5K^4(S=DSbpnP{CC2ijq5_<7|6_^JOO< z`nT9uvCe4&X2z>wXT(EmQsh@6^e6SzNi}jUP`m|YYiZi@#nxkkV^?*?V!F~d5pxAe z9|?(jFc^lsro(r{H@jYmDYR=+4eugg^~v?^>~cq0BDS)*2!B3Dxwh|6w!$j5VhR%c za#3X2KBSXIsa!l4{YxU|*Yodfx|@%M!`{cIT!CDZd*nMUD@1%Oh>fjaoonkFkL^_p zbTJzfbnW|FzY~%aoURGQa1-P8{*yXjFDwKvrbpdJ6bkw$l1`gL4WBgghV8?FK%!_M z5Esx#|5qfvizb4msl_aD;Y1#V-o2(lc!Cx#3u1#cmgR@V1C}@I+`Y4QeNX;Zzk>RnW6$~*r#}TL~ zU@E4kKixolSr~bv#w7XLwb)^TkWqB)9Ck!|Ck6$iO)_F=TtFd{`e(5!`TNBC$`c*8 zE*CsLQnWppjPRUO{`9(T@KiQ!`Tdj0woW)B=8!UFdzJApukI< zNT4$Ta5X=;YlOPwh8xwN9*=olP!bD^X9soVPcTBM@pkf_D_u~yl6QjbK9R zfDQr){v)vJ4Ns8%6j*>NjtrV25XmiuPjE|*o67laOF;Un=ZL9gQ)Q_>ZrcJ!d_L_g z_h|WyME=uDbM#ll`pgNqC9L#}c(oi7#z8-QI&wpa-I2f7j#e*xF;7N z#AX*DTb8FT2MRFs6Do4O58J@IDVC~QG`?O2KThbCA>a#+G_KU96UfIf zPz1a{pki4bub=mF#%0-fPO_`fk#%Y$!TlMWpf-p1e0o82J#`^igAx_{$=PU9VsQut z&y}ux@bkiQ+_RXHNn2=P>)Ucuo7XXwT3hCtuR>c_XxFBta%K-ee)4cd|m3-;m@<$&hkP#-y}8G=z26@pEDLk%g+1stzsFG!!xSn@KkA(=3Q`hmG%jFK7wyr^>pH{D&9US zX)kS_UR2K1(Tc0AMRw*^`|Dn46b#-#+Lx{|0aGe+U`Cb-SV~c$eSCyT5&Zv?sJq5c z!#&E?a${JVAO!;i;)h8TFCa=SgTfB*t_7XacL5R9_Hbsv5w#`jWXr$@5+jcq8Ue(} z<+SDrbCqCFHy*|O*9NWi)>AQ4hr^6-rU@AS<;VCy5REd#%s`KcnfoSDwN>5TGBf24 zv85}ySp3_D!5Oje@>aRc=wgmx^u*E$jr#;U{HE%Rk%O60N^K-a)$^?gTLPb&WdGAq zxDz7d!z5cC($Q}=t2Nov`ItJ&1BsGM?MJgqh4#!%R{$$59rPN{2eBsgM6%0IsRzB> z{7rj0VC;U2>K$A*LuK+_EufYLGQ2@v{6Jjjru?44#l+*ytaeaNTjLFuC{i(j?@V*0 z?k?Q$V*5J-;>ETf8_dpO)8o6FdqVG%XPx`lB2UMK_le(^GX&(@g^CG-weE7%4L;-h zJxUz=AaR^@%Ro1XYRnQi$Zb(Qh2myJS&$;j4Sbx=BlM;Q9EJHSlCjC0E(N(tet$(TR z^lLwzR7YJQ?u~?9{YoV%Hj5Y{7?sqdhFMBA~|JrY&xW43rN z<<+7L6+@4G7Qz0!ESWFu&5QKm&~YD`IXO?W3f3O=_kttn!LNunc7KE$*w2dH6vMTD zw{7v1E= z?~rq#r?^Xv8#g&X3}K%k+et%D4?}sq=2uPka)_8zsBzoFcJthuVGf_LK`=qy}&ojJ-D( zqW+wZiBC~3c$AFpa(#A4agX_jYy%0Yq_B!n5I9UF1c(@nV9_KDR54(~!(yqihk;xe zXt%-xv=pd-khe&{v6U!rZ-Ig9ju&c}$R$%k4pVtAOc03gj}j7)^g@AJyL-EOL987i z{=W;zgY-2=SROF427gV3#i<@H=CuSAx}qArNN*U@P|y#v%*Dqw@NsHa<9b;0jo!La zoFPQBr+U&gKa<-nl79%l6U=Gzkj0LRw`@%-J=OgsV=uUFO`B~nJ?i6(Y z9%McTY}{eZqB!+-=$f}=8b<;ft`p5OE3)FcB(u;>FV$dz$fQcSaV1S{IeW(QzD_Qh z)VgiZdNvzv`!%Vre&Rk;3ayE>a%duL0uP;W2dJIkxLl7aIq1niZ=R^JT+p0YS2QTy zyMET(JvRDCt-aB^mSQ`xrBcSxrOvXWm9v(NUQ@&x_g=k_Z ztEZ^WQ)QfFX-~8RFqIiAgyt%q!(VP~Da3veX35Hl zU~g-(eiVM;%UcN!+cgPjEV^GB)GQj_P5|CLBL0&(V8gCdOa``#Ga?8?M+*XR1EJBJ zKrRFs3MrEk>*Y%-uqE*FP)EFdZI_0kk+nRH z-OHp3;Th=`9^njxI3P(%a*M0*U=CyB`$`Q$!Ini)GdwX~jLgTc2Io&>j&7Nw4yP%U zZW6hF&fp4e2AL%a>&~l$O5kCIhnl20zqkp>Pld}akNPrPLM%pn=fF-GWo_L*J>aQK zr?#1ZgD{@QlsWk;2`yo}xMH}uB6fd9;PPNdsAZB^Vm!l#mvij3UK^5^{yr@&l61Zh z6klhHtCCTukEDShe&y3sfZp`WHs_qwkwkyZD5KC14|)agu}U_2@YZI$e|x;qr4<*~ zJ5*<<_-TDlgs1k(?k^9e6IQ5uHhsdT-(G)-I^8wQGEJDlaJRBCnsYMdUtAza8z$bB zozAXXK+3eH{hC~AL6V}2P8;b)$T$353ff*2C=$Kkr3_j6JWS!(x$lzeShqZjgX6A@ zU}@goGN?6`L1NrU(5%)1^}0jP)tR*^dt^Ln zDZ6`*qIE%&>suZNhU3y5Q`X#6;i~{Ey^*9@j#<5Cbk`5|T&NFEt*^^oU0W!fVnRQj z{5aXRx%yG}ZLQuo%QftcWW?6_`0T3GJ?mHgilHIU|Os4b8pR zY<+q+Qix~_-y?B`d7f9`N!pj7-tBXpg! z{NlOl+sBZl=e7(nMwI4tThaD`(+_BF&0Dxb8hy_(C{~A9t6b{(nQ@}2W*759tS&}m za)A91K1Z!Q?3f5NSx`f6D*LUwcR*3r-2=3Sjow}PxyVhIsv9eTUueP|f|0cC_Eryt zEig+%r=Duq$%QOx=d(GSU#SSRAL+SGADo*ja@)bPTCCFAd(YF^&fS7KQ3Uw490cwu z$|kr*_LtdFMe#AR`1C_jS(#ar%9Hb{!3qeXA03;RR$pwXAm|hql43r`7yYP=Mo88P z#-)80Mc53E)Ynhfw=3TQ^VW+dv#*ocZVPH)*jp2DoAu@}Lnw|1O>35*M0wJ*=~-Hz zq=(KBi>Y^y$Pmd!yybp4%X#tbPbb`>I$MXQ?CiNLS20HLib== z+rQRnAPMiS6SXyCff!u+tG63Ns9*VW=6#u>HuDP7thqlew7H3z5I+rTfi$i)Dy4pU zo0mE3=99ph%gt}MsHe*Q1Zp#RcTQVc+RxXV&5^+QJen+AjClpK97WP|rl)HL<9I9C zmnP)%;92`|H7yVW6`)@q|a+#bceA_I<203BFfuCqv5)umD?b) zy_4J%J(5ip-&9P_Ni(E-`5Ttr9>+2Vy#v>*l7QR6TXpl!Qs7ZQ9(pN4I-eea`LQ!M z>>S`3$i!MYrkE}x*5(=zL>ozCO2V5PAnJSu36zVqzbc5l)F0S(@9Cj6jaRrXOCD|H z52mofbjqBfV|;|@P{e`hIseU&uA`A%d%spetzVEt|9yvJ-X zMVr94SS{Z!h_QsxVw=wu$k5t`naP z%r$6qUsT?A-HS&mykKM+3^~G$aGB#Hc-n9;J~i?wEEbO=is$&$4!wWZgtEc?36WV&VJ(b#Guq!A<+4uBHy__0!`QD^4{ z&fO9hF7WHsxMny|888L}#lG?uFsDfoQ{&wAwZEnl56bE-%%0QCSKK`L$;dkxzBzjK zDKr_O@q+*)p`0S{yW7~dQQT8Ks4lSWwoMvE-X9k@eJ~uB06xXSYOd;ZAlgH67oqGh zRgasQgVQ9HaTg2`)h7E+{VcVqxHV63q7sun@>2%CVBznqPyl_x{4|7UD> z`X)iXIV7U=!#b#hNIg$EJT*m1cLi?OgWuhZj zcgA8)_j{;s&)VliZ_h3lpox&}BdfdtX5)vzY0;j;m%=?a_zhK!bwoS*wo+CrUw64` zeLox-D%9+o^0a3g@5J^?B-3NC*=hM&!oQ|Y;D@x}mPk~!w~`ymj!L!LCThRKwDYX|66T!+QA?Ah8uN6|6NP})peD7R3LwI?& z;5{6NH*V1yM^z2oWqNRp}H_qS6nVS;tFa9udJ;XAgF{)~yiq?c1EBUm@j z@|xp9n&OqI2kt?%d09hi{lSRUg}h&=Xy3?5=_rPgpQI>E&7qon#Hwb%3_SD+=_JfU zWKJIF4Hn(#UuANh-231naU@+0pw%~Q^!R^l zWxuX7J#ki9{RQh=csv?(dw-Pl`?%)u@!nL-$7KhKkorKmXV!{_tRtRM0#J(pd|E=RouSX@+;e@* zgQeul&BKB0EsT;5sCf4p@$f4^%+7GYxNa=UZ2Z|yxYXd4gF9win;AoLdel~n!EEtC zR1FQQp7-Ja>Ev1ULS1e<}vGc4J*H- zVG1e)Q%^47umuV2cRBUD#nI^e1|E#74IBNDNmgK?!Q}W1)_-M$H8)6rh6FssKgDBAEdP@J_=4@Y6`a zo}@630t~W%@5o@LKRAT~wg>|gOxS>RItv(}{2MMaqXMH97{Ea~B{-kzw|jvF6VT3J z0gF-p4Sh-pfyE3;@b;fBYBo|pEt3UoM)R9t&Q1wT!5|~;U+^gaEyV!rvM9mVf4Z_c zv4N>97O(~VZ$>sJF>suQ0T^Ucg6A22yBe|uz|v2Dn}Ix-09OtRc#HElq!PshiePYo z>u*RchWh(Fb1o(LEB9|VjyNh1lgk2j<@pVdb49@!{C}CafNvfzcv|RBqC7fqoW}zG zD*P8D`28sFd`j@X$Zyv~J~voX6o4ym2bYV(7Ez$CKmsfz@s~*eNEGsdKTG`PVk%<; zorNr5Wl2D@SOmx@!UY(Lae#JZEI^|O13dgcw;Wha3dT@`?ZU{?Ff#ybAOl;Z0jv@( z@Xh~BB;a)kFSuIv_l{dB5&&Py0tli$DhM>!>29Uxs#4fLjw0ted2z_AVzpkBcO zHc!2rHiP=ZlaU|UQ8M6lLwST+*=+nSh)c@(z{Ohti=)lO- zfRq<9oHcONLQ4Ig8vI}MdA+~rSipw{T`H?WOa diff --git a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.4.tm b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.4.tm index 7a6d5205..33d5b912 100644 --- a/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.4.tm +++ b/src/project_layouts/custom/_project/punk.shell-0.1/src/bootsupport/modules/tomlish-1.1.4.tm @@ -153,15 +153,10 @@ namespace eval tomlish { } #review - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed - } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keys are lists {parenttable subtable etc} corresponding to parenttable.subtable.etc } foreach sub [lrange $keyval_element 2 end] { @@ -207,13 +202,10 @@ namespace eval tomlish { ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! - set prev_tablenames_seen $tablenames_seen - set prev_tablenames_closed $tablenames_closed - set tablenames_seen [list] - set tablenames_closed [list] + set prev_tablenames_info $tablenames_info + set tablenames_info [dict create] set result [list type $type value [::tomlish::to_dict [list $found_sub]]] - set tablenames_seen $prev_tablenames_seen - set tablenames_closed $prev_tablenames_closed + set tablenames_info $prev_tablenames_info } MULTISTRING - MULTILITERAL { #review - mapping these to STRING might make some conversions harder? @@ -295,23 +287,66 @@ namespace eval tomlish { #[Data] #temps = [{cpu = 79.5, case = 72.0}] proc to_dict {tomlish} { + package require dictn #keep track of which tablenames have already been directly defined, # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. #we don't error out just because a previous tablename segment has already appeared. - ##variable tablenames_seen [list] - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen - } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed + + #Declaring, Creating, and Defining Tables + #https://github.com/toml-lang/toml/issues/795 + #(update - only Creating and Defining are relevant terminology) + + #review + #tablenames_info keys created, defined, createdby, definedby, closedby + + #consider the following 2 which are legal: + #[table] #'table' created, defined=open definedby={header table} + #x.y = 3 + #[table.x.z] #'table' defined=closed closedby={header table.x.z}, 'table.x' created, 'table.x.z' created defined=open definedby={header table.x.z} + #k= 22 + # #'table.x.z' defined=closed closedby={eof eof} + + #equivalent datastructure + + #[table] #'table' created, defined=open definedby={header table} + #[table.x] #'table' defined=closed closedby={header table.x}, 'table.x' created defined=open definedby={header table.x} + #y = 3 + #[table.x.z] #'table.x' defined=closed closedby={header table.x.z}, 'table.x.z' created defined=open definedby={header table.x.z} + #k=22 + + #illegal + #[table] #'table' created and defined=open + #x.y = 3 #'table.x' created first keyval pair defined=open definedby={keyval x.y = 3} + #[table.x.y.z] #'table' defined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created + #k = 22 + # + ## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) + + #illegal + #[table] + #x.y = {p=3} + #[table.x.y.z] + #k = 22 + ## we should fail because y is an inline table which is closed to further entries + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + + + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keyed on tablepath each of which is a list such as {config subgroup etc} (corresponding to config.subgroup.etc) } + log::info "---> to_dict processing '$tomlish'<<<" set items $tomlish @@ -354,7 +389,7 @@ namespace eval tomlish { #a.b.c = 1 #table_key_hierarchy -> a b - #leafkey -> c + #tleaf -> c if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively @@ -362,10 +397,10 @@ namespace eval tomlish { } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] - set leafkey [lindex $dotted_key_hierarchy 0] + set tleaf [lindex $dotted_key_hierarchy 0] } else { set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] - set leafkey [lindex $dotted_key_hierarchy end] + set tleaf [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure @@ -380,143 +415,101 @@ namespace eval tomlish { } } #review? - if {[dict exists $datastructure {*}$table_hierarchy $leafkey]} { - error "Duplicate key '$table_hierarchy $leafkey'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$table_hierarchy $tleaf]} { + error "Duplicate key '$table_hierarchy $tleaf'. The key already exists at this level in the toml data. The toml data is not valid." } #JMN test 2025 if {[llength $table_hierarchy]} { - lappend tablenames_seen $table_hierarchy + dictn incr tablenames_info [list $table_hierarchy seencount] } set keyval_dict [_get_keyval_value $item] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { - lappend tablenames_seen [list {*}$table_hierarchy $leafkey] - lappend tablenames_closed [list {*}$table_hierarchy $leafkey] + set t [list {*}$table_hierarchy $tleaf] + dictn incr tablenames_info [list $t seencount] + dictn set tablenames_info [list $t closed] 1 #review - item is an ITABLE - we recurse here without datastructure context :/ #overwriting keys? todo ? - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } else { - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } + } + TABLEARRAY { + set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLENAME (name: $tablename): $item" + set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize + #we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. + } TABLE { set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLE (name: $tablename): $item" #set tablename [::tomlish::utils::tablename_trim $tablename] set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize - if {$norm_segments in $tablenames_seen} { - error "Table name '$tablename' has already been directly defined in the toml data. Invalid." - } - log::debug "---> to_dict processing item $tag (name: $tablename): $item" - set name_segments [::tomlish::utils::tablename_split $tablename] ;#unnormalized - set last_seg "" - #toml spec rule - all segments mst be non-empty - #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - - set table_key_sublist [list] - - foreach normseg $norm_segments { - lappend table_key_sublist $normseg - if {[dict exists $datastructure {*}$table_key_sublist]} { - #It's ok for this key to already exist *if* it was defined by a previous tablename or equivalent - #and if this key is longer - - #consider the following 2 which are legal: - #[table] - #x.y = 3 - #[table.x.z] - #k= 22 - - #equivalent - - #[table] - #[table.x] - #y = 3 - #[table.x.z] - #k=22 - - #illegal - #[table] - #x.y = 3 - #[table.x.y.z] - #k = 22 - ## - we should fail on encountering table.x.y because only table and table.x are effectively tables - - #illegal - #[table] - #x.y = {p=3} - #[table.x.y.z] - #k = 22 - ## we should fail because y is an inline table which is closed to further entries - - - #note: it is not safe to compare normalized tablenames using join! - # e.g a.'b.c'.d is not the same as a.b.c.d - # instead compare {a b.c d} with {a b c d} - # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. - #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} - - set sublist_length [llength $table_key_sublist] - set found_testkey 0 - if {$table_key_sublist in $tablenames_seen} { - set found_testkey 1 - } else { - #see if it was defined by a longer entry - foreach seen_table_segments $tablenames_seen { - if {[llength $seen_table_segments] <= $sublist_length} { - continue - } - #each tablenames_seen entry is already a list of normalized segments - - #we could have [a.b.c.d] early on - # followed by [a.b] - which was still defined by the earlier one. + set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] + if {$T_DEFINED ne "NULL"} { + #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path + set msg "Table name $tablename has already been directly defined in the toml data. Invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } - set seen_longer [lrange $seen_segments 0 [expr {$sublist_length -1}]] - puts stderr "testkey:'$table_key_sublist' vs seen_match:'$seen_longer'" - if {$table_key_sublist eq $seen_longer} { - set found_testkey 1 - } - } - } - if {$found_testkey == 0} { - #the raw unnormalized tablename might be ok to display in the error message, although it's not the actual dict keyset - set msg "key $table_key_sublist already exists in datastructure, but wasn't defined by a supertable." - append msg \n "tablenames_seen:" \n - foreach ts $tablenames_seen { - append msg " " $ts \n - } + set name_segments [::tomlish::utils::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d + #results of tablename_split 0 are 'raw' - ie some segments may be enclosed in single or double quotes. + + + set supertable [list] + ############## + # [a.b.c.d] + # norm_segments = {a b c d} + #check a {a b} {a b c} <---- supertables of a.b.c.d + ############## + foreach normseg [lrange $norm_segments 0 end-1] { + lappend supertable $normseg + if {![dictn exists $tablenames_info [list $supertable type]]} { + #supertable with this path doesn't yet exist + if {[dict exists $datastructure {*}$supertable]} { + #There is data though - so it must have been created as a keyval + set msg "Supertable [join $supertable .] of table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] error $msg + } else { + #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here + dictn set tablenames_info [list $supertable type] header + #ensure empty tables are still represented in the datastructure + dict set datastructure {*}$supertable [list] } - } - - } - - #ensure empty tables are still represented in the datastructure - set key_sublist [list] - foreach k $norm_segments { - lappend key_sublist $k - if {![dict exists $datastructure {*}$key_sublist]} { - dict set datastructure {*}$key_sublist [list] } else { - tomlish::log::notice "to_dict datastructure at (TABLE) subkey $key_sublist already had data: [dict get $datastructure {*}$key_sublist]" + #supertable has already been created - and maybe defined - but even if defined we can add subtables } } + #table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename + # - or may have existing data from a keyval + if {![dictn exists $tablenames_info [list $norm_segments type]]} { + if {[dict exists $datastructure {*}$norm_segments]} { + set msg "Table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } + #no data or previously created table + dictn set tablenames_info [list $norm_segments type] header - #We must do this after the key-collision test above! - lappend tablenames_seen $norm_segments - - + #We are 'defining' this table's keys and values here (even if empty) + dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here + } + dictn set tablenames_info [list $norm_segments defined] open log::debug ">>> to_dict >>>>>>>>>>>>>>>>> normalized table key hierarchy : $norm_segments" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] - log::debug "----> tododict processing $tag subitem $type processing contained element $element" + log::debug "----> todict processing $tag subitem $type processing contained element $element" switch -exact -- $type { DOTTEDKEY { set dkey_info [_get_dottedkey_info $element] @@ -547,14 +540,19 @@ namespace eval tomlish { puts stdout "to_dict>>> $keyval_dict" dict set datastructure {*}$norm_segments {*}$dkeys $leaf_key $keyval_dict #JMN 2025 - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys] + set tkey [list {*}$norm_segments {*}$dkeys] + dictn incr tablenames_info [list $tkey seencount] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys # inner structure will contain {type value } if all leaves are not empty ITABLES - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys $leaf_key] + set tkey [list {*}$norm_segments {*}$dkeys $leaf_key] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys $leaf_key] + dictn incr tablenames_info [list $tkey seencount] #if the keyval_dict is not a simple type x value y - then it's an inline table ? #if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. + dictn set tablenames_info [list $tkey closed] 1 } } @@ -562,7 +560,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" } } } @@ -1316,7 +1314,12 @@ namespace eval tomlish::encode { #NOTE - this DELIBERATELY does not validate the data, or process escapes etc #It encodes the tomlish records as they are. #ie it only produces toml shaped data from a tomlish list. + # #It is part of the roundtripability of data from toml to tomlish + #!! ie - it is not the place to do formatting of inline vs multiline !! + # That needs to be encoded in the tomlish data that is being passed in + # (e.g from_dict could make formatting decisions in the tomlish it produces) + # #e.g duplicate keys etc can exist in the toml output. #The to_dict from_dict (or any equivalent processor pair) is responsible for validation and conversion #back and forth of escape sequences where appropriate. @@ -1646,17 +1649,27 @@ namespace eval tomlish::decode { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. set parentlevel [expr {$nest -1}] - set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { + #should only apply within a multiliteral #### set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed #Without this - we would get extraneous empty list entries in the parent # - as the xxx-squote-space isn't a space level from the toml perspective # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-squote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-squote-space { + } + default { + error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" + } + } switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? @@ -1669,74 +1682,51 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 4 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the last for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] - #todo integrate left squote with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]'" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "'"] - } - MULTILITERAL { - #empty - lappend v($parentlevel) [list LITERALPART "'"] - } - default { - error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + MULTILITERAL { + #empty + lappend v($parentlevel) [list LITERALPART "'"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" } } } ''''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 5 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the following squotes for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] - #todo integrate left 2 squotes with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]''" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "''"] - } - MULTILITERAL { - lappend v($parentlevel) [list LITERALPART "''"] - } - default { - error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "''"] + } + MULTILITERAL { + lappend v($parentlevel) [list LITERALPART "''"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" } } } } - puts stderr "tomlish::decode::toml ---- HERE squote_seq pop <$tok>" } triple_squote { #presumably popping multiliteral-space @@ -1763,7 +1753,119 @@ namespace eval tomlish::decode { lappend merged $part } default { - error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + tentative_accum_dquote { + #should only apply within a multistring + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the trailing-dquote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-dquote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-dquote-space { + } + default { + error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" + } + } + switch -- $tok { + {"} { + tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] + } + {""} { + #review - we should perhaps return double_dquote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] + } + {"""} { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] + } + {""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left dquote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {"}] + } + MULTISTRING { + #empty + lappend v($parentlevel) [list STRINGPART {"}] + } + default { + error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" + } + } + } + {"""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 dquotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {""}] + } + MULTISTRING { + lappend v($parentlevel) [list STRINGPART {""}] + } + default { + error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" + } + } + } + } + } + triple_dquote { + #presumably popping multistring-space + ::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTISTRING { + lappend merged $part + } + STRINGPART { + if {$lasttype eq "STRINGPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + CONT - WS { + lappend merged $part + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" } } set lasttype [lindex $part 0] @@ -1809,15 +1911,12 @@ namespace eval tomlish::decode { endinlinetable { ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } - endmultiquote { - ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" - } default { error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } if {$do_append_to_parent} { - #e.g squote_seq does it's own appends as necessary - so won't get here + #e.g tentative_accum_squote does it's own appends as necessary - so won't get here lappend v($parentlevel) [set v($nest)] } @@ -1831,8 +1930,8 @@ namespace eval tomlish::decode { switch -exact -- $tokenType { - squote_seq_begin { - #### + tentative_trigger_squote - tentative_trigger_dquote { + #### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { lassign [dict get $transition_info starttok] starttok_type starttok_val set next_tokenType_known 1 @@ -1840,6 +1939,16 @@ namespace eval tomlish::decode { set tok $starttok_val } } + single_squote { + #JMN - REVIEW + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART + } squotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1849,6 +1958,9 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } + triple_dquote { + set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT + } dquotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1858,7 +1970,7 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { #todo set v($nest) [list DQKEY $tok] ;#$tok is the keyname } @@ -1878,34 +1990,29 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } } - startsquote { - #JMN - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "squotedkey" - set tok "" - } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish # back to toml file will be identical. #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, - # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from + # a structural perspective. #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the # tomlish list? - set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. } tablearrayname { - set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name } startarray { @@ -1914,14 +2021,6 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } - startmultiquote { - ::tomlish::log::debug "---- push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE - } - triple_squote { - ::tomlish::log::debug "---- push trigger tokenType triple_squote" - set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL - } default { error "---- push trigger tokenType '$tokenType' not yet implemented" } @@ -1931,11 +2030,11 @@ namespace eval tomlish::decode { #no space level change switch -exact -- $tokenType { squotedkey { - puts "---- squotedkey in state $prevstate (no space level change)" + #puts "---- squotedkey in state $prevstate (no space level change)" lappend v($nest) [list SQKEY $tok] } dquotedkey { - puts "---- dquotedkey in state $prevstate (no space level change)" + #puts "---- dquotedkey in state $prevstate (no space level change)" lappend v($nest) [list DQKEY $tok] } barekey { @@ -1960,29 +2059,46 @@ namespace eval tomlish::decode { startinlinetable { puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } - startquote { + single_dquote { switch -exact -- $newstate { string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quoted-key { + dquoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "dquotedkey" set tok "" } - XXXitable-quoted-key { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "itablequotedkey" - set tok "" + multistring-space { + lappend v($nest) [list STRINGPART {"}] + #may need to be joined on pop if there are neighbouring STRINGPARTS + } + default { + error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + double_dquote { + #leading extra quotes - test: toml_multistring_startquote2 + switch -exact -- $prevstate { + itable-keyval-value-expected - keyval-value-expected { + puts stderr "tomlish::decode::toml double_dquote TEST" + #empty string + lappend v($nest) [list STRINGPART ""] + } + multistring-space { + #multistring-space to multistring-space + lappend v($nest) [list STRINGPART {""}] } default { - error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" } } + } - startsquote { + single_squote { switch -exact -- $newstate { literal-state { set next_tokenType_known 1 @@ -1995,41 +2111,17 @@ namespace eval tomlish::decode { set tok "" } multiliteral-space { - #false alarm squote returned from squote_seq pop + #false alarm squote returned from tentative_accum_squote pop ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" #(single squote - not terminating space) lappend v($nest) [list LITERALPART '] #may need to be joined on pop if there are neighbouring LITERALPARTs } default { - error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" } } } - startmultiquote { - #review - puts stderr "---- got startmultiquote in state $prevstate (no space level change)" - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringpart" - set tok "" - } - endquote { - #nothing to do? - set tok "" - } - endsquote { - set tok "" - } - endmultiquote { - #JMN!! - set tok "" - } - string { - lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes - } - literal { - lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes - } double_squote { switch -exact -- $prevstate { keyval-value-expected { @@ -2044,6 +2136,19 @@ namespace eval tomlish::decode { } } } + enddquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } multistring { #review lappend v($nest) [list MULTISTRING $tok] @@ -2056,11 +2161,9 @@ namespace eval tomlish::decode { } literalpart { lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly - } - itablequotedkey { - } untyped_value { + #would be better termed unclassified_value #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -2238,7 +2341,7 @@ namespace eval tomlish::utils { #eg {dog."tater.man"} set sLen [tcl::string::length $tablename] set segments [list] - set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" for {set i 0} {$i < $sLen} {incr i} { @@ -2249,139 +2352,166 @@ namespace eval tomlish::utils { set lastChar "" } + #todo - track\count backslashes properly + set c [tcl::string::index $tablename $i] + if {$c eq "\""} { + if {($lastChar eq "\\")} { + #not strictly correct - we could have had an even number prior-backslash sequence + #the toml spec would have us error out immediately on bsl in bad location - but we're + #trying to parse to unvalidated tomlish + set ctest escq + } else { + set ctest dq + } + } else { + set ctest [string map [list " " sp \t tab] $c] + } - if {$c eq "."} { - switch -exact -- $mode { - unquoted { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } - quoted { - append seg $c - } - unknown { - lappend segments $seg - set seg "" - } - litquoted { - append seg $c - } - default { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -- $ctest { + . { + switch -exact -- $mode { + preval { + error "tablename_split. dot not allowed - expecting a value" + } + unquoted { + #dot marks end of segment. + #if {![is_barekey $seg]} { + # error "tablename_split. dot not allowed - expecting a value" + #} + lappend segments $seg + set seg "" + set mode "preval" + } + quoted { + append seg $c + } + litquoted { + append seg $c + } + postval { + #got dot in an expected location + set mode "preval" + } } } - } elseif {($c eq "\"") && ($lastChar ne "\\")} { - if {$mode eq "unknown"} { - if {[tcl::string::trim $seg] ne ""} { - #we don't allow a quote in the middle of a bare key - error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" - } - set mode "quoted" - set seg "\"" - } elseif {$mode eq "unquoted"} { - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - } else { - lappend segments $seg + dq { + #unescaped dquote + switch -- $mode { + preval { + set mode "quoted" + set seg "\"" + } + unquoted { + #invalid in barekey - but we are after structure only + append seg $c + } + quoted { + append seg $c + if {$normalize} { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" ;#make sure we only accept a dot or end-of-data now. + } + litquoted { + append seg $c + } + postval { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } } - - set seg "" - set mode "syntax" ;#make sure we only accept a dot or end-of-data now. - } elseif {$mode eq "litquoted"} { - append seg $c - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" - } - } elseif {($c eq "\'")} { - if {$mode eq "unknown"} { - append seg $c - set mode "litquoted" - } elseif {$mode eq "unquoted"} { - #single quote inside e.g o'neill - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - } elseif {$mode eq "litquoted"} { - append seg $c - #no normalization to do - lappend segments $seg - set seg "" - set mode "syntax" - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" } - - } elseif {$c in [list " " \t]} { - if {$mode eq "syntax"} { - #ignore - } else { - append seg $c + ' { + switch -- $mode { + preval { + append seg $c + set mode "litquoted" + } + unquoted { + #single quote inside e.g o'neill - ultimately invalid - but we pass through here. + append seg $c + } + quoted { + append seg $c + } + litquoted { + append seg $c + #no normalization to do aside from stripping squotes + if {$normalize} { + lappend segments [tcl::string::range $seg 1 end-1] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" + } + postval { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + } } - } else { - if {$mode eq "syntax"} { - error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + sp - tab { + switch -- $mode { + preval - postval { + #ignore + } + unquoted { + #terminates a barekey + lappend segments $seg + set seg "" + set mode "postval" + } + default { + #append to quoted or litquoted + append seg $c + } + } } - if {$mode eq "unknown"} { - set mode "unquoted" + default { + switch -- $mode { + preval { + set mode unquoted + append seg $c + } + postval { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + default { + append seg $c + } + } } - append seg $c } + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" - #REVIEW - we can only end up in unquoted or syntax here? are other branches reachable? switch -exact -- $mode { - quoted { - if {$c ne "\""} { - error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" - } - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong - } else { - lappend segments $seg - } + preval { + error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" } - litquoted { - set trimmed_seg [tcl::string::trim $seg] - if {[tcl::string::index $trimmed_seg end] ne "\'"} { - error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" - } + unquoted { lappend segments $seg } - unquoted - unknown { - lappend segments $seg + quoted { + error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" } - syntax { - #ok - segment already lappended + litquoted { + error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" } - default { - lappend segments $seg + postval { + #ok - segment already lappended } } } } - foreach seg $segments { - set trimmed [tcl::string::trim $seg " \t"] - #note - we explicitly allow 'empty' quoted strings '' & "" - # (these are 'discouraged' but valid toml keys) - #if {$trimmed in [list "''" "\"\""]} { - # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" - #} - if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" - } - } + + #note - we must allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + return $segments } @@ -2432,26 +2562,34 @@ namespace eval tomlish::utils { #- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. #REVIEW - provide it anyway? When would it be desirable to use? - variable Bstring_control_map [list\ - \b {\b}\ - \n {\n}\ - \r {\r}\ - \" {\"}\ - \x1b {\e}\ - \\ "\\\\"\ - ] + variable Bstring_control_map [dict create] + dict set Bstring_control_map \b {\b} + dict set Bstring_control_map \n {\n} + dict set Bstring_control_map \r {\r} + dict set Bstring_control_map \" {\"} + #dict set Bstring_control_map \x1b {\e} ;#should presumably be only be a convenience for decode - going the other way we get \u001B + dict set Bstring_control_map \\ "\\\\" + #\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ #for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. - for {set cdec 0} {$cdec <= 8} {incr cdec} { + #8 = \b - already in list. + #built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list + for {set cdec 0} {$cdec <= 7} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } # \u007F = 127 - lappend Bstring_control_map [format %c 127] \\u007F + dict set Bstring_control_map [format %c 127] \\u007F #Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! #escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) @@ -2474,6 +2612,7 @@ namespace eval tomlish::utils { # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn # it replaces \ with a single whitespace (trailing backslash) #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + #plus \e for \x1b? set buffer "" set buffer4 "" ;#buffer for 4 hex characters following a \u @@ -2558,12 +2697,13 @@ namespace eval tomlish::utils { set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { - set e "\\\"" - append buffer [subst -nocommand -novariable $e] + append buffer {"} } b - t - n - f - r { - set e "\\$c" - append buffer [subst -nocommand -novariable $e] + append buffer [subst -nocommand -novariable "\\$c"] + } + e { + append buffer \x1b } u { set unicode4_active 1 @@ -2578,8 +2718,7 @@ namespace eval tomlish::utils { #review - toml spec says all other escapes are reserved #and if they are used TOML should produce an error. #we leave detecting this for caller for now - REVIEW - append buffer "\\" - append buffer $c + append buffer "\\$c" } } } else { @@ -3003,7 +3142,7 @@ namespace eval tomlish::parse { # states: # table-space, itable-space, array-space # array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, - # quoted-key, squoted-key + # dquoted-key, squoted-key # string-state, literal-state, multistring... # # notes: @@ -3039,6 +3178,12 @@ namespace eval tomlish::parse { variable stateMatrix set stateMatrix [dict create] + #--------------------------------------------------------- + #WARNING + #The stateMatrix implementation here is currently messy. + #The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. + #This means the state behaviour has to be reasoned about by looking at both in conjuction. + #--------------------------------------------------------- #xxx-space vs xxx-syntax inadequately documented - TODO @@ -3060,35 +3205,19 @@ namespace eval tomlish::parse { barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ - XXXstartquote "quoted-key"\ - XXXstartsquote "squoted-key"\ + XXXsingle_dquote "quoted-key"\ + XXXsingle_squote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ - startmultiquote "err-state"\ - endquote "err-state"\ + enddquote "err-state"\ + endsquote "err-state"\ comma "err-state"\ eof "end-state"\ equal "err-state"\ cr "err-lonecr"\ } - #itable-space/ curly-syntax : itables - dict set stateMatrix\ - itable-space {\ - whitespace "itable-space"\ - newline "itable-space"\ - barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - endinlinetable "POPSPACE"\ - XXXstartquote "quoted-key"\ - XXXstartsquote {TOSTATE "squoted-key" comment "jn-testing"}\ - comma "err-state"\ - comment "itable-space"\ - eof "err-state"\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}} dict set stateMatrix\ @@ -3113,26 +3242,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ keyval-value-expected {\ whitespace "keyval-value-expected"\ - untyped_value {TOSTATE "keyval-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} - dict set stateMatrix\ - leading-squote-space {\ - squote_seq "POPSPACE"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ + string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ + single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - #dict set stateMatrix\ - # keyval-process-leading-squotes {\ - # startsquote "literal-state"\ - # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - # } + #double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} + + #2025 - no leading-squote-space - only trailing-squote-space. dict set stateMatrix\ keyval-tail {\ @@ -3142,81 +3264,106 @@ namespace eval tomlish::parse { eof "end-state"\ } + + #itable-space/ curly-syntax : itables + # x={y=1,} + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + comma "err-state"\ + comment "itable-space"\ + eof "err-state"\ + } + #we don't get single_squote etc here - instead we get the resulting squotedkey token + + + # ??? review - something like this + # + # x={y =1,} dict set stateMatrix\ itable-keyval-syntax {\ - whitespace "itable-keyval-syntax"\ - barekey {PUSHSPACE "dottedkey-space"}\ - squotedkey {PUSHSPACE "dottedkey-space"}\ - dquotedkey {PUSHSPACE "dottedkey-space"}\ - equal "itable-keyval-value-expected"\ + whitespace {TOSTATE "itable-keyval-syntax"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + dquotedkey {PUSHSPACE "dottedkey-space"}\ + equal {TOSTATE "itable-keyval-value-expected"}\ newline "err-state"\ eof "err-state"\ } + + # x={y=1} + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ itable-keyval-value-expected {\ whitespace "itable-keyval-value-expected"\ untyped_value {TOSTATE "itable-val-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate itable-val-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ } - dict set stateMatrix\ - itable-keyval-space {\ - whitespace "itable-keyval-syntax"\ - equal {TOSTATE "itable-keyval-value-expected" note "required"}\ - } + #double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' + # review + # double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} + + + # x={y=1,z="x"} + #POPSPACE is transition from itable-keyval-space to parent itable-space dict set stateMatrix\ itable-val-tail {\ whitespace "itable-val-tail"\ endinlinetable "POPSPACE"\ comma "POPSPACE"\ - XXXnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ - newline "POPSPACE"\ + newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ comment "itable-val-tail"\ eof "err-state"\ } - #dict set stateMatrix\ - # itable-quoted-key {\ - # whitespace "NA"\ - # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endquote "itable-keyval-syntax"\ - # } - #dict set stateMatrix\ - # itable-squoted-key {\ - # whitespace "NA"\ - # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endsquote "itable-keyval-syntax"\ - # } + # XXXnewline "POPSPACE" + # We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail + # This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record + #e.g + # x = { + # j=1 + # #comment within dottedkey j record + # , # comment unattached + # #comment unattached + # k=2 , #comment unattached + # l=3 #comment within l record + # , m=4 + # #comment associated with m record + # + # #still associated with m record + # } + ## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. + #The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma + #so we cant do: j= 1 #comment for j1 , + # and have the trailing comma recognised. + # + # To associate: j= 1, #comment for j1 + # we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? + # + # The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma + # is 'associated' with the previous entry. + # + # These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, + # but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments + # (e.g reordering records within an itable) + #The user's intention for 'associated' isn't always clear and the specs don't really guide on this. - - - #array-value-expected ? - dict set stateMatrix\ - XXXvalue-expected {\ - whitespace "value-expected"\ - untyped_value {"SAMESPACE" "" replay untyped_value}\ - startquote "string-state"\ - startsquote "literal-state"\ - triple_squote {PUSHSPACE "multiliteral-space"}\ - startmultiquote {PUSHSPACE "multistring-space"}\ - startinlinetable {PUSHSPACE itable-space}\ - startarray {PUSHSPACE array-space}\ - comment "err-state-value-expected-got-comment"\ - comma "err-state"\ - newline "err-state"\ - eof "err-state"\ - } - #note comment token should never be delivered to array-value-expected state? - #dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] #it is for keyval ie x.y.z = value @@ -3245,6 +3392,8 @@ namespace eval tomlish::parse { whitespace "dottedkey-space-tail" dotsep "dottedkey-space" equal "POPSPACE"\ + eof "err-state"\ + newline "err-state"\ } #-------------------------------------------------------------------------- @@ -3262,22 +3411,10 @@ namespace eval tomlish::parse { #toml spec looks like heading towards allowing newlines within inline tables #https://github.com/toml-lang/toml/issues/781 - #2025 - appears to be valid for 1.1 - which we are targeting. + #2025 - multiline itables appear to be valid for 1.1 - which we are targeting. #https://github.com/toml-lang/toml/blob/main/toml.md#inline-table #JMN2025 - #dict set stateMatrix\ - # curly-syntax {\ - # whitespace "curly-syntax"\ - # newline "curly-syntax"\ - # barekey {PUSHSPACE "itable-keyval-space"}\ - # itablequotedkey "itable-keyval-space"\ - # endinlinetable "POPSPACE"\ - # startquote "itable-quoted-key"\ - # comma "itable-space"\ - # comment "itable-space"\ - # eof "err-state"\ - # } #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -3291,10 +3428,9 @@ namespace eval tomlish::parse { # untyped_value "SAMESPACE"\ # startarray {PUSHSPACE "array-space"}\ # endarray "POPSPACE"\ - # startmultiquote {PUSHSPACE multistring-space}\ # startinlinetable {PUSHSPACE itable-space}\ - # startquote "string-state"\ - # startsquote "literal-state"\ + # single_dquote "string-state"\ + # single_squote "literal-state"\ # triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ # comma "array-space"\ # comment "array-space"\ @@ -3305,15 +3441,16 @@ namespace eval tomlish::parse { set aspace [dict create] dict set aspace whitespace "array-space" dict set aspace newline "array-space" - dict set aspace untyped_value "SAMESPACE" + #dict set aspace untyped_value "SAMESPACE" + dict set aspace untyped_value "array-syntax" dict set aspace startarray {PUSHSPACE "array-space"} dict set aspace endarray "POPSPACE" - dict set aspace startmultiquote {PUSHSPACE multistring-space} + dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} + dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} + dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} + dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} dict set aspace startinlinetable {PUSHSPACE itable-space} - dict set aspace startquote "string-state" - dict set aspace startsquote "literal-state" - dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"} - dict set aspace comma "array-space" + #dict set aspace comma "array-space" dict set aspace comment "array-space" dict set aspace eof "err-state-array-space-got-eof" dict set stateMatrix array-space $aspace @@ -3329,26 +3466,16 @@ namespace eval tomlish::parse { #dict set asyntax untyped_value "SAMESPACE" #dict set asyntax startarray {PUSHSPACE array-space} dict set asyntax endarray "POPSPACE" - #dict set asyntax startmultiquote {PUSHSPACE multistring-space} - #dict set asyntax startquote "string-state" - #dict set asyntax startsquote "literal-state" + #dict set asyntax single_dquote "string-state" + #dict set asyntax single_squote "literal-state" dict set asyntax comma "array-space" dict set asyntax comment "array-syntax" dict set stateMatrix array-syntax $asyntax - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space - dict set stateMatrix\ - quoted-key {\ - whitespace "NA"\ - dquotedkey {PUSHSPACE "keyval-space"}\ - newline "err-state"\ - endquote "keyval-syntax"\ - } - - #review + #dquotedkey is a token - dquoted-key is a state dict set stateMatrix\ dquoted-key {\ whitespace "NA"\ @@ -3367,7 +3494,7 @@ namespace eval tomlish::parse { string-state {\ whitespace "NA"\ string "string-state"\ - endquote "SAMESPACE"\ + enddquote "SAMESPACE"\ newline "err-state"\ eof "err-state"\ } @@ -3381,20 +3508,21 @@ namespace eval tomlish::parse { } - #dict set stateMatrix\ - # stringpart {\ - # continuation "SAMESPACE"\ - # endmultiquote "POPSPACE"\ - # eof "err-state"\ - # } dict set stateMatrix\ multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "POPSPACE"\ - eof "err-state"\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ + single_dquote {TOSTATE multistring-space}\ + double_dquote {TOSTATE multistring-space}\ + triple_dquote {POPSPACE}\ + eof "err-state"\ + } + dict set stateMatrix\ + trailing-dquote-space { + tentative_accum_dquote "POPSPACE" } @@ -3402,19 +3530,19 @@ namespace eval tomlish::parse { #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ multiliteral-space {\ - literalpart "multiliteral-space"\ - newline "multiliteral-space"\ - squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ - triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ - double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ - startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ - eof "err-premature-eof-in-multiliteral-space"\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ + single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ + double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + eof "err-premature-eof-in-multiliteral-space"\ } #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - trailing-squote-space {\ - squote_seq "POPSPACE"\ + trailing-squote-space { + tentative_accum_squote "POPSPACE" } @@ -3499,7 +3627,7 @@ namespace eval tomlish::parse { - + dict set stateMatrix\ end-state {} @@ -3557,14 +3685,13 @@ namespace eval tomlish::parse { dict set spacePushTransitions itable-keyval-space itable-keyval-syntax dict set spacePushTransitions array-space array-space dict set spacePushTransitions table-space tablename-state - dict set spacePushTransitions #itable-space itable-space + #dict set spacePushTransitions #itable-space itable-space #Pop to, next variable spacePopTransitions [dict create] dict set spacePopTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail @@ -3575,7 +3702,6 @@ namespace eval tomlish::parse { #JMN test #dict set spaceSameTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail @@ -3611,6 +3737,8 @@ namespace eval tomlish::parse { ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { POPSPACE { + set popfromspace_info [spacestack peek] + set popfromspace_state [dict get $popfromspace_info state] spacestack pop set parent_info [spacestack peek] set type [dict get $parent_info type] @@ -3625,17 +3753,17 @@ namespace eval tomlish::parse { set existing [spacestack pop] dict unset existing returnstate spacestack push $existing ;#re-push modification - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" } else { ### #review - do away with spacePopTransitions - which although useful to provide a default.. # - involve error-prone configurations distant to the main state transition configuration in stateMatrix if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" } else { set next $parentspace - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" } } set result $next @@ -3805,22 +3933,6 @@ namespace eval tomlish::parse { return $tokenType } - proc _shortcircuit_startquotesequence {} { - variable tok - variable i - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - set_tokenType "startquote" - incr i -1 - return -level 2 1 - } elseif {$toklen == 2} { - puts stderr "_shortcircuit_startquotesequence toklen 2" - set_tokenType "startquote" - set tok "\"" - incr i -2 - return -level 2 1 - } - } proc get_token_waiting {} { variable token_waiting @@ -3940,7 +4052,6 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" - set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [tcl::string::index $s [expr {$i - 1}]] @@ -3957,8 +4068,6 @@ namespace eval tomlish::parse { switch -exact -- $ctest { # { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 @@ -3966,16 +4075,20 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #for multiliteral, multistring - data and/or end incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { + #pseudo token beginning with underscore - never returned to state machine - review incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -4003,7 +4116,7 @@ namespace eval tomlish::parse { append tok $c } default { - #dquotedkey, itablequotedkey, string,literal, multistring + #dquotedkey, string,literal, multistring append tok $c } } @@ -4015,7 +4128,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes#" + append tok "#" } multiliteral-space { set_tokenType "literalpart" @@ -4031,23 +4144,23 @@ namespace eval tomlish::parse { } lc { #left curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { @@ -4059,7 +4172,7 @@ namespace eval tomlish::parse { } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - starttablearrayname { #*bare* tablename can only contain letters,digits underscores @@ -4105,7 +4218,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\{" + append tok "\{" } multiliteral-space { set_tokenType "literalpart" @@ -4120,37 +4233,35 @@ namespace eval tomlish::parse { } rc { #right curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - tablename { if {$had_slash} {append tok "\\"} @@ -4221,7 +4332,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\}" + append tok "\}" } multiliteral-space { set_tokenType "literalpart" ; #review @@ -4237,35 +4348,35 @@ namespace eval tomlish::parse { } lb { #left square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename { #change the tokenType @@ -4332,7 +4443,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\[" + append tok "\[" } multiliteral-space { set_tokenType "literalpart" @@ -4350,37 +4461,35 @@ namespace eval tomlish::parse { } rb { #right square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } comment { if {$had_slash} {append tok "\\"} @@ -4428,16 +4537,6 @@ namespace eval tomlish::parse { } } } - XXXtablearraynames { - puts "rb @ tablearraynames ??" - #switch? - - #todo? - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex - return 1 - } default { incr i -1 return 1 @@ -4485,7 +4584,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\]" + append tok "\]" } multiliteral-space { set_tokenType "literalpart" @@ -4498,21 +4597,21 @@ namespace eval tomlish::parse { } } bsl { - set dquotes $multi_dquote - set multi_dquote "" ;#!! #backslash if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { @@ -4529,9 +4628,7 @@ namespace eval tomlish::parse { append tok "\\" set slash_active 0 } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -4545,7 +4642,6 @@ namespace eval tomlish::parse { set slash_active 0 append tok "\\\\" } else { - append tok $dquotes set slash_active 1 } } @@ -4575,10 +4671,6 @@ namespace eval tomlish::parse { set tok "\\\\" set slash_active 0 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } set slash_active 1 } } @@ -4599,58 +4691,56 @@ namespace eval tomlish::parse { set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { - #short squote_seq tokens are returned if active during any other character + tentative_accum_squote { + #for within multiliteral + #short tentative_accum_squote tokens are returned if active upon receipt of any other character #longest allowable for leading/trailing are returned here #### set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote - switch -- $state { - leading-squote-space { - append tok $c - if {$existingtoklen > 2} { - error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" - } elseif {$existingtoklen == 2} { - return 1 ;#return tok ''' - } - } - trailing-squote-space { - append tok $c - if {$existingtoklen == 4} { - #maxlen to be an squote_seq is multisquote + 2 = 5 - #return tok ''''' - return 1 - } - } - default { - error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" - } + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_squote is multisquote + 2 = 5 + #return tok with value ''''' + return 1 } } - whitespace { - #end whitespace - incr i -1 ;#reprocess sq + tentative_accum_dquote { + incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { - #temp token creatable only during value-expected or array-space + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space switch -- [tcl::string::length $tok] { 1 { + #no conclusion can yet be reached append tok $c } 2 { + #enter multiliteral #switch? append tok $c set_tokenType triple_squote return 1 } default { + #if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled + #by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 squotes as data. error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" + return 1 + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } literal { #slash_active always false #terminate the literal @@ -4663,7 +4753,7 @@ namespace eval tomlish::parse { # idea: end this literalpart (possibly 'temporarily') # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) - incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing return 1 } XXXitablesquotedkey { @@ -4684,7 +4774,11 @@ namespace eval tomlish::parse { append tok $c } barekey { - #not clear why o'shennanigan shouldn't be a legal barekey - but it seems not to be. + #barekeys now support all sorts of unicode letter/number chars for other cultures + #but not punctuation - not even for those of Irish heritage who don't object + #to the anglicised form of some names. + # o'shenanigan seems to not be a legal barekey + #The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" } default { @@ -4693,63 +4787,69 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - array-space { + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading squote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_squote token or triple_squote token + #It currently doesn't trigger double_squote token + #(handle '' same as 'x' ie produce a single_squote and go into processing literal) + #review - producing double_squote for empty literal may be slightly more efficient. + #This token is not used to handle squote sequences *within* a multiliteral set_tokenType "_start_squote_sequence" set tok "'" } - itable-keyval-value-expected - keyval-value-expected { - set_tokenType "squote_seq_begin" + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote set tok "'" return 1 } - table-space { - #tests: squotedkey.test - set_tokenType "squotedkey" - set tok "" - } - itable-space { - #tests: squotedkey_itable.test + table-space - itable-space { + #tests: squotedkey.test squotedkey_itable.test set_tokenType "squotedkey" set tok "" } - XXXitable-space { - #future - could there be multiline keys? - #this would allow arbitrary tcl dicts to be stored in toml + XXXtable-space - XXXitable-space { + #future - could there be multiline keys? MLLKEY, MLBKEY ? + #this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) #probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files - set_tokenType "squote_seq_begin" + #@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key + #where key is simple-key or dotted-key - no MLL or MLB components + #the spec states solution for arbitrary binary data is application specific involving encodings + #such as hex, base64 + set_tokenType "_start_squote_sequence" set tok "'" return 1 } tablename-state { #first char in tablename-state/tablearrayname-state - set_tokenType tablename + set_tokenType "tablename" append tok "'" } tablearrayname-state { - set_tokenType tablearrayname + set_tokenType "tablearrayname" append tok "'" } literal-state { + #shouldn't get here? review tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" - set_tokenType literal + set_tokenType "literal" incr -1 return 1 } multistring-space { - error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" - } - multiliteral-space { - #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row - #we are building up an squote_seq to determine if - #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines - #b) it is exactly ''' and we can terminate the whole multiliteral - #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space - set_tokenType "squote_seq_begin" - set tok "'" - return 1 + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "," + #error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" } dottedkey-space { - set_tokenType squotedkey + set_tokenType "squotedkey" } default { error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -4765,44 +4865,50 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { incr i -1 return 1 } - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - #switch vs set? - set_tokenType "startmultiquote" - return 1 - } else { - error "tomlish unexpected token length $toklen in 'startquotesequence'" - } - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" return 1 - - #set toklen [tcl::string::length $tok] - #switch -- $toklen { - # 1 { - # set_tokenType "startsquote" - # incr i -1 - # return 1 - # } - # 2 { - # set_tokenType "startsquote" - # incr i -2 - # return 1 - # } - # default { - # error "tomlish unexpected _start_squote_sequence length $toklen" - # } - #} + } + tentative_accum_dquote { + #within multistring + #short tentative_accum_dquote tokens are returned if active upon receipt of any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 + #return tok with value """"" + return 1 + } + } + _start_dquote_sequence { + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + #no conclusion can yet be reached + append tok $c + } + 2 { + #enter multistring + #switch? + append tok $c + set_tokenType triple_dquote + return 1 + } + default { + #if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled + #by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 dquotes as data. + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" + } + } } literal - literalpart { append tok $c @@ -4811,8 +4917,8 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + #unescaped quote always terminates a string + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4821,77 +4927,31 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing + return 1 } } whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$had_slash} { - incr i -2 - return 1 - } else { - switch -- [tcl::string::length $multi_dquote] { - 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] - set multi_dquote "" - return 1 - } - 1 { - incr i -2 - return 1 - } - 0 { - incr i -1 - return 1 - } - } - } - } - keyval-value-expected { - #end whitespace token and reprocess - incr i -1 - return 1 - - #if {$multi_dquote eq "\"\""} { - # set_token_waiting type startmultiquote value "\"\"\"" complete 1 - # set multi_dquote "" - # return 1 - #} else { - # #end whitespace token and reprocess - # incr i -1 - # return 1 - #} - } - table-space - itable-space { - incr i -1 - return 1 - } - default { - set_token_waiting type startquote value "\"" complete 1 startindex $cindex - return 1 - } + #assert: had_slash will only ever be true in multistring-space + if {$had_slash} { + incr i -2 + return 1 + } else { + #end whitespace token - throw dq back for reprocessing + incr i -1 + return 1 } } comment { if {$had_slash} {append tok "\\"} append tok $c } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { if {$had_slash} { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4901,7 +4961,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - #set_token_waiting type endsquote value "'" complete 1 + #set_token_waiting type enddquote value {"} complete 1 return 1 } } @@ -4924,64 +4984,40 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - keyval-value-expected - itable-keyval-value-expected { - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading dquote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_dquote token or triple_dquote token + #It currently doesn't trigger double_dquote token + #(handle "" same as "x" ie produce a single_dquote and go into processing string) + #review - producing double_dquote for empty string may be slightly more efficient. + #This token is not used to handle dquote sequences once *within* a multistring + set_tokenType "_start_dquote_sequence" + set tok {"} } multistring-space { - #TODO - had_slash!!! - #REVIEW if {$had_slash} { set_tokenType "stringpart" set tok "\\\"" - set multi_dquote "" } else { - if {$multi_dquote eq "\"\""} { - tomlish::log::debug "- tokloop char dq ---> endmultiquote" - set_tokenType "endmultiquote" - set tok "\"\"\"" - return 1 - #set_token_waiting type endmultiquote value "\"\"\"" complete 1 - #set multi_dquote "" - #return 1 - } else { - append multi_dquote "\"" - } + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote + set tok {"} + return 1 } } multiliteral-space { set_tokenType "literalpart" set tok "\"" } - XXXtable-space { - set_tokenType "startquote" - set tok $c - return 1 - } - XXXitable-space { - set_tokenType "startquote" - set tok $c - } table-space - itable-space { set_tokenType "dquotedkey" set tok "" } - tablename-state { - set_tokenType tablename - set tok $c - } - tablearrayname-state { - set_tokenType tablearrayname - set tok $c - } dottedkey-space { set_tokenType dquotedkey set tok "" @@ -4990,49 +5026,56 @@ namespace eval tomlish::parse { #set_tokenType dquote_seq_begin #set tok $c } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } default { - error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" } } } } = { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { - #assertion had_slash 0, multi_dquote "" + #assertion had_slash 0 append tok $c } - string - comment - dquotedkey - itablequotedkey { + string - comment - dquotedkey { #for these tokenTypes an = is just data. if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type equal value = complete 1 startindex $cindex @@ -5063,7 +5106,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok ${dquotes}= + append tok = } multiliteral-space { set_tokenType "literalpart" @@ -5084,8 +5127,6 @@ namespace eval tomlish::parse { } cr { #REVIEW! - set dquotes $multi_dquote - set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -5098,16 +5139,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5159,8 +5202,6 @@ namespace eval tomlish::parse { } lf { # \n newline - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5171,16 +5212,19 @@ namespace eval tomlish::parse { append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #multiliteral or multistring incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5196,20 +5240,14 @@ namespace eval tomlish::parse { return 1 } stringpart { - if {$dquotes ne ""} { - append tok $dquotes + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - if {$had_slash} { - #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] - incr i -1 - return 1 - } else { - set_token_waiting type newline value lf complete 1 startindex $cindex - return 1 - } + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } } starttablename - tablename - tablearrayname - starttablearrayname { @@ -5236,20 +5274,13 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - #e.g one or 2 quotes just before nl - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "newline" set tok lf return 1 } } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "newline" set tok "lf" return 1 @@ -5275,8 +5306,6 @@ namespace eval tomlish::parse { } } , { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5287,39 +5316,40 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - tablename - tablearrayname { if {$had_slash} {append tok "\\"} append tok , } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type comma value "," complete 1 startindex $cindex @@ -5338,10 +5368,10 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes," + append tok "," } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "literalpart" set tok "," } @@ -5354,8 +5384,6 @@ namespace eval tomlish::parse { } } . { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5366,42 +5394,45 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { switch -exact -- $state { multistring-space { - set backchars [expr {[tcl::string::length $dquotes] + 1}] + #review if {$had_slash} { - incr backchars 1 + incr i -2 + } else { + incr i -1 } - incr i -$backchars return 1 } xxxdottedkey-space { @@ -5444,7 +5475,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes." + append tok "." } multiliteral-space { set_tokenType "literalpart" @@ -5471,8 +5502,6 @@ namespace eval tomlish::parse { } " " { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 @@ -5483,16 +5512,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5512,9 +5543,9 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $dquotes$c + append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} { append tok "\\" } append tok $c } @@ -5526,8 +5557,7 @@ namespace eval tomlish::parse { incr i -2 return 1 } else { - #split into STRINGPART aaa WS " " - append tok $dquotes + #split into STRINGPART xxx WS " " incr i -1 return 1 } @@ -5537,15 +5567,7 @@ namespace eval tomlish::parse { } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - #end whitespace token - #go back by the number of quotes plus this space char - set backchars [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backchars - return 1 - } else { - append tok $c - } + append tok $c } else { append tok $c } @@ -5588,12 +5610,6 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "whitespace" append tok $c } @@ -5613,9 +5629,6 @@ namespace eval tomlish::parse { } } tab { - set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 @@ -5626,12 +5639,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5662,7 +5681,6 @@ namespace eval tomlish::parse { return 1 } else { #split into STRINGPART aaa WS " " - append tok $dquotes incr i -1 return 1 } @@ -5706,15 +5724,8 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType stringpart - set tok $dquotes - incr i -1 - return 1 - } else { - set_tokenType whitespace - append tok $c - } + set_tokenType whitespace + append tok $c } } multiliteral-space { @@ -5732,16 +5743,31 @@ namespace eval tomlish::parse { #BOM (Byte Order Mark) - ignored by token consumer if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 + } _start_squote_sequence { #assert - tok will be one or two squotes only + #A toml literal probably isn't allowed to contain this + #but we will parse and let the validator sort it out. incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart { append tok $c } + string - stringpart { + append tok $c + } default { + #state machine will generally not have entry to accept bom - let it crash set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } @@ -5752,6 +5778,10 @@ namespace eval tomlish::parse { set_tokenType "literalpart" set tok $c } + multistring-space { + set_tokenType "stringpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" @@ -5761,8 +5791,6 @@ namespace eval tomlish::parse { } } default { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. @@ -5774,28 +5802,24 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen - return 1 - } else { - incr i -1 - return 1 - } + incr i -1 + return 1 } else { #review incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. @@ -5815,7 +5839,7 @@ namespace eval tomlish::parse { return 1 } stringpart { - append tok $dquotes$c + append tok $c } default { #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname @@ -5835,22 +5859,12 @@ namespace eval tomlish::parse { error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } - XXXcurly-syntax { - puts stderr "curly-syntax - review" - if {[tomlish::utils::is_barekey $c]} { - set_tokenType "barekey" - append tok $c - } else { - error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" - } - } multistring-space { set_tokenType "stringpart" if {$had_slash} { - #assert - we don't get had_slash and dquotes at same time set tok \\$c } else { - set tok $dquotes$c + set tok $c } } multiliteral-space { @@ -5890,21 +5904,6 @@ namespace eval tomlish::parse { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #set_token_waiting type string value "" complete 1 - return 1 - } - } _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { @@ -5913,11 +5912,29 @@ namespace eval tomlish::parse { error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - #review - set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 + + ##review + #set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + #set_tokenType "literal" + #set tok "" + #return 1 + } + } + } + _start_dquote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open string + error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" + } + 2 { + set_tokenType "string" + set tok "" + return 1 } } } @@ -6011,6 +6028,16 @@ namespace eval tomlish::dict { return $name } + proc _show_tablenames {tablenames_info} { + append msg \n "tablenames_info:" \n + dict for {tkey tinfo} $tablenames_info { + append msg " " "table: $tkey" \n + dict for {field finfo} $tinfo { + append msg " " "$field $finfo" \n + } + } + return $msg + } } tcl::namespace::eval tomlish::app { diff --git a/src/vendormodules/dictn-0.1.1.tm b/src/vendormodules/dictn-0.1.1.tm new file mode 100644 index 00000000..c9ef87f2 --- /dev/null +++ b/src/vendormodules/dictn-0.1.1.tm @@ -0,0 +1,349 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application dictn 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval dictn { + namespace export {[a-z]*} + namespace ensemble create +} + + +## ::dictn::append +#This can of course 'ruin' a nested dict if applied to the wrong element +# - i.e using the string op 'append' on an element that is itself a nested dict is analogous to the standard Tcl: +# %set list {a b {c d}} +# %append list x +# a b {c d}x +# IOW - don't do that unless you really know that's what you want. +# +proc ::dictn::append {dictvar path {value {}}} { + if {[llength $path] == 1} { + uplevel 1 [list dict append $dictvar $path $value] + } else { + upvar 1 $dictvar dvar + + ::set str [dict get $dvar {*}$path] + append str $val + dict set dvar {*}$path $str + } +} + +proc ::dictn::create {args} { + ::set data {} + foreach {path val} $args { + dict set data {*}$path $val + } + return $data +} + +proc ::dictn::exists {dictval path} { + return [dict exists $dictval {*}$path] +} + +proc ::dictn::filter {dictval path filterType args} { + ::set sub [dict get $dictval {*}$path] + dict filter $sub $filterType {*}$args +} + +proc ::dictn::for {keyvalvars dictval path body} { + ::set sub [dict get $dictval {*}$path] + dict for $keyvalvars $sub $body +} + +proc ::dictn::get {dictval {path {}}} { + return [dict get $dictval {*}$path] +} + +proc ::dictn::getdef {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +proc ::dictn::getwithdefault {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +if {[info commands ::tcl::dict::getdef] ne ""} { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + ::set newval [expr {[dict getdef $dvar {*}$path 0] + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} else { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + if {![dict exists $dvar {*}$path]} { + ::set val 0 + } else { + ::set val [dict get $dvar {*}$path] + } + ::set newval [expr {$val + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} + +proc ::dictn::info {dictval {path {}}} { + if {![string length $path]} { + return [dict info $dictval] + } else { + ::set sub [dict get $dictval {*}$path] + return [dict info $sub] + } +} + +proc ::dictn::keys {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict keys $sub $glob] + } else { + return [dict keys $sub] + } +} + +proc ::dictn::lappend {dictvar path args} { + if {[llength $path] == 1} { + uplevel 1 [list dict lappend $dictvar $path {*}$args] + } else { + upvar 1 $dictvar dvar + + ::set list [dict get $dvar {*}$path] + ::lappend list {*}$args + dict set dvar {*}$path $list + } +} + +proc ::dictn::merge {args} { + error "nested merge not yet supported" +} + +#dictn remove dictionaryValue ?path ...? +proc ::dictn::remove {dictval args} { + ::set basic [list] ;#buffer basic (1element path) removals to do in a single call. + + foreach path $args { + if {[llength $path] == 1} { + ::lappend basic $path + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict remove $sub [lindex $path end]] + + dict set dictval {*}$subpath $sub + } + } + + if {[llength $basic]} { + return [dict remove $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::replace {dictval args} { + ::set basic [list] ;#buffer basic (1element path) replacements to do in a single call. + + foreach {path val} $args { + if {[llength $path] == 1} { + ::lappend basic $path $val + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict replace $sub [lindex $path end] $val] + + dict set dictval {*}$subpath $sub + } + } + + + if {[llength $basic]} { + return [dict replace $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::set {dictvar path newval} { + upvar 1 $dictvar dvar + return [dict set dvar {*}$path $newval] +} + +proc ::dictn::size {dictval {path {}}} { + return [dict size [dict get $dictval {*}$path]] +} + +proc ::dictn::unset {dictvar path} { + upvar 1 $dictvar dvar + return [dict unset dvar {*}$path +} + +proc ::dictn::update {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + uplevel 1 [list set $var [dict get $dvar $path]] + } + } + + catch {uplevel 1 $body} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + upvar 1 $var $var + if {![::info exists $var]} { + uplevel 1 [list dict unset $dictvar {*}$path] + } else { + uplevel 1 [list dict set $dictvar {*}$path [::set $var]] + } + } + } + return $result +} + +#an experiment. +proc ::dictn::Applyupdate {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + + ::set headscript "" + ::set i 0 + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + #uplevel 1 [list set $var [dict get $dvar $path]] + ::lappend arglist $var + ::lappend vallist [dict get $dvar {*}$path] + ::append headscript [string map [list %i% $i %v% $var] {upvar 1 %v% %v%; set %v% [lindex $args %i%]} ] + ::append headscript \n + ::incr i + } + } + + ::set body $headscript\r\n$body + + puts stderr "BODY: $body" + + #set result [apply [list args $body] {*}$vallist] + catch {apply [list args $body] {*}$vallist} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path] && [::info exists $var]} { + dict set dvar {*}$path [::set $var] + } + } + return $result +} + +proc ::dictn::values {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict values $sub $glob] + } else { + return [dict values $sub] + } +} + +# Standard form: +#'dictn with dictVariable path body' +# +# Extended form: +#'dictn with dictVariable path arrayVariable body' +# +proc ::dictn::with {dictvar path args} { + if {[llength $args] == 1} { + ::set body [lindex $args 0] + return [uplevel 1 [list dict with $dictvar {*}$path $body]] + } else { + upvar 1 $dictvar dvar + ::lassign $args arrayname body + + upvar 1 $arrayname arr + array set arr [dict get $dvar {*}$path] + ::set prevkeys [array names arr] + + catch {uplevel 1 $body} result + + + foreach k $prevkeys { + if {![::info exists arr($k)]} { + dict unset $dvar {*}$path $k + } + } + foreach k [array names arr] { + dict set $dvar {*}$path $k $arr($k) + } + + return $result + } +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide dictn [namespace eval dictn { + variable version + ::set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/vendormodules/include_modules.config b/src/vendormodules/include_modules.config index 895bda28..a9c143af 100644 --- a/src/vendormodules/include_modules.config +++ b/src/vendormodules/include_modules.config @@ -12,6 +12,7 @@ set local_modules [list\ c:/repo/jn/tclmodules/tablelist/modules tablelist_tile\ c:/repo/jn/tclmodules/tomlish/modules tomlish\ c:/repo/jn/tclmodules/tomlish/modules test::tomlish\ + c:/repo/jn/tclmodules/dictn/modules dictn\ ] set fossil_modules [dict create\ diff --git a/src/vendormodules/test/tomlish-1.1.3.tm b/src/vendormodules/test/tomlish-1.1.3.tm index ed5044a73e5fccdc0c437116e82de7c592c4f98a..8afb43d956b0212bfd728b88613392b2099993ea 100644 GIT binary patch delta 12790 zcmaKSbwHF|6YsKw(%s$Nl9Ee@G@^8ebeFJ5m()Xdhjc1}q;yCr-2x&KN(l(u6@A}N zUhlp8$L`ELGxN+jdt%O$#Gq6^74Kn*JCJ(mQ=f!jwoU^iI~fEtB+KSU{XPNJp#fp> zk&V>Msds{0m1=3Y8F`=Ke85J%SKxO5_48y@GbuG^9&g1d9#5vjf1N+#d(wRu$v!oD ztxU^V9^pE}+Tr_dl{1{Z1&+OjtFeLvjqEddlt5d80@8O%KQwgNS6yF>y^v*jVic_W znXI=_lF(Y6r1P=Oc8~Hn`kdarj%>XQWnH{#uTO>5RUyWBQu}AZn3dwKksp+$^-%9^ zui4~MZ6>d}VD&Q#uGBu3%ydRUDRLnPd3D@^$9k5U(k4$63VohYxSLrWXn`4 z>d4Dw8MP0KVhv*6BWFdUpv_uD`*iTM zOq95__av{+K<_v&ufa(F^fu0C*rlvav_i8wiG&sr(pLUNI8G;?@ z^p4&GgFc;3SOq?-w?bkiNUa?ex=9+&x%$QsVtkG9`5IYGfFq`+D$t@YeR51ppwT0R zjiH5Rsvra3eTkaiqOSl6^?x}&hjSor#l85Po#_(`3Tr#)Rhac(}K_g~B7OouJ zQkI4Dh4g%a$E}2UeTO24RnNlk)Pj_j!j->R1@Uh(JJyr+rYF2I6%415pAL|_H0FlS z@qmQr42mI9M8j)il6C9QN^YEWwsB^;3%6-Cw2z}sreB>jHNF>JbYGu3vEQn#t6Di$ zt`tPyvG*+Bve$XN#mBej4gI9@j@61?SP(s)+}o@|m(8)PSve|Vs5?$Q23NKj4x9At*KpjD&iCB@yoTqP-zpe&%g@K zmrFLdfo#N+=*48a%AKssb)V^;VNdSb;V}EMB?;M#+i@%k^n(Jmg8eX2;zAzBop0c4~9F ze=!FS$-1CVZr#n8yIWSME(N<#QE173pt{1-|jjz>XYgCEzOLKe%S+x=z= zq@srRzqY`887MEeh(jI11UBw^FnW{+JRsdTIPAWxc@6#8Qq9*q6y~RGztM+r)b)iA zg15w1b88Nn9LG&B&152nBiaB@QK7RB){KRu+Y2fR6L;KYG- zPE*$9rtW@XJ(o&qe#Fp) zx(YQ7(xTFcZ!>P;)Go@#4i zGC+q*-PCY@L}8QeAFP@P|B>_60_{8Vdx_m6t$G3l;-saCq0~?{m*w)8Qj;3iUja&P zTIfId&YZ94=LmWg^3OjL=IV zGd;$mWE87x>u%~)>&bGve1ebjHsHPyKdKa|fT>8)1FqM^3=4w-Fz!28R0mPR{dmY_QFKk|=v#zWTGY%xk|`JQJ(O4lM#im2s$ zJaimRNhWHrtFPs!AH3#huYeAI$f-Ye{CcmtDs(M&-=CqjhgPwEnwqU7)nA2YN6{&l z{F-tyh(iY5wt{oDa@*t8!O2c*`vF!*RS5-EQ4KS)gP0v@YgkBu%oe9C3S(Q)e)_bV#8>bPc zn_iu2gpps+1oUA29Zz#JKq7fsjW+)#hHlcjnnp)84mke6mIHJSE)N zPx%JTPVzL1h4i&UAYVq$?k~uP0&VRX(X3w1rd@AQ4Wetfet=6In`&{+_ssn-BpE*5 zZ*zJsw!iM#c{HtJM-L`7>FKv zCA2GmQ;Sitxt0pgBY>5KP>yBLzd!h3fr{BOaTw>tWixM|h31{MAZZL6YC%9Scu97f zb~I z>uY!r2o4Ou-P8Le<46BTR$p?DObA;UHmQEjwtr@w8c<^ln{`i+fc@M%z-2BGz->o` z9S_S{{?xJqIu-xXx{Ge&;$q@$WaemZZ{gquvmq9|CE;+haCPIbG;#N`vvIJ62mNo( zkN(H)lRsX57xkAD8wZ#b@FSN5?GN7KCMsH{+fkYS85Od*qnn$Bxvhow&xr2*%l*Hj zvHVXomVZWrV(#qj_&_4I0>L4^5HxpAk3!{IVBKW7k;{P3y@qfI!{23AE zZzuNdc5XJm`v~hlvS0n=`k(*!RyPBLk>Y^39S*kff3&Rs>`g$?7YhLA;Q<{Shy<=~ zE;bHU{|y^xwkHE>jVXT*p8GdWfBdHffA%Q`!tZ~F@)z;5A3yv@-|deeFi?K#8{IS! zHXx|@|HuA+`mmckL{}(WR#XBJ1R`Yufw*td?G+~iC>#j1n?CQb&W{yZU0y=O1UJDR zC*x{MMc7j{jx+)%kH!``gJm5N6ET(u(NNHr+Fx3gY@*YPz?}7%oFPo^>5jv>j?JX@ z%PpHg(daDUQl$UHjhMBh*?Gl~UIb2L@I2z>LZMAl;d1QyI%Jqe()~_VId)P%sJ8np zITm*aG}t=7Vc0B72}Oh+dGjesAQkOb(R6qPN|(^b!kfj0Sa*5yP+XbEp=(gOdSl5SsIzwSUiQ>XK-Srd3ZiruL>2^77ri?n}{;1FycO?vl@Sj_kM;j$<-j4 z9qh{;y2~r8YF6wwRR1jx{{(JCyDnT_VvKPV0jiC7w1rM)hU$$J*0!;3lCI3Uo8p)2 z$>>!6jhbJ0C#ntfV`4MVsK>a>Yc^Kzx?qyq40pCxNLufhB-5O@=j!Z^=uuK2eZ3S} zR^)>$pD3{+hhwTCb%EkW?SWn`_N?c|mMHLkPR%`$(^QlCbh>qHEO#v810$*RLmGt< z)u2_MLPK%*Nz`W>j@C$R)Ajt_#rzzq@&i&eY1H8BlbYKi^qzLU&L=%|ws}(?by%Mv zZtO98>EZ7&izvoIBCzaWjMVdDD(7J_gx6cm@zvP`VRH}zc}%dcFy9panbQEP6H}3) z?R(xjFQp{I%5HQa^sv=C&o&-BJ2T1kmU;wjGd%WLtcH4A2hI*Z#nt-3JnY~zZI#mA zT{9#3O{<5l02>#Z!`ZUOc5n$F#DJ&7^a`>gQNs?BJ@X?c@ZD(F+t7LYNOGlxhihlY zlo4%0U0ODMFkFR4G^^eiH9Ddxn@%`ZL;NFi@x&dLF?9%kFc&?L$l_dxjoa|dk!TUh zd_=cs&rs&EWF-tKdB8-nl`g7Lkw_}e;miQ1$GuOE`kbgFCky^1fl<$jcHxP{8`a!M zi_pl2cKlMs7{ezuPSK==?x~*}9zAYHr}=Co@$r*&Y(E8dSpV7MYTwKGGmPk0*3@0{ z=S8XWVI%|yNlLcMH4`{d)5mQJL>TxsyOwI)o!y%`t+j?Sbl3ax zpvOl^Yq3|lZ=C?dpTQhQ&&)ZsmMQ5`pUF;a8BUVYcL$mIbWX7->f?|m}j_(6=| zVZ=nnrwLpet*AA{t24qRbPBpijR<{eN2Y!DuIdRWb05X5w+j^+wPGJQ3byoR$E1Cp z8X7HgC$G0k_eme;xyR+J`FbH+)I&hlTjQKmMcsds?(jIP0c*O=^f7ow8Il9t;^oWi zsD|gA3~f?ceX(jcY;P&OvrarK(7Ke%AMG7Cw`=EJ(YVdOVcMXV<2dbIPPTrwpv_<> z4kbQr*&d33E~Y(k-Cu8cpi$|)cByK-^MLW%QCi@eJ%5qY2J?&()Ti$1SIo#IV$4j06M{yV=!NoG^)gTi)!Gzn1jtYgl3e z2fU|Y!xGC7M1bVJ{~+%xPZzgd&k11D@=a^C{5)+W>~@?gJZAk-x$3~LQ?q1KFhcQr^q}?!iTnxKX3Ho09Q@pjGPqF5 zIjgpzVX{jJd+NHRxJ(9xs8!F+SCyjG@YZvwbEvEKn6yfqF_^Ow6F~@YE{;}_hE*lJ zyQwAAhXtlC$$A2PZSZ$@9Crw6!JHsYkQ zg2f~!-C_<47EAOhnoBHjhbP}!@%cd0?*h$kZv&LqmR}uya*0@tXdhA-xGysDpzX?Q z%^);*r!01quZA%FGusO(DIYqdtvwIH)Tcq8Hb`mBZPey^A~zTF@WZSl_!x_8cV^CDv5L0g93J%;Ig` zw*mnVG|qj5;Uu&0)ld0}bPMS^P_+JyIlDTkE_!V(rG;>g5Q}#RUSN7hW-1>NeMGI! zFgcEF?{DegxL`X}3J2o?D}|P`@0QZygfuUB_lbEhA!@9L{dqXYVbP$u+ng-NI$vvo>`|^3t zYC1E4IS{1)rQl_?5dAlE6c6joc=ZpqU%%>5V-mB7{NT{4zPw4!f1P#FVOb8|K)L5B zECo5a0|MRuX9hrxZs)1LhX1a%f$cmOo}a*XaeYWGmwaw@Xj@NB4aZ`5UNi^MY}=@` z#ZcqM1l7^9C;m|FI=@-;aivdOrEl!$<+si@o!X=LUF;`MpLjpd)yS(+Z`<;vXRVoLjY0S%#}j_%mKlCuRo#J9SI!KWqP@sKHnZ?xat&~1iX1P>|Gjr@?zFcw?VLU7}l&gH*7h0Qf zTs|mOHI}C1rRgcYb$lYbn5azjZu&_67-#HEolSwwLr>%Tzi^GVSHqo3HA62=wL>gC z+-#a!6|0i#-I|6Ss+b|ODS`~!wq8!0l5x{@;k(G2()IV-Y4kzU?aeL61g?;7%5j!$ z`78^_uJ}pcBNN-+8 zbFoO0&{-o)GSO)qo5t+~@AD=iK#cYc^1kDA29~tL@ZR%-Nt9g za^D6$S>j1S){*Q**qRugz{AY8jO`^mn$z!1o^$px?7M{qFqAJqnv$Y$G2&>mi>TG?KvkYn^KGtrvMZ|A- zDMsL3+EcioBguD}9bB?$n2eZ)wc4lm6$ngZJrJInfc2+|1`!wc;hxV0$w3t|S=Kf8 zJQq)khESD+^zKf~FnnS~bLemuW)P3bfAlcl(!7+ZfQ|6;ia~1Ppbjobiwrrta7mKcM;vX1%;SidQdVm15|_>rv#_!o=6=sfJ-#n{F0WL1RsJ%W4ManH6HD=Y#_taCYQ;f>ySx@i+Xu8GYQN4&veF{Ba(;0NG54ugy zZ|$L)C#V=+k9H#Y6tcW1&Lb)6Y`OLjNb`deeSO;c_0!IGMr+Y?hwOQ1et_N>?|z^8 zIWdv8kFfu%4COwmu6!?lsXD#Y&?r&4j7BRa5zOu|l2BTdm85cfCuZ&t(RRWvu!jy; zm2pGI*c4umQRw=;yOi$&B`C?Anh|7^w3yeslnr(CR}ohOb}?2W`|xA=DK45`p;7Ih zoC@?~r0{=sPjl`mi)}{GwAeH4o-ie;lms=dk!;uui)FPxRZj%bKj47i=~tUv3#nl8 zeg;Ffau)g`PdGIL!1(;o^yo{%|BbD1*m_i5SA#qJbF(D{fyDo}9{qh%{k$E6Nx)HKLXO=R~+C*BE}QAq|C zUW{#57;$t}9ZF6p^Ejh^C6iB2&_)XDc>I<1C`^(3D+dchCIYDV5F`$`j`w++)rjJqO#Jz z^HI2pR$iJnQS(Sq5DiIJO@(P1&aM$DDnT$wx1@m*O)Qy3WZHy_NRn3xMFi*iDFTVTT z+%+{vjdHFdI-6HeE2t&rt)KORW+mA#YbvjVnh}$}v@G5riH)9M1-}Vu;~$-GF@Rc$8b)HNs`8Y5 zGkUi&Nh4xgM0;nD_ng3!I^&(q>y7>G!Ul4OtzyykvlXjMoM*9yyBb`m24(~nnre+2 zSo)b4OW#pT>!YZMvi-=fKt)HVwAo>12Z4Yl^zMuN)OUwJHZe!%n(a#$M11EMAxd*{ zZEBCM?e6l9zuz{p0kJ{hk^}hRUO;l$4f#Nwdo9?r=*YO}!cW+0So-&z1bwnYV;a>VOUzY!ZS?GE@iv-Tm3N=}1+IkS@ zOL2M@^FDMb&31#U?R%b=N*VgdE5GA0OYOYL$GesrJe7@v+q0}^(H|w6ejMuEvc1FTnn5j7TH2$OTuhQAC-B}hk8$7?9 zvNpnlkU-UpAQ0rH2mwn_p}&d`7DoTnTz(~~ySjR+u>ILzZcUsFpRN#@0>$^m)E)j8 zmAcQ6dGaFi1{G8~yPDaYUp`d7x`=UAox}Si!P=cX!5}6^NO^H#S8|-@{>}j#{6lC1 zF0di^t(t?Qy<_um=a>}%_{tRQBT6sNn!n4Qs{w6ViP=ZgXjC}%edB2Z7_qGk=H;Oy0On=sTXL|NQwE(h+RYkUO%9q6{ zZG=NVx7~$nhxqq0Lk#E=Jrzeh1&<$ebLB`B{(!p*HYg&|0W?T?9Y4?UtXv6FzNriC zO^>M2)QN_Z3ExSrKQj;P41DvR*@i@aSh*hBCMM4IY`BORr;)Fp%s`XCLOU)+%Q!P9 zCNw37T9{P-d8idU7Ovvp)W45(4Vhk7D$#`>H|~K z0!m#rF)Bp^Bfx>jqqPUmH42us z%+H>keCIKKvGWo+g?j|eF@!jIs{9+45wxMSlBY}&b)aI*t3RJb=>!kc8c)>5Og!0{ za$MgNv9r?2wH|U3EE6$U}YS;25Je$vxQ#yHV zL!HxJ?1zpaWWfEk&Oz46!a>5+HbHHXZ8OkL7Mf8Y9_}I?MmZGW!nk0C0ZBPO71~X% zB*if!Qv2f`zjjaX6}472d%D)($jK?go0lWzFeInD zZtsj-7d@<}1!3dNy3>`~zZdIE6BftwsMT3UDw?^e=U1rg`jBArjyLx6Grk)>N!H}0 zdgpNsTH!%jr|$b#rza%FFz zSAP_v$(TcQ`$Cfh?!m5?hB$s6OMl+W!)84|5Q&3$lT_6Yd59rkpOS{?(lpk-)`wLT z(`5d+!2hYJ;7bV2TvS~b#OlbwjJP(z^U@_}9Di@4ljJ36GfpmStg571j+@*6VBv%A z^ynOin#|eqJC6l^BlaU#lN}KW(%8OHbN{s3`}TqPj-|1ZEBmxGT&3YGZ0}b)SK*|j z*l_VoIxxc_E%qeeaJN_CKAAB{h9VOV_J6#b-}P+nhsQyHx|e7;bK645YspOVScB><+3hD`OlWzp zNISC1&|DIu6B`NyvIPbQ(F^aCVc+>7>Nxr$Ico_jW?I{vwE7m;(&}hLM(d$+4qP!# zbv_5f8wvtGzo^~E+K>3`78_)m(N^g4iMV&Ix2L{l?&;Y>Z4f<=Q3ptcbE-Kw9j*e8 zCbv|!eoQWJT}!5?x2rxzvT{1!x7b=MKHOM4SCy}R*KTO2p98^9N2@@tr11v06VyWNbqT-Nq$~9+q%VG-MS? zj;9h(7&km&fu>70Q~7ma+0VOL&OJAsyhvJkm8D7W1vBLE)kP6@tJSzKfslOrk_1Yc z3^RLY%JubDEGN0IYNI@MHD4_XLd8#JI;wKLgF-5$yTPantec#9+ zL>+7dgwV>;P^@DO^-HGSr7*EcM~{t>VnmW1?kM3M4TRRcf5rQC&tVEe2|Zo#-t!n( z?2<^?6iZ{5Nk7{vp5cxf?)ieBi>j__qK+A2ffDt776ZK#lfQ?~i%B%G8g9@=VgV^D z>ibkEvJmPSEvh|uF8HGzt>tHX)6y(OXE=FC?A=8~O_8_53{AR^vq!2hEcrb#Ejv$? zQ}R2zb~B**wTz3v)w(>V-M2(*fzgbl(}3i+SD!Z2)3}Oho$UzIzfDPsN7OfA$2Aq; z^gAtwrN$0#I&!oleZWFHl0t|l3P+*qhoc-PdM%vGO57paRkP)Q`AW}>T|S;H_+%1$ z>!f$$q~heuxeum~aDSSr_;-28JqfUk@OfoV$D$>a%+GsziSrJt?hgUSZw47Zbbfw| z955_54qVnh-D0?2d2p3{Sg$j2&vB&TyKAr*ttbw4F$6$S;ptcKu@=t63#KA;QLtbx z*QfPseRB%>@;wc(T>3cAbp|QO@y^(v8HsJ}@iVXN>o5K`qVY&eo~eb;OP9@oIkd%; z9TggBmVehO9MT4dEdT6%JU(Xw4sjGaCliM+z{fFX>hgqVRGGXA$NDvAhThP=q(j8p zr!g|(yS=jrSM3D;-^h^j_`o!d=80-^d|d`~7@2Gx}rR z@<|f@PdcMz{*jpuCj+llKc8@l5UIAI@{^D;)Kn#n+lNi((BzbZc5q+&(RnBhSEY;+ zo7*~are#3B0L?l%!|Vt54$)Y3E=EwXu&q$AlS^3LDnb@cP3=xcIB=Jo%$;NhZ0Y@Q zUZ&Xo7~$AQjc!UdZ`fdQ6;R$H?VZGD^A$PuMs?5@8{(sWe0HjBs$AREWAsQ!%Qr@i2WCdvTLjmw$Y!GY#pA&A?Jb)d4IZ=L|kYUwGI3pm7nGpK$Rt4pg)U<%q zGRa&GM^ThXGzUUYn~L;C>?4K)V~r?#Cw<3W#^wOEou7t6`a^h^s$z+gWmTjUJ1q+_ce3+QED#l^Jey=SkeA~%8t1) z4~J!}rearJTO-W5$zvNA77h{?F?G4Svb*?fXnk>q>J8&o@o5L}!vmJ=ynG6{G&=BTh+tS@}|IE!D0dp*JpcADO7a(EN4qGc}QI5H9nf-w$m*Rqn7e zmB00VU+YrC)YUPc%H(lRt)zTXGWM7!5-ZVWT44}jcdeVt()p9>5!$)*`aJ__?A%XI z(D^HaP3B##=MO^>ZE7cIHhI;ADOPaK=lE<>S{E0%&_#$3tu-868zEtV5ryvoG&Q>K zoNg33BV0?XzHjL5yB0?qpwk&G`zYpcyX`*6Y6x{@QFRVXat<|nZLy*2q-xWi$rgrC zUg$H8bA`S@m2S|~*!hvSIX~Pw<@uXIII|spuM7M~vYPI;S7hjp53uUlT(cwQmV!yl z3sTEqQ23ST(4A>Q5{mv#agPtH@+=hb!VPOWq za0CT79udf+p@l)8G^sHEi+lQs#Q66^g#=JP;(@`S5P{}M7Vs(p!k>21644AGEa2AaTBu2A<%Q1hm5b#T^8`4l19SC~P0@lL*%@P3B>1Y5- z3#!`T>@oq$MBEO6eyvq;_q z#XA6u2l`X-frvN?@BzV%%DFfiuouSyeoS=3YQ_tIFR5LIPg;pJ+CP7i_@|(~yHbWdWP=z(^mkGcW8%5)ez{0^{8KO(Fp= z(|Eyef3dmgNC0m-3-~SH&C}d;A@Cso4H=t(1khx#fK>&4vqIpNUnFX#ICxF)Cay0g z=s-dy3wTKAU-oB*6K7F?`-E>qzUJrvG>Zl7eE&B~0Cc3G0Z7>tU}_PVhzU^1mITX5 z0`b`~;4w*nI_ELi5d!;R0iM{R05v%*U;-)FLwW$sju&9bLjxpoDZmN8MB#SCKz1$~ z)nCA(e@E<30zu}+{vV7HSsn@1Ul^lX20s57{9oWk{{bNVboLjr=$11i+23#f3uJT) zbVQm*O7$1U=$518e;wV18~wrkjOH(7(Jdz=a{om0C#nbrTKbdy>E-7@|5ujzk4Mt`zD+xZuq z=$4yMC0GPh07d}@uc5pR0mg6&s=uzoZz)@9{i_@vPEPgL z&EYNORh@s7;bHfPz_UC;s=uy{ZW##Z{$l|6!5a71J-{vHM*V-3k$&9@0MEmTsQ##y z{wkK9-<25R`{c1xpKWXN9+vKfC;^ToXY?_19K@OS#q^wpkOw Vc6K5RWsoxHD=ln+^ZBXqe*m+y3oifw delta 9769 zcmZWvbySqw*B*xMmhO=5?gj~Ikj|mIONJ1Xj+gERY3YzI1xZCZ1PMVxy7}gM@9*C4 zt~-Cs+0Q=Dd1q#=v!AtN=8wS1Bj9=pQd}@ssNp^4rT78@2$W9*0*M3DvebZE3N_$D ziUfUOm7%3goG#){#8|{&dc5@`EnBuJ2 zg5OD0Fuy?^C6{-*MVAmrqH?&P8bX+lW{=n=C!Ex2s@|ycy^U3hEh8*AKkZ&-0sgKU z)jEr0|IHxpJVDYtg7@+Qnp1XCEVEjZS3R8JuUU9+S-7 z2IZnh7Uxr#uQP?8-JNr^hqR!@31l+A5wpl#@8v^g$gSTU9w4V8t>O=2!7yx2i(a&S zJ5?L@ZL|Qtsp7+W#k(_W%PaiF&P6j)cAIjI)gh~#u$)ojbB8Nbq0ZAqiXm6`Zi zjZImX8kEDrAi@#JcHFqZtqu$VO7+ZqzIryBkjgpM47susq4g@M3ge(qE{<|-i_V$6 zI3xXK8<0P-I0t?!T>!;`FaM7&zy5XTFWkGkZ8%mJ7qlUZNDq?lT9ft6IUiGApvaf> zdc1f-_#wz-Q-^Auweqq-D;YXScGGdzD-N+dlh3K>5z0x0Mi^2ut$t7f-g>t;EedLe z**mT4jUnBt)MSXy8P<`tn3647~858#+pS>?~YBhH~TX6?g z8tB6-S&YvE`kRM-Q1aRC`+nN#Z&vrf#Fxeu!aHx8)3-aB}9$8IRKp}#lsFn#4* zVpM#-SBhjGv=lwANf$^ZrZ7W@aWv2xW@@>-y@@m>Fp*s9oAJZ?kiQF~eU;C#?sUQK zhn}L53F_1B;DI zImbJ6A_QE`CR7Z~rdSqnE7INa&x3wp7C=0H@s#tP)mkz{35mIs3|mqRo|pB>3_*JU zn4?U~YWD|mbz@Rp)=fw)q+6YN!MBGqK;qu0#?#87I$IC5;R`kTM z+K|olpoHV@8N+8Cx2UJ70Z`IL=6DJ#^;GVhq@ZuDKSF;&-N^f_Vl@?&?_wA+GJ}_-j{BH2A~(EbmferqFwS!Rrd6 zy(A<)TwxO)x>h$Qbf3(}0n7 z%lT4#3>1o6?~=qUAFx+BP{I^uTQ+w~~&i;_C_2HE}npd;7rqx!LswVUGd) z4&B}_3WRq7fxItP7pkBK#ZrVP7G~_NmB;Ztx4}o_!E$f{XRU}*tTFhVs3%8i zqQ*nIX-_UgWRpB-m&0*7B2ulKZr|yx!Sg`bQi6s)=*Pm|T*Hzi#dt1?%nKc$lN0X< zbM2s?S8s_lRNxT^Kp+qbh@B$baOUIL_%0X(5~2fvcwt^ZM}gvrHN=P0!o$PD-^|L@ z*%{*E#o-0<^lDUbJOSfYR)!gxDsD`W!h=Auq#zIvps7Gj_|FM@7bklc$nOI{stX&A zX|@o8j9q!0QlJvO#zx~LFzq4$&E7x~o?@sVbm=2B#&zzbz_KWkS&2fx-5cYi2F3!| zsSoM=rBHs^tH~m4@!)H8>D9S!6o2vgVu~8>GDn;oR$&VN(we`5>WxOG$g^9QOKmL( z(PfxD6(?LrfGUL@p9iDW?!Q03_u$nDxI7pd=t7T zV@)CDkx9=Qxt2N5a?yrWU~gs5`Knbsmg#_issXM985R6}b%LPFSlxD!TU}PzDI1A< z)?WE}=L&@BuDw01%tcbvWw6>-bY@!uv%H%~#7Z$fwcf?~*+lbKCF=l$ps+G~@*Txa z^~M*T>SGKU<*1X&XJPF#V$Wlj#rtQeW-PIAwb!K0bmtv>3D)OEwoMR&FR{faj!G&j0>nys_pV&S~hI-sDx zynknME*(#p=c$j~_B)NQj=z2pQDBp>h~1TG*L8va2^cp}n4$TQB_bj0?$42fKmvdT zCpE$!cOP)KB!oUb!U75Iz+mb5k`&I%gcK-ogqZFz&aQTeSksV0u}lT2N<|UV;ynI~ zcirZ$c64Kps}ySogIH5jNZ(SaCyXaP)n1aDZSZWyuTcB>^?auc<9j7%<|DciKhAb$ zVne+SkIzM(OecV_f>n{E=r-mp_Ke&8!R?9z%io;R))QLD72Ij3#D_e!uWKA>We}em zHse-hBAmAV;7qw*tn(boY>h;uHTjFi8)S@k^+}7{*p1BAuHAz_gc=ThDOA+>y4Tr5 zmH^AZ4@L6SL$PvoCL(y|$MiC{{!lPB+pzQ7hjnp!bjxfzmD#q+P?u%RnR-K2m*I9k z%=aWE8Z%ILMQMTTUHBPcLfgEFbrxY_ETTNC@m;br!c}i!d9FG6CDr}Q##v*^febe%qmy0I2@q_Dfh{0%7`S7+UL*Op{ z_uC9_!L@a&v!<(5ky18Q6nI|{=_6{tu#6VbQM`l7_6SLmsrN{6O>;~R<6{oWu>aU1 z2t(1N$T!{X@VU{wg_D+goK4f*#znBY9~j}S5#VL>SFYX1JkXhZPD8%{|1s8k=9MD% zk&rFr^qcu_^Onu|XQCl55RF(`be8!EX|BB7gKO2iAO$9R-?k+o2TvyQs@;A1#3eWO z^QK#%5T~*E9$P1Z_v#o7D)=fCWV7N<6I$oFE)G%b>b7>%zfRI$kUH_%k=)vlboM>B zB1>?b$)dLZlGsCIMMfKkId{WOvY@)B6UC5K^4(S=DSbpnP{CC2ijq5_<7|6_^JOO< z`nT9uvCe4&X2z>wXT(EmQsh@6^e6SzNi}jUP`m|YYiZi@#nxkkV^?*?V!F~d5pxAe z9|?(jFc^lsro(r{H@jYmDYR=+4eugg^~v?^>~cq0BDS)*2!B3Dxwh|6w!$j5VhR%c za#3X2KBSXIsa!l4{YxU|*Yodfx|@%M!`{cIT!CDZd*nMUD@1%Oh>fjaoonkFkL^_p zbTJzfbnW|FzY~%aoURGQa1-P8{*yXjFDwKvrbpdJ6bkw$l1`gL4WBgghV8?FK%!_M z5Esx#|5qfvizb4msl_aD;Y1#V-o2(lc!Cx#3u1#cmgR@V1C}@I+`Y4QeNX;Zzk>RnW6$~*r#}TL~ zU@E4kKixolSr~bv#w7XLwb)^TkWqB)9Ck!|Ck6$iO)_F=TtFd{`e(5!`TNBC$`c*8 zE*CsLQnWppjPRUO{`9(T@KiQ!`Tdj0woW)B=8!UFdzJApukI< zNT4$Ta5X=;YlOPwh8xwN9*=olP!bD^X9soVPcTBM@pkf_D_u~yl6QjbK9R zfDQr){v)vJ4Ns8%6j*>NjtrV25XmiuPjE|*o67laOF;Un=ZL9gQ)Q_>ZrcJ!d_L_g z_h|WyME=uDbM#ll`pgNqC9L#}c(oi7#z8-QI&wpa-I2f7j#e*xF;7N z#AX*DTb8FT2MRFs6Do4O58J@IDVC~QG`?O2KThbCA>a#+G_KU96UfIf zPz1a{pki4bub=mF#%0-fPO_`fk#%Y$!TlMWpf-p1e0o82J#`^igAx_{$=PU9VsQut z&y}ux@bkiQ+_RXHNn2=P>)Ucuo7XXwT3hCtuR>c_XxFBta%K-ee)4cd|m3-;m@<$&hkP#-y}8G=z26@pEDLk%g+1stzsFG!!xSn@KkA(=3Q`hmG%jFK7wyr^>pH{D&9US zX)kS_UR2K1(Tc0AMRw*^`|Dn46b#-#+Lx{|0aGe+U`Cb-SV~c$eSCyT5&Zv?sJq5c z!#&E?a${JVAO!;i;)h8TFCa=SgTfB*t_7XacL5R9_Hbsv5w#`jWXr$@5+jcq8Ue(} z<+SDrbCqCFHy*|O*9NWi)>AQ4hr^6-rU@AS<;VCy5REd#%s`KcnfoSDwN>5TGBf24 zv85}ySp3_D!5Oje@>aRc=wgmx^u*E$jr#;U{HE%Rk%O60N^K-a)$^?gTLPb&WdGAq zxDz7d!z5cC($Q}=t2Nov`ItJ&1BsGM?MJgqh4#!%R{$$59rPN{2eBsgM6%0IsRzB> z{7rj0VC;U2>K$A*LuK+_EufYLGQ2@v{6Jjjru?44#l+*ytaeaNTjLFuC{i(j?@V*0 z?k?Q$V*5J-;>ETf8_dpO)8o6FdqVG%XPx`lB2UMK_le(^GX&(@g^CG-weE7%4L;-h zJxUz=AaR^@%Ro1XYRnQi$Zb(Qh2myJS&$;j4Sbx=BlM;Q9EJHSlCjC0E(N(tet$(TR z^lLwzR7YJQ?u~?9{YoV%Hj5Y{7?sqdhFMBA~|JrY&xW43rN z<<+7L6+@4G7Qz0!ESWFu&5QKm&~YD`IXO?W3f3O=_kttn!LNunc7KE$*w2dH6vMTD zw{7v1E= z?~rq#r?^Xv8#g&X3}K%k+et%D4?}sq=2uPka)_8zsBzoFcJthuVGf_LK`=qy}&ojJ-D( zqW+wZiBC~3c$AFpa(#A4agX_jYy%0Yq_B!n5I9UF1c(@nV9_KDR54(~!(yqihk;xe zXt%-xv=pd-khe&{v6U!rZ-Ig9ju&c}$R$%k4pVtAOc03gj}j7)^g@AJyL-EOL987i z{=W;zgY-2=SROF427gV3#i<@H=CuSAx}qArNN*U@P|y#v%*Dqw@NsHa<9b;0jo!La zoFPQBr+U&gKa<-nl79%l6U=Gzkj0LRw`@%-J=OgsV=uUFO`B~nJ?i6(Y z9%McTY}{eZqB!+-=$f}=8b<;ft`p5OE3)FcB(u;>FV$dz$fQcSaV1S{IeW(QzD_Qh z)VgiZdNvzv`!%Vre&Rk;3ayE>a%duL0uP;W2dJIkxLl7aIq1niZ=R^JT+p0YS2QTy zyMET(JvRDCt-aB^mSQ`xrBcSxrOvXWm9v(NUQ@&x_g=k_Z ztEZ^WQ)QfFX-~8RFqIiAgyt%q!(VP~Da3veX35Hl zU~g-(eiVM;%UcN!+cgPjEV^GB)GQj_P5|CLBL0&(V8gCdOa``#Ga?8?M+*XR1EJBJ zKrRFs3MrEk>*Y%-uqE*FP)EFdZI_0kk+nRH z-OHp3;Th=`9^njxI3P(%a*M0*U=CyB`$`Q$!Ini)GdwX~jLgTc2Io&>j&7Nw4yP%U zZW6hF&fp4e2AL%a>&~l$O5kCIhnl20zqkp>Pld}akNPrPLM%pn=fF-GWo_L*J>aQK zr?#1ZgD{@QlsWk;2`yo}xMH}uB6fd9;PPNdsAZB^Vm!l#mvij3UK^5^{yr@&l61Zh z6klhHtCCTukEDShe&y3sfZp`WHs_qwkwkyZD5KC14|)agu}U_2@YZI$e|x;qr4<*~ zJ5*<<_-TDlgs1k(?k^9e6IQ5uHhsdT-(G)-I^8wQGEJDlaJRBCnsYMdUtAza8z$bB zozAXXK+3eH{hC~AL6V}2P8;b)$T$353ff*2C=$Kkr3_j6JWS!(x$lzeShqZjgX6A@ zU}@goGN?6`L1NrU(5%)1^}0jP)tR*^dt^Ln zDZ6`*qIE%&>suZNhU3y5Q`X#6;i~{Ey^*9@j#<5Cbk`5|T&NFEt*^^oU0W!fVnRQj z{5aXRx%yG}ZLQuo%QftcWW?6_`0T3GJ?mHgilHIU|Os4b8pR zY<+q+Qix~_-y?B`d7f9`N!pj7-tBXpg! z{NlOl+sBZl=e7(nMwI4tThaD`(+_BF&0Dxb8hy_(C{~A9t6b{(nQ@}2W*759tS&}m za)A91K1Z!Q?3f5NSx`f6D*LUwcR*3r-2=3Sjow}PxyVhIsv9eTUueP|f|0cC_Eryt zEig+%r=Duq$%QOx=d(GSU#SSRAL+SGADo*ja@)bPTCCFAd(YF^&fS7KQ3Uw490cwu z$|kr*_LtdFMe#AR`1C_jS(#ar%9Hb{!3qeXA03;RR$pwXAm|hql43r`7yYP=Mo88P z#-)80Mc53E)Ynhfw=3TQ^VW+dv#*ocZVPH)*jp2DoAu@}Lnw|1O>35*M0wJ*=~-Hz zq=(KBi>Y^y$Pmd!yybp4%X#tbPbb`>I$MXQ?CiNLS20HLib== z+rQRnAPMiS6SXyCff!u+tG63Ns9*VW=6#u>HuDP7thqlew7H3z5I+rTfi$i)Dy4pU zo0mE3=99ph%gt}MsHe*Q1Zp#RcTQVc+RxXV&5^+QJen+AjClpK97WP|rl)HL<9I9C zmnP)%;92`|H7yVW6`)@q|a+#bceA_I<203BFfuCqv5)umD?b) zy_4J%J(5ip-&9P_Ni(E-`5Ttr9>+2Vy#v>*l7QR6TXpl!Qs7ZQ9(pN4I-eea`LQ!M z>>S`3$i!MYrkE}x*5(=zL>ozCO2V5PAnJSu36zVqzbc5l)F0S(@9Cj6jaRrXOCD|H z52mofbjqBfV|;|@P{e`hIseU&uA`A%d%spetzVEt|9yvJ-X zMVr94SS{Z!h_QsxVw=wu$k5t`naP z%r$6qUsT?A-HS&mykKM+3^~G$aGB#Hc-n9;J~i?wEEbO=is$&$4!wWZgtEc?36WV&VJ(b#Guq!A<+4uBHy__0!`QD^4{ z&fO9hF7WHsxMny|888L}#lG?uFsDfoQ{&wAwZEnl56bE-%%0QCSKK`L$;dkxzBzjK zDKr_O@q+*)p`0S{yW7~dQQT8Ks4lSWwoMvE-X9k@eJ~uB06xXSYOd;ZAlgH67oqGh zRgasQgVQ9HaTg2`)h7E+{VcVqxHV63q7sun@>2%CVBznqPyl_x{4|7UD> z`X)iXIV7U=!#b#hNIg$EJT*m1cLi?OgWuhZj zcgA8)_j{;s&)VliZ_h3lpox&}BdfdtX5)vzY0;j;m%=?a_zhK!bwoS*wo+CrUw64` zeLox-D%9+o^0a3g@5J^?B-3NC*=hM&!oQ|Y;D@x}mPk~!w~`ymj!L!LCThRKwDYX|66T!+QA?Ah8uN6|6NP})peD7R3LwI?& z;5{6NH*V1yM^z2oWqNRp}H_qS6nVS;tFa9udJ;XAgF{)~yiq?c1EBUm@j z@|xp9n&OqI2kt?%d09hi{lSRUg}h&=Xy3?5=_rPgpQI>E&7qon#Hwb%3_SD+=_JfU zWKJIF4Hn(#UuANh-231naU@+0pw%~Q^!R^l zWxuX7J#ki9{RQh=csv?(dw-Pl`?%)u@!nL-$7KhKkorKmXV!{_tRtRM0#J(pd|E=RouSX@+;e@* zgQeul&BKB0EsT;5sCf4p@$f4^%+7GYxNa=UZ2Z|yxYXd4gF9win;AoLdel~n!EEtC zR1FQQp7-Ja>Ev1ULS1e<}vGc4J*H- zVG1e)Q%^47umuV2cRBUD#nI^e1|E#74IBNDNmgK?!Q}W1)_-M$H8)6rh6FssKgDBAEdP@J_=4@Y6`a zo}@630t~W%@5o@LKRAT~wg>|gOxS>RItv(}{2MMaqXMH97{Ea~B{-kzw|jvF6VT3J z0gF-p4Sh-pfyE3;@b;fBYBo|pEt3UoM)R9t&Q1wT!5|~;U+^gaEyV!rvM9mVf4Z_c zv4N>97O(~VZ$>sJF>suQ0T^Ucg6A22yBe|uz|v2Dn}Ix-09OtRc#HElq!PshiePYo z>u*RchWh(Fb1o(LEB9|VjyNh1lgk2j<@pVdb49@!{C}CafNvfzcv|RBqC7fqoW}zG zD*P8D`28sFd`j@X$Zyv~J~voX6o4ym2bYV(7Ez$CKmsfz@s~*eNEGsdKTG`PVk%<; zorNr5Wl2D@SOmx@!UY(Lae#JZEI^|O13dgcw;Wha3dT@`?ZU{?Ff#ybAOl;Z0jv@( z@Xh~BB;a)kFSuIv_l{dB5&&Py0tli$DhM>!>29Uxs#4fLjw0ted2z_AVzpkBcO zHc!2rHiP=ZlaU|UQ8M6lLwST+*=+nSh)c@(z{Ohti=)lO- zfRq<9oHcONLQ4Ig8vI}MdA+~rSipw{T`H?WOa diff --git a/src/vendormodules/tomlish-1.1.4.tm b/src/vendormodules/tomlish-1.1.4.tm index 7a6d5205..33d5b912 100644 --- a/src/vendormodules/tomlish-1.1.4.tm +++ b/src/vendormodules/tomlish-1.1.4.tm @@ -153,15 +153,10 @@ namespace eval tomlish { } #review - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed - } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keys are lists {parenttable subtable etc} corresponding to parenttable.subtable.etc } foreach sub [lrange $keyval_element 2 end] { @@ -207,13 +202,10 @@ namespace eval tomlish { ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! - set prev_tablenames_seen $tablenames_seen - set prev_tablenames_closed $tablenames_closed - set tablenames_seen [list] - set tablenames_closed [list] + set prev_tablenames_info $tablenames_info + set tablenames_info [dict create] set result [list type $type value [::tomlish::to_dict [list $found_sub]]] - set tablenames_seen $prev_tablenames_seen - set tablenames_closed $prev_tablenames_closed + set tablenames_info $prev_tablenames_info } MULTISTRING - MULTILITERAL { #review - mapping these to STRING might make some conversions harder? @@ -295,23 +287,66 @@ namespace eval tomlish { #[Data] #temps = [{cpu = 79.5, case = 72.0}] proc to_dict {tomlish} { + package require dictn #keep track of which tablenames have already been directly defined, # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. #we don't error out just because a previous tablename segment has already appeared. - ##variable tablenames_seen [list] - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen - } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed + + #Declaring, Creating, and Defining Tables + #https://github.com/toml-lang/toml/issues/795 + #(update - only Creating and Defining are relevant terminology) + + #review + #tablenames_info keys created, defined, createdby, definedby, closedby + + #consider the following 2 which are legal: + #[table] #'table' created, defined=open definedby={header table} + #x.y = 3 + #[table.x.z] #'table' defined=closed closedby={header table.x.z}, 'table.x' created, 'table.x.z' created defined=open definedby={header table.x.z} + #k= 22 + # #'table.x.z' defined=closed closedby={eof eof} + + #equivalent datastructure + + #[table] #'table' created, defined=open definedby={header table} + #[table.x] #'table' defined=closed closedby={header table.x}, 'table.x' created defined=open definedby={header table.x} + #y = 3 + #[table.x.z] #'table.x' defined=closed closedby={header table.x.z}, 'table.x.z' created defined=open definedby={header table.x.z} + #k=22 + + #illegal + #[table] #'table' created and defined=open + #x.y = 3 #'table.x' created first keyval pair defined=open definedby={keyval x.y = 3} + #[table.x.y.z] #'table' defined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created + #k = 22 + # + ## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) + + #illegal + #[table] + #x.y = {p=3} + #[table.x.y.z] + #k = 22 + ## we should fail because y is an inline table which is closed to further entries + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + + + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keyed on tablepath each of which is a list such as {config subgroup etc} (corresponding to config.subgroup.etc) } + log::info "---> to_dict processing '$tomlish'<<<" set items $tomlish @@ -354,7 +389,7 @@ namespace eval tomlish { #a.b.c = 1 #table_key_hierarchy -> a b - #leafkey -> c + #tleaf -> c if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively @@ -362,10 +397,10 @@ namespace eval tomlish { } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] - set leafkey [lindex $dotted_key_hierarchy 0] + set tleaf [lindex $dotted_key_hierarchy 0] } else { set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] - set leafkey [lindex $dotted_key_hierarchy end] + set tleaf [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure @@ -380,143 +415,101 @@ namespace eval tomlish { } } #review? - if {[dict exists $datastructure {*}$table_hierarchy $leafkey]} { - error "Duplicate key '$table_hierarchy $leafkey'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$table_hierarchy $tleaf]} { + error "Duplicate key '$table_hierarchy $tleaf'. The key already exists at this level in the toml data. The toml data is not valid." } #JMN test 2025 if {[llength $table_hierarchy]} { - lappend tablenames_seen $table_hierarchy + dictn incr tablenames_info [list $table_hierarchy seencount] } set keyval_dict [_get_keyval_value $item] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { - lappend tablenames_seen [list {*}$table_hierarchy $leafkey] - lappend tablenames_closed [list {*}$table_hierarchy $leafkey] + set t [list {*}$table_hierarchy $tleaf] + dictn incr tablenames_info [list $t seencount] + dictn set tablenames_info [list $t closed] 1 #review - item is an ITABLE - we recurse here without datastructure context :/ #overwriting keys? todo ? - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } else { - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } + } + TABLEARRAY { + set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLENAME (name: $tablename): $item" + set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize + #we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. + } TABLE { set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLE (name: $tablename): $item" #set tablename [::tomlish::utils::tablename_trim $tablename] set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize - if {$norm_segments in $tablenames_seen} { - error "Table name '$tablename' has already been directly defined in the toml data. Invalid." - } - log::debug "---> to_dict processing item $tag (name: $tablename): $item" - set name_segments [::tomlish::utils::tablename_split $tablename] ;#unnormalized - set last_seg "" - #toml spec rule - all segments mst be non-empty - #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - - set table_key_sublist [list] - - foreach normseg $norm_segments { - lappend table_key_sublist $normseg - if {[dict exists $datastructure {*}$table_key_sublist]} { - #It's ok for this key to already exist *if* it was defined by a previous tablename or equivalent - #and if this key is longer - - #consider the following 2 which are legal: - #[table] - #x.y = 3 - #[table.x.z] - #k= 22 - - #equivalent - - #[table] - #[table.x] - #y = 3 - #[table.x.z] - #k=22 - - #illegal - #[table] - #x.y = 3 - #[table.x.y.z] - #k = 22 - ## - we should fail on encountering table.x.y because only table and table.x are effectively tables - - #illegal - #[table] - #x.y = {p=3} - #[table.x.y.z] - #k = 22 - ## we should fail because y is an inline table which is closed to further entries - - - #note: it is not safe to compare normalized tablenames using join! - # e.g a.'b.c'.d is not the same as a.b.c.d - # instead compare {a b.c d} with {a b c d} - # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. - #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} - - set sublist_length [llength $table_key_sublist] - set found_testkey 0 - if {$table_key_sublist in $tablenames_seen} { - set found_testkey 1 - } else { - #see if it was defined by a longer entry - foreach seen_table_segments $tablenames_seen { - if {[llength $seen_table_segments] <= $sublist_length} { - continue - } - #each tablenames_seen entry is already a list of normalized segments - - #we could have [a.b.c.d] early on - # followed by [a.b] - which was still defined by the earlier one. + set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] + if {$T_DEFINED ne "NULL"} { + #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path + set msg "Table name $tablename has already been directly defined in the toml data. Invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } - set seen_longer [lrange $seen_segments 0 [expr {$sublist_length -1}]] - puts stderr "testkey:'$table_key_sublist' vs seen_match:'$seen_longer'" - if {$table_key_sublist eq $seen_longer} { - set found_testkey 1 - } - } - } - if {$found_testkey == 0} { - #the raw unnormalized tablename might be ok to display in the error message, although it's not the actual dict keyset - set msg "key $table_key_sublist already exists in datastructure, but wasn't defined by a supertable." - append msg \n "tablenames_seen:" \n - foreach ts $tablenames_seen { - append msg " " $ts \n - } + set name_segments [::tomlish::utils::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d + #results of tablename_split 0 are 'raw' - ie some segments may be enclosed in single or double quotes. + + + set supertable [list] + ############## + # [a.b.c.d] + # norm_segments = {a b c d} + #check a {a b} {a b c} <---- supertables of a.b.c.d + ############## + foreach normseg [lrange $norm_segments 0 end-1] { + lappend supertable $normseg + if {![dictn exists $tablenames_info [list $supertable type]]} { + #supertable with this path doesn't yet exist + if {[dict exists $datastructure {*}$supertable]} { + #There is data though - so it must have been created as a keyval + set msg "Supertable [join $supertable .] of table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] error $msg + } else { + #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here + dictn set tablenames_info [list $supertable type] header + #ensure empty tables are still represented in the datastructure + dict set datastructure {*}$supertable [list] } - } - - } - - #ensure empty tables are still represented in the datastructure - set key_sublist [list] - foreach k $norm_segments { - lappend key_sublist $k - if {![dict exists $datastructure {*}$key_sublist]} { - dict set datastructure {*}$key_sublist [list] } else { - tomlish::log::notice "to_dict datastructure at (TABLE) subkey $key_sublist already had data: [dict get $datastructure {*}$key_sublist]" + #supertable has already been created - and maybe defined - but even if defined we can add subtables } } + #table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename + # - or may have existing data from a keyval + if {![dictn exists $tablenames_info [list $norm_segments type]]} { + if {[dict exists $datastructure {*}$norm_segments]} { + set msg "Table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } + #no data or previously created table + dictn set tablenames_info [list $norm_segments type] header - #We must do this after the key-collision test above! - lappend tablenames_seen $norm_segments - - + #We are 'defining' this table's keys and values here (even if empty) + dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here + } + dictn set tablenames_info [list $norm_segments defined] open log::debug ">>> to_dict >>>>>>>>>>>>>>>>> normalized table key hierarchy : $norm_segments" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] - log::debug "----> tododict processing $tag subitem $type processing contained element $element" + log::debug "----> todict processing $tag subitem $type processing contained element $element" switch -exact -- $type { DOTTEDKEY { set dkey_info [_get_dottedkey_info $element] @@ -547,14 +540,19 @@ namespace eval tomlish { puts stdout "to_dict>>> $keyval_dict" dict set datastructure {*}$norm_segments {*}$dkeys $leaf_key $keyval_dict #JMN 2025 - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys] + set tkey [list {*}$norm_segments {*}$dkeys] + dictn incr tablenames_info [list $tkey seencount] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys # inner structure will contain {type value } if all leaves are not empty ITABLES - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys $leaf_key] + set tkey [list {*}$norm_segments {*}$dkeys $leaf_key] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys $leaf_key] + dictn incr tablenames_info [list $tkey seencount] #if the keyval_dict is not a simple type x value y - then it's an inline table ? #if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. + dictn set tablenames_info [list $tkey closed] 1 } } @@ -562,7 +560,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" } } } @@ -1316,7 +1314,12 @@ namespace eval tomlish::encode { #NOTE - this DELIBERATELY does not validate the data, or process escapes etc #It encodes the tomlish records as they are. #ie it only produces toml shaped data from a tomlish list. + # #It is part of the roundtripability of data from toml to tomlish + #!! ie - it is not the place to do formatting of inline vs multiline !! + # That needs to be encoded in the tomlish data that is being passed in + # (e.g from_dict could make formatting decisions in the tomlish it produces) + # #e.g duplicate keys etc can exist in the toml output. #The to_dict from_dict (or any equivalent processor pair) is responsible for validation and conversion #back and forth of escape sequences where appropriate. @@ -1646,17 +1649,27 @@ namespace eval tomlish::decode { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. set parentlevel [expr {$nest -1}] - set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { + #should only apply within a multiliteral #### set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed #Without this - we would get extraneous empty list entries in the parent # - as the xxx-squote-space isn't a space level from the toml perspective # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-squote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-squote-space { + } + default { + error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" + } + } switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? @@ -1669,74 +1682,51 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 4 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the last for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] - #todo integrate left squote with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]'" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "'"] - } - MULTILITERAL { - #empty - lappend v($parentlevel) [list LITERALPART "'"] - } - default { - error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + MULTILITERAL { + #empty + lappend v($parentlevel) [list LITERALPART "'"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" } } } ''''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 5 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the following squotes for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] - #todo integrate left 2 squotes with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]''" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "''"] - } - MULTILITERAL { - lappend v($parentlevel) [list LITERALPART "''"] - } - default { - error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "''"] + } + MULTILITERAL { + lappend v($parentlevel) [list LITERALPART "''"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" } } } } - puts stderr "tomlish::decode::toml ---- HERE squote_seq pop <$tok>" } triple_squote { #presumably popping multiliteral-space @@ -1763,7 +1753,119 @@ namespace eval tomlish::decode { lappend merged $part } default { - error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + tentative_accum_dquote { + #should only apply within a multistring + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the trailing-dquote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-dquote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-dquote-space { + } + default { + error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" + } + } + switch -- $tok { + {"} { + tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] + } + {""} { + #review - we should perhaps return double_dquote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] + } + {"""} { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] + } + {""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left dquote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {"}] + } + MULTISTRING { + #empty + lappend v($parentlevel) [list STRINGPART {"}] + } + default { + error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" + } + } + } + {"""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 dquotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {""}] + } + MULTISTRING { + lappend v($parentlevel) [list STRINGPART {""}] + } + default { + error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" + } + } + } + } + } + triple_dquote { + #presumably popping multistring-space + ::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTISTRING { + lappend merged $part + } + STRINGPART { + if {$lasttype eq "STRINGPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + CONT - WS { + lappend merged $part + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" } } set lasttype [lindex $part 0] @@ -1809,15 +1911,12 @@ namespace eval tomlish::decode { endinlinetable { ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } - endmultiquote { - ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" - } default { error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } if {$do_append_to_parent} { - #e.g squote_seq does it's own appends as necessary - so won't get here + #e.g tentative_accum_squote does it's own appends as necessary - so won't get here lappend v($parentlevel) [set v($nest)] } @@ -1831,8 +1930,8 @@ namespace eval tomlish::decode { switch -exact -- $tokenType { - squote_seq_begin { - #### + tentative_trigger_squote - tentative_trigger_dquote { + #### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { lassign [dict get $transition_info starttok] starttok_type starttok_val set next_tokenType_known 1 @@ -1840,6 +1939,16 @@ namespace eval tomlish::decode { set tok $starttok_val } } + single_squote { + #JMN - REVIEW + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART + } squotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1849,6 +1958,9 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } + triple_dquote { + set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT + } dquotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1858,7 +1970,7 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { #todo set v($nest) [list DQKEY $tok] ;#$tok is the keyname } @@ -1878,34 +1990,29 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } } - startsquote { - #JMN - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "squotedkey" - set tok "" - } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish # back to toml file will be identical. #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, - # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from + # a structural perspective. #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the # tomlish list? - set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. } tablearrayname { - set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name } startarray { @@ -1914,14 +2021,6 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } - startmultiquote { - ::tomlish::log::debug "---- push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE - } - triple_squote { - ::tomlish::log::debug "---- push trigger tokenType triple_squote" - set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL - } default { error "---- push trigger tokenType '$tokenType' not yet implemented" } @@ -1931,11 +2030,11 @@ namespace eval tomlish::decode { #no space level change switch -exact -- $tokenType { squotedkey { - puts "---- squotedkey in state $prevstate (no space level change)" + #puts "---- squotedkey in state $prevstate (no space level change)" lappend v($nest) [list SQKEY $tok] } dquotedkey { - puts "---- dquotedkey in state $prevstate (no space level change)" + #puts "---- dquotedkey in state $prevstate (no space level change)" lappend v($nest) [list DQKEY $tok] } barekey { @@ -1960,29 +2059,46 @@ namespace eval tomlish::decode { startinlinetable { puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } - startquote { + single_dquote { switch -exact -- $newstate { string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quoted-key { + dquoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "dquotedkey" set tok "" } - XXXitable-quoted-key { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "itablequotedkey" - set tok "" + multistring-space { + lappend v($nest) [list STRINGPART {"}] + #may need to be joined on pop if there are neighbouring STRINGPARTS + } + default { + error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + double_dquote { + #leading extra quotes - test: toml_multistring_startquote2 + switch -exact -- $prevstate { + itable-keyval-value-expected - keyval-value-expected { + puts stderr "tomlish::decode::toml double_dquote TEST" + #empty string + lappend v($nest) [list STRINGPART ""] + } + multistring-space { + #multistring-space to multistring-space + lappend v($nest) [list STRINGPART {""}] } default { - error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" } } + } - startsquote { + single_squote { switch -exact -- $newstate { literal-state { set next_tokenType_known 1 @@ -1995,41 +2111,17 @@ namespace eval tomlish::decode { set tok "" } multiliteral-space { - #false alarm squote returned from squote_seq pop + #false alarm squote returned from tentative_accum_squote pop ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" #(single squote - not terminating space) lappend v($nest) [list LITERALPART '] #may need to be joined on pop if there are neighbouring LITERALPARTs } default { - error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" } } } - startmultiquote { - #review - puts stderr "---- got startmultiquote in state $prevstate (no space level change)" - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringpart" - set tok "" - } - endquote { - #nothing to do? - set tok "" - } - endsquote { - set tok "" - } - endmultiquote { - #JMN!! - set tok "" - } - string { - lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes - } - literal { - lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes - } double_squote { switch -exact -- $prevstate { keyval-value-expected { @@ -2044,6 +2136,19 @@ namespace eval tomlish::decode { } } } + enddquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } multistring { #review lappend v($nest) [list MULTISTRING $tok] @@ -2056,11 +2161,9 @@ namespace eval tomlish::decode { } literalpart { lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly - } - itablequotedkey { - } untyped_value { + #would be better termed unclassified_value #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -2238,7 +2341,7 @@ namespace eval tomlish::utils { #eg {dog."tater.man"} set sLen [tcl::string::length $tablename] set segments [list] - set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" for {set i 0} {$i < $sLen} {incr i} { @@ -2249,139 +2352,166 @@ namespace eval tomlish::utils { set lastChar "" } + #todo - track\count backslashes properly + set c [tcl::string::index $tablename $i] + if {$c eq "\""} { + if {($lastChar eq "\\")} { + #not strictly correct - we could have had an even number prior-backslash sequence + #the toml spec would have us error out immediately on bsl in bad location - but we're + #trying to parse to unvalidated tomlish + set ctest escq + } else { + set ctest dq + } + } else { + set ctest [string map [list " " sp \t tab] $c] + } - if {$c eq "."} { - switch -exact -- $mode { - unquoted { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } - quoted { - append seg $c - } - unknown { - lappend segments $seg - set seg "" - } - litquoted { - append seg $c - } - default { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -- $ctest { + . { + switch -exact -- $mode { + preval { + error "tablename_split. dot not allowed - expecting a value" + } + unquoted { + #dot marks end of segment. + #if {![is_barekey $seg]} { + # error "tablename_split. dot not allowed - expecting a value" + #} + lappend segments $seg + set seg "" + set mode "preval" + } + quoted { + append seg $c + } + litquoted { + append seg $c + } + postval { + #got dot in an expected location + set mode "preval" + } } } - } elseif {($c eq "\"") && ($lastChar ne "\\")} { - if {$mode eq "unknown"} { - if {[tcl::string::trim $seg] ne ""} { - #we don't allow a quote in the middle of a bare key - error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" - } - set mode "quoted" - set seg "\"" - } elseif {$mode eq "unquoted"} { - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - } else { - lappend segments $seg + dq { + #unescaped dquote + switch -- $mode { + preval { + set mode "quoted" + set seg "\"" + } + unquoted { + #invalid in barekey - but we are after structure only + append seg $c + } + quoted { + append seg $c + if {$normalize} { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" ;#make sure we only accept a dot or end-of-data now. + } + litquoted { + append seg $c + } + postval { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } } - - set seg "" - set mode "syntax" ;#make sure we only accept a dot or end-of-data now. - } elseif {$mode eq "litquoted"} { - append seg $c - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" - } - } elseif {($c eq "\'")} { - if {$mode eq "unknown"} { - append seg $c - set mode "litquoted" - } elseif {$mode eq "unquoted"} { - #single quote inside e.g o'neill - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - } elseif {$mode eq "litquoted"} { - append seg $c - #no normalization to do - lappend segments $seg - set seg "" - set mode "syntax" - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" } - - } elseif {$c in [list " " \t]} { - if {$mode eq "syntax"} { - #ignore - } else { - append seg $c + ' { + switch -- $mode { + preval { + append seg $c + set mode "litquoted" + } + unquoted { + #single quote inside e.g o'neill - ultimately invalid - but we pass through here. + append seg $c + } + quoted { + append seg $c + } + litquoted { + append seg $c + #no normalization to do aside from stripping squotes + if {$normalize} { + lappend segments [tcl::string::range $seg 1 end-1] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" + } + postval { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + } } - } else { - if {$mode eq "syntax"} { - error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + sp - tab { + switch -- $mode { + preval - postval { + #ignore + } + unquoted { + #terminates a barekey + lappend segments $seg + set seg "" + set mode "postval" + } + default { + #append to quoted or litquoted + append seg $c + } + } } - if {$mode eq "unknown"} { - set mode "unquoted" + default { + switch -- $mode { + preval { + set mode unquoted + append seg $c + } + postval { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + default { + append seg $c + } + } } - append seg $c } + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" - #REVIEW - we can only end up in unquoted or syntax here? are other branches reachable? switch -exact -- $mode { - quoted { - if {$c ne "\""} { - error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" - } - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong - } else { - lappend segments $seg - } + preval { + error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" } - litquoted { - set trimmed_seg [tcl::string::trim $seg] - if {[tcl::string::index $trimmed_seg end] ne "\'"} { - error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" - } + unquoted { lappend segments $seg } - unquoted - unknown { - lappend segments $seg + quoted { + error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" } - syntax { - #ok - segment already lappended + litquoted { + error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" } - default { - lappend segments $seg + postval { + #ok - segment already lappended } } } } - foreach seg $segments { - set trimmed [tcl::string::trim $seg " \t"] - #note - we explicitly allow 'empty' quoted strings '' & "" - # (these are 'discouraged' but valid toml keys) - #if {$trimmed in [list "''" "\"\""]} { - # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" - #} - if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" - } - } + + #note - we must allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + return $segments } @@ -2432,26 +2562,34 @@ namespace eval tomlish::utils { #- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. #REVIEW - provide it anyway? When would it be desirable to use? - variable Bstring_control_map [list\ - \b {\b}\ - \n {\n}\ - \r {\r}\ - \" {\"}\ - \x1b {\e}\ - \\ "\\\\"\ - ] + variable Bstring_control_map [dict create] + dict set Bstring_control_map \b {\b} + dict set Bstring_control_map \n {\n} + dict set Bstring_control_map \r {\r} + dict set Bstring_control_map \" {\"} + #dict set Bstring_control_map \x1b {\e} ;#should presumably be only be a convenience for decode - going the other way we get \u001B + dict set Bstring_control_map \\ "\\\\" + #\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ #for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. - for {set cdec 0} {$cdec <= 8} {incr cdec} { + #8 = \b - already in list. + #built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list + for {set cdec 0} {$cdec <= 7} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } # \u007F = 127 - lappend Bstring_control_map [format %c 127] \\u007F + dict set Bstring_control_map [format %c 127] \\u007F #Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! #escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) @@ -2474,6 +2612,7 @@ namespace eval tomlish::utils { # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn # it replaces \ with a single whitespace (trailing backslash) #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + #plus \e for \x1b? set buffer "" set buffer4 "" ;#buffer for 4 hex characters following a \u @@ -2558,12 +2697,13 @@ namespace eval tomlish::utils { set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { - set e "\\\"" - append buffer [subst -nocommand -novariable $e] + append buffer {"} } b - t - n - f - r { - set e "\\$c" - append buffer [subst -nocommand -novariable $e] + append buffer [subst -nocommand -novariable "\\$c"] + } + e { + append buffer \x1b } u { set unicode4_active 1 @@ -2578,8 +2718,7 @@ namespace eval tomlish::utils { #review - toml spec says all other escapes are reserved #and if they are used TOML should produce an error. #we leave detecting this for caller for now - REVIEW - append buffer "\\" - append buffer $c + append buffer "\\$c" } } } else { @@ -3003,7 +3142,7 @@ namespace eval tomlish::parse { # states: # table-space, itable-space, array-space # array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, - # quoted-key, squoted-key + # dquoted-key, squoted-key # string-state, literal-state, multistring... # # notes: @@ -3039,6 +3178,12 @@ namespace eval tomlish::parse { variable stateMatrix set stateMatrix [dict create] + #--------------------------------------------------------- + #WARNING + #The stateMatrix implementation here is currently messy. + #The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. + #This means the state behaviour has to be reasoned about by looking at both in conjuction. + #--------------------------------------------------------- #xxx-space vs xxx-syntax inadequately documented - TODO @@ -3060,35 +3205,19 @@ namespace eval tomlish::parse { barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ - XXXstartquote "quoted-key"\ - XXXstartsquote "squoted-key"\ + XXXsingle_dquote "quoted-key"\ + XXXsingle_squote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ - startmultiquote "err-state"\ - endquote "err-state"\ + enddquote "err-state"\ + endsquote "err-state"\ comma "err-state"\ eof "end-state"\ equal "err-state"\ cr "err-lonecr"\ } - #itable-space/ curly-syntax : itables - dict set stateMatrix\ - itable-space {\ - whitespace "itable-space"\ - newline "itable-space"\ - barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - endinlinetable "POPSPACE"\ - XXXstartquote "quoted-key"\ - XXXstartsquote {TOSTATE "squoted-key" comment "jn-testing"}\ - comma "err-state"\ - comment "itable-space"\ - eof "err-state"\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}} dict set stateMatrix\ @@ -3113,26 +3242,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ keyval-value-expected {\ whitespace "keyval-value-expected"\ - untyped_value {TOSTATE "keyval-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} - dict set stateMatrix\ - leading-squote-space {\ - squote_seq "POPSPACE"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ + string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ + single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - #dict set stateMatrix\ - # keyval-process-leading-squotes {\ - # startsquote "literal-state"\ - # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - # } + #double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} + + #2025 - no leading-squote-space - only trailing-squote-space. dict set stateMatrix\ keyval-tail {\ @@ -3142,81 +3264,106 @@ namespace eval tomlish::parse { eof "end-state"\ } + + #itable-space/ curly-syntax : itables + # x={y=1,} + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + comma "err-state"\ + comment "itable-space"\ + eof "err-state"\ + } + #we don't get single_squote etc here - instead we get the resulting squotedkey token + + + # ??? review - something like this + # + # x={y =1,} dict set stateMatrix\ itable-keyval-syntax {\ - whitespace "itable-keyval-syntax"\ - barekey {PUSHSPACE "dottedkey-space"}\ - squotedkey {PUSHSPACE "dottedkey-space"}\ - dquotedkey {PUSHSPACE "dottedkey-space"}\ - equal "itable-keyval-value-expected"\ + whitespace {TOSTATE "itable-keyval-syntax"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + dquotedkey {PUSHSPACE "dottedkey-space"}\ + equal {TOSTATE "itable-keyval-value-expected"}\ newline "err-state"\ eof "err-state"\ } + + # x={y=1} + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ itable-keyval-value-expected {\ whitespace "itable-keyval-value-expected"\ untyped_value {TOSTATE "itable-val-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate itable-val-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ } - dict set stateMatrix\ - itable-keyval-space {\ - whitespace "itable-keyval-syntax"\ - equal {TOSTATE "itable-keyval-value-expected" note "required"}\ - } + #double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' + # review + # double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} + + + # x={y=1,z="x"} + #POPSPACE is transition from itable-keyval-space to parent itable-space dict set stateMatrix\ itable-val-tail {\ whitespace "itable-val-tail"\ endinlinetable "POPSPACE"\ comma "POPSPACE"\ - XXXnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ - newline "POPSPACE"\ + newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ comment "itable-val-tail"\ eof "err-state"\ } - #dict set stateMatrix\ - # itable-quoted-key {\ - # whitespace "NA"\ - # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endquote "itable-keyval-syntax"\ - # } - #dict set stateMatrix\ - # itable-squoted-key {\ - # whitespace "NA"\ - # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endsquote "itable-keyval-syntax"\ - # } + # XXXnewline "POPSPACE" + # We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail + # This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record + #e.g + # x = { + # j=1 + # #comment within dottedkey j record + # , # comment unattached + # #comment unattached + # k=2 , #comment unattached + # l=3 #comment within l record + # , m=4 + # #comment associated with m record + # + # #still associated with m record + # } + ## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. + #The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma + #so we cant do: j= 1 #comment for j1 , + # and have the trailing comma recognised. + # + # To associate: j= 1, #comment for j1 + # we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? + # + # The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma + # is 'associated' with the previous entry. + # + # These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, + # but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments + # (e.g reordering records within an itable) + #The user's intention for 'associated' isn't always clear and the specs don't really guide on this. - - - #array-value-expected ? - dict set stateMatrix\ - XXXvalue-expected {\ - whitespace "value-expected"\ - untyped_value {"SAMESPACE" "" replay untyped_value}\ - startquote "string-state"\ - startsquote "literal-state"\ - triple_squote {PUSHSPACE "multiliteral-space"}\ - startmultiquote {PUSHSPACE "multistring-space"}\ - startinlinetable {PUSHSPACE itable-space}\ - startarray {PUSHSPACE array-space}\ - comment "err-state-value-expected-got-comment"\ - comma "err-state"\ - newline "err-state"\ - eof "err-state"\ - } - #note comment token should never be delivered to array-value-expected state? - #dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] #it is for keyval ie x.y.z = value @@ -3245,6 +3392,8 @@ namespace eval tomlish::parse { whitespace "dottedkey-space-tail" dotsep "dottedkey-space" equal "POPSPACE"\ + eof "err-state"\ + newline "err-state"\ } #-------------------------------------------------------------------------- @@ -3262,22 +3411,10 @@ namespace eval tomlish::parse { #toml spec looks like heading towards allowing newlines within inline tables #https://github.com/toml-lang/toml/issues/781 - #2025 - appears to be valid for 1.1 - which we are targeting. + #2025 - multiline itables appear to be valid for 1.1 - which we are targeting. #https://github.com/toml-lang/toml/blob/main/toml.md#inline-table #JMN2025 - #dict set stateMatrix\ - # curly-syntax {\ - # whitespace "curly-syntax"\ - # newline "curly-syntax"\ - # barekey {PUSHSPACE "itable-keyval-space"}\ - # itablequotedkey "itable-keyval-space"\ - # endinlinetable "POPSPACE"\ - # startquote "itable-quoted-key"\ - # comma "itable-space"\ - # comment "itable-space"\ - # eof "err-state"\ - # } #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -3291,10 +3428,9 @@ namespace eval tomlish::parse { # untyped_value "SAMESPACE"\ # startarray {PUSHSPACE "array-space"}\ # endarray "POPSPACE"\ - # startmultiquote {PUSHSPACE multistring-space}\ # startinlinetable {PUSHSPACE itable-space}\ - # startquote "string-state"\ - # startsquote "literal-state"\ + # single_dquote "string-state"\ + # single_squote "literal-state"\ # triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ # comma "array-space"\ # comment "array-space"\ @@ -3305,15 +3441,16 @@ namespace eval tomlish::parse { set aspace [dict create] dict set aspace whitespace "array-space" dict set aspace newline "array-space" - dict set aspace untyped_value "SAMESPACE" + #dict set aspace untyped_value "SAMESPACE" + dict set aspace untyped_value "array-syntax" dict set aspace startarray {PUSHSPACE "array-space"} dict set aspace endarray "POPSPACE" - dict set aspace startmultiquote {PUSHSPACE multistring-space} + dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} + dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} + dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} + dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} dict set aspace startinlinetable {PUSHSPACE itable-space} - dict set aspace startquote "string-state" - dict set aspace startsquote "literal-state" - dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"} - dict set aspace comma "array-space" + #dict set aspace comma "array-space" dict set aspace comment "array-space" dict set aspace eof "err-state-array-space-got-eof" dict set stateMatrix array-space $aspace @@ -3329,26 +3466,16 @@ namespace eval tomlish::parse { #dict set asyntax untyped_value "SAMESPACE" #dict set asyntax startarray {PUSHSPACE array-space} dict set asyntax endarray "POPSPACE" - #dict set asyntax startmultiquote {PUSHSPACE multistring-space} - #dict set asyntax startquote "string-state" - #dict set asyntax startsquote "literal-state" + #dict set asyntax single_dquote "string-state" + #dict set asyntax single_squote "literal-state" dict set asyntax comma "array-space" dict set asyntax comment "array-syntax" dict set stateMatrix array-syntax $asyntax - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space - dict set stateMatrix\ - quoted-key {\ - whitespace "NA"\ - dquotedkey {PUSHSPACE "keyval-space"}\ - newline "err-state"\ - endquote "keyval-syntax"\ - } - - #review + #dquotedkey is a token - dquoted-key is a state dict set stateMatrix\ dquoted-key {\ whitespace "NA"\ @@ -3367,7 +3494,7 @@ namespace eval tomlish::parse { string-state {\ whitespace "NA"\ string "string-state"\ - endquote "SAMESPACE"\ + enddquote "SAMESPACE"\ newline "err-state"\ eof "err-state"\ } @@ -3381,20 +3508,21 @@ namespace eval tomlish::parse { } - #dict set stateMatrix\ - # stringpart {\ - # continuation "SAMESPACE"\ - # endmultiquote "POPSPACE"\ - # eof "err-state"\ - # } dict set stateMatrix\ multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "POPSPACE"\ - eof "err-state"\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ + single_dquote {TOSTATE multistring-space}\ + double_dquote {TOSTATE multistring-space}\ + triple_dquote {POPSPACE}\ + eof "err-state"\ + } + dict set stateMatrix\ + trailing-dquote-space { + tentative_accum_dquote "POPSPACE" } @@ -3402,19 +3530,19 @@ namespace eval tomlish::parse { #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ multiliteral-space {\ - literalpart "multiliteral-space"\ - newline "multiliteral-space"\ - squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ - triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ - double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ - startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ - eof "err-premature-eof-in-multiliteral-space"\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ + single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ + double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + eof "err-premature-eof-in-multiliteral-space"\ } #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - trailing-squote-space {\ - squote_seq "POPSPACE"\ + trailing-squote-space { + tentative_accum_squote "POPSPACE" } @@ -3499,7 +3627,7 @@ namespace eval tomlish::parse { - + dict set stateMatrix\ end-state {} @@ -3557,14 +3685,13 @@ namespace eval tomlish::parse { dict set spacePushTransitions itable-keyval-space itable-keyval-syntax dict set spacePushTransitions array-space array-space dict set spacePushTransitions table-space tablename-state - dict set spacePushTransitions #itable-space itable-space + #dict set spacePushTransitions #itable-space itable-space #Pop to, next variable spacePopTransitions [dict create] dict set spacePopTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail @@ -3575,7 +3702,6 @@ namespace eval tomlish::parse { #JMN test #dict set spaceSameTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail @@ -3611,6 +3737,8 @@ namespace eval tomlish::parse { ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { POPSPACE { + set popfromspace_info [spacestack peek] + set popfromspace_state [dict get $popfromspace_info state] spacestack pop set parent_info [spacestack peek] set type [dict get $parent_info type] @@ -3625,17 +3753,17 @@ namespace eval tomlish::parse { set existing [spacestack pop] dict unset existing returnstate spacestack push $existing ;#re-push modification - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" } else { ### #review - do away with spacePopTransitions - which although useful to provide a default.. # - involve error-prone configurations distant to the main state transition configuration in stateMatrix if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" } else { set next $parentspace - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" } } set result $next @@ -3805,22 +3933,6 @@ namespace eval tomlish::parse { return $tokenType } - proc _shortcircuit_startquotesequence {} { - variable tok - variable i - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - set_tokenType "startquote" - incr i -1 - return -level 2 1 - } elseif {$toklen == 2} { - puts stderr "_shortcircuit_startquotesequence toklen 2" - set_tokenType "startquote" - set tok "\"" - incr i -2 - return -level 2 1 - } - } proc get_token_waiting {} { variable token_waiting @@ -3940,7 +4052,6 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" - set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [tcl::string::index $s [expr {$i - 1}]] @@ -3957,8 +4068,6 @@ namespace eval tomlish::parse { switch -exact -- $ctest { # { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 @@ -3966,16 +4075,20 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #for multiliteral, multistring - data and/or end incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { + #pseudo token beginning with underscore - never returned to state machine - review incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -4003,7 +4116,7 @@ namespace eval tomlish::parse { append tok $c } default { - #dquotedkey, itablequotedkey, string,literal, multistring + #dquotedkey, string,literal, multistring append tok $c } } @@ -4015,7 +4128,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes#" + append tok "#" } multiliteral-space { set_tokenType "literalpart" @@ -4031,23 +4144,23 @@ namespace eval tomlish::parse { } lc { #left curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { @@ -4059,7 +4172,7 @@ namespace eval tomlish::parse { } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - starttablearrayname { #*bare* tablename can only contain letters,digits underscores @@ -4105,7 +4218,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\{" + append tok "\{" } multiliteral-space { set_tokenType "literalpart" @@ -4120,37 +4233,35 @@ namespace eval tomlish::parse { } rc { #right curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - tablename { if {$had_slash} {append tok "\\"} @@ -4221,7 +4332,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\}" + append tok "\}" } multiliteral-space { set_tokenType "literalpart" ; #review @@ -4237,35 +4348,35 @@ namespace eval tomlish::parse { } lb { #left square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename { #change the tokenType @@ -4332,7 +4443,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\[" + append tok "\[" } multiliteral-space { set_tokenType "literalpart" @@ -4350,37 +4461,35 @@ namespace eval tomlish::parse { } rb { #right square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } comment { if {$had_slash} {append tok "\\"} @@ -4428,16 +4537,6 @@ namespace eval tomlish::parse { } } } - XXXtablearraynames { - puts "rb @ tablearraynames ??" - #switch? - - #todo? - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex - return 1 - } default { incr i -1 return 1 @@ -4485,7 +4584,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\]" + append tok "\]" } multiliteral-space { set_tokenType "literalpart" @@ -4498,21 +4597,21 @@ namespace eval tomlish::parse { } } bsl { - set dquotes $multi_dquote - set multi_dquote "" ;#!! #backslash if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { @@ -4529,9 +4628,7 @@ namespace eval tomlish::parse { append tok "\\" set slash_active 0 } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -4545,7 +4642,6 @@ namespace eval tomlish::parse { set slash_active 0 append tok "\\\\" } else { - append tok $dquotes set slash_active 1 } } @@ -4575,10 +4671,6 @@ namespace eval tomlish::parse { set tok "\\\\" set slash_active 0 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } set slash_active 1 } } @@ -4599,58 +4691,56 @@ namespace eval tomlish::parse { set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { - #short squote_seq tokens are returned if active during any other character + tentative_accum_squote { + #for within multiliteral + #short tentative_accum_squote tokens are returned if active upon receipt of any other character #longest allowable for leading/trailing are returned here #### set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote - switch -- $state { - leading-squote-space { - append tok $c - if {$existingtoklen > 2} { - error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" - } elseif {$existingtoklen == 2} { - return 1 ;#return tok ''' - } - } - trailing-squote-space { - append tok $c - if {$existingtoklen == 4} { - #maxlen to be an squote_seq is multisquote + 2 = 5 - #return tok ''''' - return 1 - } - } - default { - error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" - } + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_squote is multisquote + 2 = 5 + #return tok with value ''''' + return 1 } } - whitespace { - #end whitespace - incr i -1 ;#reprocess sq + tentative_accum_dquote { + incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { - #temp token creatable only during value-expected or array-space + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space switch -- [tcl::string::length $tok] { 1 { + #no conclusion can yet be reached append tok $c } 2 { + #enter multiliteral #switch? append tok $c set_tokenType triple_squote return 1 } default { + #if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled + #by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 squotes as data. error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" + return 1 + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } literal { #slash_active always false #terminate the literal @@ -4663,7 +4753,7 @@ namespace eval tomlish::parse { # idea: end this literalpart (possibly 'temporarily') # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) - incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing return 1 } XXXitablesquotedkey { @@ -4684,7 +4774,11 @@ namespace eval tomlish::parse { append tok $c } barekey { - #not clear why o'shennanigan shouldn't be a legal barekey - but it seems not to be. + #barekeys now support all sorts of unicode letter/number chars for other cultures + #but not punctuation - not even for those of Irish heritage who don't object + #to the anglicised form of some names. + # o'shenanigan seems to not be a legal barekey + #The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" } default { @@ -4693,63 +4787,69 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - array-space { + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading squote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_squote token or triple_squote token + #It currently doesn't trigger double_squote token + #(handle '' same as 'x' ie produce a single_squote and go into processing literal) + #review - producing double_squote for empty literal may be slightly more efficient. + #This token is not used to handle squote sequences *within* a multiliteral set_tokenType "_start_squote_sequence" set tok "'" } - itable-keyval-value-expected - keyval-value-expected { - set_tokenType "squote_seq_begin" + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote set tok "'" return 1 } - table-space { - #tests: squotedkey.test - set_tokenType "squotedkey" - set tok "" - } - itable-space { - #tests: squotedkey_itable.test + table-space - itable-space { + #tests: squotedkey.test squotedkey_itable.test set_tokenType "squotedkey" set tok "" } - XXXitable-space { - #future - could there be multiline keys? - #this would allow arbitrary tcl dicts to be stored in toml + XXXtable-space - XXXitable-space { + #future - could there be multiline keys? MLLKEY, MLBKEY ? + #this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) #probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files - set_tokenType "squote_seq_begin" + #@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key + #where key is simple-key or dotted-key - no MLL or MLB components + #the spec states solution for arbitrary binary data is application specific involving encodings + #such as hex, base64 + set_tokenType "_start_squote_sequence" set tok "'" return 1 } tablename-state { #first char in tablename-state/tablearrayname-state - set_tokenType tablename + set_tokenType "tablename" append tok "'" } tablearrayname-state { - set_tokenType tablearrayname + set_tokenType "tablearrayname" append tok "'" } literal-state { + #shouldn't get here? review tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" - set_tokenType literal + set_tokenType "literal" incr -1 return 1 } multistring-space { - error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" - } - multiliteral-space { - #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row - #we are building up an squote_seq to determine if - #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines - #b) it is exactly ''' and we can terminate the whole multiliteral - #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space - set_tokenType "squote_seq_begin" - set tok "'" - return 1 + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "," + #error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" } dottedkey-space { - set_tokenType squotedkey + set_tokenType "squotedkey" } default { error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -4765,44 +4865,50 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { incr i -1 return 1 } - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - #switch vs set? - set_tokenType "startmultiquote" - return 1 - } else { - error "tomlish unexpected token length $toklen in 'startquotesequence'" - } - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" return 1 - - #set toklen [tcl::string::length $tok] - #switch -- $toklen { - # 1 { - # set_tokenType "startsquote" - # incr i -1 - # return 1 - # } - # 2 { - # set_tokenType "startsquote" - # incr i -2 - # return 1 - # } - # default { - # error "tomlish unexpected _start_squote_sequence length $toklen" - # } - #} + } + tentative_accum_dquote { + #within multistring + #short tentative_accum_dquote tokens are returned if active upon receipt of any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 + #return tok with value """"" + return 1 + } + } + _start_dquote_sequence { + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + #no conclusion can yet be reached + append tok $c + } + 2 { + #enter multistring + #switch? + append tok $c + set_tokenType triple_dquote + return 1 + } + default { + #if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled + #by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 dquotes as data. + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" + } + } } literal - literalpart { append tok $c @@ -4811,8 +4917,8 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + #unescaped quote always terminates a string + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4821,77 +4927,31 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing + return 1 } } whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$had_slash} { - incr i -2 - return 1 - } else { - switch -- [tcl::string::length $multi_dquote] { - 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] - set multi_dquote "" - return 1 - } - 1 { - incr i -2 - return 1 - } - 0 { - incr i -1 - return 1 - } - } - } - } - keyval-value-expected { - #end whitespace token and reprocess - incr i -1 - return 1 - - #if {$multi_dquote eq "\"\""} { - # set_token_waiting type startmultiquote value "\"\"\"" complete 1 - # set multi_dquote "" - # return 1 - #} else { - # #end whitespace token and reprocess - # incr i -1 - # return 1 - #} - } - table-space - itable-space { - incr i -1 - return 1 - } - default { - set_token_waiting type startquote value "\"" complete 1 startindex $cindex - return 1 - } + #assert: had_slash will only ever be true in multistring-space + if {$had_slash} { + incr i -2 + return 1 + } else { + #end whitespace token - throw dq back for reprocessing + incr i -1 + return 1 } } comment { if {$had_slash} {append tok "\\"} append tok $c } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { if {$had_slash} { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4901,7 +4961,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - #set_token_waiting type endsquote value "'" complete 1 + #set_token_waiting type enddquote value {"} complete 1 return 1 } } @@ -4924,64 +4984,40 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - keyval-value-expected - itable-keyval-value-expected { - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading dquote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_dquote token or triple_dquote token + #It currently doesn't trigger double_dquote token + #(handle "" same as "x" ie produce a single_dquote and go into processing string) + #review - producing double_dquote for empty string may be slightly more efficient. + #This token is not used to handle dquote sequences once *within* a multistring + set_tokenType "_start_dquote_sequence" + set tok {"} } multistring-space { - #TODO - had_slash!!! - #REVIEW if {$had_slash} { set_tokenType "stringpart" set tok "\\\"" - set multi_dquote "" } else { - if {$multi_dquote eq "\"\""} { - tomlish::log::debug "- tokloop char dq ---> endmultiquote" - set_tokenType "endmultiquote" - set tok "\"\"\"" - return 1 - #set_token_waiting type endmultiquote value "\"\"\"" complete 1 - #set multi_dquote "" - #return 1 - } else { - append multi_dquote "\"" - } + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote + set tok {"} + return 1 } } multiliteral-space { set_tokenType "literalpart" set tok "\"" } - XXXtable-space { - set_tokenType "startquote" - set tok $c - return 1 - } - XXXitable-space { - set_tokenType "startquote" - set tok $c - } table-space - itable-space { set_tokenType "dquotedkey" set tok "" } - tablename-state { - set_tokenType tablename - set tok $c - } - tablearrayname-state { - set_tokenType tablearrayname - set tok $c - } dottedkey-space { set_tokenType dquotedkey set tok "" @@ -4990,49 +5026,56 @@ namespace eval tomlish::parse { #set_tokenType dquote_seq_begin #set tok $c } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } default { - error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" } } } } = { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { - #assertion had_slash 0, multi_dquote "" + #assertion had_slash 0 append tok $c } - string - comment - dquotedkey - itablequotedkey { + string - comment - dquotedkey { #for these tokenTypes an = is just data. if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type equal value = complete 1 startindex $cindex @@ -5063,7 +5106,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok ${dquotes}= + append tok = } multiliteral-space { set_tokenType "literalpart" @@ -5084,8 +5127,6 @@ namespace eval tomlish::parse { } cr { #REVIEW! - set dquotes $multi_dquote - set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -5098,16 +5139,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5159,8 +5202,6 @@ namespace eval tomlish::parse { } lf { # \n newline - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5171,16 +5212,19 @@ namespace eval tomlish::parse { append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #multiliteral or multistring incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5196,20 +5240,14 @@ namespace eval tomlish::parse { return 1 } stringpart { - if {$dquotes ne ""} { - append tok $dquotes + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - if {$had_slash} { - #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] - incr i -1 - return 1 - } else { - set_token_waiting type newline value lf complete 1 startindex $cindex - return 1 - } + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } } starttablename - tablename - tablearrayname - starttablearrayname { @@ -5236,20 +5274,13 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - #e.g one or 2 quotes just before nl - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "newline" set tok lf return 1 } } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "newline" set tok "lf" return 1 @@ -5275,8 +5306,6 @@ namespace eval tomlish::parse { } } , { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5287,39 +5316,40 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - tablename - tablearrayname { if {$had_slash} {append tok "\\"} append tok , } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type comma value "," complete 1 startindex $cindex @@ -5338,10 +5368,10 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes," + append tok "," } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "literalpart" set tok "," } @@ -5354,8 +5384,6 @@ namespace eval tomlish::parse { } } . { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5366,42 +5394,45 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { switch -exact -- $state { multistring-space { - set backchars [expr {[tcl::string::length $dquotes] + 1}] + #review if {$had_slash} { - incr backchars 1 + incr i -2 + } else { + incr i -1 } - incr i -$backchars return 1 } xxxdottedkey-space { @@ -5444,7 +5475,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes." + append tok "." } multiliteral-space { set_tokenType "literalpart" @@ -5471,8 +5502,6 @@ namespace eval tomlish::parse { } " " { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 @@ -5483,16 +5512,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5512,9 +5543,9 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $dquotes$c + append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} { append tok "\\" } append tok $c } @@ -5526,8 +5557,7 @@ namespace eval tomlish::parse { incr i -2 return 1 } else { - #split into STRINGPART aaa WS " " - append tok $dquotes + #split into STRINGPART xxx WS " " incr i -1 return 1 } @@ -5537,15 +5567,7 @@ namespace eval tomlish::parse { } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - #end whitespace token - #go back by the number of quotes plus this space char - set backchars [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backchars - return 1 - } else { - append tok $c - } + append tok $c } else { append tok $c } @@ -5588,12 +5610,6 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "whitespace" append tok $c } @@ -5613,9 +5629,6 @@ namespace eval tomlish::parse { } } tab { - set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 @@ -5626,12 +5639,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5662,7 +5681,6 @@ namespace eval tomlish::parse { return 1 } else { #split into STRINGPART aaa WS " " - append tok $dquotes incr i -1 return 1 } @@ -5706,15 +5724,8 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType stringpart - set tok $dquotes - incr i -1 - return 1 - } else { - set_tokenType whitespace - append tok $c - } + set_tokenType whitespace + append tok $c } } multiliteral-space { @@ -5732,16 +5743,31 @@ namespace eval tomlish::parse { #BOM (Byte Order Mark) - ignored by token consumer if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 + } _start_squote_sequence { #assert - tok will be one or two squotes only + #A toml literal probably isn't allowed to contain this + #but we will parse and let the validator sort it out. incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart { append tok $c } + string - stringpart { + append tok $c + } default { + #state machine will generally not have entry to accept bom - let it crash set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } @@ -5752,6 +5778,10 @@ namespace eval tomlish::parse { set_tokenType "literalpart" set tok $c } + multistring-space { + set_tokenType "stringpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" @@ -5761,8 +5791,6 @@ namespace eval tomlish::parse { } } default { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. @@ -5774,28 +5802,24 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen - return 1 - } else { - incr i -1 - return 1 - } + incr i -1 + return 1 } else { #review incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. @@ -5815,7 +5839,7 @@ namespace eval tomlish::parse { return 1 } stringpart { - append tok $dquotes$c + append tok $c } default { #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname @@ -5835,22 +5859,12 @@ namespace eval tomlish::parse { error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } - XXXcurly-syntax { - puts stderr "curly-syntax - review" - if {[tomlish::utils::is_barekey $c]} { - set_tokenType "barekey" - append tok $c - } else { - error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" - } - } multistring-space { set_tokenType "stringpart" if {$had_slash} { - #assert - we don't get had_slash and dquotes at same time set tok \\$c } else { - set tok $dquotes$c + set tok $c } } multiliteral-space { @@ -5890,21 +5904,6 @@ namespace eval tomlish::parse { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #set_token_waiting type string value "" complete 1 - return 1 - } - } _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { @@ -5913,11 +5912,29 @@ namespace eval tomlish::parse { error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - #review - set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 + + ##review + #set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + #set_tokenType "literal" + #set tok "" + #return 1 + } + } + } + _start_dquote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open string + error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" + } + 2 { + set_tokenType "string" + set tok "" + return 1 } } } @@ -6011,6 +6028,16 @@ namespace eval tomlish::dict { return $name } + proc _show_tablenames {tablenames_info} { + append msg \n "tablenames_info:" \n + dict for {tkey tinfo} $tablenames_info { + append msg " " "table: $tkey" \n + dict for {field finfo} $tinfo { + append msg " " "$field $finfo" \n + } + } + return $msg + } } tcl::namespace::eval tomlish::app { diff --git a/src/vfs/_vfscommon.vfs/modules/dictn-0.1.1.tm b/src/vfs/_vfscommon.vfs/modules/dictn-0.1.1.tm new file mode 100644 index 00000000..c9ef87f2 --- /dev/null +++ b/src/vfs/_vfscommon.vfs/modules/dictn-0.1.1.tm @@ -0,0 +1,349 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) 2023 +# +# @@ Meta Begin +# Application dictn 0.1.1 +# Meta platform tcl +# Meta license +# @@ Meta End + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +##e.g package require frobz + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +namespace eval dictn { + namespace export {[a-z]*} + namespace ensemble create +} + + +## ::dictn::append +#This can of course 'ruin' a nested dict if applied to the wrong element +# - i.e using the string op 'append' on an element that is itself a nested dict is analogous to the standard Tcl: +# %set list {a b {c d}} +# %append list x +# a b {c d}x +# IOW - don't do that unless you really know that's what you want. +# +proc ::dictn::append {dictvar path {value {}}} { + if {[llength $path] == 1} { + uplevel 1 [list dict append $dictvar $path $value] + } else { + upvar 1 $dictvar dvar + + ::set str [dict get $dvar {*}$path] + append str $val + dict set dvar {*}$path $str + } +} + +proc ::dictn::create {args} { + ::set data {} + foreach {path val} $args { + dict set data {*}$path $val + } + return $data +} + +proc ::dictn::exists {dictval path} { + return [dict exists $dictval {*}$path] +} + +proc ::dictn::filter {dictval path filterType args} { + ::set sub [dict get $dictval {*}$path] + dict filter $sub $filterType {*}$args +} + +proc ::dictn::for {keyvalvars dictval path body} { + ::set sub [dict get $dictval {*}$path] + dict for $keyvalvars $sub $body +} + +proc ::dictn::get {dictval {path {}}} { + return [dict get $dictval {*}$path] +} + +proc ::dictn::getdef {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +proc ::dictn::getwithdefault {dictval path default} { + return [dict getdef $dictval {*}$path $default] +} + +if {[info commands ::tcl::dict::getdef] ne ""} { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + ::set newval [expr {[dict getdef $dvar {*}$path 0] + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} else { + proc ::dictn::incr {dictvar path {increment {}} } { + if {$increment eq ""} { + ::set increment 1 + } + if {[llength $path] == 1} { + uplevel 1 [list dict incr $dictvar $path $increment] + } else { + upvar 1 $dictvar dvar + if {![::info exists dvar]} { + dict set dvar {*}$path $increment + } else { + if {![dict exists $dvar {*}$path]} { + ::set val 0 + } else { + ::set val [dict get $dvar {*}$path] + } + ::set newval [expr {$val + $increment}] + dict set dvar {*}$path $newval + } + return $dvar + } + } +} + +proc ::dictn::info {dictval {path {}}} { + if {![string length $path]} { + return [dict info $dictval] + } else { + ::set sub [dict get $dictval {*}$path] + return [dict info $sub] + } +} + +proc ::dictn::keys {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict keys $sub $glob] + } else { + return [dict keys $sub] + } +} + +proc ::dictn::lappend {dictvar path args} { + if {[llength $path] == 1} { + uplevel 1 [list dict lappend $dictvar $path {*}$args] + } else { + upvar 1 $dictvar dvar + + ::set list [dict get $dvar {*}$path] + ::lappend list {*}$args + dict set dvar {*}$path $list + } +} + +proc ::dictn::merge {args} { + error "nested merge not yet supported" +} + +#dictn remove dictionaryValue ?path ...? +proc ::dictn::remove {dictval args} { + ::set basic [list] ;#buffer basic (1element path) removals to do in a single call. + + foreach path $args { + if {[llength $path] == 1} { + ::lappend basic $path + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict remove $sub [lindex $path end]] + + dict set dictval {*}$subpath $sub + } + } + + if {[llength $basic]} { + return [dict remove $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::replace {dictval args} { + ::set basic [list] ;#buffer basic (1element path) replacements to do in a single call. + + foreach {path val} $args { + if {[llength $path] == 1} { + ::lappend basic $path $val + } else { + #extract,modify,replace + ::set subpath [lrange $path 0 end-1] + + ::set sub [dict get $dictval {*}$subpath] + ::set sub [dict replace $sub [lindex $path end] $val] + + dict set dictval {*}$subpath $sub + } + } + + + if {[llength $basic]} { + return [dict replace $dictval {*}$basic] + } else { + return $dictval + } +} + + +proc ::dictn::set {dictvar path newval} { + upvar 1 $dictvar dvar + return [dict set dvar {*}$path $newval] +} + +proc ::dictn::size {dictval {path {}}} { + return [dict size [dict get $dictval {*}$path]] +} + +proc ::dictn::unset {dictvar path} { + upvar 1 $dictvar dvar + return [dict unset dvar {*}$path +} + +proc ::dictn::update {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + uplevel 1 [list set $var [dict get $dvar $path]] + } + } + + catch {uplevel 1 $body} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + upvar 1 $var $var + if {![::info exists $var]} { + uplevel 1 [list dict unset $dictvar {*}$path] + } else { + uplevel 1 [list dict set $dictvar {*}$path [::set $var]] + } + } + } + return $result +} + +#an experiment. +proc ::dictn::Applyupdate {dictvar args} { + ::set body [lindex $args end] + ::set maplist [lrange $args 0 end-1] + + upvar 1 $dictvar dvar + + ::set headscript "" + ::set i 0 + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path]} { + #uplevel 1 [list set $var [dict get $dvar $path]] + ::lappend arglist $var + ::lappend vallist [dict get $dvar {*}$path] + ::append headscript [string map [list %i% $i %v% $var] {upvar 1 %v% %v%; set %v% [lindex $args %i%]} ] + ::append headscript \n + ::incr i + } + } + + ::set body $headscript\r\n$body + + puts stderr "BODY: $body" + + #set result [apply [list args $body] {*}$vallist] + catch {apply [list args $body] {*}$vallist} result + + foreach {path var} $maplist { + if {[dict exists $dvar {*}$path] && [::info exists $var]} { + dict set dvar {*}$path [::set $var] + } + } + return $result +} + +proc ::dictn::values {dictval {path {}} {glob {}}} { + ::set sub [dict get $dictval {*}$path] + if {[string length $glob]} { + return [dict values $sub $glob] + } else { + return [dict values $sub] + } +} + +# Standard form: +#'dictn with dictVariable path body' +# +# Extended form: +#'dictn with dictVariable path arrayVariable body' +# +proc ::dictn::with {dictvar path args} { + if {[llength $args] == 1} { + ::set body [lindex $args 0] + return [uplevel 1 [list dict with $dictvar {*}$path $body]] + } else { + upvar 1 $dictvar dvar + ::lassign $args arrayname body + + upvar 1 $arrayname arr + array set arr [dict get $dvar {*}$path] + ::set prevkeys [array names arr] + + catch {uplevel 1 $body} result + + + foreach k $prevkeys { + if {![::info exists arr($k)]} { + dict unset $dvar {*}$path $k + } + } + foreach k [array names arr] { + dict set $dvar {*}$path $k $arr($k) + } + + return $result + } +} + + + + + + + + + + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide dictn [namespace eval dictn { + variable version + ::set version 0.1.1 +}] +return \ No newline at end of file diff --git a/src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.3.tm b/src/vfs/_vfscommon.vfs/modules/test/tomlish-1.1.3.tm index ed5044a73e5fccdc0c437116e82de7c592c4f98a..8afb43d956b0212bfd728b88613392b2099993ea 100644 GIT binary patch delta 12790 zcmaKSbwHF|6YsKw(%s$Nl9Ee@G@^8ebeFJ5m()Xdhjc1}q;yCr-2x&KN(l(u6@A}N zUhlp8$L`ELGxN+jdt%O$#Gq6^74Kn*JCJ(mQ=f!jwoU^iI~fEtB+KSU{XPNJp#fp> zk&V>Msds{0m1=3Y8F`=Ke85J%SKxO5_48y@GbuG^9&g1d9#5vjf1N+#d(wRu$v!oD ztxU^V9^pE}+Tr_dl{1{Z1&+OjtFeLvjqEddlt5d80@8O%KQwgNS6yF>y^v*jVic_W znXI=_lF(Y6r1P=Oc8~Hn`kdarj%>XQWnH{#uTO>5RUyWBQu}AZn3dwKksp+$^-%9^ zui4~MZ6>d}VD&Q#uGBu3%ydRUDRLnPd3D@^$9k5U(k4$63VohYxSLrWXn`4 z>d4Dw8MP0KVhv*6BWFdUpv_uD`*iTM zOq95__av{+K<_v&ufa(F^fu0C*rlvav_i8wiG&sr(pLUNI8G;?@ z^p4&GgFc;3SOq?-w?bkiNUa?ex=9+&x%$QsVtkG9`5IYGfFq`+D$t@YeR51ppwT0R zjiH5Rsvra3eTkaiqOSl6^?x}&hjSor#l85Po#_(`3Tr#)Rhac(}K_g~B7OouJ zQkI4Dh4g%a$E}2UeTO24RnNlk)Pj_j!j->R1@Uh(JJyr+rYF2I6%415pAL|_H0FlS z@qmQr42mI9M8j)il6C9QN^YEWwsB^;3%6-Cw2z}sreB>jHNF>JbYGu3vEQn#t6Di$ zt`tPyvG*+Bve$XN#mBej4gI9@j@61?SP(s)+}o@|m(8)PSve|Vs5?$Q23NKj4x9At*KpjD&iCB@yoTqP-zpe&%g@K zmrFLdfo#N+=*48a%AKssb)V^;VNdSb;V}EMB?;M#+i@%k^n(Jmg8eX2;zAzBop0c4~9F ze=!FS$-1CVZr#n8yIWSME(N<#QE173pt{1-|jjz>XYgCEzOLKe%S+x=z= zq@srRzqY`887MEeh(jI11UBw^FnW{+JRsdTIPAWxc@6#8Qq9*q6y~RGztM+r)b)iA zg15w1b88Nn9LG&B&152nBiaB@QK7RB){KRu+Y2fR6L;KYG- zPE*$9rtW@XJ(o&qe#Fp) zx(YQ7(xTFcZ!>P;)Go@#4i zGC+q*-PCY@L}8QeAFP@P|B>_60_{8Vdx_m6t$G3l;-saCq0~?{m*w)8Qj;3iUja&P zTIfId&YZ94=LmWg^3OjL=IV zGd;$mWE87x>u%~)>&bGve1ebjHsHPyKdKa|fT>8)1FqM^3=4w-Fz!28R0mPR{dmY_QFKk|=v#zWTGY%xk|`JQJ(O4lM#im2s$ zJaimRNhWHrtFPs!AH3#huYeAI$f-Ye{CcmtDs(M&-=CqjhgPwEnwqU7)nA2YN6{&l z{F-tyh(iY5wt{oDa@*t8!O2c*`vF!*RS5-EQ4KS)gP0v@YgkBu%oe9C3S(Q)e)_bV#8>bPc zn_iu2gpps+1oUA29Zz#JKq7fsjW+)#hHlcjnnp)84mke6mIHJSE)N zPx%JTPVzL1h4i&UAYVq$?k~uP0&VRX(X3w1rd@AQ4Wetfet=6In`&{+_ssn-BpE*5 zZ*zJsw!iM#c{HtJM-L`7>FKv zCA2GmQ;Sitxt0pgBY>5KP>yBLzd!h3fr{BOaTw>tWixM|h31{MAZZL6YC%9Scu97f zb~I z>uY!r2o4Ou-P8Le<46BTR$p?DObA;UHmQEjwtr@w8c<^ln{`i+fc@M%z-2BGz->o` z9S_S{{?xJqIu-xXx{Ge&;$q@$WaemZZ{gquvmq9|CE;+haCPIbG;#N`vvIJ62mNo( zkN(H)lRsX57xkAD8wZ#b@FSN5?GN7KCMsH{+fkYS85Od*qnn$Bxvhow&xr2*%l*Hj zvHVXomVZWrV(#qj_&_4I0>L4^5HxpAk3!{IVBKW7k;{P3y@qfI!{23AE zZzuNdc5XJm`v~hlvS0n=`k(*!RyPBLk>Y^39S*kff3&Rs>`g$?7YhLA;Q<{Shy<=~ zE;bHU{|y^xwkHE>jVXT*p8GdWfBdHffA%Q`!tZ~F@)z;5A3yv@-|deeFi?K#8{IS! zHXx|@|HuA+`mmckL{}(WR#XBJ1R`Yufw*td?G+~iC>#j1n?CQb&W{yZU0y=O1UJDR zC*x{MMc7j{jx+)%kH!``gJm5N6ET(u(NNHr+Fx3gY@*YPz?}7%oFPo^>5jv>j?JX@ z%PpHg(daDUQl$UHjhMBh*?Gl~UIb2L@I2z>LZMAl;d1QyI%Jqe()~_VId)P%sJ8np zITm*aG}t=7Vc0B72}Oh+dGjesAQkOb(R6qPN|(^b!kfj0Sa*5yP+XbEp=(gOdSl5SsIzwSUiQ>XK-Srd3ZiruL>2^77ri?n}{;1FycO?vl@Sj_kM;j$<-j4 z9qh{;y2~r8YF6wwRR1jx{{(JCyDnT_VvKPV0jiC7w1rM)hU$$J*0!;3lCI3Uo8p)2 z$>>!6jhbJ0C#ntfV`4MVsK>a>Yc^Kzx?qyq40pCxNLufhB-5O@=j!Z^=uuK2eZ3S} zR^)>$pD3{+hhwTCb%EkW?SWn`_N?c|mMHLkPR%`$(^QlCbh>qHEO#v810$*RLmGt< z)u2_MLPK%*Nz`W>j@C$R)Ajt_#rzzq@&i&eY1H8BlbYKi^qzLU&L=%|ws}(?by%Mv zZtO98>EZ7&izvoIBCzaWjMVdDD(7J_gx6cm@zvP`VRH}zc}%dcFy9panbQEP6H}3) z?R(xjFQp{I%5HQa^sv=C&o&-BJ2T1kmU;wjGd%WLtcH4A2hI*Z#nt-3JnY~zZI#mA zT{9#3O{<5l02>#Z!`ZUOc5n$F#DJ&7^a`>gQNs?BJ@X?c@ZD(F+t7LYNOGlxhihlY zlo4%0U0ODMFkFR4G^^eiH9Ddxn@%`ZL;NFi@x&dLF?9%kFc&?L$l_dxjoa|dk!TUh zd_=cs&rs&EWF-tKdB8-nl`g7Lkw_}e;miQ1$GuOE`kbgFCky^1fl<$jcHxP{8`a!M zi_pl2cKlMs7{ezuPSK==?x~*}9zAYHr}=Co@$r*&Y(E8dSpV7MYTwKGGmPk0*3@0{ z=S8XWVI%|yNlLcMH4`{d)5mQJL>TxsyOwI)o!y%`t+j?Sbl3ax zpvOl^Yq3|lZ=C?dpTQhQ&&)ZsmMQ5`pUF;a8BUVYcL$mIbWX7->f?|m}j_(6=| zVZ=nnrwLpet*AA{t24qRbPBpijR<{eN2Y!DuIdRWb05X5w+j^+wPGJQ3byoR$E1Cp z8X7HgC$G0k_eme;xyR+J`FbH+)I&hlTjQKmMcsds?(jIP0c*O=^f7ow8Il9t;^oWi zsD|gA3~f?ceX(jcY;P&OvrarK(7Ke%AMG7Cw`=EJ(YVdOVcMXV<2dbIPPTrwpv_<> z4kbQr*&d33E~Y(k-Cu8cpi$|)cByK-^MLW%QCi@eJ%5qY2J?&()Ti$1SIo#IV$4j06M{yV=!NoG^)gTi)!Gzn1jtYgl3e z2fU|Y!xGC7M1bVJ{~+%xPZzgd&k11D@=a^C{5)+W>~@?gJZAk-x$3~LQ?q1KFhcQr^q}?!iTnxKX3Ho09Q@pjGPqF5 zIjgpzVX{jJd+NHRxJ(9xs8!F+SCyjG@YZvwbEvEKn6yfqF_^Ow6F~@YE{;}_hE*lJ zyQwAAhXtlC$$A2PZSZ$@9Crw6!JHsYkQ zg2f~!-C_<47EAOhnoBHjhbP}!@%cd0?*h$kZv&LqmR}uya*0@tXdhA-xGysDpzX?Q z%^);*r!01quZA%FGusO(DIYqdtvwIH)Tcq8Hb`mBZPey^A~zTF@WZSl_!x_8cV^CDv5L0g93J%;Ig` zw*mnVG|qj5;Uu&0)ld0}bPMS^P_+JyIlDTkE_!V(rG;>g5Q}#RUSN7hW-1>NeMGI! zFgcEF?{DegxL`X}3J2o?D}|P`@0QZygfuUB_lbEhA!@9L{dqXYVbP$u+ng-NI$vvo>`|^3t zYC1E4IS{1)rQl_?5dAlE6c6joc=ZpqU%%>5V-mB7{NT{4zPw4!f1P#FVOb8|K)L5B zECo5a0|MRuX9hrxZs)1LhX1a%f$cmOo}a*XaeYWGmwaw@Xj@NB4aZ`5UNi^MY}=@` z#ZcqM1l7^9C;m|FI=@-;aivdOrEl!$<+si@o!X=LUF;`MpLjpd)yS(+Z`<;vXRVoLjY0S%#}j_%mKlCuRo#J9SI!KWqP@sKHnZ?xat&~1iX1P>|Gjr@?zFcw?VLU7}l&gH*7h0Qf zTs|mOHI}C1rRgcYb$lYbn5azjZu&_67-#HEolSwwLr>%Tzi^GVSHqo3HA62=wL>gC z+-#a!6|0i#-I|6Ss+b|ODS`~!wq8!0l5x{@;k(G2()IV-Y4kzU?aeL61g?;7%5j!$ z`78^_uJ}pcBNN-+8 zbFoO0&{-o)GSO)qo5t+~@AD=iK#cYc^1kDA29~tL@ZR%-Nt9g za^D6$S>j1S){*Q**qRugz{AY8jO`^mn$z!1o^$px?7M{qFqAJqnv$Y$G2&>mi>TG?KvkYn^KGtrvMZ|A- zDMsL3+EcioBguD}9bB?$n2eZ)wc4lm6$ngZJrJInfc2+|1`!wc;hxV0$w3t|S=Kf8 zJQq)khESD+^zKf~FnnS~bLemuW)P3bfAlcl(!7+ZfQ|6;ia~1Ppbjobiwrrta7mKcM;vX1%;SidQdVm15|_>rv#_!o=6=sfJ-#n{F0WL1RsJ%W4ManH6HD=Y#_taCYQ;f>ySx@i+Xu8GYQN4&veF{Ba(;0NG54ugy zZ|$L)C#V=+k9H#Y6tcW1&Lb)6Y`OLjNb`deeSO;c_0!IGMr+Y?hwOQ1et_N>?|z^8 zIWdv8kFfu%4COwmu6!?lsXD#Y&?r&4j7BRa5zOu|l2BTdm85cfCuZ&t(RRWvu!jy; zm2pGI*c4umQRw=;yOi$&B`C?Anh|7^w3yeslnr(CR}ohOb}?2W`|xA=DK45`p;7Ih zoC@?~r0{=sPjl`mi)}{GwAeH4o-ie;lms=dk!;uui)FPxRZj%bKj47i=~tUv3#nl8 zeg;Ffau)g`PdGIL!1(;o^yo{%|BbD1*m_i5SA#qJbF(D{fyDo}9{qh%{k$E6Nx)HKLXO=R~+C*BE}QAq|C zUW{#57;$t}9ZF6p^Ejh^C6iB2&_)XDc>I<1C`^(3D+dchCIYDV5F`$`j`w++)rjJqO#Jz z^HI2pR$iJnQS(Sq5DiIJO@(P1&aM$DDnT$wx1@m*O)Qy3WZHy_NRn3xMFi*iDFTVTT z+%+{vjdHFdI-6HeE2t&rt)KORW+mA#YbvjVnh}$}v@G5riH)9M1-}Vu;~$-GF@Rc$8b)HNs`8Y5 zGkUi&Nh4xgM0;nD_ng3!I^&(q>y7>G!Ul4OtzyykvlXjMoM*9yyBb`m24(~nnre+2 zSo)b4OW#pT>!YZMvi-=fKt)HVwAo>12Z4Yl^zMuN)OUwJHZe!%n(a#$M11EMAxd*{ zZEBCM?e6l9zuz{p0kJ{hk^}hRUO;l$4f#Nwdo9?r=*YO}!cW+0So-&z1bwnYV;a>VOUzY!ZS?GE@iv-Tm3N=}1+IkS@ zOL2M@^FDMb&31#U?R%b=N*VgdE5GA0OYOYL$GesrJe7@v+q0}^(H|w6ejMuEvc1FTnn5j7TH2$OTuhQAC-B}hk8$7?9 zvNpnlkU-UpAQ0rH2mwn_p}&d`7DoTnTz(~~ySjR+u>ILzZcUsFpRN#@0>$^m)E)j8 zmAcQ6dGaFi1{G8~yPDaYUp`d7x`=UAox}Si!P=cX!5}6^NO^H#S8|-@{>}j#{6lC1 zF0di^t(t?Qy<_um=a>}%_{tRQBT6sNn!n4Qs{w6ViP=ZgXjC}%edB2Z7_qGk=H;Oy0On=sTXL|NQwE(h+RYkUO%9q6{ zZG=NVx7~$nhxqq0Lk#E=Jrzeh1&<$ebLB`B{(!p*HYg&|0W?T?9Y4?UtXv6FzNriC zO^>M2)QN_Z3ExSrKQj;P41DvR*@i@aSh*hBCMM4IY`BORr;)Fp%s`XCLOU)+%Q!P9 zCNw37T9{P-d8idU7Ovvp)W45(4Vhk7D$#`>H|~K z0!m#rF)Bp^Bfx>jqqPUmH42us z%+H>keCIKKvGWo+g?j|eF@!jIs{9+45wxMSlBY}&b)aI*t3RJb=>!kc8c)>5Og!0{ za$MgNv9r?2wH|U3EE6$U}YS;25Je$vxQ#yHV zL!HxJ?1zpaWWfEk&Oz46!a>5+HbHHXZ8OkL7Mf8Y9_}I?MmZGW!nk0C0ZBPO71~X% zB*if!Qv2f`zjjaX6}472d%D)($jK?go0lWzFeInD zZtsj-7d@<}1!3dNy3>`~zZdIE6BftwsMT3UDw?^e=U1rg`jBArjyLx6Grk)>N!H}0 zdgpNsTH!%jr|$b#rza%FFz zSAP_v$(TcQ`$Cfh?!m5?hB$s6OMl+W!)84|5Q&3$lT_6Yd59rkpOS{?(lpk-)`wLT z(`5d+!2hYJ;7bV2TvS~b#OlbwjJP(z^U@_}9Di@4ljJ36GfpmStg571j+@*6VBv%A z^ynOin#|eqJC6l^BlaU#lN}KW(%8OHbN{s3`}TqPj-|1ZEBmxGT&3YGZ0}b)SK*|j z*l_VoIxxc_E%qeeaJN_CKAAB{h9VOV_J6#b-}P+nhsQyHx|e7;bK645YspOVScB><+3hD`OlWzp zNISC1&|DIu6B`NyvIPbQ(F^aCVc+>7>Nxr$Ico_jW?I{vwE7m;(&}hLM(d$+4qP!# zbv_5f8wvtGzo^~E+K>3`78_)m(N^g4iMV&Ix2L{l?&;Y>Z4f<=Q3ptcbE-Kw9j*e8 zCbv|!eoQWJT}!5?x2rxzvT{1!x7b=MKHOM4SCy}R*KTO2p98^9N2@@tr11v06VyWNbqT-Nq$~9+q%VG-MS? zj;9h(7&km&fu>70Q~7ma+0VOL&OJAsyhvJkm8D7W1vBLE)kP6@tJSzKfslOrk_1Yc z3^RLY%JubDEGN0IYNI@MHD4_XLd8#JI;wKLgF-5$yTPantec#9+ zL>+7dgwV>;P^@DO^-HGSr7*EcM~{t>VnmW1?kM3M4TRRcf5rQC&tVEe2|Zo#-t!n( z?2<^?6iZ{5Nk7{vp5cxf?)ieBi>j__qK+A2ffDt776ZK#lfQ?~i%B%G8g9@=VgV^D z>ibkEvJmPSEvh|uF8HGzt>tHX)6y(OXE=FC?A=8~O_8_53{AR^vq!2hEcrb#Ejv$? zQ}R2zb~B**wTz3v)w(>V-M2(*fzgbl(}3i+SD!Z2)3}Oho$UzIzfDPsN7OfA$2Aq; z^gAtwrN$0#I&!oleZWFHl0t|l3P+*qhoc-PdM%vGO57paRkP)Q`AW}>T|S;H_+%1$ z>!f$$q~heuxeum~aDSSr_;-28JqfUk@OfoV$D$>a%+GsziSrJt?hgUSZw47Zbbfw| z955_54qVnh-D0?2d2p3{Sg$j2&vB&TyKAr*ttbw4F$6$S;ptcKu@=t63#KA;QLtbx z*QfPseRB%>@;wc(T>3cAbp|QO@y^(v8HsJ}@iVXN>o5K`qVY&eo~eb;OP9@oIkd%; z9TggBmVehO9MT4dEdT6%JU(Xw4sjGaCliM+z{fFX>hgqVRGGXA$NDvAhThP=q(j8p zr!g|(yS=jrSM3D;-^h^j_`o!d=80-^d|d`~7@2Gx}rR z@<|f@PdcMz{*jpuCj+llKc8@l5UIAI@{^D;)Kn#n+lNi((BzbZc5q+&(RnBhSEY;+ zo7*~are#3B0L?l%!|Vt54$)Y3E=EwXu&q$AlS^3LDnb@cP3=xcIB=Jo%$;NhZ0Y@Q zUZ&Xo7~$AQjc!UdZ`fdQ6;R$H?VZGD^A$PuMs?5@8{(sWe0HjBs$AREWAsQ!%Qr@i2WCdvTLjmw$Y!GY#pA&A?Jb)d4IZ=L|kYUwGI3pm7nGpK$Rt4pg)U<%q zGRa&GM^ThXGzUUYn~L;C>?4K)V~r?#Cw<3W#^wOEou7t6`a^h^s$z+gWmTjUJ1q+_ce3+QED#l^Jey=SkeA~%8t1) z4~J!}rearJTO-W5$zvNA77h{?F?G4Svb*?fXnk>q>J8&o@o5L}!vmJ=ynG6{G&=BTh+tS@}|IE!D0dp*JpcADO7a(EN4qGc}QI5H9nf-w$m*Rqn7e zmB00VU+YrC)YUPc%H(lRt)zTXGWM7!5-ZVWT44}jcdeVt()p9>5!$)*`aJ__?A%XI z(D^HaP3B##=MO^>ZE7cIHhI;ADOPaK=lE<>S{E0%&_#$3tu-868zEtV5ryvoG&Q>K zoNg33BV0?XzHjL5yB0?qpwk&G`zYpcyX`*6Y6x{@QFRVXat<|nZLy*2q-xWi$rgrC zUg$H8bA`S@m2S|~*!hvSIX~Pw<@uXIII|spuM7M~vYPI;S7hjp53uUlT(cwQmV!yl z3sTEqQ23ST(4A>Q5{mv#agPtH@+=hb!VPOWq za0CT79udf+p@l)8G^sHEi+lQs#Q66^g#=JP;(@`S5P{}M7Vs(p!k>21644AGEa2AaTBu2A<%Q1hm5b#T^8`4l19SC~P0@lL*%@P3B>1Y5- z3#!`T>@oq$MBEO6eyvq;_q z#XA6u2l`X-frvN?@BzV%%DFfiuouSyeoS=3YQ_tIFR5LIPg;pJ+CP7i_@|(~yHbWdWP=z(^mkGcW8%5)ez{0^{8KO(Fp= z(|Eyef3dmgNC0m-3-~SH&C}d;A@Cso4H=t(1khx#fK>&4vqIpNUnFX#ICxF)Cay0g z=s-dy3wTKAU-oB*6K7F?`-E>qzUJrvG>Zl7eE&B~0Cc3G0Z7>tU}_PVhzU^1mITX5 z0`b`~;4w*nI_ELi5d!;R0iM{R05v%*U;-)FLwW$sju&9bLjxpoDZmN8MB#SCKz1$~ z)nCA(e@E<30zu}+{vV7HSsn@1Ul^lX20s57{9oWk{{bNVboLjr=$11i+23#f3uJT) zbVQm*O7$1U=$518e;wV18~wrkjOH(7(Jdz=a{om0C#nbrTKbdy>E-7@|5ujzk4Mt`zD+xZuq z=$4yMC0GPh07d}@uc5pR0mg6&s=uzoZz)@9{i_@vPEPgL z&EYNORh@s7;bHfPz_UC;s=uy{ZW##Z{$l|6!5a71J-{vHM*V-3k$&9@0MEmTsQ##y z{wkK9-<25R`{c1xpKWXN9+vKfC;^ToXY?_19K@OS#q^wpkOw Vc6K5RWsoxHD=ln+^ZBXqe*m+y3oifw delta 9769 zcmZWvbySqw*B*xMmhO=5?gj~Ikj|mIONJ1Xj+gERY3YzI1xZCZ1PMVxy7}gM@9*C4 zt~-Cs+0Q=Dd1q#=v!AtN=8wS1Bj9=pQd}@ssNp^4rT78@2$W9*0*M3DvebZE3N_$D ziUfUOm7%3goG#){#8|{&dc5@`EnBuJ2 zg5OD0Fuy?^C6{-*MVAmrqH?&P8bX+lW{=n=C!Ex2s@|ycy^U3hEh8*AKkZ&-0sgKU z)jEr0|IHxpJVDYtg7@+Qnp1XCEVEjZS3R8JuUU9+S-7 z2IZnh7Uxr#uQP?8-JNr^hqR!@31l+A5wpl#@8v^g$gSTU9w4V8t>O=2!7yx2i(a&S zJ5?L@ZL|Qtsp7+W#k(_W%PaiF&P6j)cAIjI)gh~#u$)ojbB8Nbq0ZAqiXm6`Zi zjZImX8kEDrAi@#JcHFqZtqu$VO7+ZqzIryBkjgpM47susq4g@M3ge(qE{<|-i_V$6 zI3xXK8<0P-I0t?!T>!;`FaM7&zy5XTFWkGkZ8%mJ7qlUZNDq?lT9ft6IUiGApvaf> zdc1f-_#wz-Q-^Auweqq-D;YXScGGdzD-N+dlh3K>5z0x0Mi^2ut$t7f-g>t;EedLe z**mT4jUnBt)MSXy8P<`tn3647~858#+pS>?~YBhH~TX6?g z8tB6-S&YvE`kRM-Q1aRC`+nN#Z&vrf#Fxeu!aHx8)3-aB}9$8IRKp}#lsFn#4* zVpM#-SBhjGv=lwANf$^ZrZ7W@aWv2xW@@>-y@@m>Fp*s9oAJZ?kiQF~eU;C#?sUQK zhn}L53F_1B;DI zImbJ6A_QE`CR7Z~rdSqnE7INa&x3wp7C=0H@s#tP)mkz{35mIs3|mqRo|pB>3_*JU zn4?U~YWD|mbz@Rp)=fw)q+6YN!MBGqK;qu0#?#87I$IC5;R`kTM z+K|olpoHV@8N+8Cx2UJ70Z`IL=6DJ#^;GVhq@ZuDKSF;&-N^f_Vl@?&?_wA+GJ}_-j{BH2A~(EbmferqFwS!Rrd6 zy(A<)TwxO)x>h$Qbf3(}0n7 z%lT4#3>1o6?~=qUAFx+BP{I^uTQ+w~~&i;_C_2HE}npd;7rqx!LswVUGd) z4&B}_3WRq7fxItP7pkBK#ZrVP7G~_NmB;Ztx4}o_!E$f{XRU}*tTFhVs3%8i zqQ*nIX-_UgWRpB-m&0*7B2ulKZr|yx!Sg`bQi6s)=*Pm|T*Hzi#dt1?%nKc$lN0X< zbM2s?S8s_lRNxT^Kp+qbh@B$baOUIL_%0X(5~2fvcwt^ZM}gvrHN=P0!o$PD-^|L@ z*%{*E#o-0<^lDUbJOSfYR)!gxDsD`W!h=Auq#zIvps7Gj_|FM@7bklc$nOI{stX&A zX|@o8j9q!0QlJvO#zx~LFzq4$&E7x~o?@sVbm=2B#&zzbz_KWkS&2fx-5cYi2F3!| zsSoM=rBHs^tH~m4@!)H8>D9S!6o2vgVu~8>GDn;oR$&VN(we`5>WxOG$g^9QOKmL( z(PfxD6(?LrfGUL@p9iDW?!Q03_u$nDxI7pd=t7T zV@)CDkx9=Qxt2N5a?yrWU~gs5`Knbsmg#_issXM985R6}b%LPFSlxD!TU}PzDI1A< z)?WE}=L&@BuDw01%tcbvWw6>-bY@!uv%H%~#7Z$fwcf?~*+lbKCF=l$ps+G~@*Txa z^~M*T>SGKU<*1X&XJPF#V$Wlj#rtQeW-PIAwb!K0bmtv>3D)OEwoMR&FR{faj!G&j0>nys_pV&S~hI-sDx zynknME*(#p=c$j~_B)NQj=z2pQDBp>h~1TG*L8va2^cp}n4$TQB_bj0?$42fKmvdT zCpE$!cOP)KB!oUb!U75Iz+mb5k`&I%gcK-ogqZFz&aQTeSksV0u}lT2N<|UV;ynI~ zcirZ$c64Kps}ySogIH5jNZ(SaCyXaP)n1aDZSZWyuTcB>^?auc<9j7%<|DciKhAb$ zVne+SkIzM(OecV_f>n{E=r-mp_Ke&8!R?9z%io;R))QLD72Ij3#D_e!uWKA>We}em zHse-hBAmAV;7qw*tn(boY>h;uHTjFi8)S@k^+}7{*p1BAuHAz_gc=ThDOA+>y4Tr5 zmH^AZ4@L6SL$PvoCL(y|$MiC{{!lPB+pzQ7hjnp!bjxfzmD#q+P?u%RnR-K2m*I9k z%=aWE8Z%ILMQMTTUHBPcLfgEFbrxY_ETTNC@m;br!c}i!d9FG6CDr}Q##v*^febe%qmy0I2@q_Dfh{0%7`S7+UL*Op{ z_uC9_!L@a&v!<(5ky18Q6nI|{=_6{tu#6VbQM`l7_6SLmsrN{6O>;~R<6{oWu>aU1 z2t(1N$T!{X@VU{wg_D+goK4f*#znBY9~j}S5#VL>SFYX1JkXhZPD8%{|1s8k=9MD% zk&rFr^qcu_^Onu|XQCl55RF(`be8!EX|BB7gKO2iAO$9R-?k+o2TvyQs@;A1#3eWO z^QK#%5T~*E9$P1Z_v#o7D)=fCWV7N<6I$oFE)G%b>b7>%zfRI$kUH_%k=)vlboM>B zB1>?b$)dLZlGsCIMMfKkId{WOvY@)B6UC5K^4(S=DSbpnP{CC2ijq5_<7|6_^JOO< z`nT9uvCe4&X2z>wXT(EmQsh@6^e6SzNi}jUP`m|YYiZi@#nxkkV^?*?V!F~d5pxAe z9|?(jFc^lsro(r{H@jYmDYR=+4eugg^~v?^>~cq0BDS)*2!B3Dxwh|6w!$j5VhR%c za#3X2KBSXIsa!l4{YxU|*Yodfx|@%M!`{cIT!CDZd*nMUD@1%Oh>fjaoonkFkL^_p zbTJzfbnW|FzY~%aoURGQa1-P8{*yXjFDwKvrbpdJ6bkw$l1`gL4WBgghV8?FK%!_M z5Esx#|5qfvizb4msl_aD;Y1#V-o2(lc!Cx#3u1#cmgR@V1C}@I+`Y4QeNX;Zzk>RnW6$~*r#}TL~ zU@E4kKixolSr~bv#w7XLwb)^TkWqB)9Ck!|Ck6$iO)_F=TtFd{`e(5!`TNBC$`c*8 zE*CsLQnWppjPRUO{`9(T@KiQ!`Tdj0woW)B=8!UFdzJApukI< zNT4$Ta5X=;YlOPwh8xwN9*=olP!bD^X9soVPcTBM@pkf_D_u~yl6QjbK9R zfDQr){v)vJ4Ns8%6j*>NjtrV25XmiuPjE|*o67laOF;Un=ZL9gQ)Q_>ZrcJ!d_L_g z_h|WyME=uDbM#ll`pgNqC9L#}c(oi7#z8-QI&wpa-I2f7j#e*xF;7N z#AX*DTb8FT2MRFs6Do4O58J@IDVC~QG`?O2KThbCA>a#+G_KU96UfIf zPz1a{pki4bub=mF#%0-fPO_`fk#%Y$!TlMWpf-p1e0o82J#`^igAx_{$=PU9VsQut z&y}ux@bkiQ+_RXHNn2=P>)Ucuo7XXwT3hCtuR>c_XxFBta%K-ee)4cd|m3-;m@<$&hkP#-y}8G=z26@pEDLk%g+1stzsFG!!xSn@KkA(=3Q`hmG%jFK7wyr^>pH{D&9US zX)kS_UR2K1(Tc0AMRw*^`|Dn46b#-#+Lx{|0aGe+U`Cb-SV~c$eSCyT5&Zv?sJq5c z!#&E?a${JVAO!;i;)h8TFCa=SgTfB*t_7XacL5R9_Hbsv5w#`jWXr$@5+jcq8Ue(} z<+SDrbCqCFHy*|O*9NWi)>AQ4hr^6-rU@AS<;VCy5REd#%s`KcnfoSDwN>5TGBf24 zv85}ySp3_D!5Oje@>aRc=wgmx^u*E$jr#;U{HE%Rk%O60N^K-a)$^?gTLPb&WdGAq zxDz7d!z5cC($Q}=t2Nov`ItJ&1BsGM?MJgqh4#!%R{$$59rPN{2eBsgM6%0IsRzB> z{7rj0VC;U2>K$A*LuK+_EufYLGQ2@v{6Jjjru?44#l+*ytaeaNTjLFuC{i(j?@V*0 z?k?Q$V*5J-;>ETf8_dpO)8o6FdqVG%XPx`lB2UMK_le(^GX&(@g^CG-weE7%4L;-h zJxUz=AaR^@%Ro1XYRnQi$Zb(Qh2myJS&$;j4Sbx=BlM;Q9EJHSlCjC0E(N(tet$(TR z^lLwzR7YJQ?u~?9{YoV%Hj5Y{7?sqdhFMBA~|JrY&xW43rN z<<+7L6+@4G7Qz0!ESWFu&5QKm&~YD`IXO?W3f3O=_kttn!LNunc7KE$*w2dH6vMTD zw{7v1E= z?~rq#r?^Xv8#g&X3}K%k+et%D4?}sq=2uPka)_8zsBzoFcJthuVGf_LK`=qy}&ojJ-D( zqW+wZiBC~3c$AFpa(#A4agX_jYy%0Yq_B!n5I9UF1c(@nV9_KDR54(~!(yqihk;xe zXt%-xv=pd-khe&{v6U!rZ-Ig9ju&c}$R$%k4pVtAOc03gj}j7)^g@AJyL-EOL987i z{=W;zgY-2=SROF427gV3#i<@H=CuSAx}qArNN*U@P|y#v%*Dqw@NsHa<9b;0jo!La zoFPQBr+U&gKa<-nl79%l6U=Gzkj0LRw`@%-J=OgsV=uUFO`B~nJ?i6(Y z9%McTY}{eZqB!+-=$f}=8b<;ft`p5OE3)FcB(u;>FV$dz$fQcSaV1S{IeW(QzD_Qh z)VgiZdNvzv`!%Vre&Rk;3ayE>a%duL0uP;W2dJIkxLl7aIq1niZ=R^JT+p0YS2QTy zyMET(JvRDCt-aB^mSQ`xrBcSxrOvXWm9v(NUQ@&x_g=k_Z ztEZ^WQ)QfFX-~8RFqIiAgyt%q!(VP~Da3veX35Hl zU~g-(eiVM;%UcN!+cgPjEV^GB)GQj_P5|CLBL0&(V8gCdOa``#Ga?8?M+*XR1EJBJ zKrRFs3MrEk>*Y%-uqE*FP)EFdZI_0kk+nRH z-OHp3;Th=`9^njxI3P(%a*M0*U=CyB`$`Q$!Ini)GdwX~jLgTc2Io&>j&7Nw4yP%U zZW6hF&fp4e2AL%a>&~l$O5kCIhnl20zqkp>Pld}akNPrPLM%pn=fF-GWo_L*J>aQK zr?#1ZgD{@QlsWk;2`yo}xMH}uB6fd9;PPNdsAZB^Vm!l#mvij3UK^5^{yr@&l61Zh z6klhHtCCTukEDShe&y3sfZp`WHs_qwkwkyZD5KC14|)agu}U_2@YZI$e|x;qr4<*~ zJ5*<<_-TDlgs1k(?k^9e6IQ5uHhsdT-(G)-I^8wQGEJDlaJRBCnsYMdUtAza8z$bB zozAXXK+3eH{hC~AL6V}2P8;b)$T$353ff*2C=$Kkr3_j6JWS!(x$lzeShqZjgX6A@ zU}@goGN?6`L1NrU(5%)1^}0jP)tR*^dt^Ln zDZ6`*qIE%&>suZNhU3y5Q`X#6;i~{Ey^*9@j#<5Cbk`5|T&NFEt*^^oU0W!fVnRQj z{5aXRx%yG}ZLQuo%QftcWW?6_`0T3GJ?mHgilHIU|Os4b8pR zY<+q+Qix~_-y?B`d7f9`N!pj7-tBXpg! z{NlOl+sBZl=e7(nMwI4tThaD`(+_BF&0Dxb8hy_(C{~A9t6b{(nQ@}2W*759tS&}m za)A91K1Z!Q?3f5NSx`f6D*LUwcR*3r-2=3Sjow}PxyVhIsv9eTUueP|f|0cC_Eryt zEig+%r=Duq$%QOx=d(GSU#SSRAL+SGADo*ja@)bPTCCFAd(YF^&fS7KQ3Uw490cwu z$|kr*_LtdFMe#AR`1C_jS(#ar%9Hb{!3qeXA03;RR$pwXAm|hql43r`7yYP=Mo88P z#-)80Mc53E)Ynhfw=3TQ^VW+dv#*ocZVPH)*jp2DoAu@}Lnw|1O>35*M0wJ*=~-Hz zq=(KBi>Y^y$Pmd!yybp4%X#tbPbb`>I$MXQ?CiNLS20HLib== z+rQRnAPMiS6SXyCff!u+tG63Ns9*VW=6#u>HuDP7thqlew7H3z5I+rTfi$i)Dy4pU zo0mE3=99ph%gt}MsHe*Q1Zp#RcTQVc+RxXV&5^+QJen+AjClpK97WP|rl)HL<9I9C zmnP)%;92`|H7yVW6`)@q|a+#bceA_I<203BFfuCqv5)umD?b) zy_4J%J(5ip-&9P_Ni(E-`5Ttr9>+2Vy#v>*l7QR6TXpl!Qs7ZQ9(pN4I-eea`LQ!M z>>S`3$i!MYrkE}x*5(=zL>ozCO2V5PAnJSu36zVqzbc5l)F0S(@9Cj6jaRrXOCD|H z52mofbjqBfV|;|@P{e`hIseU&uA`A%d%spetzVEt|9yvJ-X zMVr94SS{Z!h_QsxVw=wu$k5t`naP z%r$6qUsT?A-HS&mykKM+3^~G$aGB#Hc-n9;J~i?wEEbO=is$&$4!wWZgtEc?36WV&VJ(b#Guq!A<+4uBHy__0!`QD^4{ z&fO9hF7WHsxMny|888L}#lG?uFsDfoQ{&wAwZEnl56bE-%%0QCSKK`L$;dkxzBzjK zDKr_O@q+*)p`0S{yW7~dQQT8Ks4lSWwoMvE-X9k@eJ~uB06xXSYOd;ZAlgH67oqGh zRgasQgVQ9HaTg2`)h7E+{VcVqxHV63q7sun@>2%CVBznqPyl_x{4|7UD> z`X)iXIV7U=!#b#hNIg$EJT*m1cLi?OgWuhZj zcgA8)_j{;s&)VliZ_h3lpox&}BdfdtX5)vzY0;j;m%=?a_zhK!bwoS*wo+CrUw64` zeLox-D%9+o^0a3g@5J^?B-3NC*=hM&!oQ|Y;D@x}mPk~!w~`ymj!L!LCThRKwDYX|66T!+QA?Ah8uN6|6NP})peD7R3LwI?& z;5{6NH*V1yM^z2oWqNRp}H_qS6nVS;tFa9udJ;XAgF{)~yiq?c1EBUm@j z@|xp9n&OqI2kt?%d09hi{lSRUg}h&=Xy3?5=_rPgpQI>E&7qon#Hwb%3_SD+=_JfU zWKJIF4Hn(#UuANh-231naU@+0pw%~Q^!R^l zWxuX7J#ki9{RQh=csv?(dw-Pl`?%)u@!nL-$7KhKkorKmXV!{_tRtRM0#J(pd|E=RouSX@+;e@* zgQeul&BKB0EsT;5sCf4p@$f4^%+7GYxNa=UZ2Z|yxYXd4gF9win;AoLdel~n!EEtC zR1FQQp7-Ja>Ev1ULS1e<}vGc4J*H- zVG1e)Q%^47umuV2cRBUD#nI^e1|E#74IBNDNmgK?!Q}W1)_-M$H8)6rh6FssKgDBAEdP@J_=4@Y6`a zo}@630t~W%@5o@LKRAT~wg>|gOxS>RItv(}{2MMaqXMH97{Ea~B{-kzw|jvF6VT3J z0gF-p4Sh-pfyE3;@b;fBYBo|pEt3UoM)R9t&Q1wT!5|~;U+^gaEyV!rvM9mVf4Z_c zv4N>97O(~VZ$>sJF>suQ0T^Ucg6A22yBe|uz|v2Dn}Ix-09OtRc#HElq!PshiePYo z>u*RchWh(Fb1o(LEB9|VjyNh1lgk2j<@pVdb49@!{C}CafNvfzcv|RBqC7fqoW}zG zD*P8D`28sFd`j@X$Zyv~J~voX6o4ym2bYV(7Ez$CKmsfz@s~*eNEGsdKTG`PVk%<; zorNr5Wl2D@SOmx@!UY(Lae#JZEI^|O13dgcw;Wha3dT@`?ZU{?Ff#ybAOl;Z0jv@( z@Xh~BB;a)kFSuIv_l{dB5&&Py0tli$DhM>!>29Uxs#4fLjw0ted2z_AVzpkBcO zHc!2rHiP=ZlaU|UQ8M6lLwST+*=+nSh)c@(z{Ohti=)lO- zfRq<9oHcONLQ4Ig8vI}MdA+~rSipw{T`H?WOa diff --git a/src/vfs/_vfscommon.vfs/modules/tomlish-1.1.4.tm b/src/vfs/_vfscommon.vfs/modules/tomlish-1.1.4.tm index 7a6d5205..33d5b912 100644 --- a/src/vfs/_vfscommon.vfs/modules/tomlish-1.1.4.tm +++ b/src/vfs/_vfscommon.vfs/modules/tomlish-1.1.4.tm @@ -153,15 +153,10 @@ namespace eval tomlish { } #review - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed - } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keys are lists {parenttable subtable etc} corresponding to parenttable.subtable.etc } foreach sub [lrange $keyval_element 2 end] { @@ -207,13 +202,10 @@ namespace eval tomlish { ARRAY { #we need to recurse to get the corresponding dict for the contained item(s) #pass in the whole $found_sub - not just the $value! - set prev_tablenames_seen $tablenames_seen - set prev_tablenames_closed $tablenames_closed - set tablenames_seen [list] - set tablenames_closed [list] + set prev_tablenames_info $tablenames_info + set tablenames_info [dict create] set result [list type $type value [::tomlish::to_dict [list $found_sub]]] - set tablenames_seen $prev_tablenames_seen - set tablenames_closed $prev_tablenames_closed + set tablenames_info $prev_tablenames_info } MULTISTRING - MULTILITERAL { #review - mapping these to STRING might make some conversions harder? @@ -295,23 +287,66 @@ namespace eval tomlish { #[Data] #temps = [{cpu = 79.5, case = 72.0}] proc to_dict {tomlish} { + package require dictn #keep track of which tablenames have already been directly defined, # so we can raise an error to satisfy the toml rule: 'You cannot define any key or table more than once. Doing so is invalid' #Note that [a] and then [a.b] is ok if there are no subkey conflicts - so we are only tracking complete tablenames here. #we don't error out just because a previous tablename segment has already appeared. - ##variable tablenames_seen [list] - if {[uplevel 1 [list info exists tablenames_seen]]} { - upvar tablenames_seen tablenames_seen - } else { - set tablenames_seen [list] ;#list of lists - } - if {[uplevel 1 [list info exists tablenames_closed]]} { - upvar tablenames_closed tablenames_closed + + #Declaring, Creating, and Defining Tables + #https://github.com/toml-lang/toml/issues/795 + #(update - only Creating and Defining are relevant terminology) + + #review + #tablenames_info keys created, defined, createdby, definedby, closedby + + #consider the following 2 which are legal: + #[table] #'table' created, defined=open definedby={header table} + #x.y = 3 + #[table.x.z] #'table' defined=closed closedby={header table.x.z}, 'table.x' created, 'table.x.z' created defined=open definedby={header table.x.z} + #k= 22 + # #'table.x.z' defined=closed closedby={eof eof} + + #equivalent datastructure + + #[table] #'table' created, defined=open definedby={header table} + #[table.x] #'table' defined=closed closedby={header table.x}, 'table.x' created defined=open definedby={header table.x} + #y = 3 + #[table.x.z] #'table.x' defined=closed closedby={header table.x.z}, 'table.x.z' created defined=open definedby={header table.x.z} + #k=22 + + #illegal + #[table] #'table' created and defined=open + #x.y = 3 #'table.x' created first keyval pair defined=open definedby={keyval x.y = 3} + #[table.x.y.z] #'table' defined=closed, 'table.x' closed because parent 'table' closed?, 'table.x.y' cannot be created + #k = 22 + # + ## - we would fail on encountering table.x.y because only table and table.x are effectively tables - but that table.x is closed should be detected (?) + + #illegal + #[table] + #x.y = {p=3} + #[table.x.y.z] + #k = 22 + ## we should fail because y is an inline table which is closed to further entries + + #note: it is not safe to compare normalized tablenames using join! + # e.g a.'b.c'.d is not the same as a.b.c.d + # instead compare {a b.c d} with {a b c d} + # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. + #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' + #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} + + + + if {[uplevel 1 [list info exists tablenames_info]]} { + upvar tablenames_info tablenames_info } else { - set tablenames_closed [list] ;#list of lists + set tablenames_info [dict create] ;#keyed on tablepath each of which is a list such as {config subgroup etc} (corresponding to config.subgroup.etc) } + log::info "---> to_dict processing '$tomlish'<<<" set items $tomlish @@ -354,7 +389,7 @@ namespace eval tomlish { #a.b.c = 1 #table_key_hierarchy -> a b - #leafkey -> c + #tleaf -> c if {[llength $dotted_key_hierarchy] == 0} { #empty?? probably invalid. review #This is different to '' = 1 or ''.'' = 1 which have lengths 1 and 2 respectively @@ -362,10 +397,10 @@ namespace eval tomlish { } elseif {[llength $dotted_key_hierarchy] == 1} { #dottedkey is only a key - no table component set table_hierarchy [list] - set leafkey [lindex $dotted_key_hierarchy 0] + set tleaf [lindex $dotted_key_hierarchy 0] } else { set table_hierarchy [lrange $dotted_key_hierarchy 0 end-1] - set leafkey [lindex $dotted_key_hierarchy end] + set tleaf [lindex $dotted_key_hierarchy end] } #ensure empty tables are still represented in the datastructure @@ -380,143 +415,101 @@ namespace eval tomlish { } } #review? - if {[dict exists $datastructure {*}$table_hierarchy $leafkey]} { - error "Duplicate key '$table_hierarchy $leafkey'. The key already exists at this level in the toml data. The toml data is not valid." + if {[dict exists $datastructure {*}$table_hierarchy $tleaf]} { + error "Duplicate key '$table_hierarchy $tleaf'. The key already exists at this level in the toml data. The toml data is not valid." } #JMN test 2025 if {[llength $table_hierarchy]} { - lappend tablenames_seen $table_hierarchy + dictn incr tablenames_info [list $table_hierarchy seencount] } set keyval_dict [_get_keyval_value $item] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { - lappend tablenames_seen [list {*}$table_hierarchy $leafkey] - lappend tablenames_closed [list {*}$table_hierarchy $leafkey] + set t [list {*}$table_hierarchy $tleaf] + dictn incr tablenames_info [list $t seencount] + dictn set tablenames_info [list $t closed] 1 #review - item is an ITABLE - we recurse here without datastructure context :/ #overwriting keys? todo ? - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } else { - dict set datastructure {*}$table_hierarchy $leafkey $keyval_dict + dict set datastructure {*}$table_hierarchy $tleaf $keyval_dict } + } + TABLEARRAY { + set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLENAME (name: $tablename): $item" + set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize + #we expect repeated tablearray entries - each adding a sub-object to the value, which is an array/list. + } TABLE { set tablename [lindex $item 1] + log::debug "---> to_dict processing item TABLE (name: $tablename): $item" #set tablename [::tomlish::utils::tablename_trim $tablename] set norm_segments [::tomlish::utils::tablename_split $tablename true] ;#true to normalize - if {$norm_segments in $tablenames_seen} { - error "Table name '$tablename' has already been directly defined in the toml data. Invalid." - } - log::debug "---> to_dict processing item $tag (name: $tablename): $item" - set name_segments [::tomlish::utils::tablename_split $tablename] ;#unnormalized - set last_seg "" - #toml spec rule - all segments mst be non-empty - #note that the results of tablename_split are 'raw' - ie some segments may be enclosed in single or double quotes. - - set table_key_sublist [list] - - foreach normseg $norm_segments { - lappend table_key_sublist $normseg - if {[dict exists $datastructure {*}$table_key_sublist]} { - #It's ok for this key to already exist *if* it was defined by a previous tablename or equivalent - #and if this key is longer - - #consider the following 2 which are legal: - #[table] - #x.y = 3 - #[table.x.z] - #k= 22 - - #equivalent - - #[table] - #[table.x] - #y = 3 - #[table.x.z] - #k=22 - - #illegal - #[table] - #x.y = 3 - #[table.x.y.z] - #k = 22 - ## - we should fail on encountering table.x.y because only table and table.x are effectively tables - - #illegal - #[table] - #x.y = {p=3} - #[table.x.y.z] - #k = 22 - ## we should fail because y is an inline table which is closed to further entries - - - #note: it is not safe to compare normalized tablenames using join! - # e.g a.'b.c'.d is not the same as a.b.c.d - # instead compare {a b.c d} with {a b c d} - # Here is an example where the number of keys is the same, but they must be compared as a list, not a joined string. - #'a.b'.'c.d.e' vs 'a.b.c'.'d.e' - #we need to normalize the tablenames seen so that {"x\ty"} matches {"xy"} - - set sublist_length [llength $table_key_sublist] - set found_testkey 0 - if {$table_key_sublist in $tablenames_seen} { - set found_testkey 1 - } else { - #see if it was defined by a longer entry - foreach seen_table_segments $tablenames_seen { - if {[llength $seen_table_segments] <= $sublist_length} { - continue - } - #each tablenames_seen entry is already a list of normalized segments - - #we could have [a.b.c.d] early on - # followed by [a.b] - which was still defined by the earlier one. + set T_DEFINED [dictn getdef $tablenames_info [list $norm_segments defined] NULL] + if {$T_DEFINED ne "NULL"} { + #our tablename e.g [a.b.c.d] declares a space to 'define' subkeys - but there has already been a definition space for this path + set msg "Table name $tablename has already been directly defined in the toml data. Invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } - set seen_longer [lrange $seen_segments 0 [expr {$sublist_length -1}]] - puts stderr "testkey:'$table_key_sublist' vs seen_match:'$seen_longer'" - if {$table_key_sublist eq $seen_longer} { - set found_testkey 1 - } - } - } - if {$found_testkey == 0} { - #the raw unnormalized tablename might be ok to display in the error message, although it's not the actual dict keyset - set msg "key $table_key_sublist already exists in datastructure, but wasn't defined by a supertable." - append msg \n "tablenames_seen:" \n - foreach ts $tablenames_seen { - append msg " " $ts \n - } + set name_segments [::tomlish::utils::tablename_split $tablename 0] ;#unnormalized e.g ['a'."b".c.d] -> 'a' "b" c d + #results of tablename_split 0 are 'raw' - ie some segments may be enclosed in single or double quotes. + + + set supertable [list] + ############## + # [a.b.c.d] + # norm_segments = {a b c d} + #check a {a b} {a b c} <---- supertables of a.b.c.d + ############## + foreach normseg [lrange $norm_segments 0 end-1] { + lappend supertable $normseg + if {![dictn exists $tablenames_info [list $supertable type]]} { + #supertable with this path doesn't yet exist + if {[dict exists $datastructure {*}$supertable]} { + #There is data though - so it must have been created as a keyval + set msg "Supertable [join $supertable .] of table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] error $msg + } else { + #here we 'create' it, but it's not being 'defined' ie we're not setting keyvals for it here + dictn set tablenames_info [list $supertable type] header + #ensure empty tables are still represented in the datastructure + dict set datastructure {*}$supertable [list] } - } - - } - - #ensure empty tables are still represented in the datastructure - set key_sublist [list] - foreach k $norm_segments { - lappend key_sublist $k - if {![dict exists $datastructure {*}$key_sublist]} { - dict set datastructure {*}$key_sublist [list] } else { - tomlish::log::notice "to_dict datastructure at (TABLE) subkey $key_sublist already had data: [dict get $datastructure {*}$key_sublist]" + #supertable has already been created - and maybe defined - but even if defined we can add subtables } } + #table [a.b.c.d] hasn't been defined - but may have been 'created' already by a longer tablename + # - or may have existing data from a keyval + if {![dictn exists $tablenames_info [list $norm_segments type]]} { + if {[dict exists $datastructure {*}$norm_segments]} { + set msg "Table name $tablename already has data - invalid" + append msg \n [tomlish::dict::_show_tablenames $tablenames_info] + error $msg + } + #no data or previously created table + dictn set tablenames_info [list $norm_segments type] header - #We must do this after the key-collision test above! - lappend tablenames_seen $norm_segments - - + #We are 'defining' this table's keys and values here (even if empty) + dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here + } + dictn set tablenames_info [list $norm_segments defined] open log::debug ">>> to_dict >>>>>>>>>>>>>>>>> normalized table key hierarchy : $norm_segments" #now add the contained elements foreach element [lrange $item 2 end] { set type [lindex $element 0] - log::debug "----> tododict processing $tag subitem $type processing contained element $element" + log::debug "----> todict processing $tag subitem $type processing contained element $element" switch -exact -- $type { DOTTEDKEY { set dkey_info [_get_dottedkey_info $element] @@ -547,14 +540,19 @@ namespace eval tomlish { puts stdout "to_dict>>> $keyval_dict" dict set datastructure {*}$norm_segments {*}$dkeys $leaf_key $keyval_dict #JMN 2025 - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys] + set tkey [list {*}$norm_segments {*}$dkeys] + dictn incr tablenames_info [list $tkey seencount] if {![tomlish::dict::is_tomlish_typeval $keyval_dict]} { #the value is either empty or or a dict structure with arbitrary (from-user-data) toplevel keys # inner structure will contain {type value } if all leaves are not empty ITABLES - lappend tablenames_seen [list {*}$norm_segments {*}$dkeys $leaf_key] + set tkey [list {*}$norm_segments {*}$dkeys $leaf_key] + #lappend tablenames_info [list {*}$norm_segments {*}$dkeys $leaf_key] + dictn incr tablenames_info [list $tkey seencount] #if the keyval_dict is not a simple type x value y - then it's an inline table ? #if so - we should add the path to the leaf_key as a closed table too - as it's not allowed to have more entries added. + dictn set tablenames_info [list $tkey closed] 1 } } @@ -562,7 +560,7 @@ namespace eval tomlish { #ignore } default { - error "Sub element of type '$type' not understood in table context. Expected only KEY,DQKEY,SQKEY,NEWLINE,COMMENT,WS" + error "Sub element of type '$type' not understood in table context. Expected only DOTTEDKEY,NEWLINE,COMMENT,WS" } } } @@ -1316,7 +1314,12 @@ namespace eval tomlish::encode { #NOTE - this DELIBERATELY does not validate the data, or process escapes etc #It encodes the tomlish records as they are. #ie it only produces toml shaped data from a tomlish list. + # #It is part of the roundtripability of data from toml to tomlish + #!! ie - it is not the place to do formatting of inline vs multiline !! + # That needs to be encoded in the tomlish data that is being passed in + # (e.g from_dict could make formatting decisions in the tomlish it produces) + # #e.g duplicate keys etc can exist in the toml output. #The to_dict from_dict (or any equivalent processor pair) is responsible for validation and conversion #back and forth of escape sequences where appropriate. @@ -1646,17 +1649,27 @@ namespace eval tomlish::decode { #pop_trigger_tokens: newline tablename endarray endinlinetable #note a token is a pop trigger depending on context. e.g first newline during keyval is a pop trigger. set parentlevel [expr {$nest -1}] - set do_append_to_parent 1 ;#most tokens will leave this alone - but some like squote_seq need to do their own append + set do_append_to_parent 1 ;#most tokens will leave this alone - but some like tentative_accum_squote need to do their own append switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { + #should only apply within a multiliteral #### set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed #Without this - we would get extraneous empty list entries in the parent # - as the xxx-squote-space isn't a space level from the toml perspective # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-squote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-squote-space { + } + default { + error "--- unexpected popped due to tentative_accum_squote but came from state '$prevstate' should have been trailing-squote-space" + } + } switch -- $tok { ' { - tomlish::parse::set_token_waiting type startsquote value $tok complete 1 startindex [expr {$i -1}] + tomlish::parse::set_token_waiting type single_squote value $tok complete 1 startindex [expr {$i -1}] } '' { #review - we should perhaps return double_squote instead? @@ -1669,74 +1682,51 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 3}] } '''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 4 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the last for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left squote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]'" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i - 4}] - #todo integrate left squote with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]'" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "'"] - } - MULTILITERAL { - #empty - lappend v($parentlevel) [list LITERALPART "'"] - } - default { - error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "'"] + } + MULTILITERAL { + #empty + lappend v($parentlevel) [list LITERALPART "'"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing squote with data $v($parentlevel)" } } } ''''' { - switch -exact -- $prevstate { - leading-squote-space { - error "---- 5 squotes from leading-squote-space - shouldn't get here" - #we should have emitted the triple and left the following squotes for next loop + tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 squotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + LITERALPART { + set newval "[lindex $lastpart 1]''" + set parentdata $v($parentlevel) + lset parentdata end [list LITERALPART $newval] + set v($parentlevel) $parentdata } - trailing-squote-space { - tomlish::parse::set_token_waiting type triple_squote value $tok complete 1 startindex [expr {$i-5}] - #todo integrate left 2 squotes with nest data at this level - set lastpart [lindex $v($parentlevel) end] - switch -- [lindex $lastpart 0] { - LITERALPART { - set newval "[lindex $lastpart 1]''" - set parentdata $v($parentlevel) - lset parentdata end [list LITERALPART $newval] - set v($parentlevel) $parentdata - } - NEWLINE { - lappend v($parentlevel) [list LITERALPART "''"] - } - MULTILITERAL { - lappend v($parentlevel) [list LITERALPART "''"] - } - default { - error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" - } - } + NEWLINE { + lappend v($parentlevel) [list LITERALPART "''"] + } + MULTILITERAL { + lappend v($parentlevel) [list LITERALPART "''"] } default { - error "--- unexpected popped due to squote_seq but came from state '$prevstate' should have been leading-squote-space or trailing-squote-space" + error "--- don't know how to integrate extra trailing 2 squotes with data $v($parentlevel)" } } } } - puts stderr "tomlish::decode::toml ---- HERE squote_seq pop <$tok>" } triple_squote { #presumably popping multiliteral-space @@ -1763,7 +1753,119 @@ namespace eval tomlish::decode { lappend merged $part } default { - error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($next)" + error "---- triple_squote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" + } + } + set lasttype [lindex $part 0] + } + set v($nest) $merged + } + tentative_accum_dquote { + #should only apply within a multistring + #### + set do_append_to_parent 0 ;#mark false to indicate we will do our own appends if needed + #Without this - we would get extraneous empty list entries in the parent + # - as the trailing-dquote-space isn't a space level from the toml perspective + # - the use of a space is to give us a hook here to (possibly) integrate extra quotes into the parent space when we pop + #assert prevstate always trailing-dquote-space + #dev guardrail - remove? assertion lib? + switch -exact -- $prevstate { + trailing-dquote-space { + } + default { + error "--- unexpected popped due to tentative_accum_dquote but came from state '$prevstate' should have been trailing-dquote-space" + } + } + switch -- $tok { + {"} { + tomlish::parse::set_token_waiting type single_dquote value $tok complete 1 startindex [expr {$i -1}] + } + {""} { + #review - we should perhaps return double_dquote instead? + #tomlish::parse::set_token_waiting type literal value "" complete 1 + tomlish::parse::set_token_waiting type double_dquote value "" complete 1 startindex [expr {$i - 2}] + } + {"""} { + #### + #if already an eof in token_waiting - set_token_waiting will insert before it + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 3}] + } + {""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i - 4}] + #todo integrate left dquote with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {"}] + } + MULTISTRING { + #empty + lappend v($parentlevel) [list STRINGPART {"}] + } + default { + error "--- don't know how to integrate extra trailing dquote with data $v($parentlevel)" + } + } + } + {"""""} { + tomlish::parse::set_token_waiting type triple_dquote value $tok complete 1 startindex [expr {$i-5}] + #todo integrate left 2 dquotes with nest data at this level + set lastpart [lindex $v($parentlevel) end] + switch -- [lindex $lastpart 0] { + STRINGPART { + set newval "[lindex $lastpart 1]\"\"" + set parentdata $v($parentlevel) + lset parentdata end [list STRINGPART $newval] + set v($parentlevel) $parentdata + } + NEWLINE - CONT - WS { + lappend v($parentlevel) [list STRINGPART {""}] + } + MULTISTRING { + lappend v($parentlevel) [list STRINGPART {""}] + } + default { + error "--- don't know how to integrate extra trailing 2 dquotes with data $v($parentlevel)" + } + } + } + } + } + triple_dquote { + #presumably popping multistring-space + ::tomlish::log::debug "---- triple_dquote for last_space_action pop leveldata: $v($nest)" + set merged [list] + set lasttype "" + foreach part $v($nest) { + switch -exact -- [lindex $part 0] { + MULTISTRING { + lappend merged $part + } + STRINGPART { + if {$lasttype eq "STRINGPART"} { + set prevpart [lindex $merged end] + lset prevpart 1 [lindex $prevpart 1][lindex $part 1] + lset merged end $prevpart + } else { + lappend merged $part + } + } + CONT - WS { + lappend merged $part + } + NEWLINE { + #note that even though first newline ultimately gets stripped from multiliterals - that isn't done here + #we still need the first one for roundtripping. The datastructure stage is where it gets stripped. + lappend merged $part + } + default { + error "---- triple_dquote unhandled part type [lindex $part 0] unable to merge leveldata: $v($nest)" } } set lasttype [lindex $part 0] @@ -1809,15 +1911,12 @@ namespace eval tomlish::decode { endinlinetable { ::tomlish::log::debug "---- endinlinetable for last_space_action pop" } - endmultiquote { - ::tomlish::log::debug "---- endmultiquote for last_space_action 'pop'" - } default { error "---- unexpected tokenType '$tokenType' for last_space_action 'pop'" } } if {$do_append_to_parent} { - #e.g squote_seq does it's own appends as necessary - so won't get here + #e.g tentative_accum_squote does it's own appends as necessary - so won't get here lappend v($parentlevel) [set v($nest)] } @@ -1831,8 +1930,8 @@ namespace eval tomlish::decode { switch -exact -- $tokenType { - squote_seq_begin { - #### + tentative_trigger_squote - tentative_trigger_dquote { + #### this startok will always be tentative_accum_squote/tentative_accum_dquote starting with one accumulated squote/dquote if {[dict exists $transition_info starttok] && [dict get $transition_info starttok] ne ""} { lassign [dict get $transition_info starttok] starttok_type starttok_val set next_tokenType_known 1 @@ -1840,6 +1939,16 @@ namespace eval tomlish::decode { set tok $starttok_val } } + single_squote { + #JMN - REVIEW + set next_tokenType_known 1 + ::tomlish::parse::set_tokenType "squotedkey" + set tok "" + } + triple_squote { + ::tomlish::log::debug "---- push trigger tokenType triple_squote" + set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERALPART + } squotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1849,6 +1958,9 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } + triple_dquote { + set v($nest) [list MULTISTRING] ;#container for NEWLINE,STRINGPART,CONT + } dquotedkey { switch -exact -- $prevstate { table-space - itable-space { @@ -1858,7 +1970,7 @@ namespace eval tomlish::decode { #todo - check not something already waiting? tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { #todo set v($nest) [list DQKEY $tok] ;#$tok is the keyname } @@ -1878,34 +1990,29 @@ namespace eval tomlish::decode { tomlish::parse::set_token_waiting type $tokenType value $tok complete 1 startindex [expr {$i -[tcl::string::length $tok]}] ;#re-submit token in the newly pushed space } } - startsquote { - #JMN - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "squotedkey" - set tok "" - } tablename { #note: we do not use the output of tomlish::tablename_trim to produce a tablename for storage in the tomlish list! #The tomlish list is intended to preserve all whitespace (and comments) - so a roundtrip from toml file to tomlish # back to toml file will be identical. #It is up to the datastructure stage to normalize and interpret tomlish for programmatic access. # we call tablename_trim here only to to validate that the tablename data is well-formed at the outermost level, - # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names. + # so we can raise an error at this point rather than create a tomlish list with obviously invalid table names from + # a structural perspective. #todo - review! It's arguable that we should not do any validation here, and just store even incorrect raw tablenames, # so that the tomlish list is more useful for say a toml editor. Consider adding an 'err' tag to the appropriate place in the # tomlish list? - set test_only [::tomlish::utils::tablename_trim $tok] - ::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablename: '$trimtable'" set v($nest) [list TABLE $tok] ;#$tok is the *raw* table name #note also that equivalent tablenames may have different toml representations even after being trimmed! #e.g ["x\t\t"] & ["x "] (tab escapes vs literals) #These will show as above in the tomlish list, but should normalize to the same tablename when used as keys by the datastructure stage. } tablearrayname { - set test_only [::tomlish::utils::tablename_trim $tok] - puts stdout "trimmed (but not normalized) tablearrayname: '$test_only'" + #set trimtable [::tomlish::utils::tablename_trim $tok] + #::tomlish::log::debug "---- trimmed (but not normalized) tablearrayname: '$trimtable'" set v($nest) [list TABLEARRAY $tok] ;#$tok is the *raw* tablearray name } startarray { @@ -1914,14 +2021,6 @@ namespace eval tomlish::decode { startinlinetable { set v($nest) [list ITABLE] ;#$tok is just the opening curly brace - don't output. } - startmultiquote { - ::tomlish::log::debug "---- push trigger tokenType startmultiquote" - set v($nest) [list MULTISTRING] ;#container for STRINGPART, WS, CONT, NEWLINE - } - triple_squote { - ::tomlish::log::debug "---- push trigger tokenType triple_squote" - set v($nest) [list MULTILITERAL] ;#container for NEWLINE,LITERAL - } default { error "---- push trigger tokenType '$tokenType' not yet implemented" } @@ -1931,11 +2030,11 @@ namespace eval tomlish::decode { #no space level change switch -exact -- $tokenType { squotedkey { - puts "---- squotedkey in state $prevstate (no space level change)" + #puts "---- squotedkey in state $prevstate (no space level change)" lappend v($nest) [list SQKEY $tok] } dquotedkey { - puts "---- dquotedkey in state $prevstate (no space level change)" + #puts "---- dquotedkey in state $prevstate (no space level change)" lappend v($nest) [list DQKEY $tok] } barekey { @@ -1960,29 +2059,46 @@ namespace eval tomlish::decode { startinlinetable { puts stderr "---- decode::toml error. did not expect startinlinetable without space level change (no space level change)" } - startquote { + single_dquote { switch -exact -- $newstate { string-state { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "string" set tok "" } - quoted-key { + dquoted-key { set next_tokenType_known 1 ::tomlish::parse::set_tokenType "dquotedkey" set tok "" } - XXXitable-quoted-key { - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "itablequotedkey" - set tok "" + multistring-space { + lappend v($nest) [list STRINGPART {"}] + #may need to be joined on pop if there are neighbouring STRINGPARTS + } + default { + error "---- single_dquote switch case not implemented for nextstate: $newstate (no space level change)" + } + } + } + double_dquote { + #leading extra quotes - test: toml_multistring_startquote2 + switch -exact -- $prevstate { + itable-keyval-value-expected - keyval-value-expected { + puts stderr "tomlish::decode::toml double_dquote TEST" + #empty string + lappend v($nest) [list STRINGPART ""] + } + multistring-space { + #multistring-space to multistring-space + lappend v($nest) [list STRINGPART {""}] } default { - error "---- startquote switch case not implemented for nextstate: $newstate (no space level change)" + error "--- unhandled tokenType '$tokenType' when transitioning from state $prevstate to $newstate [::tomlish::parse::report_line] (no space level change)" } } + } - startsquote { + single_squote { switch -exact -- $newstate { literal-state { set next_tokenType_known 1 @@ -1995,41 +2111,17 @@ namespace eval tomlish::decode { set tok "" } multiliteral-space { - #false alarm squote returned from squote_seq pop + #false alarm squote returned from tentative_accum_squote pop ::tomlish::log::debug "---- adding lone squote to own LITERALPART nextstate: $newstate (no space level change)" #(single squote - not terminating space) lappend v($nest) [list LITERALPART '] #may need to be joined on pop if there are neighbouring LITERALPARTs } default { - error "---- startsquote switch case not implemented for nextstate: $newstate (no space level change)" + error "---- single_squote switch case not implemented for nextstate: $newstate (no space level change)" } } } - startmultiquote { - #review - puts stderr "---- got startmultiquote in state $prevstate (no space level change)" - set next_tokenType_known 1 - ::tomlish::parse::set_tokenType "stringpart" - set tok "" - } - endquote { - #nothing to do? - set tok "" - } - endsquote { - set tok "" - } - endmultiquote { - #JMN!! - set tok "" - } - string { - lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes - } - literal { - lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes - } double_squote { switch -exact -- $prevstate { keyval-value-expected { @@ -2044,6 +2136,19 @@ namespace eval tomlish::decode { } } } + enddquote { + #nothing to do? + set tok "" + } + endsquote { + set tok "" + } + string { + lappend v($nest) [list STRING $tok] ;#directly wrapped in dquotes + } + literal { + lappend v($nest) [list LITERAL $tok] ;#directly wrapped in squotes + } multistring { #review lappend v($nest) [list MULTISTRING $tok] @@ -2056,11 +2161,9 @@ namespace eval tomlish::decode { } literalpart { lappend v($nest) [list LITERALPART $tok] ;#will not get wrapped in squotes directly - } - itablequotedkey { - } untyped_value { + #would be better termed unclassified_value #we can't determine the type of unquoted values (int,float,datetime,bool) until the entire token was read. if {$tok in {true false}} { set tag BOOL @@ -2238,7 +2341,7 @@ namespace eval tomlish::utils { #eg {dog."tater.man"} set sLen [tcl::string::length $tablename] set segments [list] - set mode "unknown" ;#5 modes: unknown, quoted,litquoted, unquoted, syntax + set mode "preval" ;#5 modes: preval, quoted,litquoted, unquoted, postval #quoted is for double-quotes, litquoted is for single-quotes (string literal) set seg "" for {set i 0} {$i < $sLen} {incr i} { @@ -2249,139 +2352,166 @@ namespace eval tomlish::utils { set lastChar "" } + #todo - track\count backslashes properly + set c [tcl::string::index $tablename $i] + if {$c eq "\""} { + if {($lastChar eq "\\")} { + #not strictly correct - we could have had an even number prior-backslash sequence + #the toml spec would have us error out immediately on bsl in bad location - but we're + #trying to parse to unvalidated tomlish + set ctest escq + } else { + set ctest dq + } + } else { + set ctest [string map [list " " sp \t tab] $c] + } - if {$c eq "."} { - switch -exact -- $mode { - unquoted { - #dot marks end of segment. - lappend segments $seg - set seg "" - set mode "unknown" - } - quoted { - append seg $c - } - unknown { - lappend segments $seg - set seg "" - } - litquoted { - append seg $c - } - default { - #mode: syntax - #we got our dot. - the syntax mode is now satisfied. - set mode "unknown" + switch -- $ctest { + . { + switch -exact -- $mode { + preval { + error "tablename_split. dot not allowed - expecting a value" + } + unquoted { + #dot marks end of segment. + #if {![is_barekey $seg]} { + # error "tablename_split. dot not allowed - expecting a value" + #} + lappend segments $seg + set seg "" + set mode "preval" + } + quoted { + append seg $c + } + litquoted { + append seg $c + } + postval { + #got dot in an expected location + set mode "preval" + } } } - } elseif {($c eq "\"") && ($lastChar ne "\\")} { - if {$mode eq "unknown"} { - if {[tcl::string::trim $seg] ne ""} { - #we don't allow a quote in the middle of a bare key - error "tablename_split. character '\"' invalid at this point in tablename. tablename: '$tablename'" - } - set mode "quoted" - set seg "\"" - } elseif {$mode eq "unquoted"} { - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - } else { - lappend segments $seg + dq { + #unescaped dquote + switch -- $mode { + preval { + set mode "quoted" + set seg "\"" + } + unquoted { + #invalid in barekey - but we are after structure only + append seg $c + } + quoted { + append seg $c + if {$normalize} { + lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" ;#make sure we only accept a dot or end-of-data now. + } + litquoted { + append seg $c + } + postval { + error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" + } } - - set seg "" - set mode "syntax" ;#make sure we only accept a dot or end-of-data now. - } elseif {$mode eq "litquoted"} { - append seg $c - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got double quote. tablename: '$tablename'" - } - } elseif {($c eq "\'")} { - if {$mode eq "unknown"} { - append seg $c - set mode "litquoted" - } elseif {$mode eq "unquoted"} { - #single quote inside e.g o'neill - append seg $c - } elseif {$mode eq "quoted"} { - append seg $c - - } elseif {$mode eq "litquoted"} { - append seg $c - #no normalization to do - lappend segments $seg - set seg "" - set mode "syntax" - } elseif {$mode eq "syntax"} { - error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" } - - } elseif {$c in [list " " \t]} { - if {$mode eq "syntax"} { - #ignore - } else { - append seg $c + ' { + switch -- $mode { + preval { + append seg $c + set mode "litquoted" + } + unquoted { + #single quote inside e.g o'neill - ultimately invalid - but we pass through here. + append seg $c + } + quoted { + append seg $c + } + litquoted { + append seg $c + #no normalization to do aside from stripping squotes + if {$normalize} { + lappend segments [tcl::string::range $seg 1 end-1] + } else { + lappend segments $seg + } + set seg "" + set mode "postval" + } + postval { + error "tablename_split. expected whitespace or dot, got single quote. tablename: '$tablename'" + } + } } - } else { - if {$mode eq "syntax"} { - error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + sp - tab { + switch -- $mode { + preval - postval { + #ignore + } + unquoted { + #terminates a barekey + lappend segments $seg + set seg "" + set mode "postval" + } + default { + #append to quoted or litquoted + append seg $c + } + } } - if {$mode eq "unknown"} { - set mode "unquoted" + default { + switch -- $mode { + preval { + set mode unquoted + append seg $c + } + postval { + error "tablename_split. Expected a dot separator. got '$c'. tablename: '$tablename'" + } + default { + append seg $c + } + } } - append seg $c } + if {$i == $sLen-1} { #end of data ::tomlish::log::debug "End of data: mode='$mode'" - #REVIEW - we can only end up in unquoted or syntax here? are other branches reachable? switch -exact -- $mode { - quoted { - if {$c ne "\""} { - error "tablename_split. missing closing double-quote in a segment. tablename: '$tablename'" - } - if {$normalize} { - lappend segments [::tomlish::utils::unescape_string [tcl::string::range $seg 1 end-1]] - #lappend segments [subst -nocommands -novariables [::string range $seg 1 end-1]] ;#wrong - } else { - lappend segments $seg - } + preval { + error "tablename_split. Expected a value after last dot separator. tablename: '$tablename'" } - litquoted { - set trimmed_seg [tcl::string::trim $seg] - if {[tcl::string::index $trimmed_seg end] ne "\'"} { - error "tablename_split. missing closing single-quote in a segment. tablename: '$tablename'" - } + unquoted { lappend segments $seg } - unquoted - unknown { - lappend segments $seg + quoted { + error "tablename_split. Expected a trailing double quote. tablename: '$tablename'" } - syntax { - #ok - segment already lappended + litquoted { + error "tablename_split. Expected a trailing single quote. tablename: '$tablename'" } - default { - lappend segments $seg + postval { + #ok - segment already lappended } } } } - foreach seg $segments { - set trimmed [tcl::string::trim $seg " \t"] - #note - we explicitly allow 'empty' quoted strings '' & "" - # (these are 'discouraged' but valid toml keys) - #if {$trimmed in [list "''" "\"\""]} { - # puts stderr "tablename_split. warning - Empty quoted string as tablename segment" - #} - if {$trimmed eq "" } { - error "tablename_split. Empty segment found. tablename: '$tablename' segments [llength $segments] ($segments)" - } - } + + #note - we must allow 'empty' quoted strings '' & "" + # (these are 'discouraged' but valid toml keys) + return $segments } @@ -2432,26 +2562,34 @@ namespace eval tomlish::utils { #- escape_string and unescape_string would not be reliably roundtrippable inverses anyway. #REVIEW - provide it anyway? When would it be desirable to use? - variable Bstring_control_map [list\ - \b {\b}\ - \n {\n}\ - \r {\r}\ - \" {\"}\ - \x1b {\e}\ - \\ "\\\\"\ - ] + variable Bstring_control_map [dict create] + dict set Bstring_control_map \b {\b} + dict set Bstring_control_map \n {\n} + dict set Bstring_control_map \r {\r} + dict set Bstring_control_map \" {\"} + #dict set Bstring_control_map \x1b {\e} ;#should presumably be only be a convenience for decode - going the other way we get \u001B + dict set Bstring_control_map \\ "\\\\" + #\e for \x1b seems like it might be included - v1.1?? hard to find current state of where toml is going :/ #for a Bstring (Basic string) tab is explicitly mentioned as not being one that must be escaped. - for {set cdec 0} {$cdec <= 8} {incr cdec} { + #8 = \b - already in list. + #built the remainder whilst checking for entries already hardcoded above -in case more are added to the hardcoded list + for {set cdec 0} {$cdec <= 7} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } for {set cdec [expr {0x0A}]} {$cdec <= 0x1F} {incr cdec} { set hhhh [format %.4X $cdec] - lappend Bstring_control_map [format %c $cdec] \\u$hhhh + set char [format %c $cdec] + if {![dict exists $Bstring_control_map $char]} { + dict set Bstring_control_map $char \\u$hhhh + } } # \u007F = 127 - lappend Bstring_control_map [format %c 127] \\u007F + dict set Bstring_control_map [format %c 127] \\u007F #Note the inclusion of backslash in the list of controls makes this non idempotent - subsequent runs would keep encoding the backslashes! #escape only those chars that must be escaped in a Bstring (e.g not tab which can be literal or escaped) @@ -2474,6 +2612,7 @@ namespace eval tomlish::utils { # it recognizes other escapes which aren't approprite e.g \xhh and octal \nnn # it replaces \ with a single whitespace (trailing backslash) #This means we shouldn't use 'subst' on the whole string, but instead substitute only the toml-specified escapes (\r \n \b \t \f \\ \" \uhhhh & \Uhhhhhhhh + #plus \e for \x1b? set buffer "" set buffer4 "" ;#buffer for 4 hex characters following a \u @@ -2558,12 +2697,13 @@ namespace eval tomlish::utils { set ctest [tcl::string::map {{"} dq} $c] switch -exact -- $ctest { dq { - set e "\\\"" - append buffer [subst -nocommand -novariable $e] + append buffer {"} } b - t - n - f - r { - set e "\\$c" - append buffer [subst -nocommand -novariable $e] + append buffer [subst -nocommand -novariable "\\$c"] + } + e { + append buffer \x1b } u { set unicode4_active 1 @@ -2578,8 +2718,7 @@ namespace eval tomlish::utils { #review - toml spec says all other escapes are reserved #and if they are used TOML should produce an error. #we leave detecting this for caller for now - REVIEW - append buffer "\\" - append buffer $c + append buffer "\\$c" } } } else { @@ -3003,7 +3142,7 @@ namespace eval tomlish::parse { # states: # table-space, itable-space, array-space # array-value-expected,keyval-value-expected,itable-keyval-value-expected, keyval-syntax, - # quoted-key, squoted-key + # dquoted-key, squoted-key # string-state, literal-state, multistring... # # notes: @@ -3039,6 +3178,12 @@ namespace eval tomlish::parse { variable stateMatrix set stateMatrix [dict create] + #--------------------------------------------------------- + #WARNING + #The stateMatrix implementation here is currently messy. + #The code is a mixture of declarative via the stateMatrix and imperative via switch statements during PUSH/POP/SAMESPACE transitions. + #This means the state behaviour has to be reasoned about by looking at both in conjuction. + #--------------------------------------------------------- #xxx-space vs xxx-syntax inadequately documented - TODO @@ -3060,35 +3205,19 @@ namespace eval tomlish::parse { barekey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ squotedkey {PUSHSPACE "keyval-space" state "keyval-syntax" note ""}\ dquotedkey {PUSHSPACE "keyval-space" state "keyval-syntax"}\ - XXXstartquote "quoted-key"\ - XXXstartsquote "squoted-key"\ + XXXsingle_dquote "quoted-key"\ + XXXsingle_squote "squoted-key"\ comment "table-space"\ starttablename "tablename-state"\ starttablearrayname "tablearrayname-state"\ - startmultiquote "err-state"\ - endquote "err-state"\ + enddquote "err-state"\ + endsquote "err-state"\ comma "err-state"\ eof "end-state"\ equal "err-state"\ cr "err-lonecr"\ } - #itable-space/ curly-syntax : itables - dict set stateMatrix\ - itable-space {\ - whitespace "itable-space"\ - newline "itable-space"\ - barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ - endinlinetable "POPSPACE"\ - XXXstartquote "quoted-key"\ - XXXstartsquote {TOSTATE "squoted-key" comment "jn-testing"}\ - comma "err-state"\ - comment "itable-space"\ - eof "err-state"\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-space starttok {squote_seq "'"}} dict set stateMatrix\ @@ -3113,26 +3242,19 @@ namespace eval tomlish::parse { dict set stateMatrix\ keyval-value-expected {\ whitespace "keyval-value-expected"\ - untyped_value {TOSTATE "keyval-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate keyval-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"}\ - triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ - startarray {PUSHSPACE array-space returnstate keyval-tail}\ - } - #squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate keyval-process-leading-squotes starttok {squote_seq "'"}} - dict set stateMatrix\ - leading-squote-space {\ - squote_seq "POPSPACE"\ + untyped_value {TOSTATE "keyval-tail" note ""}\ + literal {TOSTATE "keyval-tail" note "required for empty literal at EOF"}\ + string {TOSTATE "keyval-tail" note "required for empty string at EOF"}\ + single_dquote {TOSTATE "string-state" returnstate keyval-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate keyval-tail}\ + single_squote {TOSTATE "literal-state" returnstate keyval-tail note "usual way a literal is triggered"}\ + triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ + startinlinetable {PUSHSPACE itable-space returnstate keyval-tail}\ + startarray {PUSHSPACE array-space returnstate keyval-tail}\ } - #dict set stateMatrix\ - # keyval-process-leading-squotes {\ - # startsquote "literal-state"\ - # triple_squote {PUSHSPACE "multiliteral-space" returnstate keyval-tail}\ - # } + #double_squote {TOSTATE "keyval-tail" note "empty literal received when double squote occurs"} + + #2025 - no leading-squote-space - only trailing-squote-space. dict set stateMatrix\ keyval-tail {\ @@ -3142,81 +3264,106 @@ namespace eval tomlish::parse { eof "end-state"\ } + + #itable-space/ curly-syntax : itables + # x={y=1,} + dict set stateMatrix\ + itable-space {\ + whitespace "itable-space"\ + newline "itable-space"\ + barekey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + squotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + dquotedkey {PUSHSPACE "itable-keyval-space" state "itable-keyval-syntax"}\ + endinlinetable "POPSPACE"\ + comma "err-state"\ + comment "itable-space"\ + eof "err-state"\ + } + #we don't get single_squote etc here - instead we get the resulting squotedkey token + + + # ??? review - something like this + # + # x={y =1,} dict set stateMatrix\ itable-keyval-syntax {\ - whitespace "itable-keyval-syntax"\ - barekey {PUSHSPACE "dottedkey-space"}\ - squotedkey {PUSHSPACE "dottedkey-space"}\ - dquotedkey {PUSHSPACE "dottedkey-space"}\ - equal "itable-keyval-value-expected"\ + whitespace {TOSTATE "itable-keyval-syntax"}\ + barekey {PUSHSPACE "dottedkey-space"}\ + squotedkey {PUSHSPACE "dottedkey-space"}\ + dquotedkey {PUSHSPACE "dottedkey-space"}\ + equal {TOSTATE "itable-keyval-value-expected"}\ newline "err-state"\ eof "err-state"\ } + + # x={y=1} + dict set stateMatrix\ + itable-keyval-space {\ + whitespace "itable-keyval-syntax"\ + equal {TOSTATE "itable-keyval-value-expected" note "required"}\ + } + dict set stateMatrix\ itable-keyval-value-expected {\ whitespace "itable-keyval-value-expected"\ untyped_value {TOSTATE "itable-val-tail" note ""}\ - startquote {TOSTATE "string-state" returnstate itable-val-tail}\ - startmultiquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ - squote_seq_begin {PUSHSPACE "leading-squote-space" returnstate itable-keyval-value-expected starttok {squote_seq "'"}}\ - startsquote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ - double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"}\ + single_dquote {TOSTATE "string-state" returnstate itable-val-tail}\ + triple_dquote {PUSHSPACE "multistring-space" returnstate itable-val-tail}\ + single_squote {TOSTATE "literal-state" returnstate itable-val-tail note "usual way a literal is triggered"}\ triple_squote {PUSHSPACE "multiliteral-space" returnstate itable-val-tail}\ startinlinetable {PUSHSPACE "itable-space" returnstate itable-val-tail}\ startarray {PUSHSPACE "array-space" returnstate itable-val-tail}\ } - dict set stateMatrix\ - itable-keyval-space {\ - whitespace "itable-keyval-syntax"\ - equal {TOSTATE "itable-keyval-value-expected" note "required"}\ - } + #double_squote not currently generated by _start_squote_sequence - '' processed as single_squote to literal-state just like 'xxx' + # review + # double_squote {TOSTATE "itable-val-tail" note "empty literal received when double squote occurs"} + + + # x={y=1,z="x"} + #POPSPACE is transition from itable-keyval-space to parent itable-space dict set stateMatrix\ itable-val-tail {\ whitespace "itable-val-tail"\ endinlinetable "POPSPACE"\ comma "POPSPACE"\ - XXXnewline {TOSTATE "itable-val-tail" note "itable-space ??"}\ - newline "POPSPACE"\ + newline {TOSTATE "itable-val-tail" note "itable-space ??"}\ comment "itable-val-tail"\ eof "err-state"\ } - #dict set stateMatrix\ - # itable-quoted-key {\ - # whitespace "NA"\ - # itablequotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endquote "itable-keyval-syntax"\ - # } - #dict set stateMatrix\ - # itable-squoted-key {\ - # whitespace "NA"\ - # itablesquotedkey {PUSHSPACE "itable-keyval-space"}\ - # newline "err-state"\ - # endsquote "itable-keyval-syntax"\ - # } + # XXXnewline "POPSPACE" + # We shouldn't popspace on newline - as if there was no comma we need to stay in itable-val-tail + # This means the newline and subsequent whitespace, comments etc become part of the preceeding dottedkey record + #e.g + # x = { + # j=1 + # #comment within dottedkey j record + # , # comment unattached + # #comment unattached + # k=2 , #comment unattached + # l=3 #comment within l record + # , m=4 + # #comment associated with m record + # + # #still associated with m record + # } + ## - This doesn't quite correspond to what a user might expect - but seems like a consistent mechanism. + #The awkwardness is because there is no way to put in a comment that doesn't consume a trailing comma + #so we cant do: j= 1 #comment for j1 , + # and have the trailing comma recognised. + # + # To associate: j= 1, #comment for j1 + # we would need some extra processing . (not popping until next key ? extra state itable-sep-tail?) REVIEW - worth doing? + # + # The same issue occurs with multiline arrays. The most natural assumption is that a comment on same line after a comma + # is 'associated' with the previous entry. + # + # These comment issues are independent of the data dictionary being generated for conversion to json etc - as the comments don't carry through anyway, + # but are a potential oddity for manipulating the intermediate tomlish structure whilst attempting to preserve 'associated' comments + # (e.g reordering records within an itable) + #The user's intention for 'associated' isn't always clear and the specs don't really guide on this. - - - #array-value-expected ? - dict set stateMatrix\ - XXXvalue-expected {\ - whitespace "value-expected"\ - untyped_value {"SAMESPACE" "" replay untyped_value}\ - startquote "string-state"\ - startsquote "literal-state"\ - triple_squote {PUSHSPACE "multiliteral-space"}\ - startmultiquote {PUSHSPACE "multistring-space"}\ - startinlinetable {PUSHSPACE itable-space}\ - startarray {PUSHSPACE array-space}\ - comment "err-state-value-expected-got-comment"\ - comma "err-state"\ - newline "err-state"\ - eof "err-state"\ - } - #note comment token should never be delivered to array-value-expected state? - #dottedkey-space is not (currently) used within [tablename] or [[tablearrayname]] #it is for keyval ie x.y.z = value @@ -3245,6 +3392,8 @@ namespace eval tomlish::parse { whitespace "dottedkey-space-tail" dotsep "dottedkey-space" equal "POPSPACE"\ + eof "err-state"\ + newline "err-state"\ } #-------------------------------------------------------------------------- @@ -3262,22 +3411,10 @@ namespace eval tomlish::parse { #toml spec looks like heading towards allowing newlines within inline tables #https://github.com/toml-lang/toml/issues/781 - #2025 - appears to be valid for 1.1 - which we are targeting. + #2025 - multiline itables appear to be valid for 1.1 - which we are targeting. #https://github.com/toml-lang/toml/blob/main/toml.md#inline-table #JMN2025 - #dict set stateMatrix\ - # curly-syntax {\ - # whitespace "curly-syntax"\ - # newline "curly-syntax"\ - # barekey {PUSHSPACE "itable-keyval-space"}\ - # itablequotedkey "itable-keyval-space"\ - # endinlinetable "POPSPACE"\ - # startquote "itable-quoted-key"\ - # comma "itable-space"\ - # comment "itable-space"\ - # eof "err-state"\ - # } #review comment "err-state" vs comment "itable-space" - see if TOML 1.1 comes out and allows comments in multiline ITABLES #We currently allow multiline ITABLES (also with comments) in the tokenizer. #if we want to disallow as per TOML 1.0 - we should do so when attempting to get structure? @@ -3291,10 +3428,9 @@ namespace eval tomlish::parse { # untyped_value "SAMESPACE"\ # startarray {PUSHSPACE "array-space"}\ # endarray "POPSPACE"\ - # startmultiquote {PUSHSPACE multistring-space}\ # startinlinetable {PUSHSPACE itable-space}\ - # startquote "string-state"\ - # startsquote "literal-state"\ + # single_dquote "string-state"\ + # single_squote "literal-state"\ # triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"}\ # comma "array-space"\ # comment "array-space"\ @@ -3305,15 +3441,16 @@ namespace eval tomlish::parse { set aspace [dict create] dict set aspace whitespace "array-space" dict set aspace newline "array-space" - dict set aspace untyped_value "SAMESPACE" + #dict set aspace untyped_value "SAMESPACE" + dict set aspace untyped_value "array-syntax" dict set aspace startarray {PUSHSPACE "array-space"} dict set aspace endarray "POPSPACE" - dict set aspace startmultiquote {PUSHSPACE multistring-space} + dict set aspace single_dquote {TOSTATE "string-state" returnstate array-syntax} + dict set aspace triple_dquote {PUSHSPACE "multistring-space" returnstate array-syntax} + dict set aspace single_squote {TOSTATE "literal-state" returnstate array-syntax} + dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax} dict set aspace startinlinetable {PUSHSPACE itable-space} - dict set aspace startquote "string-state" - dict set aspace startsquote "literal-state" - dict set aspace triple_squote {PUSHSPACE "multiliteral-space" returnstate array-syntax note "seems ok 2024"} - dict set aspace comma "array-space" + #dict set aspace comma "array-space" dict set aspace comment "array-space" dict set aspace eof "err-state-array-space-got-eof" dict set stateMatrix array-space $aspace @@ -3329,26 +3466,16 @@ namespace eval tomlish::parse { #dict set asyntax untyped_value "SAMESPACE" #dict set asyntax startarray {PUSHSPACE array-space} dict set asyntax endarray "POPSPACE" - #dict set asyntax startmultiquote {PUSHSPACE multistring-space} - #dict set asyntax startquote "string-state" - #dict set asyntax startsquote "literal-state" + #dict set asyntax single_dquote "string-state" + #dict set asyntax single_squote "literal-state" dict set asyntax comma "array-space" dict set asyntax comment "array-syntax" dict set stateMatrix array-syntax $asyntax - #quoted-key & squoted-key need to PUSHSPACE from own token to keyval-space - dict set stateMatrix\ - quoted-key {\ - whitespace "NA"\ - dquotedkey {PUSHSPACE "keyval-space"}\ - newline "err-state"\ - endquote "keyval-syntax"\ - } - - #review + #dquotedkey is a token - dquoted-key is a state dict set stateMatrix\ dquoted-key {\ whitespace "NA"\ @@ -3367,7 +3494,7 @@ namespace eval tomlish::parse { string-state {\ whitespace "NA"\ string "string-state"\ - endquote "SAMESPACE"\ + enddquote "SAMESPACE"\ newline "err-state"\ eof "err-state"\ } @@ -3381,20 +3508,21 @@ namespace eval tomlish::parse { } - #dict set stateMatrix\ - # stringpart {\ - # continuation "SAMESPACE"\ - # endmultiquote "POPSPACE"\ - # eof "err-state"\ - # } dict set stateMatrix\ multistring-space {\ - whitespace "multistring-space"\ - continuation "multistring-space"\ - stringpart "multistring-space"\ - newline "multistring-space"\ - endmultiquote "POPSPACE"\ - eof "err-state"\ + whitespace "multistring-space"\ + continuation "multistring-space"\ + stringpart "multistring-space"\ + newline "multistring-space"\ + tentative_trigger_dquote {PUSHSPACE "trailing-dquote-space" returnstate multistring-space starttok {tentative_accum_dquote {"}}}\ + single_dquote {TOSTATE multistring-space}\ + double_dquote {TOSTATE multistring-space}\ + triple_dquote {POPSPACE}\ + eof "err-state"\ + } + dict set stateMatrix\ + trailing-dquote-space { + tentative_accum_dquote "POPSPACE" } @@ -3402,19 +3530,19 @@ namespace eval tomlish::parse { #todo - treat sole cr as part of literalpart but crlf and lf as newline dict set stateMatrix\ multiliteral-space {\ - literalpart "multiliteral-space"\ - newline "multiliteral-space"\ - squote_seq_begin {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {squote_seq "'"}}\ - triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ - double_squote {TOSTATE multiliteral-space note "short squote_seq: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ - startsquote {TOSTATE multiliteral-space note "short squote_seq: same as double_squote - false alarm"}\ - eof "err-premature-eof-in-multiliteral-space"\ + literalpart "multiliteral-space"\ + newline "multiliteral-space"\ + tentative_trigger_squote {PUSHSPACE "trailing-squote-space" returnstate multiliteral-space starttok {tentative_accum_squote "'"}}\ + single_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: false alarm this squote is part of data"}\ + double_squote {TOSTATE multiliteral-space note "short tentative_accum_squote: can occur anywhere in the space e.g emitted at end when 5 squotes occur"}\ + triple_squote {POPSPACE note "on popping - we do any necessary concatenation of LITERALPART items due to squote processing"}\ + eof "err-premature-eof-in-multiliteral-space"\ } #trailing because we are looking for possible terminating ''' - but must accept '''' or ''''' and re-integrate the 1st one or 2 extra squotes dict set stateMatrix\ - trailing-squote-space {\ - squote_seq "POPSPACE"\ + trailing-squote-space { + tentative_accum_squote "POPSPACE" } @@ -3499,7 +3627,7 @@ namespace eval tomlish::parse { - + dict set stateMatrix\ end-state {} @@ -3557,14 +3685,13 @@ namespace eval tomlish::parse { dict set spacePushTransitions itable-keyval-space itable-keyval-syntax dict set spacePushTransitions array-space array-space dict set spacePushTransitions table-space tablename-state - dict set spacePushTransitions #itable-space itable-space + #dict set spacePushTransitions #itable-space itable-space #Pop to, next variable spacePopTransitions [dict create] dict set spacePopTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail #review #we pop to keyval-space from dottedkey-space or from keyval-value-expected? we don't always want to go to keyval-tail @@ -3575,7 +3702,6 @@ namespace eval tomlish::parse { #JMN test #dict set spaceSameTransitions array-space array-syntax - #itable-space curly-syntax #itable-keyval-space itable-val-tail @@ -3611,6 +3737,8 @@ namespace eval tomlish::parse { ::tomlish::log::debug "--->> goNextState tokentype:$tokentype tok:$tok currentstate:$currentstate : transition_to = $transition_to" switch -exact -- [lindex $transition_to 0] { POPSPACE { + set popfromspace_info [spacestack peek] + set popfromspace_state [dict get $popfromspace_info state] spacestack pop set parent_info [spacestack peek] set type [dict get $parent_info type] @@ -3625,17 +3753,17 @@ namespace eval tomlish::parse { set existing [spacestack pop] dict unset existing returnstate spacestack push $existing ;#re-push modification - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected to stored returnstate $next <<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected to stored returnstate $next <<---" } else { ### #review - do away with spacePopTransitions - which although useful to provide a default.. # - involve error-prone configurations distant to the main state transition configuration in stateMatrix if {[dict exists $::tomlish::parse::spacePopTransitions $parentspace]} { set next [dict get $::tomlish::parse::spacePopTransitions $parentspace] - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace redirected state to $next (spacePopTransitions)<<---" } else { set next $parentspace - ::tomlish::log::info "--->> POPSPACE transition to parent space $parentspace<<---" + ::tomlish::log::info "--->> POPSPACE transition from $popfromspace_state to parent space $parentspace<<---" } } set result $next @@ -3805,22 +3933,6 @@ namespace eval tomlish::parse { return $tokenType } - proc _shortcircuit_startquotesequence {} { - variable tok - variable i - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - set_tokenType "startquote" - incr i -1 - return -level 2 1 - } elseif {$toklen == 2} { - puts stderr "_shortcircuit_startquotesequence toklen 2" - set_tokenType "startquote" - set tok "\"" - incr i -2 - return -level 2 1 - } - } proc get_token_waiting {} { variable token_waiting @@ -3940,7 +4052,6 @@ namespace eval tomlish::parse { set slash_active 0 set quote 0 set c "" - set multi_dquote "" for {} {$i < $sLen} {} { if {$i > 0} { set lastChar [tcl::string::index $s [expr {$i - 1}]] @@ -3957,8 +4068,6 @@ namespace eval tomlish::parse { switch -exact -- $ctest { # { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 @@ -3966,16 +4075,20 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #for multiliteral, multistring - data and/or end incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { + #pseudo token beginning with underscore - never returned to state machine - review incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -4003,7 +4116,7 @@ namespace eval tomlish::parse { append tok $c } default { - #dquotedkey, itablequotedkey, string,literal, multistring + #dquotedkey, string,literal, multistring append tok $c } } @@ -4015,7 +4128,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes#" + append tok "#" } multiliteral-space { set_tokenType "literalpart" @@ -4031,23 +4144,23 @@ namespace eval tomlish::parse { } lc { #left curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i [tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { @@ -4059,7 +4172,7 @@ namespace eval tomlish::parse { } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - starttablearrayname { #*bare* tablename can only contain letters,digits underscores @@ -4105,7 +4218,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\{" + append tok "\{" } multiliteral-space { set_tokenType "literalpart" @@ -4120,37 +4233,35 @@ namespace eval tomlish::parse { } rc { #right curly brace - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename - tablename { if {$had_slash} {append tok "\\"} @@ -4221,7 +4332,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\}" + append tok "\}" } multiliteral-space { set_tokenType "literalpart" ; #review @@ -4237,35 +4348,35 @@ namespace eval tomlish::parse { } lb { #left square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } starttablename { #change the tokenType @@ -4332,7 +4443,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\[" + append tok "\[" } multiliteral-space { set_tokenType "literalpart" @@ -4350,37 +4461,35 @@ namespace eval tomlish::parse { } rb { #right square bracket - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { append tok $c } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } comment { if {$had_slash} {append tok "\\"} @@ -4428,16 +4537,6 @@ namespace eval tomlish::parse { } } } - XXXtablearraynames { - puts "rb @ tablearraynames ??" - #switch? - - #todo? - if {$had_slash} {append tok "\\"} - #invalid! - but leave for datastructure loading stage to catch - set_token_waiting type endtablearrayname value "" complete 1 startindex $cindex - return 1 - } default { incr i -1 return 1 @@ -4485,7 +4584,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok "$dquotes\]" + append tok "\]" } multiliteral-space { set_tokenType "literalpart" @@ -4498,21 +4597,21 @@ namespace eval tomlish::parse { } } bsl { - set dquotes $multi_dquote - set multi_dquote "" ;#!! #backslash if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { @@ -4529,9 +4628,7 @@ namespace eval tomlish::parse { append tok "\\" set slash_active 0 } - XXXitablesquotedkey { - } - string - dquotedkey - itablequotedkey - comment { + string - dquotedkey - comment { if {$slash_active} { set slash_active 0 append tok "\\\\" @@ -4545,7 +4642,6 @@ namespace eval tomlish::parse { set slash_active 0 append tok "\\\\" } else { - append tok $dquotes set slash_active 1 } } @@ -4575,10 +4671,6 @@ namespace eval tomlish::parse { set tok "\\\\" set slash_active 0 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - } set slash_active 1 } } @@ -4599,58 +4691,56 @@ namespace eval tomlish::parse { set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { - #short squote_seq tokens are returned if active during any other character + tentative_accum_squote { + #for within multiliteral + #short tentative_accum_squote tokens are returned if active upon receipt of any other character #longest allowable for leading/trailing are returned here #### set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote - switch -- $state { - leading-squote-space { - append tok $c - if {$existingtoklen > 2} { - error "tomlish tok error: squote_seq unexpected length $existingtoklen when another received" - } elseif {$existingtoklen == 2} { - return 1 ;#return tok ''' - } - } - trailing-squote-space { - append tok $c - if {$existingtoklen == 4} { - #maxlen to be an squote_seq is multisquote + 2 = 5 - #return tok ''''' - return 1 - } - } - default { - error "tomlish tok error: squote_seq in unexpected state '$state' - expected leading-squote-space or trailing-squote-space" - } + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_squote is multisquote + 2 = 5 + #return tok with value ''''' + return 1 } } - whitespace { - #end whitespace - incr i -1 ;#reprocess sq + tentative_accum_dquote { + incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { - #temp token creatable only during value-expected or array-space + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space switch -- [tcl::string::length $tok] { 1 { + #no conclusion can yet be reached append tok $c } 2 { + #enter multiliteral #switch? append tok $c set_tokenType triple_squote return 1 } default { + #if there are more than 3 leading squotes we also enter multiliteral space and the subsequent ones are handled + #by the tentative_accum_squote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 squotes as data. error "tomlish unexpected token length [tcl::string::length $tok] in '_start_squote_sequence'" } } } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" + return 1 + } + whitespace { + #end whitespace + incr i -1 ;#reprocess sq + return 1 + } literal { #slash_active always false #terminate the literal @@ -4663,7 +4753,7 @@ namespace eval tomlish::parse { # idea: end this literalpart (possibly 'temporarily') # let the sq be reprocessed in the multiliteral-space to push an end-multiliteral-sequence to state stack # upon popping end-multiliteral-sequence - stitch quotes back into this literalpart's token (if either too short - or a long ending sequence as shown above) - incr i -1 ;#throw the "'" back to loop - will be added to an squote_seq token for later processing + incr i -1 ;#throw the "'" back to loop - will be added to a tentative_accum_squote token for later processing return 1 } XXXitablesquotedkey { @@ -4684,7 +4774,11 @@ namespace eval tomlish::parse { append tok $c } barekey { - #not clear why o'shennanigan shouldn't be a legal barekey - but it seems not to be. + #barekeys now support all sorts of unicode letter/number chars for other cultures + #but not punctuation - not even for those of Irish heritage who don't object + #to the anglicised form of some names. + # o'shenanigan seems to not be a legal barekey + #The Irish will have to use an earlier form Ó - which apparently many may prefer anyway. error "tomlish Unexpected single quote during barekey. [tomlish::parse::report_line]" } default { @@ -4693,63 +4787,69 @@ namespace eval tomlish::parse { } } else { switch -exact -- $state { - array-space { + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading squote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_squote token or triple_squote token + #It currently doesn't trigger double_squote token + #(handle '' same as 'x' ie produce a single_squote and go into processing literal) + #review - producing double_squote for empty literal may be slightly more efficient. + #This token is not used to handle squote sequences *within* a multiliteral set_tokenType "_start_squote_sequence" set tok "'" } - itable-keyval-value-expected - keyval-value-expected { - set_tokenType "squote_seq_begin" + multiliteral-space { + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_squote" ;#trigger tentative_accum_squote set tok "'" return 1 } - table-space { - #tests: squotedkey.test - set_tokenType "squotedkey" - set tok "" - } - itable-space { - #tests: squotedkey_itable.test + table-space - itable-space { + #tests: squotedkey.test squotedkey_itable.test set_tokenType "squotedkey" set tok "" } - XXXitable-space { - #future - could there be multiline keys? - #this would allow arbitrary tcl dicts to be stored in toml + XXXtable-space - XXXitable-space { + #future - could there be multiline keys? MLLKEY, MLBKEY ? + #this would (almost) allow arbitrary tcl dicts to be stored in toml (aside from escaping issues) #probably unlikely - as it's perhaps not very 'minimal' or ergonomic for config files - set_tokenType "squote_seq_begin" + #@2025 ABNF for toml mentions key, simple-key, unquoted-key, quoted-key and dotted-key + #where key is simple-key or dotted-key - no MLL or MLB components + #the spec states solution for arbitrary binary data is application specific involving encodings + #such as hex, base64 + set_tokenType "_start_squote_sequence" set tok "'" return 1 } tablename-state { #first char in tablename-state/tablearrayname-state - set_tokenType tablename + set_tokenType "tablename" append tok "'" } tablearrayname-state { - set_tokenType tablearrayname + set_tokenType "tablearrayname" append tok "'" } literal-state { + #shouldn't get here? review tomlish::log::debug "- tokloop sq during literal-state with no tokentype - empty literal?" - set_tokenType literal + set_tokenType "literal" incr -1 return 1 } multistring-space { - error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" - } - multiliteral-space { - #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row - #we are building up an squote_seq to determine if - #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines - #b) it is exactly ''' and we can terminate the whole multiliteral - #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space - set_tokenType "squote_seq_begin" - set tok "'" - return 1 + set_tokenType "stringpart" + set tok "" + if {$had_slash} {append tok "\\"} + append tok "," + #error "tomlish unimplemented - squote during state '$state'. [tomlish::parse::report_line]" } dottedkey-space { - set_tokenType squotedkey + set_tokenType "squotedkey" } default { error "tomlish unhandled squote during state '$state'. [tomlish::parse::report_line]" @@ -4765,44 +4865,50 @@ namespace eval tomlish::parse { if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote { incr i -1 return 1 } - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - append tok $c - } elseif {$toklen == 2} { - append tok $c - #switch vs set? - set_tokenType "startmultiquote" - return 1 - } else { - error "tomlish unexpected token length $toklen in 'startquotesequence'" - } - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" return 1 - - #set toklen [tcl::string::length $tok] - #switch -- $toklen { - # 1 { - # set_tokenType "startsquote" - # incr i -1 - # return 1 - # } - # 2 { - # set_tokenType "startsquote" - # incr i -2 - # return 1 - # } - # default { - # error "tomlish unexpected _start_squote_sequence length $toklen" - # } - #} + } + tentative_accum_dquote { + #within multistring + #short tentative_accum_dquote tokens are returned if active upon receipt of any other character + #longest allowable for leading/trailing are returned here + #### + set existingtoklen [tcl::string::length $tok] ;#toklen prior to this squote + #assert state = trailing-squote-space + append tok $c + if {$existingtoklen == 4} { + #maxlen to be a tentative_accum_dquote is multidquote + 2 = 5 + #return tok with value """"" + return 1 + } + } + _start_dquote_sequence { + #pseudo/temp token creatable during keyval-value-expected itable-keyval-value-expected or array-space + switch -- [tcl::string::length $tok] { + 1 { + #no conclusion can yet be reached + append tok $c + } + 2 { + #enter multistring + #switch? + append tok $c + set_tokenType triple_dquote + return 1 + } + default { + #if there are more than 3 leading dquotes we also enter multistring space and the subsequent ones are handled + #by the tentative_accum_dquote check for ending sequence which can accept up to 5 and reintegrate the + #extra 1 or 2 dquotes as data. + error "tomlish unexpected token length [tcl::string::length $tok] in '_start_dquote_sequence'" + } + } } literal - literalpart { append tok $c @@ -4811,8 +4917,8 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #unescaped quote always terminates a string? - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + #unescaped quote always terminates a string + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4821,77 +4927,31 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" $c } else { - #incr i -1 - - if {$multi_dquote eq "\"\""} { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex -2}] - set multi_dquote "" - return 1 - } else { - append multi_dquote "\"" - } + incr i -1 ;#throw the {"} back to loop - will be added to a tentative_accum_dquote token for later processing + return 1 } } whitespace { - switch -exact -- $state { - multistring-space { - #REVIEW - if {$had_slash} { - incr i -2 - return 1 - } else { - switch -- [tcl::string::length $multi_dquote] { - 2 { - set_token_waiting type endmultiquote value "\"\"\"" complete 1 startindex [expr {$cindex-2}] - set multi_dquote "" - return 1 - } - 1 { - incr i -2 - return 1 - } - 0 { - incr i -1 - return 1 - } - } - } - } - keyval-value-expected { - #end whitespace token and reprocess - incr i -1 - return 1 - - #if {$multi_dquote eq "\"\""} { - # set_token_waiting type startmultiquote value "\"\"\"" complete 1 - # set multi_dquote "" - # return 1 - #} else { - # #end whitespace token and reprocess - # incr i -1 - # return 1 - #} - } - table-space - itable-space { - incr i -1 - return 1 - } - default { - set_token_waiting type startquote value "\"" complete 1 startindex $cindex - return 1 - } + #assert: had_slash will only ever be true in multistring-space + if {$had_slash} { + incr i -2 + return 1 + } else { + #end whitespace token - throw dq back for reprocessing + incr i -1 + return 1 } } comment { if {$had_slash} {append tok "\\"} append tok $c } - XXXdquotedkey - XXXitablequotedkey { + XXXdquotedkey { if {$had_slash} { append tok "\\" append tok $c } else { - set_token_waiting type endquote value "\"" complete 1 startindex $cindex + set_token_waiting type enddquote value "\"" complete 1 startindex $cindex return 1 } } @@ -4901,7 +4961,7 @@ namespace eval tomlish::parse { append tok "\\" append tok $c } else { - #set_token_waiting type endsquote value "'" complete 1 + #set_token_waiting type enddquote value {"} complete 1 return 1 } } @@ -4924,64 +4984,40 @@ namespace eval tomlish::parse { #$slash_active not relevant when no tokenType #token is string only if we're expecting a value at this point switch -exact -- $state { - array-space { - #!? start looking for possible multistartquote - #set_tokenType startquote - #set tok $c - #return 1 - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c - } - keyval-value-expected - itable-keyval-value-expected { - set_tokenType "startquotesequence" ;#one or more quotes in a row - either startquote or multistartquote - set tok $c + array-space - keyval-value-expected - itable-keyval-value-expected { + #leading dquote + #pseudo-token _start_squote_sequence ss not received by state machine + #This pseudotoken will trigger production of single_dquote token or triple_dquote token + #It currently doesn't trigger double_dquote token + #(handle "" same as "x" ie produce a single_dquote and go into processing string) + #review - producing double_dquote for empty string may be slightly more efficient. + #This token is not used to handle dquote sequences once *within* a multistring + set_tokenType "_start_dquote_sequence" + set tok {"} } multistring-space { - #TODO - had_slash!!! - #REVIEW if {$had_slash} { set_tokenType "stringpart" set tok "\\\"" - set multi_dquote "" } else { - if {$multi_dquote eq "\"\""} { - tomlish::log::debug "- tokloop char dq ---> endmultiquote" - set_tokenType "endmultiquote" - set tok "\"\"\"" - return 1 - #set_token_waiting type endmultiquote value "\"\"\"" complete 1 - #set multi_dquote "" - #return 1 - } else { - append multi_dquote "\"" - } + #each literalpart is not necessarily started/ended with squotes - but may contain up to 2 in a row + #we are building up a tentative_accum_squote to determine if + #a) it is shorter than ''' so belongs in a literalpart (either previous, subsequent or it's own literalpart between newlines + #b) it is exactly ''' and we can terminate the whole multiliteral + #c) it is 4 or 5 squotes where the first 1 or 2 beling in a literalpart and the trailing 3 terminate the space + set_tokenType "tentative_trigger_dquote" ;#trigger tentative_accum_dquote + set tok {"} + return 1 } } multiliteral-space { set_tokenType "literalpart" set tok "\"" } - XXXtable-space { - set_tokenType "startquote" - set tok $c - return 1 - } - XXXitable-space { - set_tokenType "startquote" - set tok $c - } table-space - itable-space { set_tokenType "dquotedkey" set tok "" } - tablename-state { - set_tokenType tablename - set tok $c - } - tablearrayname-state { - set_tokenType tablearrayname - set tok $c - } dottedkey-space { set_tokenType dquotedkey set tok "" @@ -4990,49 +5026,56 @@ namespace eval tomlish::parse { #set_tokenType dquote_seq_begin #set tok $c } + tablename-state { + set_tokenType tablename + set tok $c + } + tablearrayname-state { + set_tokenType tablearrayname + set tok $c + } default { - error "tomlish Unexpected quote during state '$state' [tomlish::parse::report_line]" + error "tomlish Unexpected dquote during state '$state' [tomlish::parse::report_line]" } } } } = { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart - squotedkey { - #assertion had_slash 0, multi_dquote "" + #assertion had_slash 0 append tok $c } - string - comment - dquotedkey - itablequotedkey { + string - comment - dquotedkey { #for these tokenTypes an = is just data. if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type equal value = complete 1 startindex $cindex @@ -5063,7 +5106,7 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok ${dquotes}= + append tok = } multiliteral-space { set_tokenType "literalpart" @@ -5084,8 +5127,6 @@ namespace eval tomlish::parse { } cr { #REVIEW! - set dquotes $multi_dquote - set multi_dquote "" ;#!! # \r carriage return if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. set slash_active 0 @@ -5098,16 +5139,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5159,8 +5202,6 @@ namespace eval tomlish::parse { } lf { # \n newline - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5171,16 +5212,19 @@ namespace eval tomlish::parse { append tok lf ;#assert we should now have tok "crlf" - as a previous cr is the only way to have an incomplete newline tok return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { + #multiliteral or multistring incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal { @@ -5196,20 +5240,14 @@ namespace eval tomlish::parse { return 1 } stringpart { - if {$dquotes ne ""} { - append tok $dquotes + if {$had_slash} { + #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) + set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] incr i -1 return 1 } else { - if {$had_slash} { - #emit the stringpart (return 1), queue the continuation, go back 1 to reprocess the lf (incr i -1) - set_token_waiting type continuation value \\ complete 1 startindex [expr {$cindex-1}] - incr i -1 - return 1 - } else { - set_token_waiting type newline value lf complete 1 startindex $cindex - return 1 - } + set_token_waiting type newline value lf complete 1 startindex $cindex + return 1 } } starttablename - tablename - tablearrayname - starttablearrayname { @@ -5236,20 +5274,13 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - #e.g one or 2 quotes just before nl - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "newline" set tok lf return 1 } } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "newline" set tok "lf" return 1 @@ -5275,8 +5306,6 @@ namespace eval tomlish::parse { } } , { - set dquotes $multi_dquote - set multi_dquote "" set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5287,39 +5316,40 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - tablename - tablearrayname { if {$had_slash} {append tok "\\"} append tok , } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { #stringpart can have up to 2 quotes too if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { if {$state eq "multistring-space"} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen + incr i -1 return 1 } else { set_token_waiting type comma value "," complete 1 startindex $cindex @@ -5338,10 +5368,10 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes," + append tok "," } multiliteral-space { - #assert had_slash 0, multi_dquote "" + #assert had_slash 0 set_tokenType "literalpart" set tok "," } @@ -5354,8 +5384,6 @@ namespace eval tomlish::parse { } } . { - set dquotes $multi_dquote - set multi_dquote "" ;#!! set had_slash $slash_active set slash_active 0 if {[tcl::string::length $tokenType]} { @@ -5366,42 +5394,45 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } comment - untyped_value { if {$had_slash} {append tok "\\"} append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} {append tok "\\"} append tok $c } stringpart { if {$had_slash} {append tok "\\"} - append tok $dquotes$c + append tok $c } literal - literalpart - squotedkey { - #assert had_slash always 0, multi_dquote "" + #assert had_slash always 0 append tok $c } whitespace { switch -exact -- $state { multistring-space { - set backchars [expr {[tcl::string::length $dquotes] + 1}] + #review if {$had_slash} { - incr backchars 1 + incr i -2 + } else { + incr i -1 } - incr i -$backchars return 1 } xxxdottedkey-space { @@ -5444,7 +5475,7 @@ namespace eval tomlish::parse { set_tokenType "stringpart" set tok "" if {$had_slash} {append tok "\\"} - append tok "$dquotes." + append tok "." } multiliteral-space { set_tokenType "literalpart" @@ -5471,8 +5502,6 @@ namespace eval tomlish::parse { } " " { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { set had_slash $slash_active set slash_active 0 @@ -5483,16 +5512,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5512,9 +5543,9 @@ namespace eval tomlish::parse { if {$had_slash} { append tok "\\" } - append tok $dquotes$c + append tok $c } - string - dquotedkey - itablequotedkey { + string - dquotedkey { if {$had_slash} { append tok "\\" } append tok $c } @@ -5526,8 +5557,7 @@ namespace eval tomlish::parse { incr i -2 return 1 } else { - #split into STRINGPART aaa WS " " - append tok $dquotes + #split into STRINGPART xxx WS " " incr i -1 return 1 } @@ -5537,15 +5567,7 @@ namespace eval tomlish::parse { } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - #end whitespace token - #go back by the number of quotes plus this space char - set backchars [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backchars - return 1 - } else { - append tok $c - } + append tok $c } else { append tok $c } @@ -5588,12 +5610,6 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType "stringpart" - set tok $dquotes - incr i -1 - return 1 - } set_tokenType "whitespace" append tok $c } @@ -5613,9 +5629,6 @@ namespace eval tomlish::parse { } } tab { - set dquotes $multi_dquote - set multi_dquote "" ;#!! - if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out (?review) set slash_active 0 @@ -5626,12 +5639,18 @@ namespace eval tomlish::parse { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } barekey { @@ -5662,7 +5681,6 @@ namespace eval tomlish::parse { return 1 } else { #split into STRINGPART aaa WS " " - append tok $dquotes incr i -1 return 1 } @@ -5706,15 +5724,8 @@ namespace eval tomlish::parse { incr i -1 return 1 } else { - if {$dquotes ne ""} { - set_tokenType stringpart - set tok $dquotes - incr i -1 - return 1 - } else { - set_tokenType whitespace - append tok $c - } + set_tokenType whitespace + append tok $c } } multiliteral-space { @@ -5732,16 +5743,31 @@ namespace eval tomlish::parse { #BOM (Byte Order Mark) - ignored by token consumer if {[tcl::string::length $tokenType]} { switch -exact -- $tokenType { + tentative_accum_squote - tentative_accum_dquote { + incr i -1 + return 1 + } _start_squote_sequence { #assert - tok will be one or two squotes only + #A toml literal probably isn't allowed to contain this + #but we will parse and let the validator sort it out. incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } literal - literalpart { append tok $c } + string - stringpart { + append tok $c + } default { + #state machine will generally not have entry to accept bom - let it crash set_token_waiting type bom value "\uFEFF" complete 1 startindex $cindex return 1 } @@ -5752,6 +5778,10 @@ namespace eval tomlish::parse { set_tokenType "literalpart" set tok $c } + multistring-space { + set_tokenType "stringpart" + set tok $c + } default { set_tokenType "bom" set tok "\uFEFF" @@ -5761,8 +5791,6 @@ namespace eval tomlish::parse { } } default { - set dquotes $multi_dquote - set multi_dquote "" ;#!! if {[tcl::string::length $tokenType]} { if {$slash_active} {append tok "\\"} ;#if tokentype not appropriate for \, we would already have errored out. @@ -5774,28 +5802,24 @@ namespace eval tomlish::parse { incr i -1 return 1 } - squote_seq { + tentative_accum_squote - tentative_accum_dquote { incr i -1 return 1 } - startquotesequence { - _shortcircuit_startquotesequence - } _start_squote_sequence { incr i -[tcl::string::length $tok] - set_tokenType "startsquote" + set_tokenType "single_squote" + return 1 + } + _start_dquote_sequence { + incr i -[tcl::string::length $tok] + set_tokenType "single_dquote" return 1 } whitespace { if {$state eq "multistring-space"} { - if {$dquotes ne ""} { - set backlen [expr {[tcl::string::length $dquotes] + 1}] - incr i -$backlen - return 1 - } else { - incr i -1 - return 1 - } + incr i -1 + return 1 } else { #review incr i -1 ;#We don't have a full token to add to the token_waiting dict - so leave this char for next run. @@ -5815,7 +5839,7 @@ namespace eval tomlish::parse { return 1 } stringpart { - append tok $dquotes$c + append tok $c } default { #e.g comment/string/literal/literalpart/untyped_value/starttablename/starttablearrayname/tablename/tablearrayname @@ -5835,22 +5859,12 @@ namespace eval tomlish::parse { error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" } } - XXXcurly-syntax { - puts stderr "curly-syntax - review" - if {[tomlish::utils::is_barekey $c]} { - set_tokenType "barekey" - append tok $c - } else { - error "tomlish Unexpected char $c ([tomlish::utils::nonprintable_to_slashu $c]) whilst no active tokenType. [tomlish::parse::report_line]" - } - } multistring-space { set_tokenType "stringpart" if {$had_slash} { - #assert - we don't get had_slash and dquotes at same time set tok \\$c } else { - set tok $dquotes$c + set tok $c } } multiliteral-space { @@ -5890,21 +5904,6 @@ namespace eval tomlish::parse { # error "Reached end of data whilst tokenType = '$tokenType'. INVALID" #} switch -exact -- $tokenType { - startquotesequence { - set toklen [tcl::string::length $tok] - if {$toklen == 1} { - #invalid - #eof with open string - error "tomlish eof reached without closing quote for string. [tomlish::parse::report_line]" - } elseif {$toklen == 2} { - #valid - #we ended in a double quote, not actually a startquoteseqence - effectively an empty string - switch_tokenType "startquote" - incr i -1 - #set_token_waiting type string value "" complete 1 - return 1 - } - } _start_squote_sequence { set toklen [tcl::string::length $tok] switch -- $toklen { @@ -5913,11 +5912,29 @@ namespace eval tomlish::parse { error "tomlish eof reached without closing single quote for string literal. [tomlish::parse::report_line]" } 2 { - #review - set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] set_tokenType "literal" set tok "" return 1 + + ##review + #set_token_waiting type endsquote value "'" complete 1 startindex [expr {$cindex -1}] + #set_tokenType "literal" + #set tok "" + #return 1 + } + } + } + _start_dquote_sequence { + set toklen [tcl::string::length $tok] + switch -- $toklen { + 1 { + #invalid eof with open string + error "tomlish eof reached without closing double quote for string. [tomlish::parse::report_line]" + } + 2 { + set_tokenType "string" + set tok "" + return 1 } } } @@ -6011,6 +6028,16 @@ namespace eval tomlish::dict { return $name } + proc _show_tablenames {tablenames_info} { + append msg \n "tablenames_info:" \n + dict for {tkey tinfo} $tablenames_info { + append msg " " "table: $tkey" \n + dict for {field finfo} $tinfo { + append msg " " "$field $finfo" \n + } + } + return $msg + } } tcl::namespace::eval tomlish::app {