diff --git a/src/vendormodules/test/tomlish-1.1.5.tm b/src/vendormodules/test/tomlish-1.1.5.tm index 35de5e70..3ae60d42 100644 Binary files a/src/vendormodules/test/tomlish-1.1.5.tm and b/src/vendormodules/test/tomlish-1.1.5.tm differ diff --git a/src/vendormodules/tomlish-1.1.6.tm b/src/vendormodules/tomlish-1.1.6.tm index a562545a..7abbaeae 100644 --- a/src/vendormodules/tomlish-1.1.6.tm +++ b/src/vendormodules/tomlish-1.1.6.tm @@ -265,7 +265,7 @@ namespace eval tomlish { #note that a barekey/dquotedkey won't occur directly inside a barekey/dquotedkey #DDDD switch -exact -- [lindex $sub 0] { - STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - TABLE - ARRAY - ITABLE { + STRING - LITERAL - MULTISTRING - MULTILITERAL - INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - TIME-TZ - TABLE - ARRAY - ITABLE { lappend values $sub lappend value_posns $posn } @@ -311,18 +311,16 @@ namespace eval tomlish { lassign [lindex $values 0] type_d1 value_d1 lassign [lindex $values 1] type_d2 value_d2 #DDDD - if {$type_d1 ne "DATE-LOCAL" || $type_d2 ni {DATETIME TIME-LOCAL}} { + if {$type_d1 ne "DATE-LOCAL" || $type_d2 ni {TIME-TZ TIME-LOCAL}} { #we reuse DATETIME tag for standalone time with tz offset (or zZ) error "tomlish KEY in 2 parts does not appear to be datetime '$keyval_element'" } if {$type_d2 eq "TIME-LOCAL"} { set type DATETIME-LOCAL - } else { - #extra check that 2nd part is actually a time - if {![tomlish::utils::is_timepart $value_d2]} { - error "tomlish KEY in 2 parts does not appear to be datetime. (part 2 not a time value) '$keyval_element'" - } + } elseif {$type_d2 eq "TIME-TZ"} { set type DATETIME + } else { + error "tomlish KEY in 2 parts does not appear to be datetime. (part 2 not a time value) '$keyval_element'" } set value "${value_d1}T${value_d2}" } @@ -332,6 +330,10 @@ namespace eval tomlish { } set sub_tablenames_info [dict create] switch -exact -- $type { + TIME-TZ { + #This is only valid in tomlish following a DATE-LOCAL + error "tomlish type TIME-TZ was not preceeded by DATE-LOCAL in keyval '$keyval_element'" + } INT - FLOAT - BOOL - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL { #DDDD #simple (non-container, no-substitution) datatype @@ -383,8 +385,8 @@ namespace eval tomlish { } - proc to_dict {tomlish} { - tomlish::dict::from_tomlish $tomlish + proc to_dict {tomlish {returnextra 0}} { + tomlish::dict::from_tomlish $tomlish $returnextra } @@ -437,7 +439,8 @@ namespace eval tomlish { #and even format it with a reasonable indent so that proper CONT and WS entries are made (?) REVIEW # #TODO - set tomlpart "x=\"\"\"\\\n" + #set tomlpart "x=\"\"\"\\\n" ;#no need for continuation + set tomlpart "x=\"\"\"\n" append tomlpart [tomlish::utils::rawstring_to_MultiBstring_with_escaped_controls $val] append tomlpart "\"\"\"" set tomlish [tomlish::from_toml $tomlpart] @@ -519,6 +522,10 @@ namespace eval tomlish { lappend result DOTTEDKEY [list $K_PART {WS { }}] = {WS { }} $sublist {NEWLINE lf} } else { if {$vinfo ne ""} { + if {![tomlish::utils::string_is_dict $vinfo]} { + #e.g tomlish::dict::from_tomlish was called with return_extra 1 + return -code error -errorcode {TOMLISH SYNTAX INVALIDDICT} "tomlish::_from_dictval Supplied dict is not a valid format for converting to tomlish" ;#review + } #set result [list DOTTEDKEY [list [list KEY $k]] = ] #set records [list ITABLE] @@ -645,6 +652,10 @@ namespace eval tomlish { } } else { if {$vinfo ne ""} { + if {![tomlish::utils::string_is_dict $vinfo]} { + #e.g tomlish::dict::from_tomlish was called with return_extra 1 + return -code error -errorcode {TOMLISH SYNTAX INVALIDDICT} "tomlish::_from_dictval Supplied dict is not a valid format for converting to tomlish" ;#review + } set lastidx [expr {[dict size $vinfo] -1}] set dictidx 0 set sub [list] @@ -1522,30 +1533,28 @@ namespace eval tomlish { #DDDD if {[::tomlish::utils::is_float $tok]} { set tag FLOAT - } elseif {[::tomlish::utils::is_localtime $tok]} { + } elseif {[::tomlish::utils::is_time-local $tok]} { set tag TIME-LOCAL } elseif {[::tomlish::utils::is_timepart $tok]} { - #Note we must allow lone timepart here (not just is_localtime which doesn't allow tz offsets) in case it followed a previous localdate - set tag DATETIME ;#?? review standalone time with tz - no specific tag - only allowed as followup value from DATETIME-LOCAL - } elseif {[::tomlish::utils::is_datepart $tok]} { + ###################################### + #Note we must allow lone timepart here (not just is_time-local which doesn't allow tz offsets) in case it followed a previous localdate + #set tag DATETIME ;#PLACEHOLDER tag - review standalone time with tz - no specific tag - only allowed as followup value from DATE-LOCAL + set tag TIME-TZ + #This will become a DATETIME or a DATETIME-LOCAL (or will error) + ###################################### + } elseif {[::tomlish::utils::is_date-local $tok]} { set tag DATE-LOCAL - } elseif {[::tomlish::utils::is_datetime $tok]} { + } elseif {[::tomlish::utils::is_date_or_time_or_datetime $tok]} { #not just a date or just a time #could be either local or have tz offset #DDDD JJJ set norm [string map {" " T} $tok];#prob unneeded - we won't get here if there was a space - would arrive as 2 separate tokens review. lassign [split $norm T] dp tp - if {[::tomlish::utils::is_localtime $tp]} { + if {[::tomlish::utils::is_time-local $tp]} { set tag DATETIME-LOCAL } else { set tag DATETIME } - } elseif {[::tomlish::utils::is_datetime X$tok] || [::tomlish::utils::is_timepart X$tok]} { - # obsolete - #Note we must allow lone timepart here (not just is_localtime which doesn't allow tz offsets) in case it followed a previous localdate - #e.g x= 2025-01-01 02:34Z - #The dict::from_tomlish validation will catch an invalid standaline timepart, or combine with leading date if applicable. - set tag DATETIME } else { error "---- Unable to interpret '$tok' as Boolean, Integer, Float or Datetime as per the toml specs. [tomlish::parse::report_line] (no space level change)" } @@ -1662,6 +1671,433 @@ namespace eval tomlish { } + #return TOMLISH { value} from new and existing typeval dicts of form {type value value} but + # some such as MULTISTRING can be of form { ...} + # + #Don't validate here - validate in tomlish::dict::path::setleaf + proc _update_tomlish_typeval_convert_to_new_from_existing {new existing} { + #we deliberately don't support container types that can contain comments e.g ARRAY, ITABLE, DOTTEDKEY + #This is also not for higher level constructs such as TABLE, TABLEARRAY + if {!([tomlish::dict::is_typeval $target] && [tomlish::dict_is_typveval $source])} { + error "_update_tomlish_typeval_convert_to: target and source must be of form {type value are contained in the table + foreach tr $tablechildren { + set tr_type [lindex $tr 0] + switch -- $tr_type { + NEWLINE - WS - COMMENT { + lappend updated_tablechildren $tr + } + DOTTEDKEY { + #review + #UUU + set dktomlish [list TOMLISH $tr] + set dkdict [::tomlish::to_dict $dktomlish] + set newdktomlish [update_tomlish_from_dict $dktomlish $subd] + set newrecords [lrange $newdktomlish 1 end];#strip TOMLISH + lappend updated_tablechildren {*}$newrecords + } + default { + error "update_tomlish_from_dict: unexpected table record type $tr_type" + } + } + } + + #todo - add leaves from subd that weren't in the tablechildren list + #ordering? + + lappend output_tomlish [list {*}[lrange $tomlish_record 0 1] {*}$updated_tablechildren] + } + DOTTEDKEY { + #We don't have to check toml table rules regarding created/defined here as dict::from_tomlish has already ensured correctness + #UUU + set dkinfo [tomlish::get_dottedkey_info $tomlish_record] ;#e.g keys {j { k} l} keys_raw {j {' k'} l} + set keys [dict get $dkinfo keys] + set dk_refpath [lmap k $keys {string cat @@ $k}] + + set kvinfo [tomlish::_get_keyval_value $tomlish_record] + set existing_typeval [dict get $kvinfo result] + if {[tomlish::dict::is_typeval $existing_typeval] && [dict get $existing_typeval type] ne "ARRAY"} { + #leaf in supplied tomlish - source dict must also be leaf (invalid to rewrite a branch) + #e.g + #DOTTEDKEY {{KEY j} DOTSEP {SQKEY { k}} DOTSEP {KEY l}} = {INT 0} {WS { }} {COMMENT comment} {NEWLINE lf} + #existing_typeval: {type INT value 0} + #e.g + #DOTTEDKEY {{KEY j} DOTSEP {SQKEY { k}} DOTSEP {KEY l}} = {MULTISTRING {WS { }} {STRINGPART x} {WS { }}} {WS { }} {COMMENT comment} {NEWLINE lf} + #existing_typeval: {type MULTISTRING value { x }} + + #see if source dict has a simple typeval to set + set new_typeval [tomlish::dict::path::get $d $dk_refpath] + if {![tomlish::dict::is_typeval $new_typeval]} { + error "update_tomlish_from_dict - update dictionary has non-leaf data at path $dk_refpath - cannot set" + } + #update if type matches. Todo - flag -allowtypechange ? + set e_type [dict get $existing_typeval type] + set n_type [dict get $new_typeval type] + if {$e_type ne $n_type} { + error "update_tomlish_from_dict - cannot change type $e_type to $n_type at path $dk_refpath" + } + #-start 3 to begin search after = + set valindex [lsearch -start 3 -index 0 $tomlish_record $e_type] + if {$valindex == -1} { + error "update_tomlish_from_dict - unexpected error - failed to find $e_type in record $tomlish_record" + } + set rawval [dict get $new_typeval value] + switch -- $e_type { + MULTISTRING { + #UUU + set newval [tomlish::utils::rawstring_to_MultiBstring_with_escaped_controls $rawval] + set toml "" + append toml "x=\"\"\"" \n + append toml "$newval\"\"\"" \n + set tomlish [lrange [tomlish::from_toml $toml] 1 end] ;#remove TOMLISH keyword + #assert tomlish is a list with a single element + #e.g {DOTTEDKEY {{KEY x}} = {MULTISTRING {NEWLINE lf} {STRINGPART aaa}} {NEWLINE lf}} + set dklist [lindex $tomlish 0] + set msrecord [lindex $dklist 3] + #e.g + #MULTISTRING {NEWLINE lf} {STRINGPART aaa} + + #error "update_tomlish_from_dict MULTISTRING update unimplemented. Todo" + lset tomlish_record $valindex $msrecord + } + MULTILITERAL { + set toml "" + append toml "x='''" \n + append toml "$rawval'''" \n + set tomlish [lrange [tomlish::from_toml $toml] 1 end] ;#remove TOMLISH keyword + set dklist [lindex $tomlish 0] + set msrecord [lindex $dklist 3] + lset tomlish_record $valindex $msrecord + } + default { + switch -- $e_type { + STRING { + #review + set newval [tomlish::utils::rawstring_to_Bstring_with_escaped_controls $rawval] + } + default { + set newval $rawval + } + } + lset tomlish_record $valindex [list $e_type $newval] + } + } + + } elseif {[tomlish::dict::is_typeval $existing_typeval] && [dict get $existing_typeval type] eq "ARRAY"} { + #e.g + #DOTTEDKEY {{KEY a}} = {ARRAY {INT 1} SEP {INT 2} SEP {INT 3}} + #DOTTEDKEY {{KEY a} {WS { }}} = {WS { }} {ARRAY {INT 1} {WS { }} SEP {INT 2} {WS { }} SEP {INT 3}} {WS { }} + #existing_typeval: {type ARRAY value {{type INT value 1} {type INT value 2} {type INT value 3}}} + + #= is always at index 2 (any preceding whitespace is attached to keylist) + set valindex [lsearch -start 3 -index 0 $tomlish_record ARRAY] + if {$valindex == -1} { + error "update_tomlish_from_dict - unexpected error - failed to find ARRAY in record $tomlish_record" + } + + set existing_arraytomlish [lindex $tomlish_record $valindex] + puts "update_tomlish_from_dict: existing_arraytomlish: $existing_arraytomlish" + set subd [tomlish::dict::path::get $d $dk_refpath] + #set existing_items [tomlish::dict::from_tomlish $tomlish_record] ;#utilise fragment processing of dict::from_tomlish - to produce a LIST + #we expect the subdict structure to be something like: + # {type ARRAY value {{type INT value 1} {type INT value 2}}} + # or with untagged subdicts (ITABLE in tomlish) + # {type ARRAY value {{x {type INT value 1}} {type INT value 2}}} + + + #we can only have one ARRAY record - so we can use lset + set newsubrecord_itable [update_tomlish_from_dict [list $existing_arraytomlish] $subd] + lset tomlish_record $valindex [lindex $newsubrecord_itable 0] ;#passed in a single element tomlish list - expect only one back + + } elseif {[tomlish::dict::is_typeval_dict $existing_typeval]} { + #Not actually a {type value } structure. + #sub dict (ITABLE) + #e.g + #DOTTEDKEY {{KEY j} DOTSEP {SQKEY { k}} DOTSEP {KEY l}} = {ITABLE {DOTTEDKEY {{KEY q}} = {INT 1}}} {WS { }} {COMMENT comment} {NEWLINE lf} + #DOTTEDKEY {{KEY x} {WS { }}} = {WS { }} {ITABLE {WS { }} {DOTTEDKEY {{KEY j}} = {INT 1} {WS { }} SEP} {WS { }} {DOTTEDKEY {{KEY k} {WS { }}} = {WS { }} {INT 333}}} {WS { }} {COMMENT {test }} + #existingvaldata: {q {type INT value 1}} + set subd [tomlish::dict::path::get $d $dk_refpath] + #= is always at index 2 (any preceding whitespace is attached to keylist) + set valindex [lsearch -start 3 -index 0 $tomlish_record ITABLE] + if {$valindex == -1} { + error "update_tomlish_from_dict - unexpected error - failed to find ITABLE in record $tomlish_record" + } + #we can only have one ITABLE record - so we can use lset + + set itablerecord [lindex $tomlish_record $valindex] + puts "update_tomlish_from_dict: existing_itabletomlish: $itablerecord" + set newsubrecord_itable [update_tomlish_from_dict [list $itablerecord] $subd] + lset tomlish_record $valindex [lindex $newsubrecord_itable 0] + } else { + #unreachable? - dict::from_tomlish didn't object. + error "update_tomlish_from_dict: Unexpected data in DOTTEDKEY record: $existing_typeval" + } + lappend output_tomlish $tomlish_record + } + ARRAY { + #UUU + #fragment recursion + puts "update_tomlish_from_dict: process ARRAY fragment" + puts "tomlish:\n$tomlish" + puts "updatedict:\n$d" + set source_d_elements [tomlish::dict::path::get $d {[]}] + + set updated_arraychildren [list] + set arrayrecord $tomlish_record + set arraychildren [lrange $arrayrecord 1 end] ;#includes WS, SEP, NEWLINE, COMMENT + set arridx 0 + set childidx 0 + foreach arrchild $arraychildren { + set arrchild_type [lindex $arrchild 0] + switch -- $arrchild_type { + SEP { + #we don't check for proper SEP interspersal here, presuming well-formed tomlish - review + lappend updated_arraychildren $arrchild + } + NEWLINE - WS - COMMENT { + lappend updated_arraychildren $arrchild + } + default { + #updatables + #review - type changes from existing value?? + set sourcedata [lindex $source_d_elements $arridx] + switch -- $arrchild_type { + STRING - LITERAL - FLOAT - INT - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL { + #basic types - no recursion needed + #REVIEW - change of type? flag to allow/disallow? + if {![tomlish::dict::is_typeval $sourcedata]} { + error "update_tomlish_from_dict - update dictionary has non-leaf data at path \[$arridx\] - cannot set" + } + set newval [dict get $sourcedata value] + set newtype [dict get $sourcedata type] + if {$newtype eq "STRING"} { + set newval [tomlish::utils::rawstring_to_Bstring_with_escaped_controls $newval] + } + lappend updated_arraychildren [list $newtype $newval] + } + MULTISTRING { + #no need to recurse + puts stderr "multistring within array update - unimplemented" + } + MULTILITERAL { + #no need to recurse + puts stderr "multiliteral within array update - unimplemented" + } + ITABLE - ARRAY { + #recurse + puts stderr "update $tomlish_type within array" + set nextd [tomlish::dict::path::get $d $arridx] + set subrecord_tomlish [list $arrchild] + set newsubrecord_tomlish [update_tomlish_from_dict $subrecord_tomlish $nextd] + lappend updated_arraychildren {*}$newsubrecord_tomlish + } + default { + error "update_tomlish_from_dict: unexpected array child record type $arrchild_type" + } + } + incr arridx ;#only increment array index for updatables + } + } + } + + lappend output_tomlish [list ARRAY {*}$updated_arraychildren] + } + ITABLE { + #fragment recursion target + #ITABLE {DOTTEDKEY {{KEY j}} = {INT 1}} + #ITABLE {WS { }} {DOTTEDKEY {{KEY j}} = {INT 1} {WS { }} SEP} {WS { }} {DOTTEDKEY {{KEY k} {WS { }}} = {WS { }} {INT 333}} + #ITABLE {NEWLINE lf} {DOTTEDKEY {{KEY j} {WS { }}} = {WS { }} {INT 1} SEP} {WS { }} {COMMENT test} {NEWLINE lf} {WS { }} {DOTTEDKEY {{KEY k}} = {WS { }} {INT 2} {NEWLINE lf}} + puts "update_tomlish_from_dict: process ITABLE fragment" + puts "tomlish:\n$tomlish" + puts "updatedict:\n$d" + set updated_itablechildren [list] + set itablechildren [lrange $tomlish_record 1 end] ;#includes WS, NEWLINE, COMMENT (possibly SEP - though it may be attached to DOTTEDKEY record REVIEW) + #we only expect DOTTEDKEY records for data items within ITABLE + foreach itablechild $tomlish_record { + set itablechild_type [lindex $itablechild 0] + switch -- $itablechild_type { + SEP { + #REVIEW + #we don't necessarily expect a SEP *directly* within ITABLE records as currently when they're created by tomlish::from_toml + #it attaches them (along with intervening WS, COMMENTs) to each DOTTEDKEY record + #This feels somewhat misaligned with ARRAY - where we have no choice but to have SEP, and COMMENTs independent of the array elements. + #Attaching COMMENTs, SEP to the previous DOTTEDKEY has some merit - but perhaps consistency with ARRAY would be preferable. + #This may change - but in any case it should probably be valid/handled gracefully either way. + lappend updated_itablechildren $itablechild + } + COMMENT - WS - NEWLINE { + lappend updated_itablechildren $itablechild + } + DOTTEDKEY { + puts stderr "update dottedkey in itable: tomlish:[list $itablechild] d:$d" + set updatedtomlish [update_tomlish_from_dict [list $itablechild] $d] + set newrecord [lindex $updatedtomlish 0] + lappend updated_itablechildren $newrecord + } + } + } + + lappend output_tomlish [list ITABLE {*}$updated_itablechildren] + } + default { + error "update_tomlish_from_dict: Unexpected toplevel type $tomlish_type record: $tomlish_record" + } + } + } + return $output_tomlish + } + #*** !doctools #[list_end] [comment {--- end definitions namespace tomlish ---}] @@ -1713,7 +2149,7 @@ namespace eval tomlish::build { } proc DATETIME {str} { - if {[::tomlish::utils::is_datetime $str]} { + if {[::tomlish::utils::is_date_or_time_or_datetime $str]} { return [list DATETIME $str] } else { error "Unable to interpret '$str' as Toml datetime. Check your input, or check that tomlish is able to handle all Toml datetimes properly [::tomlish::parse::report_line]" @@ -2052,6 +2488,103 @@ namespace eval tomlish::utils { } #------------------------------------------------------------------------------ + #subset of jq syntax for get/set operations on dicts + # no filters or multiple targets + # meant for 'leaf' queries + proc jq_to_path {jq} { + set jq [string trim $jq] ;#don't tokenize any leading/trailing whitespace + set path [list] + set in_arr 0 + set in_dq 0 + set tok "" + set bsl 0 + foreach c [split $jq ""] { + if {$c eq "\\"} { + if {$bsl} { + set bsl 0 + set c "\\" + } else { + set bsl 1 + continue + } + } else { + if {$bsl} { + set c "\\$c" + set bsl 0 + } + } + if {$in_arr} { + switch -- $c { + {]} { + set in_arr 0 + lappend path $tok + set tok "" + } + default { + append tok $c + } + } + } elseif {$in_dq} { + if {$c eq "\""} { + set in_dq 0 + #append tok "\"" + lappend path $tok + set tok "" + } else { + append tok $c + } + } else { + switch -- $c { + . { + if {$tok ne ""} { + lappend path $tok + } + set tok "@@" + } + {[} { + if {$tok ne ""} { + lappend path $tok + } + set in_arr 1 + set tok "" + } + {"} { + if {$tok eq "@@"} { + #set tok "@@\"" + set in_dq 1 + } else { + append tok "\"" + } + } + default { + append tok $c + } + } + } + } + if {$tok ne ""} { + lappend path $tok + } + return $path + } + proc path_to_jq {path} { + set jq "" + foreach p $path { + if {[string match @@* $p]} { + set key [string range $p 2 end] + if {![tomlish::utils::is_barekey $key]} { + set key [subst -nocommands -novariables $key] + set key "\"[tomlish::utils::rawstring_to_Bstring_with_escaped_controls $key]\"" + } + append jq ".$key" + } else { + append jq {[} $p {]} + } + } + return $jq + } + + #basic generic quote matching for single and double quotes #note for example that {[o'malley]} will return sq - as the single quote is not closed or wrapped in double quotes @@ -2249,16 +2782,78 @@ namespace eval tomlish::utils { return [string map $map $str] } - proc rawstring_is_valid_tomlstring {str} { - #controls are allowed in this direction dict -> toml (they get quoted) + #anything is valid in this direction ?? review + #proc rawstring_is_valid_tomlstring {str} { + # #controls are allowed in this direction dict -> toml (they get quoted) + + # #check any existing escapes are valid + # if {[catch { + # unescape_string $str + # } errM]} { + # return 0 + # } + # return 1 + #} + - #check any existing escapes are valid + #REVIEW - easier way to validate? regex? + #This is not used for the parsing of toml to tomlish, + # but can be used to validate for updating via dict e.g when setting with tomlish::dict::path::setleaf + proc inner_MultiBstring_is_valid_toml {str} { + set without_literal_backslashes [string map [list "\\\\" ""] $str] + #replace only escaped dquotes - use a placeholder - we don't want unescaped runs of dquotes merging. + set without_escaped_dquotes [string map [list "\\\"" ""] $without_literal_backslashes] + + if {[string first "\"\"\"" $without_escaped_dquotes] != -1} { + return 0 + } + #assert - all remaining backslashes are escapes + + #strip remaining dquotes + set dquoteless [string map [list "\"" ""] $without_escaped_dquotes] + #puts stderr "dquoteless: $dquoteless" + + #check any remaining escapes are valid if {[catch { - unescape_string $str + #don't use the returned value - just check it + unescape_string $without_literal_backslashes } errM]} { return 0 } - return 1 + + + variable Bstring_control_map + #remove backslash from control map - we are happy with the remaining escapes (varying length) + set testmap [dict remove $Bstring_control_map "\\" \r \n] + set testval [string map $testmap $dquoteless] + #if they differ - there were raw controls + return [expr {$testval eq $dquoteless}] + } + proc inner_Bstring_is_valid_toml {str} { + set without_literal_backslashes [string map [list "\\\\" ""] $str] + #replace only escaped dquotes - use a placeholder - we don't want unescaped runs of dquotes merging. + set without_escaped_dquotes [string map [list "\\\"" ""] $without_literal_backslashes] + + #plain Bstring can't have unescaped dquotes at tall + if {[string first "\"" $without_escaped_dquotes] != -1} { + return 0 + } + #assert - all remaining backslashes are escapes + + #check any remaining escapes are valid + if {[catch { + #don't use the returned value - just check it + unescape_string $without_literal_backslashes + } errM]} { + return 0 + } + + variable Bstring_control_map + #remove backslash from control map - we are happy with the remaining escapes (varying length) + set testmap [dict remove $Bstring_control_map "\\"] + set testval [string map $testmap $without_escaped_dquotes] + #if they differ - there were raw controls + return [expr {$testval eq $without_escaped_dquotes}] } proc rawstring_is_valid_literal {str} { @@ -2850,48 +3445,9 @@ namespace eval tomlish::utils { } } - proc is_datepart {str} { - set matches [regexp -all {[0-9\-]} $str] - if {[tcl::string::length $str] != $matches} { - return 0 - } - #seems to require yyyy-mm-dd (e.g not allowing just yyyy-mm) - if {![regexp {^([0-9]{4})-([0-9]{2})-([0-9]{2})$} $str _match y m d]} { - return 0 - } - if {$m > 12 || $m == 0} { - return 0 - } - switch -- [expr {$m}] { - 1 - 3 - 5 - 7 - 8 - 10 - 12 { - if {$d > 31 || $d == 0} { - return 0 - } - } - 2 { - if {$d > 29 || $d == 0} { - return 0 - } - if {$d == 29} { - #leapyear check - if {[catch {clock scan $str -format %Y-%m-%d} errM]} { - return 0 - } - } - } - 4 - 6 - 9 - 11 { - if {$d > 30 || $d == 0} { - return 0 - } - } - } - return 1 - } - proc is_localdate {str} { - is_datepart $str - } #allow only hh:mm:ss or hh:mm (no subseconds) + #return 2 when missing seconds proc _is_hms_or_hm_time {val} { set numchars [tcl::string::length $val] if {[regexp -all {[0-9:]} $val] != $numchars} { @@ -2908,6 +3464,7 @@ namespace eval tomlish::utils { if {$hr > 23 || $min > 59} { return 0 } + return 2 ;#missing seconds indicator (can still be used as boolean for true in tcl if we don't care whether hh::mm::ss or hh:mm } elseif {[llength $hms_cparts] == 3} { lassign $hms_cparts hr min sec if {[string length $hr] != 2 || [string length $min] != 2 || [string length $sec] !=2} { @@ -2917,10 +3474,10 @@ namespace eval tomlish::utils { if {$hr > 23 || $min > 59 || $sec > 60} { return 0 } + return 1 } else { return 0 } - return 1 } proc is_timepart {str} { #validate the part after the T (or space) @@ -2946,6 +3503,11 @@ namespace eval tomlish::utils { } if {[llength $dotparts] == 2} { lassign $dotparts hms tail + if {[_is_hms_or_hm_time $hms] == 2} { + #If we have a dot - assume hh::mm::ss required + #toml spec is unclear on this but hh:mm. doesn't seem sensible - REVIEW + return 0 + } #validate tail - which might have +- offset if {[string index $tail end] ni {z Z}} { #from hh:mm:??. @@ -2954,14 +3516,21 @@ namespace eval tomlish::utils { if {![string is digit -strict $fraction]} { return 0 } - if {![_is_hms_or_hm_time $offset]} { + if {[_is_hms_or_hm_time $offset] != 2} { + #RFC3339 indicates offset can be specified as hh:mm or Z - not hh:mm:ss + return 0 + } + } else { + #tail has no +/-, only valid if fraction digits + #toml-test invalid/datetime/second-trailing-dot + if {![string is digit -strict $tail]} { return 0 } } } else { set tail [string range $tail 0 end-1] #expect tail nnnn (from hh:mm::ss.nnnnZ) - #had a dot and a zZ - no other offset valid (?) + #had a dot and a zZ if {![string is digit -strict $tail]} { return 0 } @@ -2970,8 +3539,10 @@ namespace eval tomlish::utils { } else { #no dot (fraction of second) if {[regexp {(.*)[+-](.*)} $str _match hms offset]} { - #validate offset - if {![_is_hms_or_hm_time $offset]} { + #validate offset + #offset of +Z or -Z not valid + if {[_is_hms_or_hm_time $offset] != 2} { + #offset is not of required form hh:mm return 0 } } else { @@ -2994,7 +3565,45 @@ namespace eval tomlish::utils { return 0 } } - proc is_localtime {str} { + + proc is_date-local {str} { + set matches [regexp -all {[0-9\-]} $str] + if {[tcl::string::length $str] != $matches} { + return 0 + } + #seems to require yyyy-mm-dd (e.g not allowing just yyyy-mm) + if {![regexp {^([0-9]{4})-([0-9]{2})-([0-9]{2})$} $str _match y m d]} { + return 0 + } + if {$m > 12 || $m == 0} { + return 0 + } + switch -- [expr {$m}] { + 1 - 3 - 5 - 7 - 8 - 10 - 12 { + if {$d > 31 || $d == 0} { + return 0 + } + } + 2 { + if {$d > 29 || $d == 0} { + return 0 + } + if {$d == 29} { + #leapyear check + if {[catch {clock scan $str -format %Y-%m-%d} errM]} { + return 0 + } + } + } + 4 - 6 - 9 - 11 { + if {$d > 30 || $d == 0} { + return 0 + } + } + } + return 1 + } + proc is_time-local {str} { #time of day without any relation to a specific day or any offset or timezone set numchars [tcl::string::length $str] if {[regexp -all {[0-9\.:]} $str] == $numchars} { @@ -3023,9 +3632,26 @@ namespace eval tomlish::utils { return 0 } } - - #review + proc is_datetime-local {str} { + set norm [string map {" " T} $str] + lassign [split $norm T] dp tp + if {$dp eq "" || $tp eq ""} {return 0} + if {![is_date-local $dp]} {return 0} + if {![is_timepart $tp]} {return 0} + if {![is_time-local $tp]} {return 0} + return 1 + } proc is_datetime {str} { + set norm [string map {" " T} $str] + lassign [split $norm T] dp tp + if {$dp eq "" || $tp eq ""} {return 0} + if {![is_date-local $dp]} {return 0} + if {![is_timepart $tp]} {return 0} + if {[is_time-local $tp]} {return 0} + return 1 + } + #review + proc is_date_or_time_or_datetime {str} { #Essentially RFC3339 formatted date-time - but: #1) allowing seconds to be omitted (:00 assumed) #2) T may be replaced with a single space character TODO - parser support for space in datetime! @@ -3073,7 +3699,7 @@ namespace eval tomlish::utils { if {[string first T $str] > -1} { lassign [split $str T] datepart timepart - if {![is_datepart $datepart]} { + if {![is_date-local $datepart]} { return 0 } if {![is_timepart $timepart]} { @@ -3083,7 +3709,7 @@ namespace eval tomlish::utils { #either a datepart or a localtime #spec: "If you include only the time portion of an RFC 3339 formatted date-time, it will represent that time of day # without any relation to a specific day or any offset or timezone." - if {!([is_datepart $str] || [is_localtime $str])} { + if {!([is_date-local $str] || [is_time-local $str])} { return 0 } } @@ -6029,7 +6655,7 @@ namespace eval tomlish::huddle { set h [huddle::json::json2huddle parse $json] } proc from_dict {d} { - + error "tomlish::huddle::from_dict unimplemented" } #raw - strings must already be processed into values suitable for json e.g surrogate pair escaping @@ -6625,8 +7251,40 @@ namespace eval tomlish::dict { set testtype integer set dval [expr {$dval}] ;#convert e.g 0xDEADBEEF to 3735928559 } - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - FLOAT - BOOL { - #DDDD + FLOAT - BOOL { + set testtype [string tolower $dtype] + } + DATE-LOCAL { + set testtype date-local + } + TIME-LOCAL { + if {[tomlish::utils::_is_hms_or_hm_time $dval] == 2} { + #add seconds for sending to json + set dval "${dval}:00" + } + set testtype time-local + } + DATETIME - DATETIME-LOCAL { + #we expect it to be basically well formed here - this is not validation - just adding possible missing seconds + if {![regexp {([tT\ ])} $dval _ dsep]} { + return -code error -errorcode {TOJSON SYNTAX INVALIDDATE} "Unable to process $dtype '$dval' - missing RFC3339 separator space or T" + } + lassign [split $dval $dsep] dp tail + + #toml allows HH:MM without seconds - but we need to add seconds 00 when passing to external systems + if {![tomlish::utils::is_time-local $tail]} { + #there is some offset component. We aren't checking its syntax here (presumed done when dict building) + regexp {([\+\-zZ])} $tail _ tsep ;#keep tsep for rebuilding + lassign [split $tail $tsep] tp offset ;#offset may be empty if z or Z + } else { + set tp $tail + set tsep "" + set offset "" + } + if {[tomlish::utils::_is_hms_or_hm_time $tp] == 2} { + #need to add seconds + set dval "${dp}${dsep}${tp}:00${tsep}${offset}" + } set testtype [string tolower $dtype] } STRING - MULTISTRING { @@ -6644,10 +7302,6 @@ namespace eval tomlish::dict { #} set dval [tomlish::utils::rawstring_to_jsonstring $dval] } - MULTILITERAL { - #todo - escape newlines for json? - set testtype string - } default { error "convert_typeval_to_tomltest unhandled type $dtype" } @@ -6882,7 +7536,7 @@ namespace eval tomlish::dict { lappend dottedtables_defined $dottedsuper_refpath #ensure empty tables are still represented in the datastructure - tomlish::dict::path::set_endpoint datastructure $dottedsuper_refpath {} ;#set to empty subdict + tomlish::dict::path::setleaf datastructure $dottedsuper_refpath {} 0;#set to empty subdict } else { #added for fixed assumption set ttype [dict get $tablenames_info $dottedsuper_refpath ttype] @@ -6935,7 +7589,7 @@ namespace eval tomlish::dict { #'create' the table dict set tablenames_info $dottedkey_refpath ttype dottedkey_table #don't actually set 'defined' here.. use the end of TABLE record to close them off by looking at this list - tomlish::dict::path::set_endpoint datastructure $dottedkey_refpath {} + tomlish::dict::path::setleaf datastructure $dottedkey_refpath {} 0 lappend dottedtables_defined $dottedkey_refpath # @@ -6994,7 +7648,7 @@ namespace eval tomlish::dict { #or the result from parsing an arbitrary dict from an inline table - which could theoretically look the same at the topmost level #punk::dict::is_typeval can distinguish tomlish::log::debug "_process_tomlish_dottedkey>>> context:$context_refpath dottedkey $dottedkeyname kv: $keyval_dict" - tomlish::dict::path::set_endpoint datastructure $fullkey_refpath $keyval_dict + tomlish::dict::path::setleaf datastructure $fullkey_refpath $keyval_dict 0 #remove ? #if {![tomlish::dict::is_typeval $keyval_dict]} { @@ -7015,8 +7669,17 @@ namespace eval tomlish::dict { #} return [dict create dottedtables_defined $dottedtables_defined] } + + #tomlish::dict::from_tomlish is a *basic* programmatic datastructure for accessing the data. # produce a dictionary of keys and values from a tomlish tagged list. + # ---------------------------------------------------------------- + # NOTE: + # can instead produce a list if passed an ARRAY at toplevel + # can produce a single value if passed a MULTISTRING or MULTILIST at toplevel + # These are fragments of tomlish used in recursive calls. + # Such fragments don't represent valid tomlish that can be converted to a toml doc. + # ---------------------------------------------------------------- # dict::from_tomlish is primarily for read access to toml data. #Extraneous (not within quoted sections) whitespace and comments are not preserved in this structure, # so a roundtrip from toml to this datastructure and back to toml will lose whitespace formatting and comments. @@ -7036,7 +7699,7 @@ namespace eval tomlish::dict { # versus #[Data] #temps = [{cpu = 79.5, case = 72.0}] - proc from_tomlish {tomlish} { + proc from_tomlish {tomlish {returnextra 0}} { package require dictn #keep track of which tablenames have already been directly defined, @@ -7099,13 +7762,17 @@ namespace eval tomlish::dict { #value is a dict with keys: ttype, tdefined } + if {![string is list $tomlish]} { + error "tomlish::dict::from_tomlish Supplied value for tomlish does not appear to be a tomlish list. Use tomlish::from_toml to get a tomlish list from toml." + } + log::info "---> dict::from_tomlish processing '$tomlish'<<<" set items $tomlish foreach lst $items { if {[lindex $lst 0] ni $::tomlish::tags} { - error "supplied list does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" + error "tomlish::dict::from_tomlish supplied list does not appear to be toml parsed into a tomlish tagged list. Run tomlish::decode::toml on the raw toml data to produce a tomlish list" } } @@ -7121,12 +7788,13 @@ namespace eval tomlish::dict { #puts "...> item:'$item' tag:'$tag'" switch -exact -- $tag { KEY - DQKEY - SQKEY - INT - FLOAT - DATETIME - DATETIME-LOCAL - DATE-LOCAL - TIME-LOCAL - STRING - LITERAL { - #why would we get individual key item as opposed to DOTTEDKEY? + #we don't require invalid tomlish fragments with these keys in our direct recursion + #(we do support ARRAY, MULTISTING, and MULTILITERAL tomlish fragments below) error "tomlish::dict::from_tomlish error: invalid tag: $tag. At the toplevel, from_tomlish can only process WS NEWLINE COMMENT and compound elements DOTTEDKEY TABLE TABLEARRAY ITABLE MULTILITERAL MULTISTRING" } DOTTEDKEY { - #toplevel dotted key - set dkinfo [_process_tomlish_dottedkey $item] + #toplevel dotted key empty context_refpath + set dkinfo [_process_tomlish_dottedkey $item {}] lappend dottedtables_defined {*}[dict get $dkinfo dottedtables_defined] #at any level - we don't expect any more DOTTEDKEY records in a tomlish structure after TABLE or TABLEARRAY are encountered #as those records should encapsulate their own dottedkeys @@ -7221,7 +7889,7 @@ namespace eval tomlish::dict { dict set tablenames_info $tablearray_refpath ttype header_tablearray #dict set datastructure {*}$norm_segments [list type ARRAY value {}] #create array along with empty array-item at position zero - tomlish::dict::path::set_endpoint datastructure $tablearray_refpath [list type ARRAY value {{}}] + tomlish::dict::path::setleaf datastructure $tablearray_refpath [list type ARRAY value {{}}] 0 set arrayitem_refpath [list {*}$tablearray_refpath 0] #set ARRAY_ELEMENTS [list] } else { @@ -7375,7 +8043,7 @@ namespace eval tomlish::dict { dict set tablenames_info $refpath ttype unknown_header #ensure empty tables are still represented in the datastructure #dict set datastructure {*}$supertable [list] - tomlish::dict::path::set_endpoint datastructure $refpath {} + tomlish::dict::path::setleaf datastructure $refpath {} 0 } else { #supertable has already been created - and may be defined - but even if defined we can add subtables unless it is of type itable if {[dict get $tablenames_info $refpath ttype] eq "header_tablearray"} { @@ -7420,7 +8088,7 @@ namespace eval tomlish::dict { #We are 'defining' this table's keys and values here (even if empty) #dict set datastructure {*}$norm_segments [list] ;#ensure table still represented in datastructure even if we add no keyvals here - tomlish::dict::path::set_endpoint datastructure $table_refpath {} ;#ensure table still represented in datastructure even if we add no keyvals here + tomlish::dict::path::setleaf datastructure $table_refpath {} 0;#ensure table still represented in datastructure even if we add no keyvals here } else { if {[dict get $tablenames_info $table_refpath ttype] eq "header_tablearray"} { #e.g tomltest invalid/table/duplicate-table-array2 @@ -7492,6 +8160,7 @@ namespace eval tomlish::dict { } } ARRAY { + #invalid at toplevel of a 'complete' tomlish structure - but we support it here for recursive fragment processing #arrays in toml are allowed to contain mixtures of types set datastructure [list] log::debug "--> processing array: $item" @@ -7540,6 +8209,8 @@ namespace eval tomlish::dict { } } MULTILITERAL { + #Not for toplevel of complete tomlish - (recursive fragment processing) + #triple squoted string #first newline stripped only if it is the very first element #(ie *immediately* following the opening delims) @@ -7583,6 +8254,7 @@ namespace eval tomlish::dict { set datastructure $stringvalue } MULTISTRING { + #Not for toplevel of complete tomlish - (recursive fragment processing) #triple dquoted string log::debug "---> tomlish::dict::from_tomlish processing multistring: $item" set stringvalue "" @@ -7696,82 +8368,394 @@ namespace eval tomlish::dict { } } } - return $datastructure + if {!$returnextra} { + return $datastructure + } else { + return [dict create datastructure $datastructure tablenames_info $tablenames_info] + } + } +} +namespace eval tomlish::path { + namespace export {[a-z]*}; # Convention: export all lowercase + + set test_tomlish [tomlish::from_toml { } #comment {z=1} {x.y=2 #xy2} {[[shop.product]] #product1} {x=[ #array1} {11 #val1} {, 12 #val2} {]} {[unrelated.' etc ']} {a.b={c=666}} {a.x={}} {[[shop.product]]} {x="test"} {[shop]} {name="myshop"}] + + proc get {tomlish {path {}}} { + if {$path eq ""} { + return $tomlish + } + if {[string index $path 0] in [list . "\["]} { + set path [tomlish::utils::jq_to_path $path] + } + + #at the cost of some performance, sanity check that the tomlish is valid + if {[catch {tomlish::to_dict $tomlish} d]} { + error "tomlish::path::get error supplied tomlish is malformed\nerrmsg: $d" + } + #since we have the dict - test the path is valid + if {![tomlish::dict::path::exists $d $path]} { + error "tomlish::path::get - path \"$path\" not found in tomlish $tomlish" + } + + if {[lindex $tomlish 0] eq "TOMLISH"} { + set tomlish [lrange $tomlish 1 end] + } + ::set pathsofar [list] + ::set tomlitems [list] ;#reducing set. 2 element list {keypath itemlist} + foreach record $tomlish { + lappend tomlitems [list {} [list $record]] ;#root records + } + + ::set dictsubpath [list] ;#reset at every index encounter? + foreach p $path { + ::lappend pathsofar $p + set sublist [list] + if {[string range $p 0 1] eq "@@"} { + set realsearchkey [string range $p 2 end] + lappend dictsubpath $realsearchkey + foreach path_items $tomlitems { + lassign $path_items subpath tlist + lappend subpath $realsearchkey + foreach item $tlist { + set tp [lindex $item 0] + switch -- $tp { + WS - NEWLINE - COMMENT { + } + DOTTEDKEY { + #can occur at toplevel (before others) or within other elements + set keyinfo [tomlish::get_dottedkey_info $item] + set keys_raw [dict get $keyinfo keys_raw] + puts stderr "subpath:$subpath -->DOTTEDKEY keys_raw: $keys_raw" + #may not be enough keys_raw for subpath - but there could be further ITABLES to continue the dict further + set prefixparts [lrange $keys_raw 0 [llength $subpath]-1] + set is_kmatch 1 ;#default assumption only + foreach dsub $subpath kpart $prefixparts { + if {$dsub ne $kpart} { + set is_kmatch 0 + } + } + if {$is_kmatch} { + if {[llength $keys_raw] == [llength $subpath]} { + set subpath [list] + #e.g {DOTTEDKEY {{KEY xxx}} = {WS { }} {STRING blah}} + lappend sublist [list $subpath [lrange $item 3 end]] + } else { + lappend sublist [list $subpath [list $item]] + } + } + } + ITABLE { + #subelement only + set itablechildren [lrange $item 1 end] + puts stderr "subpath:$subpath -->ITABLE records: $itablechildren" + set nextpath [lmap v $subpath {string cat @@ $v}] + set results [tomlish::path::get $itablechildren $nextpath] + set subpath [list] + puts "--> lappending [list $subpath $results]" + lappend sublist [list $subpath $results] + } + TABLEARRAY { + #toplevel only + set fulltablename [lindex $item 1] + set normalise 1 + set tparts [tomlish::toml::tablename_split $fulltablename $normalise] + if {[llength $tparts] < [llength $subpath]} {continue} ;#not enough parts to satisfy current subpath query + set prefixparts [lrange $tparts 0 [llength $subpath]-1] + set is_tmatch 1 ;#default assumption only + foreach dsub $subpath tpart $prefixparts { + if {$dsub ne $tpart} { + set is_tmatch 0 + } + } + #TODO reference arrays + if {$is_tmatch} { + if {[llength $tparts] == [llength $subpath]} { + set subpath [list] + lappend sublist [list $subpath [lrange $item 2 end]] + } else { + #TODO + set subpath 0 + lappend sublist [list $subpath [list $item]] ;#add entire TABLE line + } + } + } + TABLE { + #toplevel only + set fulltablename [lindex $item 1] + set normalise 1 + set tparts [tomlish::toml::tablename_split $fulltablename $normalise] + if {[llength $tparts] < [llength $subpath]} {continue} ;#not enough parts to satisfy current subpath query + set prefixparts [lrange $tparts 0 [llength $subpath]-1] + set is_tmatch 1 ;#default assumption only + foreach dsub $subpath tpart $prefixparts { + if {$dsub ne $tpart} { + set is_tmatch 0 + } + } + if {$is_tmatch} { + if {[llength $tparts] == [llength $subpath]} { + set subpath [list] + lappend sublist [list $subpath [lrange $item 2 end]] + } else { + #leave subpath + lappend sublist [list $subpath [list $item]] ;#add entire TABLE line + } + } + } + ARRAY { + #subelement only + } + + } + } + } + } else { + #index + #will never occur at toplevel (dict::path::exists already ruled it out) + foreach path_items $toml_items { + lassign $path_items subpath $tlist + set tp [lindex $tlist 0] + switch -- $tp { + ARRAY { + } + } + } + } + #temp + puts stdout "pathsofar: $pathsofar" + puts stdout [punk::lib::showdict -roottype list $sublist] + set tomlitems $sublist + } + + #REVIEW + if {[llength $tomlitems] == 1} { + return [lindex $tomlitems 0 1] + } + set result [list] + foreach i $tomlitems { + lappend result [lindex $i 1] + } + return $result + #return [lindex $tomlitems 1] } + } namespace eval tomlish::dict::path { - #access tomlish dict structure + + #access tomlish dict structure namespace export {[a-z]*}; # Convention: export all lowercase - #access with path such as: @@k @@k 0 @@k end where dict keys marked with @@ and plain values are list indices into in {type ARRAY value } - #leaf elements returned as structured {type value } + #access with path such as: @@k @@k 0 @@k end where dict keys marked with @@ and plain values are list indices into in {type ARRAY value } + #leaf elements returned as structured {type value } proc get {dictval {path {}}} { if {$path eq ""} { return $dictval } + if {[string index $path 0] in [list . "\["]} { + set path [tomlish::utils::jq_to_path $path] + } + ::set data $dictval ::set pathsofar [list] + ::set i 0 foreach p $path { ::lappend pathsofar $p if {[string range $p 0 1] eq "@@"} { + #dict key ::set data [dict get $data [string range $p 2 end]] } else { - if {![tomlish::dict::is_typeval $data]} { - error "tomlish::dict::path::get error bad path $path. Attempt to access table as array at subpath $pathsofar." - } - if {[dict get $data type] ne "ARRAY"} { - error "tomlish::dict::get error bad path $path. Subpath $pathsofar is not an array." + #ARRAY or raw list index + if {[llength $pathsofar] > 1 && [string trim [lindex $pathsofar $i-1]] eq ""} { + #previous path was query for entire list - result is a raw list, not a dict + if {[string trim $p] eq ""} { + #review - multiple {[]} in a row in the path is pretty suspicious - raise error + error "tomlish::dict::path::get error - multiple empty indices in a row not supported" + } + ::set data [lindex $data $p] + } else { + if {![tomlish::dict::is_typeval $data]} { + error "tomlish::dict::path::get error bad path $path. Attempt to access table or other value as array at subpath $pathsofar." + } + if {[dict get $data type] ne "ARRAY"} { + error "tomlish::dict::get error bad path $path. Subpath $pathsofar is not an array." + } + ::set arrdata [dict get $data value] + #when $p is empty string (or whitespace) - lindex returns entire list (or empty list) + # - this corresponds to jq: {[]} or path {""} + ::set data [lindex $arrdata $p] } - ::set arrdata [dict get $data value] - ::set data [lindex $arrdata $p] } + incr i } return $data } + proc exists {dictval path} { + #completely empty path considered to exist - review + if {[string index $path 0] in [list . {[}]} { + set path [tomlish::utils::jq_to_path $path] + } ::set data $dictval ::set pathsofar [list] ::set exists 1 + ::set i 0 foreach p $path { ::lappend pathsofar $p if {[string range $p 0 1] eq "@@"} { + #dict key ::set k [string range $p 2 end] if {![dict exists $data $k]} { return 0 } ::set data [dict get $data $k] } else { - if {![tomlish::dict::is_typeval $data]} { - return 0 + #ARRAY or raw list index + if {[llength $pathsofar] > 1 && [string trim [lindex $pathsofar $i-1]] eq ""} { + #previous path was query for entire list - result is not a dict + if {[string trim $p] eq ""} { + #review - multiple {[]} in a row in the path is pretty suspicious - raise error + error "tomlish::dict::path::exists error - multiple empty indices in a row not supported" + #or just leave data as is? + } else { + ::set intp [tomlish::system::lindex_resolve_basic $data $p] + if {$intp == -1} { + return 0 + } + ::set data [lindex $data $p] + } + } else { + if {![tomlish::dict::is_typeval $data]} { + return 0 + } + if {[dict get $data type] ne "ARRAY"} { + return 0 + } + #special case for empty path syntax {jq: [] path: ""} meaning retrieve all elements in list + ::set arrdata [dict get $data value] + if {[string trim $p] eq ""} { + #we have confirmed above it is an ARRAY - we consider an empty list to exist. + #UUU + ::set data $arrdata + } else { + #for 'exists' we need to avoid lindex returning empty string for out of bounds + ::set intp [tomlish::system::lindex_resolve_basic $arrdata $p] ;#handle index math (end-1 etc) + if {$intp == -1} { + #out of bounds + return 0 + } + ::set data [lindex $arrdata $p] + } } - if {[dict get $data type] ne "ARRAY"} { - return 0 + } + incr i + } + return $exists + } + + + #raise error for invalid + proc validate_typeval {typeval} { + set valtype [dict get $typeval type] + set rawval [dict get $typeval value] + switch -- $valtype { + INT { + if {![tomlish::utils::is_int $rawval]} { + return -code error -errorcode {TOML TYPE NOT_INT} "validate_typeval value is not a valid toml int: '$rawval'" } - ::set arrdata [dict get $data value] - ::set intp [tomlish::system::lindex_resolve_basic $arrdata $p] ;#handle index math (end-1 etc) - if {$intp == -1} { - #out of bounds - return 0 + } + BOOL { + #toml only accepts lower case true and false + #review + if {$rawval ni {true false}} { + return -code error -errorcode {TOML TYPE NOT_INT} "validate_typeval value is not a valid toml boolean (true|false): '$rawval'" + } + } + FLOAT { + if {![tomlish::utils::is_float $rawval]} { + return -code error -errorcode {TOML TYPE NOT_INT} "validate_typeval value is not a valid toml float: '$rawval'" + } + } + DATETIME { + #review - accept even when more specific types apply? + if {![tomlish::utils::is_datetime]} { + return -code error -errorcode {TOML TYPE NOT_DATETIME} "validate_typeval value is not a valid toml datetime: '$rawval'" + } + } + DATETIME-LOCAL { + if {![tomlish::utils::is_datetime-local]} { + return -code error -errorcode {TOML TYPE NOT_DATETIME-LOCAL} "validate_typeval value is not a valid toml datetime-local: '$rawval'" + } + } + DATE-LOCAL { + if {![tomlish::utils::is_date-local]} { + return -code error -errorcode {TOML TYPE NOT_DATE-LOCAL} "validate_typeval value is not a valid toml date-local: '$rawval'" + } + } + TIME-LOCAL { + if {![tomlish::utils::is_time-local]} { + return -code error -errorcode {TOML TYPE NOT_TIME-LOCAL} "validate_typeval value is not a valid toml time-local: '$rawval'" + } + } + ARRAY { + if {$rawval eq ""} { + return + } + foreach el $rawval { + validate_typeval $el + } + } + STRING { + if {![tomlish::utils::inner_Bstring_is_valid_toml $rawval]} { + return -code error -errorcode {TOML TYPE NOT_BSTRING} "validate_typeval value is not a valid toml basic string: '$rawval'" } - ::set data [lindex $arrdata $p] + } + MULTISTRING { + #multistring as a single value + #UUU + if {![tomlish::utils::inner_MultiBstring_is_valid_toml $rawval]} { + return -code error -errorcode {TOML TYPE NOT_MLBSTRING} "validate_typeval value is not a valid toml multistring: '$rawval'" + } + } + LITERAL { + #todo? + } + MULTILITERAL { + #? + } + default { + return -code error -errorcode {TOML TYPE UNRECOGNISED} "validate_typeval does not recognise type '$valtype'" } } - return $exists } #a restricted analogy of 'dictn set' - #set 'endpoints' - don't create intermediate paths + #set 'leaf' values only - don't create intermediate paths # can replace an existing dict with another dict # can create a key when key at tail end of path is a key (ie @@keyname, not index) # can replace an existing {type value value } # with added restriction that if is ARRAY the new must also be ARRAY - proc set_endpoint {dictvariable path value} { + + package require struct::list + proc setleaf {dictvariable path value {validate 1}} { + if {[string index $path 0] in [list . {[}]} { + set path [tomlish::utils::jq_to_path $path] + } + upvar $dictvariable dict_being_edited + if {![info exists dict_being_edited]} { + error "tomlish::dict::path::setleaf error - supplied value for 'dictvariable' doesn't seem to be the name of an existing variable" + } ::set data $dict_being_edited ::set pathsofar [list] if {!([tomlish::dict::is_typeval $value] || [tomlish::dict::is_typeval_dict $value 0])} { #failed check of supplied value as basic type, or a sub-dict structure (not checking arrays) - error "tomlish::dict::path::set_endpoint error - value must already be in the tomlish form {type value } or be a dict with such forms as leaves" + error "tomlish::dict::path::setleaf error - value must already be in the tomlish form {type value } or be a dict with such forms as leaves" + } + if {$validate && [tomlish::dict::is_typeval $value]} { + #validate value element of $value is correct for type element + if {[catch {validate_typeval $value} errM]} { + return -code error -errorcode {TOMLISH VALIDATION TYPEFAIL} $errM + } } foreach p $path { ::lappend pathsofar $p @@ -7783,28 +8767,28 @@ namespace eval tomlish::dict::path { #} ::set varname v[incr v] - if {$pathsofar eq $path} { - #see if endpoint of the path given already exists + if {[struct::list equal $pathsofar $path]} { + #see if leaf of the path given already exists if {[dict exists $data $k]} { ::set endpoint [dict get $data $k] if {[tomlish::dict::is_typeval $endpoint]} { set existing_tp [dict get $endpoint type] if {![tomlish::dict::is_typeval $value]} { - error "tomlish::dict::path::set_endpoint error Unable to overwrite subpath '$pathsofar' which is of type $existing_tp with sub-dict. Supplied value not {type value value val } with sub-dict: $value" + error "tomlish::dict::path::setleaf error path '$path'. Cannot overwrite {type val } with sub-dict: $value" } switch -- [dict get $endpoint type] { ARRAY { #disallow overwriting array - unless given value is an ARRAY? REVIEW if {[dict get $value type] ne "ARRAY"} { - error "tomlish::dict::path::set_endpoint error bad path '$path'. Cannot overwrite array with non-array: $value" + error "tomlish::dict::path::setleaf error bad path '$path'. Cannot overwrite array with non-array: $value" } } default { @@ -7855,9 +8839,9 @@ namespace eval tomlish::dict::path { } } } else { - #endpoint is a typeval dict not a plain typeval - only allow overwrite with a typeval dict + #leaf is a typeval dict not a plain typeval - only allow overwrite with a typeval dict if {![tomlish::dict::is_typeval_dict $value 0]} { - error "tomlish::dict::path::set_endpoint error path '$path'. Cannot overwrite sub-dict (size: [dict size $endpoint]) with non sub-dict: $value" + error "tomlish::dict::path::setleaf error path '$path'. Cannot overwrite sub-dict (size: [dict size $endpoint]) with non sub-dict: $value" } } ::set $varname $value @@ -7867,7 +8851,7 @@ namespace eval tomlish::dict::path { ::set arrdata [dict get $data value] set idx [tomlish::system::lindex_resolve_basic $arrdata $p] if {$idx == -1} { - error "tomlish::dict::path::set_endpoint error bad path '$path'. No existing element at $p" + error "tomlish::dict::path::setleaf error bad path '$path'. No existing element at $p" } ::set data [lindex $arrdata $p] ::set $varname $data @@ -7897,7 +8881,7 @@ namespace eval tomlish::dict::path { if {[string match @@* $k]} { #dict key #dict set $nextvarname $k $newval - set_endpoint $nextvarname [list $k] $newval + setleaf $nextvarname [list $k] $newval 0 } else { #list index ::set nextarr [dict get $nextval value] @@ -7913,6 +8897,9 @@ namespace eval tomlish::dict::path { #path must be to a {type ARRAY value } #REVIEW - how to lappend to deep mixed dict/array structure without rewriting whole datastructure? proc lappend {dictvariable path args} { + if {[string index $path 0] in [list . {[}]} { + set path [tomlish::utils::jq_to_path $path] + } upvar $dictvariable dict_being_edited ::set data $dict_being_edited ::set pathsofar [list] @@ -7933,7 +8920,7 @@ namespace eval tomlish::dict::path { } ::set varname v[incr v] - if {$pathsofar eq $path} { + if {[struct::list equal $pathsofar $path]} { #see if endpoint of the path given is an ARRAY ::set endpoint [dict get $data $k] if {![tomlish::dict::is_typeval $endpoint]} { @@ -7961,7 +8948,7 @@ namespace eval tomlish::dict::path { error "tomlish::dict::path::lappend error bad path $path. Subpath $pathsofar is not an array." } ::set varname v[incr v] - if {$pathsofar eq $path} { + if {[struct::list equal $pathsofar $path]} { if {[dict get $data type] ne "ARRAY"} { error "tomlish::dict::path::lappend error bad path $path. Parent path is not an array." } @@ -8160,6 +9147,8 @@ tcl::namespace::eval tomlish::app { #review chan configure $ch_input -translation lf + chan configure $ch_output -translation lf + if {[catch { set json [read $ch_input] }]} { @@ -8291,6 +9280,25 @@ namespace eval tomlish::system { } } + #supports *safe* ultra basic offset expressions as used by lindex etc, but without the 'end' features + #safe in that we don't evaluate the expression as a string. + proc offset_expr {expression} { + #required for tcl < 8.7 range command (lseq not available) + set expression [tcl::string::map {_ {}} $expression] + if {[tcl::string::is integer -strict $expression]} { + return [expr {$expression}] + } + if {[regexp {(.*)([+-])(.*)} $expression _match a op b] && [tcl::string::is integer -strict $a] && [tcl::string::is integer -strict $b]} { + if {$op eq "-"} { + return [expr {$a - $b}] + } else { + return [expr {$a + $b}] + } + } else { + error "bad expression '$expression': must be integer?\[+-\]integer?" + } + } + if {[info commands ::lseq] ne ""} { #tcl 8.7+ lseq significantly faster, especially for larger ranges #The internal rep can be an 'arithseries' with no string representation