You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
266 lines
8.4 KiB
266 lines
8.4 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'dev make' or bin/punkmake to update from <pkg>-buildversion.txt |
|
# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2024 DasBrain |
|
# http://paste.tclers.tk/5977 |
|
# |
|
# @@ Meta Begin |
|
# Application punk::cesu 999999.0a1.0 |
|
# Meta platform tcl |
|
# Meta license <unknown> |
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin punkshell_module_punk::cesu 0 999999.0a1.0] |
|
#[copyright "2024"] |
|
#[titledesc {CESU compatibility ehcoding scheme for utf-16: 8-Bit (CESU-8) ??}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {CESU experimental}] [comment {-- Description at end of page heading --}] |
|
#[require punk::cesu] |
|
#[keywords module cesu encoding compatibility experimental unofficial] |
|
#[description] |
|
#[para] experimental |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of punk::cesu |
|
#[subsection Concepts] |
|
#[para] cesu-8 may be mistaken for utf-8 if no supplementary chars present. |
|
#[para] see: https://www.unicode.org/reports/tr26/tr26-4.html |
|
#[para] Particulary note discouragement of use especially in external interchange. |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by punk::cesu |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6- |
|
#*** !doctools |
|
#[item] [package {Tcl 8.6}] |
|
|
|
# #package require frobz |
|
# #*** !doctools |
|
# #[item] [package {frobz}] |
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Base namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
tcl::namespace::eval punk::cesu { |
|
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase |
|
#variable xyz |
|
|
|
#*** !doctools |
|
#[subsection {Namespace punk::cesu}] |
|
#[para] Core API functions for punk::cesu |
|
#[list_begin definitions] |
|
|
|
|
|
|
|
proc formatQuery args { |
|
package require http |
|
http::config ;#call something in http lib - as http lib uses auto_index so is lazy about creating ::http namespace etc. |
|
set result "" |
|
set sep "" |
|
foreach i $args { |
|
append result $sep [mapReply $i] |
|
if {$sep eq "="} { |
|
set sep & |
|
} else { |
|
set sep = |
|
} |
|
} |
|
return $result |
|
} |
|
|
|
proc mapReply string { |
|
variable ::http::formMap |
|
|
|
set string [encoding convertto utf-8 $string] |
|
set string [cesu2utf $string] |
|
return [string map $formMap $string] |
|
} |
|
|
|
|
|
|
|
proc cesu2utf str { |
|
#hacked by JMN - as original seemed broken and intention as to input is unclear |
|
if {[regexp {\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])} $str]} { |
|
#set str [string map {\ \\ \[ \\\[ \] \\\]} $str] ;#original -broken - unsure of usecase/intention |
|
set str [string map {\\ \\\\ \[ \\\[ \] \\\]} $str] ;#guess intention is to stop premature substitution of escapes and commands |
|
#return [subst -novariables [regsub -all {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $str {[cesu2utfR \1 \2 \3 \4]} ]] ;#original. anchoring seems unlikely to be desirable |
|
return [subst -novariables [regsub -all {\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])} $str {[cesu2utfR \1 \2 \3 \4]} ]] |
|
} else { |
|
return $str |
|
} |
|
} |
|
|
|
proc cesu2utfR {1 2 3 4} { |
|
# UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx |
|
# CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx |
|
binary scan $1 c 1 |
|
binary scan $2 c 2 |
|
binary scan $3 c 3 |
|
puts [list $1 $2 $3] |
|
#binary scan $4 c 4 |
|
incr 1 |
|
|
|
return [binary format ccca \ |
|
[expr {0xF0 | (($1 & 0xC) >> 2)}] \ |
|
[expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \ |
|
[expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \ |
|
$4] |
|
} |
|
|
|
# |
|
proc cesu2utfC char { |
|
# UTF-8: 11110xxx 10xx xxxx 10xx xxxx 10xxxxxx |
|
# CESU-8: 11101101 1010 yy yy 10xxxx xx 11101101 1011xxxx 10xxxxxx |
|
if {[regexp {^\xED([\xA0-\xAF])([\x80-\xBF])\xED([\xB0-\xBF])([\x80-\xBF])$} $char -> 1 2 3 4]} { |
|
binary scan $1 c 1 |
|
binary scan $2 c 2 |
|
binary scan $3 c 3 |
|
puts [list $1 $2 $3] |
|
#binary scan $4 c 4 |
|
incr 1 |
|
|
|
return [binary format ccca \ |
|
[expr {0xF0 | (($1 & 0xC) >> 2)}] \ |
|
[expr {0x80 | (($1 & 0x3) << 4) | (($2 & 0x3C) >> 2)}] \ |
|
[expr {0x80 | (($2 & 0x3) << 4) | ($3 & 0xF)}] \ |
|
$4] |
|
|
|
} else { |
|
puts "Invalid sequence: $char" |
|
return $char |
|
} |
|
} |
|
|
|
#data |
|
# surrogate pair \ud83d\udcc4 'document icon' = \U1F4C4 |
|
#\U1f400 'mouse' |
|
#\U1f600 'smiley' |
|
|
|
proc test1 {} { |
|
#JMN |
|
#package require Tcl 9- |
|
set c [encoding convertto cesu-8 \ud83d\ude10] |
|
set x [cesu2utfC $c] |
|
encoding convertfrom utf-8 $x |
|
} |
|
|
|
#e.g test2 "note \ud83f\udd1e etc" |
|
#e.g test2 "faces \ud83d\ude10 \ud83d\ude21 \ud83d\ude31" |
|
#note: test2 \U1f600 returns a mouse (\U1f400) instead of smiley |
|
# but test2 \U1f400 returns a mouse. |
|
# Either surrogated_string shouldn't include non BMP chars anyway (G.I.G.O?).. or we're doing something wrong. |
|
proc test2 {surrogated_string} { |
|
#JMN |
|
set cesu [encoding convertto cesu-8 $surrogated_string] |
|
set x [cesu2utf $cesu] |
|
encoding convertfrom utf-8 $x |
|
} |
|
|
|
# |
|
#test_enc_equivalency \U1f400 \U1f600 |
|
proc test_enc_equivalency {c1 c2} { |
|
package require punk::ansi |
|
namespace import ::punk::ansi::a+ ::punk::ansi::a |
|
foreach enc [lsort [encoding names]] { |
|
puts stdout "testing $enc" |
|
if {$enc in "iso2022 iso2022-jp iso2022-kr"} { |
|
puts stderr "skipping $enc - crashes tcl9 on non BMP codepoints" |
|
continue |
|
} |
|
if {[catch { |
|
set equiv [string equal [encoding convertto $enc $c1] [encoding convertto $enc $c2]] |
|
if {$equiv} { |
|
puts stdout "[a+ web-green]c1 = c2 under $enc[a]" |
|
} else { |
|
puts stdout "[a+ web-red]c1 not equal to c2 under $enc[a]" |
|
} |
|
} errM]} { |
|
puts stdout "error encoding c1/c2 under $enc\n $errM" |
|
} |
|
} |
|
} |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::cesu ---}] |
|
} |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# Secondary API namespace |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
tcl::namespace::eval punk::cesu::lib { |
|
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase |
|
tcl::namespace::path [tcl::namespace::parent] |
|
#*** !doctools |
|
#[subsection {Namespace punk::cesu::lib}] |
|
#[para] Secondary functions that are part of the API |
|
#[list_begin definitions] |
|
|
|
#proc utility1 {p1 args} { |
|
# #*** !doctools |
|
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] |
|
# #[para]Description of utility1 |
|
# return 1 |
|
#} |
|
|
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::cesu::lib ---}] |
|
} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[section Internal] |
|
#tcl::namespace::eval punk::cesu::system { |
|
#*** !doctools |
|
#[subsection {Namespace punk::cesu::system}] |
|
#[para] Internal functions that are not part of the API |
|
|
|
|
|
|
|
#} |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide punk::cesu [tcl::namespace::eval punk::cesu { |
|
variable pkg punk::cesu |
|
variable version |
|
set version 999999.0a1.0 |
|
}] |
|
return |
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|