Community discussions

MikroTik App
User avatar
Topic Author
Posts: 435
Joined: Fri Sep 25, 2020 3:30 pm
Location: Russia, Moscow


Mon Apr 03, 2023 12:40 pm

Rextended function:
:global UCS2toUTF8 do={
    :local numbyte2hex do={
        :local input [:tonum $1]
        :local hexchars "0123456789ABCDEF"
        :local convert [:pick $hexchars (($input >> 4) & 0xF)]
        :set convert ($convert.[:pick $hexchars ($input & 0xF)])
        :return $convert

    :local charsString ""
    :for x from=0 to=15 step=1 do={ :for y from=0 to=15 step=1 do={
        :local tmpHex "$[:pick "0123456789ABCDEF" $x ($x+1)]$[:pick "0123456789ABCDEF" $y ($y+1)]"
        :set $charsString "$charsString$[[:parse "(\"\\$tmpHex\")"]]"
    } }

    :local chr2int do={:if (($1="") or ([:len $1] > 1) or ([:typeof $1] = "nothing")) do={:return -1}; :return [:find $2 $1 -1]}

    :local string $1
    :if (([:typeof $string] != "str") or ($string = "")) do={ :return "" }
    :local output ""

    :local lenstr [:len $string]
    :for pos from=0 to=($lenstr - 1) step=2 do={
        :local input   (([$chr2int [:pick $string $pos ($pos + 1)]] * 0x100) + [$chr2int [:pick $string ($pos + 1) ($pos + 2)] $charsString])
        :local results [:toarray ""]
        :local utf   ""
        :if ($input > 0x7F) do={
            :if ($input > 0x7FF) do={
                :if ($input > 0xFFFF) do={
                    :if ($input > 0x10FFFF) do={
                        :error "UTF-8 do not have code point > of 0x10FFFF"
                    } else={
                        :error "UCS-2 do not have code point > of 0xFFFF"
# the following commented lines are not used on UCS-2
# but I have already prepared my script for future changes to work with all UNICODE code points from 0x000000 to 0x10FFFF as well...
#                        :set ($results->0) (0xF0 + ($input >> 18))
#                        :set ($results->1) (0x80 + (($input & 0x3FFFF) >> 12))
#                        :set ($results->2) (0x80 + (($input & 0xFFF) >> 6))
#                        :set ($results->3) (0x80 + ($input & 0x3F))
                } else={
                    :set ($results->0) (0xE0 + ($input >> 12))
                    :set ($results->1) (0x80 + (($input & 0xFFF) >> 6))
                    :set ($results->2) (0x80 + ($input & 0x3F))
            } else={
                :set ($results->0) (0xC0 + ($input >> 6))
                :set ($results->1) (0x80 + ($input & 0x3F))
        } else={
            :set ($results->0) $input
        :foreach item in=$results do={
            :set utf "$utf%$[$numbyte2hex $item]"
        :set output "$output$utf"
    :return $output

Pepelxl function:
:global symbolsHex {"\00";"\01";"\02";"\03";"\04";"\05";"\06";"\07";"\08";"\09";"\0A";"\0B";"\0C";"\0D";"\0E";"\0F";"\10";"\11";"\12";"\13";"\14";"\15";"\16";"\17";"\18";"\19";"\1A";"\1B";"\1C";"\1D";"\1E";"\1F";"\20";"\21";"\22";"\23";"\24";"\25";"\26";"\27";"\28";"\29";"\2A";"\2B";"\2C";"\2D";"\2E";"\2F";"\30";"\31";"\32";"\33";"\34";"\35";"\36";"\37";"\38";"\39";"\3A";"\3B";"\3C";"\3D";"\3E";"\3F";"\40";"\41";"\42";"\43";"\44";"\45";"\46";"\47";"\48";"\49";"\4A";"\4B";"\4C";"\4D";"\4E";"\4F";"\50";"\51";"\52";"\53";"\54";"\55";"\56";"\57";"\58";"\59";"\5A";"\5B";"\5C";"\5D";"\5E";"\5F";"\60";"\61";"\62";"\63";"\64";"\65";"\66";"\67";"\68";"\69";"\6A";"\6B";"\6C";"\6D";"\6E";"\6F";"\70";"\71";"\72";"\73";"\74";"\75";"\76";"\77";"\78";"\79";"\7A";"\7B";"\7C";"\7D";"\7E";"\7F";"\80";"\81";"\82";"\83";"\84";"\85";"\86";"\87";"\88";"\89";"\8A";"\8B";"\8C";"\8D";"\8E";"\8F";"\90";"\91";"\92";"\93";"\94";"\95";"\96";"\97";"\98";"\99";"\9A";"\9B";"\9C";"\9D";"\9E";"\9F";"\A0";"\A1";"\A2";"\A3";"\A4";"\A5";"\A6";"\A7";"\A8";"\A9";"\AA";"\AB";"\AC";"\AD";"\AE";"\AF";"\B0";"\B1";"\B2";"\B3";"\B4";"\B5";"\B6";"\B7";"\B8";"\B9";"\BA";"\BB";"\BC";"\BD";"\BE";"\BF";"\C0";"\C1";"\C2";"\C3";"\C4";"\C5";"\C6";"\C7";"\C8";"\C9";"\CA";"\CB";"\CC";"\CD";"\CE";"\CF";"\D0";"\D1";"\D2";"\D3";"\D4";"\D5";"\D6";"\D7";"\D8";"\D9";"\DA";"\DB";"\DC";"\DD";"\DE";"\DF";"\E0";"\E1";"\E2";"\E3";"\E4";"\E5";"\E6";"\E7";"\E8";"\E9";"\EA";"\EB";"\EC";"\ED";"\EE";"\EF";"\F0";"\F1";"\F2";"\F3";"\F4";"\F5";"\F6";"\F7";"\F8";"\F9";"\FA";"\FB";"\FC";"\FD";"\FE";"\FF"};

:global convertUcs2ToUtf8 do={
	:local decodedLine "";
	:global symbolsHex;
	:for curposition from=0 to=([:len $instring] -1) step=4 do={
	:local i [:tonum ("0x".[:pick $instring $curposition ($curposition +4)])];
	:if ($i < 0x80) do={
	:set $decodedLine ($decodedLine.($symbolsHex->$i));
	:if (($i >= 0x80) and ($i < 0x800)) do={
	:local byteA (($i >> 6) | 192);
	:local byteB (($i & 63) | 128);
	:set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB));
	:if ($i >= 0x800) do={
	:local byteA (($i >> 12) | 224);
	:local byteB ((($i >> 6) & 63) | 128);
	:local byteC (($i & 63) | 128);
	:set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB).($symbolsHex->$byteC));
		:return $decodedLine;

I wonder whose algorithm is better?
User avatar
Forum Guru
Forum Guru
Posts: 12003
Joined: Tue Feb 25, 2014 12:49 pm
Location: Italy

Re: UCS2toUTF8

Mon Apr 03, 2023 11:06 pm

The codes are uncomparable because do not do the same thing.
On output the @pepelxl version return a string, unusable directly on MikroTik, can be sended only by mail.
My script as output give one escaped sequence for use as parameter for /tool fetch for send message on telegram (SMS 2 Telegram) or other social media,
or any other use where unicode must be escaped for be used.
But is easily modificable for do same output.

Is not the correct @pepelxl version, you altered it...

That script must be decently formatted for compare (not tested, just formatted):

reformatted code

:global convertUcs2ToUtf8 do={
    :local decodedLine ""
    :local symbolsHex {"\00";"\01";"\02";"\03";"\04";"\05";"\06";"\07";"\08";"\09";"\0A";"\0B";"\0C";"\0D";"\0E";"\0F";\
    :for curposition from=0 to=([:len $instring] - 1) step=4 do={
        :local i [:tonum ("0x".[:pick $instring $curposition ($curposition + 4)])]
        :if ($i < 0x80) do={
            :set $decodedLine ($decodedLine.($symbolsHex->$i))
        :if (($i >= 0x80) and ($i < 0x800)) do={
            :local byteA (($i >> 6) | 192)
            :local byteB (($i & 63) | 128)
            :set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB))
        :if ($i >= 0x800) do={
            :local byteA (($i >> 12) | 224)
            :local byteB ((($i >> 6) & 63) | 128)
            :local byteC (($i & 63) | 128)
            :set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB).($symbolsHex->$byteC))
    :return $decodedLine

# :put [$convertUcs2ToUtf8 instring="string"]
The original have to set "instring" as parameter, on my version, not matter set the name.

Tthe @pepelxl version do not have any error check.

I prefer define the needed "charsString" / "symbolsHex" with a rapid function than a multiple line variable, since are a 00..FF sequence, nothing special.

Comparing the two main block, my "if" checks are executed less time, because are nested:

rextended fragment code

        :if ($input > 0x7F) do={
            :if ($input > 0x7FF) do={
                :if ($input > 0xFFFF) do={
[...] ignoring error ckecks [...]
                } else={
                    :set ($results->0) (0xE0 + ( $input >> 12        ))
                    :set ($results->1) (0x80 + (($input >>  6) & 0x3F))
                    :set ($results->2) (0x80 + ( $input        & 0x3F))
            } else={
                :set ($results->0) (0xC0 + ($input >>    6))
                :set ($results->1) (0x80 + ($input  & 0x3F))
        } else={
            :set ($results->0) $input

On the other, "if" is executed uselessly ~3 times more:

pepelxl fragment code

        :if ($i < 0x80) do={
            :set $decodedLine ($decodedLine.($symbolsHex->$i))
        :if (($i >= 0x80) and ($i < 0x800)) do={
            :local byteA (($i >> 6) | 192)
            :local byteB (($i & 63) | 128)
            :set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB))
        :if ($i >= 0x800) do={
            :local byteA (($i >> 12) | 224)
            :local byteB ((($i >> 6) & 63) | 128)
            :local byteC (($i & 63) | 128)
            :set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB).($symbolsHex->$byteC))

Buth both are using the same algorythm, the coding scheme are rules, and how obtain the characters, is the same....

1 Byte char:
in = out : the same...

2 Bytes char:
a = (($i >> 6) | 192) = ($i >> 6) + 0xC0 = 0xC0 + $i >> 6 : the same...
b = (($i & 63) | 128) = $i & 0x3F + 0x80 = 0x80 + $i & 0x3F : the same...

3 Bytes char:
a = (($i >> 12) | 224) = $i >> 12 + 0xE0 = 0xE0 + $i >> 12 : the same...
b = ((($i >> 6) & 63) | 128) = ($i >> 6) & 0x3F + 0x80 = 0x80 + ($i >> 6) & 0x3F
c = (($i & 63) | 128) = $i & 0x3F + 0x80 = 0x80 + $i & 0x3F : the same...

1 Byte char:
in = out : the same...

2 Bytes char:
a = (0xC0 + ($input >> 6)) = 0xC0 + $input >> 6 : the same...
b = (0x80 + ($input & 0x3F)) = 0x80 + $input & 0x3F : the same...

3 Bytes char:
a = (0xE0 + ($input >> 12)) = 0xE0 + $input >> 12 : the same...
b = (0x80 + (($input & 0xFFF) >> 6)) = 0x80 + ($input & 0xFFF) >> 6
c = (0x80 + ($input & 0x3F)) = 0x80 + $input & 0x3F : the same...

The only differency is how is obtained "b" on 3 Bytes char, both add 0x80 to something, just change the order of the operations:
($i >> 6) & 0x3F
Shift the $i of 6 and set to 0 all not wanted bits...
(as @pepelxl wrote this, it looks nicer to me, I arrange mine in the same order as well)

($input & 0xFFF) >> 6
Set to 0 all not wanted bits, and shift of 6...

So, in the end, I could influence the judgment because I wrote it,
but objectively mine is clearer, already set up to be expanded or change the result on exit and it does fewer operations.
User avatar
Topic Author
Posts: 435
Joined: Fri Sep 25, 2020 3:30 pm
Location: Russia, Moscow

Re: UCS2toUTF8

Tue Apr 04, 2023 9:31 am

OK, Thank you very much for the detailed analysis.
User avatar
Forum Guru
Forum Guru
Posts: 12003
Joined: Tue Feb 25, 2014 12:49 pm
Location: Italy

Re: UCS2toUTF8

Sat Jul 15, 2023 2:45 am

Rextended function:

Pepelxl function:
I can not fix your post.
For courtesy, alter the first post, remove the function and put the link to the function, why do copy & paste copy also the errors....

Who is online

Users browsing this forum: No registered users and 37 guests