Code: Select all
:global UCS2toUTF8 do={
:local numbyte2hex do={
:local input [:tonum $1]
:local hexchars "0123456789ABCDEF"
:local convert [:pick $hexchars (($input >> 4) & 0xF)]
:set convert ($convert.[:pick $hexchars ($input & 0xF)])
:return $convert
}
:local charsString ""
:for x from=0 to=15 step=1 do={ :for y from=0 to=15 step=1 do={
:local tmpHex "$[:pick "0123456789ABCDEF" $x ($x+1)]$[:pick "0123456789ABCDEF" $y ($y+1)]"
:set $charsString "$charsString$[[:parse "(\"\\$tmpHex\")"]]"
} }
:local chr2int do={:if (($1="") or ([:len $1] > 1) or ([:typeof $1] = "nothing")) do={:return -1}; :return [:find $2 $1 -1]}
:local string $1
:if (([:typeof $string] != "str") or ($string = "")) do={ :return "" }
:local output ""
:local lenstr [:len $string]
:for pos from=0 to=($lenstr - 1) step=2 do={
:local input (([$chr2int [:pick $string $pos ($pos + 1)]] * 0x100) + [$chr2int [:pick $string ($pos + 1) ($pos + 2)] $charsString])
:local results [:toarray ""]
:local utf ""
:if ($input > 0x7F) do={
:if ($input > 0x7FF) do={
:if ($input > 0xFFFF) do={
:if ($input > 0x10FFFF) do={
:error "UTF-8 do not have code point > of 0x10FFFF"
} else={
:error "UCS-2 do not have code point > of 0xFFFF"
# the following commented lines are not used on UCS-2
# but I have already prepared my script for future changes to work with all UNICODE code points from 0x000000 to 0x10FFFF as well...
# :set ($results->0) (0xF0 + ($input >> 18))
# :set ($results->1) (0x80 + (($input & 0x3FFFF) >> 12))
# :set ($results->2) (0x80 + (($input & 0xFFF) >> 6))
# :set ($results->3) (0x80 + ($input & 0x3F))
}
} else={
:set ($results->0) (0xE0 + ($input >> 12))
:set ($results->1) (0x80 + (($input & 0xFFF) >> 6))
:set ($results->2) (0x80 + ($input & 0x3F))
}
} else={
:set ($results->0) (0xC0 + ($input >> 6))
:set ($results->1) (0x80 + ($input & 0x3F))
}
} else={
:set ($results->0) $input
}
:foreach item in=$results do={
:set utf "$utf%$[$numbyte2hex $item]"
}
:set output "$output$utf"
}
:return $output
}
Pepelxl function:
Code: Select all
:global symbolsHex {"\00";"\01";"\02";"\03";"\04";"\05";"\06";"\07";"\08";"\09";"\0A";"\0B";"\0C";"\0D";"\0E";"\0F";"\10";"\11";"\12";"\13";"\14";"\15";"\16";"\17";"\18";"\19";"\1A";"\1B";"\1C";"\1D";"\1E";"\1F";"\20";"\21";"\22";"\23";"\24";"\25";"\26";"\27";"\28";"\29";"\2A";"\2B";"\2C";"\2D";"\2E";"\2F";"\30";"\31";"\32";"\33";"\34";"\35";"\36";"\37";"\38";"\39";"\3A";"\3B";"\3C";"\3D";"\3E";"\3F";"\40";"\41";"\42";"\43";"\44";"\45";"\46";"\47";"\48";"\49";"\4A";"\4B";"\4C";"\4D";"\4E";"\4F";"\50";"\51";"\52";"\53";"\54";"\55";"\56";"\57";"\58";"\59";"\5A";"\5B";"\5C";"\5D";"\5E";"\5F";"\60";"\61";"\62";"\63";"\64";"\65";"\66";"\67";"\68";"\69";"\6A";"\6B";"\6C";"\6D";"\6E";"\6F";"\70";"\71";"\72";"\73";"\74";"\75";"\76";"\77";"\78";"\79";"\7A";"\7B";"\7C";"\7D";"\7E";"\7F";"\80";"\81";"\82";"\83";"\84";"\85";"\86";"\87";"\88";"\89";"\8A";"\8B";"\8C";"\8D";"\8E";"\8F";"\90";"\91";"\92";"\93";"\94";"\95";"\96";"\97";"\98";"\99";"\9A";"\9B";"\9C";"\9D";"\9E";"\9F";"\A0";"\A1";"\A2";"\A3";"\A4";"\A5";"\A6";"\A7";"\A8";"\A9";"\AA";"\AB";"\AC";"\AD";"\AE";"\AF";"\B0";"\B1";"\B2";"\B3";"\B4";"\B5";"\B6";"\B7";"\B8";"\B9";"\BA";"\BB";"\BC";"\BD";"\BE";"\BF";"\C0";"\C1";"\C2";"\C3";"\C4";"\C5";"\C6";"\C7";"\C8";"\C9";"\CA";"\CB";"\CC";"\CD";"\CE";"\CF";"\D0";"\D1";"\D2";"\D3";"\D4";"\D5";"\D6";"\D7";"\D8";"\D9";"\DA";"\DB";"\DC";"\DD";"\DE";"\DF";"\E0";"\E1";"\E2";"\E3";"\E4";"\E5";"\E6";"\E7";"\E8";"\E9";"\EA";"\EB";"\EC";"\ED";"\EE";"\EF";"\F0";"\F1";"\F2";"\F3";"\F4";"\F5";"\F6";"\F7";"\F8";"\F9";"\FA";"\FB";"\FC";"\FD";"\FE";"\FF"};
:global convertUcs2ToUtf8 do={
:local decodedLine "";
:global symbolsHex;
:for curposition from=0 to=([:len $instring] -1) step=4 do={
:local i [:tonum ("0x".[:pick $instring $curposition ($curposition +4)])];
:if ($i < 0x80) do={
:set $decodedLine ($decodedLine.($symbolsHex->$i));
};
:if (($i >= 0x80) and ($i < 0x800)) do={
:local byteA (($i >> 6) | 192);
:local byteB (($i & 63) | 128);
:set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB));
};
:if ($i >= 0x800) do={
:local byteA (($i >> 12) | 224);
:local byteB ((($i >> 6) & 63) | 128);
:local byteC (($i & 63) | 128);
:set $decodedLine ($decodedLine.($symbolsHex->$byteA).($symbolsHex->$byteB).($symbolsHex->$byteC));
};
};
:return $decodedLine;
I wonder whose algorithm is better?