utf8support.bi:
Code: Select all
/'
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2022
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
This file is the product of the original utf8.bas and utf8.bi files from Frank Hoogerbeets written in Sharp Basic (https://www.freebasic.net/forum/viewtopic.php?t=26170) and
have been merged and adapted to FreeBasic 1.09 and have been tested under Windows and Debian Linux 11.4. Also the margins have been increased from 72 to 182 columns. The IDE
being used is Geany.
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Copyright (C) 2017 Frank Hoogerbeets <frank@sharpbasic.com>
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it under the terms of the Modified GNU Library General Public License either version 2.0 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE.
See the Modified GNU Library General Public License for more details:
https://sharpbasic.com/downloads/mlgpl.html
https://sharpbasic.com/downloads/mlgpl.txt
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
'/
'Version reporting to MultiFB
Dim Shared As String UTFSupVer
UTFSupVer = "20220808"
#include once "crt/string.bi"
' Repair codes &H01-&H1f and &H80-&Hff manually:
Dim Shared As String Utf8Rep (&Hffff)
Dim As Integer Utf8Tmp
For Utf8Tmp = 0 To &H7f
Utf8Rep (Utf8Tmp) = Chr (Utf8Tmp)
Next Utf8Tmp
Utf8Rep ( 1) = "☺": Utf8Rep ( 2) = "☻": Utf8Rep ( 3) = "♥": Utf8Rep ( 4) = "♦": Utf8Rep ( 5) = "♣": Utf8Rep ( 6) = "♠": Utf8Rep ( 14) = "♫": Utf8Rep ( 15) = "☼"
Utf8Rep ( 16) = "►": Utf8Rep ( 17) = "◄": Utf8Rep ( 18) = "↕": Utf8Rep ( 19) = "‼": Utf8Rep ( 20) = "¶": Utf8Rep ( 21) = "§": Utf8Rep ( 22) = "▬": Utf8Rep ( 23) = "↨"
Utf8Rep ( 24) = "↑": Utf8Rep ( 25) = "↓": Utf8Rep ( 26) = "→": Utf8Rep ( 27) = "←": Utf8Rep ( 28) = "∟": Utf8Rep ( 29) = "↔": Utf8Rep ( 30) = "▲": Utf8Rep ( 31) = "▼"
Utf8Rep (&H80) = "€": Utf8Rep (&H81) = "": Utf8Rep (&H82) = "‚": Utf8Rep (&H83) = "ƒ": Utf8Rep (&H84) = "„": Utf8Rep (&H85) = "…": Utf8Rep (&H86) = "†": Utf8Rep (&H87) = "‡"
Utf8Rep (&H88) = "ˆ": Utf8Rep (&H89) = "‰": Utf8Rep (&H8a) = "Š": Utf8Rep (&H8b) = "‹": Utf8Rep (&H8c) = "Œ": Utf8Rep (&H8d) = "": Utf8Rep (&H8e) = "Ž": Utf8Rep (&H8f) = ""
Utf8Rep (&H90) = "": Utf8Rep (&H91) = "‘": Utf8Rep (&H92) = "’": Utf8Rep (&H93) = "“": Utf8Rep (&H94) = "”": Utf8Rep (&H95) = "•": Utf8Rep (&H96) = "–": Utf8Rep (&H97) = "—"
Utf8Rep (&H98) = "˜": Utf8Rep (&H99) = "™": Utf8Rep (&H9a) = "š": Utf8Rep (&H9b) = "›": Utf8Rep (&H9c) = "œ": Utf8Rep (&H9d) = "": Utf8Rep (&H9e) = "ž": Utf8Rep (&H9f) = "Ÿ"
Utf8Rep (&Ha0) = "ツ": Utf8Rep (&Ha1) = "¡": Utf8Rep (&Ha2) = "¢": Utf8Rep (&Ha3) = "£": Utf8Rep (&Ha4) = "¤": Utf8Rep (&Ha5) = "¥": Utf8Rep (&Ha6) = "¦": Utf8Rep (&Ha7) = "§"
Utf8Rep (&Ha8) = "¨": Utf8Rep (&Ha9) = "©": Utf8Rep (&Haa) = "ª": Utf8Rep (&Hab) = "«": Utf8Rep (&Hac) = "¬": Utf8Rep (&Had) = " ": Utf8Rep (&Hae) = "®": Utf8Rep (&Haf) = "¯"
Utf8Rep (&Hb0) = "°": Utf8Rep (&Hb1) = "±": Utf8Rep (&Hb2) = "²": Utf8Rep (&Hb3) = "³": Utf8Rep (&Hb4) = "´": Utf8Rep (&Hb5) = "µ": Utf8Rep (&Hb6) = "¶": Utf8Rep (&Hb7) = "·"
Utf8Rep (&Hb8) = "¸": Utf8Rep (&Hb9) = "¹": Utf8Rep (&Hba) = "º": Utf8Rep (&Hbb) = "»": Utf8Rep (&Hbc) = "¼": Utf8Rep (&Hbd) = "½": Utf8Rep (&Hbe) = "¾": Utf8Rep (&Hbf) = "¿"
Utf8Rep (&Hc0) = "À": Utf8Rep (&Hc1) = "Á": Utf8Rep (&Hc2) = "Â": Utf8Rep (&Hc3) = "Ã": Utf8Rep (&Hc4) = "Ä": Utf8Rep (&Hc5) = "Å": Utf8Rep (&Hc6) = "Æ": Utf8Rep (&Hc7) = "Ç"
Utf8Rep (&Hc8) = "È": Utf8Rep (&Hc9) = "É": Utf8Rep (&Hca) = "Ê": Utf8Rep (&Hcb) = "Ë": Utf8Rep (&Hcc) = "Ì": Utf8Rep (&Hcd) = "Í": Utf8Rep (&Hce) = "Î": Utf8Rep (&Hcf) = "Ï"
Utf8Rep (&Hd0) = "Ð": Utf8Rep (&Hd1) = "Ñ": Utf8Rep (&Hd2) = "Ò": Utf8Rep (&Hd3) = "Ó": Utf8Rep (&Hd4) = "Ô": Utf8Rep (&Hd5) = "Õ": Utf8Rep (&Hd6) = "Ö": Utf8Rep (&Hd7) = "×"
Utf8Rep (&Hd8) = "Ø": Utf8Rep (&Hd9) = "Ù": Utf8Rep (&Hda) = "Ú": Utf8Rep (&Hdb) = "Û": Utf8Rep (&Hdc) = "Ü": Utf8Rep (&Hdd) = "Ý": Utf8Rep (&Hde) = "Þ": Utf8Rep (&Hdf) = "ß"
Utf8Rep (&He0) = "à": Utf8Rep (&He1) = "á": Utf8Rep (&He2) = "â": Utf8Rep (&He3) = "ã": Utf8Rep (&He4) = "ä": Utf8Rep (&He5) = "å": Utf8Rep (&He6) = "æ": Utf8Rep (&He7) = "ç"
Utf8Rep (&He8) = "è": Utf8Rep (&He9) = "é": Utf8Rep (&Hea) = "ê": Utf8Rep (&Heb) = "ë": Utf8Rep (&Hec) = "ì": Utf8Rep (&Hed) = "í": Utf8Rep (&Hee) = "î": Utf8Rep (&Hef) = "ï"
Utf8Rep (&Hf0) = "ð": Utf8Rep (&Hf1) = "ñ": Utf8Rep (&Hf2) = "ò": Utf8Rep (&Hf3) = "ó": Utf8Rep (&Hf4) = "ô": Utf8Rep (&Hf5) = "õ": Utf8Rep (&Hf6) = "ö": Utf8Rep (&Hf7) = "÷"
Utf8Rep (&Hf8) = "ø": Utf8Rep (&Hf9) = "ù": Utf8Rep (&Hfa) = "ú": Utf8Rep (&Hfb) = "û": Utf8Rep (&Hfc) = "ü": Utf8Rep (&Hfd) = "ý": Utf8Rep (&Hfe) = "þ": Utf8Rep (&Hff) = "ÿ"
Utf8Rep (&H0560) = "ՠ": Utf8Rep (&H0560) = "ֈ": Utf8Rep (&H0560) = "։": Utf8Rep (&H0560) = "֍": Utf8Rep (&H0560) = "֎": Utf8Rep (&H0560) = "֏"
' pointer nil indicator for easy reading
#define nil 0
' variable within range check macro
#define within_max_(x, y) iif(x > y, x, y)
#define within_min_(x, y) iif(x < y, x, y)
#define Within(x, y, v) iif((v) >= within_min_((x), (y)) andalso (v) <= within_max_((x), (y)), true, false)
type PChar8 as zstring ptr
type PCChar as const zstring ptr
' for easy reading and possible future adjustment
type UString as string
' general procedures
declare function ByteIndex(b as any ptr, byval size as uinteger, byval chval as ubyte) as integer
declare sub ResizeStr(byref s as string, byval size as uinteger)
' string routines supporting UTF-8
declare function UAsc(byref value as const ustring) as uinteger
declare function UChr(byval value as uinteger) as ustring
declare function UInsert overload (byref s1 as const ustring, byref s2 as const ustring, byval start as uinteger) as const ustring
declare function UInsert overload (byref s1 as const ustring, byref s2 as const ustring, byval start as uinteger, byval count as uinteger) as const ustring
declare function UInstr overload (byref sp as const ustring, byref sb as const ustring) as integer
declare function UInstr overload (byval spos as uinteger, byref sp as const ustring, byref sb as const ustring) as integer
declare function UIsAscii(byref s as const ustring) as boolean
declare function UIvstr(byref s as const string) as boolean
declare function ULCase(byref s as const ustring, byref lang as const string = "") as const ustring
declare function ULeft(byref s as const ustring, byval count as uinteger) as const ustring
declare function ULen(byref s as const ustring) as uinteger
declare function UMid overload (byref s as const ustring, byval start as uinteger, byval count as uinteger) as const ustring
declare function UMid overload (byref s as const ustring, byval start as uinteger) as const ustring
declare function URemove(byref s as ustring, byval start as uinteger, byval count as uinteger) as const ustring
declare sub URepair(byref s as string)
declare sub UReplace(byref t as string, byref i as const string, byref s as const string, byval a as integer = 1)
declare sub UReplaceAll(byref t as ustring, byref i as const ustring, byref s as const ustring, byval a as integer = 1)
declare function UReverse(byref s as const ustring) as ustring
declare function URight(byref s as const ustring, byval count as uinteger) as const ustring
declare function UUCase(byref s as const ustring, byref lang as const string = "") as const ustring
#define byte1_is_utf8 (((*p)[1] and &hC0) = &h80)
#define byte2_is_utf8 (((*p)[2] and &hC0) = &h80)
#define byte3_is_utf8 (((*p)[3] and &hC0) = &h80)
' house-keeping procedures
declare function UTF8Asc(p as PChar8) as uinteger
declare function UTF8Cod(byval value as uinteger) as ustring
declare function UTF8CpLenX(p as PChar8) as uinteger
declare function UTF8CpLen(p as PChar8) as uinteger
declare function UTF8CpStart(p as PChar8, byval length as uinteger, byval index as uinteger) as PChar8
declare function UTF8Eval(p as PChar8, byval count as uinteger, byval flag as boolean = true) as integer
declare function UTF8InstrP(sp as PChar8, byval splen as uinteger, sb as PChar8, byval sblen as uinteger) as PChar8
declare function UTF8Len(p as PChar8, byval bytes as uinteger) as uinteger
declare sub UTF8Repair(p as PChar8)
declare sub UTF8ResizeBuffer (byref s as ustring, byref outp as PChar8, byval icnt as uinteger, byval ocnt as uinteger, byval osize as integer, byval nsize as integer)
declare function UTF8Reverse(p as PChar8, byval count as uinteger) as ustring
' --- general procedures ---
function ByteIndex(b as any ptr, byval size as uinteger, byval chval as ubyte) as integer
' returns -1 if byte was not found
dim p as any ptr
dim result as integer = -1
p = memchr(b, chval, size)
if p > 0 then
result = p - b
end if
return result
end function
sub ResizeStr(byref s as string, byval size as uinteger)
var slen = len(s)
if size > slen then
s += space(size - slen)
elseif size < slen then
s = left(s, size)
end if
end sub
' string procedures supporting UTF-8
' UTF-8 value from unicode codepoint
function UAsc(byref value as const ustring) as uinteger
' generates error if codepoint is illegal
dim result as uinteger
result = UTF8Asc(value)
if result = 0 andalso (value <> chr(0)) then
' illegal codepoint
error 1 ' illegal function call
end if
return result
end function
' UTF-8 codepoint from unicode value
function UChr(byval value as uinteger) as ustring
' generates error if codepoint is illegal
dim result as ustring
result = UTF8Cod(value)
if result = "" then
' illegal codepoint
error 1 'illegal function call
end if
return result
end function
' UTF-8 insert string
function UInsert overload (byref s1 as const ustring, byref s2 as const ustring, byval start as uinteger) as const ustring
return ULeft(s1, start - 1) + s2 + UMid(s1, start)
end function
' UTF-8 insert string with overwrite, remove
function UInsert overload (byref s1 as const ustring, byref s2 as const ustring, byval start as uinteger, byval count as uinteger) as const ustring
return ULeft(s1, start - 1) + s2 + UMid(s1, start + count)
end function
' UTF-8 Instr (fixed start position)
function UInstr overload (byref sp as const ustring, byref sb as const ustring) as integer
' with default start position
return UInstr(1, sp, sb)
end function
' UTF-8 Instr (custom start position)
function UInstr overload (byval spos as uinteger, byref sp as const ustring, byref sb as const ustring) as integer
' with custom start position
dim as uinteger i, splen
dim as PChar8 p, sposp
if spos = 1 then
i = instr(sp, sb)
if i > 0 then
return UTF8Len(sp, i - 1) + 1
end if
elseif spos > 1 then
splen = len(sp)
sposp = UTF8CpStart(sp, splen, spos - 1)
if sposp = nil then
return 0
end if
p = UTF8InstrP(sposp, splen + (strptr(sp) - sposp), sb, len(sb))
if p = nil then
return 0
end if
return spos + UTF8Len(sposp, p - sposp)
end if
return 0
end function
' UTF-8 is ascii
function UIsAscii(byref s as const ustring) as boolean
' returns TRUE if string only contains codepoints < &h80
for i as uinteger = 0 to len(s)
if s[i] > &h7F then
return false
end if
next
return true
end function
' UTF-8 is valid string
function UIvstr(byref s as const string) as boolean
return UTF8Eval(s, len(s)) = -1
end function
' UTF-8 left part of string
function ULeft(byref s as const ustring, byval count as uinteger) as const ustring
return UMid(s, 1, count)
end function
' UTF-8 length
function ULen(byref s as const ustring) as uinteger
return UTF8Len(s, len(s))
end function
' UTF-8 mid rest of the string
function UMid overload (byref s as const ustring, byval start as uinteger) as const ustring
return UMid(s, start, ulen(s) - start + 1)
end function
' UTF-8 mid default
function UMid overload (byref s as const ustring, byval start as uinteger, byval count as uinteger) as const ustring
dim as PChar8 sbpos, ebpos
dim as uinteger maxb, slen
dim result as ustring
slen = len(s)
sbpos = UTF8CpStart(s, slen, start - 1)
result = ""
if sbpos = nil then
return result
else
maxb = strptr(s) + slen - sbpos
ebpos = UTF8CpStart(sbpos, maxb, count)
if ebpos = nil then
result = mid(s, sbpos - strptr(s) + 1, maxb)
else
result = mid(s, sbpos - strptr(s) + 1, ebpos - sbpos)
end if
end if
return result
end function
function URemove(byref s as ustring, byval start as uinteger, byval count as uinteger) as const ustring
return UInsert(s, "", start, count)
end function
' UTF-8 repair string
sub URepair(byref s as string)
' try to repair if not valid
if not UIvstr(s) then
UTF8Repair(s)
end if
end sub
sub UReplace(byref t as ustring, byref i as const ustring, byref s as const ustring, byval a as integer = 1)
dim as uinteger li, p
p = instr(a, t, i)
if p > 0 then
li = len(i)
if li <> len(s) then
t = left(t, p - 1) + s + mid(t, p + li)
else
mid(t, p) = s
end if
end if
end sub
sub UReplaceAll(byref t as ustring, byref i as const ustring, byref s as const ustring, byval a as integer = 1)
dim as uinteger li, ls, p
p = instr(a, t, i)
if p = 0 then
exit sub
end if
li = len(i)
ls = len(s)
if li = ls then
li = 0
end if
while p > 0
if li then
t = left(t, p - 1) + s + mid(t, p + li)
else
mid(t, p) = s
end if
p = instr(p + ls, t, i)
wend
end sub
function UReverse(byref s as const ustring) as ustring
dim as uinteger slen = len(s)
if slen <> 0 then
return UTF8Reverse(s, slen)
end if
end function
function URight(byref s as const ustring, byval count as uinteger) as const ustring
return UMid(s, ulen(s) - count + 1)
end function
function ULCase(byref s as const ustring, byref lang as const string = "") as const ustring
dim as uinteger cdiff, p
dim as pcchar istr, istrend
dim as PChar8 outp
dim as boolean IsTurkish
dim as ubyte c1, c2, c3, nc1, nc2, nc3
dim as ustring result
if len(s) = 0 then
exit function
end if
istr = strptr(s)
istrend = istr + len(s)
result = s
while istr < istrend
c1 = (*istr)[0]
select case c1
case &h41 to &h5A
exit while
case &hC3 to &hFF
select case c1
case &hC3 to &hC9, &hCE, &hCF, &hD0 to &hD5, &hE1, &hE2, &hE5
c2 = (*istr)[1]
select case c1
case &hC3
if within(&h80, &h9E, c2) then
exit while
end if
case &hC4
select case c2
case &h80 to &hAF, &hB2 to &hB6
if (c2 mod 2) = 0 then
exit while
end if
case &hB8 to &hFF
if (c2 mod 2) = 1 then
exit while
end if
case &hB0
exit while
end select
case &hC5
select case c2
case &h8A to &hB7
if (c2 mod 2) = 0 then
exit while
end if
case &h00 to &h88, &hB9 to &hFF
if (c2 mod 2) = 1 then
exit while
end if
case &hB8
exit while
end select
case &hE5
if c2 = &hBC then
if within(&hA1, &hBA, (*istr)[2]) then
exit while
end if
end if
case else
exit while
end select
end select
end select
istr += 1
wend
if istr >= istrend then
return result
end if
if lang = "tr" orelse lang = "az" then
IsTurkish = true
end if
outp = strptr(result) + (istr - strptr(s))
cdiff = 0
while istr < istrend
c1 = (*istr)[0]
select case c1
case &h41 to &h5A
' ASCII
if IsTurkish andalso (c1 = asc("I")) then
p = outp - strptr(result)
ResizeStr(result, len(result) + 1)
outp = strptr(result) + p
(*outp)[0] = &hC4
outp += 1
(*outp)[0] = &hB1
cdiff -= 1
else
(*outp)[0] = c1 + &h20
end if
istr += 1
outp += 1
case &hC3 to &hD5
' two bytes
c2 = (*istr)[1]
nc1 = c1
nc2 = c2
select case c1
case &hC3
select case c2
case &h80 to &h96, &h98 to &h9E
nc2 = c2 + &h20
end select
case &hC4
select case c2
case &h80 to &hAF, &hB2 to &hB7
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &hB0
(*outp)[0] = asc("i")
istr += 2
outp += 1
cdiff += 1
continue while
case &hB9 to &hBE
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &hBF
nc1 = &hC5
nc2 = &h80
end select
case &hC5
select case c2
case &h8A to &hB7
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &h00 to &h88, &hB9 to &hBE
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &hB8
nc1 = &hC3
nc2 = &hBF
end select
case &hC6
select case c2
case &h81
nc1 = &hC9
nc2 = &h93
case &h82 to &h85
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &h87, &h88, &h8B, &h8C
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &h86
nc1 = &hC9
nc2 = &h94
case &h89
nc1 = &hC9
nc2 = &h96
case &h8A
nc1 = &hC9
nc2 = &h97
case &h8E
nc1 = &hC7
nc2 = &h9D
case &h8F
nc1 = &hC9
nc2 = &h99
case &h90
nc1 = &hC9
nc2 = &h9B
case &h91, &h98
nc2 = c2 + 1
case &h93
nc1 = &hC9
nc2 = &hA0
case &h94
nc1 = &hC9
nc2 = &hA3
case &h96
nc1 = &hC9
nc2 = &hA9
case &h97
nc1 = &hC9
nc2 = &hA8
case &h9C
nc1 = &hC9
nc2 = &hAF
case &h9D
nc1 = &hC9
nc2 = &hB2
case &h9F
nc1 = &hC9
nc2 = &hB5
case &hA0 to &hA5, &hAC
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &hA7, &hAF
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &hA6
nc1 = &hCA
nc2 = &h80
case &hA9
nc1 = &hCA
nc2 = &h83
case &hAE
nc1 = &hCA
nc2 = &h88
case &hB8, &hBC
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &hB3 to &hB6
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &hB1
nc1 = &hCA
nc2 = &h8A
case &hB2
nc1 = &hCA
nc2 = &h8B
case &hB7
nc1 = &hCA
nc2 = &h92
end select
case &hC7
select case c2
case &h84 to &h8C, &hB1 to &hB3
if ((c2 and &hF) mod 3) = 1 then
nc2 = c2 + 2
elseif ((c2 and &hF) mod 3) = 2 then
nc2 = c2 + 1
end if
case &h8D to &h9C
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &h9E to &hAF, &hB4, &hB5, &hB8 to &hBF
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &hB6
nc1 = &hC6
nc2 = &h95
case &hB7
nc1 = &hC6
nc2 = &hBF
end select
case &hC8
if within(&h80, &hB3, c2) then
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
end if
select case c2
case &hA0
nc1 = &hC6
nc2 = &h9E
case &hA1
nc2 = c2
case &hBA, &hBE
p = outp - strptr(result)
ResizeStr(result, len(result) + 1)
outp = strptr(result) + p
(*outp)[0] = &hE2
outp += 1
(*outp)[0] = &hB1
outp += 1
if c2 = &hBA then
(*outp)[0] = &hA5
else
(*outp)[0] = &hA6
end if
cdiff -= 1
outp += 1
istr += 2
continue while
case &hBD
nc1 = &hC6
nc2 = &h9A
case &hBB
nc2 = c2 + 1
end select
case &hC9
select case c2
case &h81, &h82
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &h86 to &h8F
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &h83
nc1 = &hC6
nc2 = &h80
case &h84
nc1 = &hCA
nc2 = &h89
case &h85
nc1 = &hCA
nc2 = &h8C
end select
case &hCE
select case c2
case &h86
nc2 = &hAC
case &h88
nc2 = &hAD
case &h89
nc2 = &hAE
case &h8A
nc2 = &hAF
case &h8C
' nc2 doesn't change
nc1 = &hCF
case &h8E
nc1 = &hCF
nc2 = &h8D
case &h8F
nc1 = &hCF
nc2 = &h8E
case &h91 to &h9F
nc2 = c2 + &h20
case &hA0 to &hAB
nc1 = &hCF
nc2 = c2 - &h20
end select
case &hCF
select case c2
case &h8F
nc2 = &h97
case &h98
nc2 = &h99
case &h9A
nc2 = &h9B
case &h9C
nc2 = &h9D
case &h9E
nc2 = &h9F
case &hA0 to &hAF
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
case &hB4
nc1 = &hCE
nc2 = &hB8
case &hB7
nc2 = &hB8
case &hB9
nc2 = &hB2
case &hBA
nc2 = &hBB
case &hBD
nc1 = &hCD
nc2 = &hBB
case &hBE
nc1 = &hCD
nc2 = &hBC
case &hBF
nc1 = &hCD
nc2 = &hBD
end select
case &hD0
c2 = (*istr)[1]
select case c2
case &h80 to &h8F
nc1 = c1 + 1
nc2 = c2 + &h10
case &h90 to &h9F
nc2 = c2 + &h20
case &hA0 to &hAF
nc1 = c1 + 1
nc2 = c2 - &h20
end select
case &hD1
if within(&hA0, &hBF, c2) then
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
end if
case &hD2
select case c2
case &h80
nc2 = c2 + 1
case &h8A to &hBF
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
end select
case &hD3
select case c2
case &h80
nc2 = &h8F
case &h81 to &h8E
if (c2 mod 2) = 1 then
nc2 = c2 + 1
end if
case &h90 to &hBF
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
end select
case &hD4
if (c2 mod 2) = 0 then
nc2 = c2 + 1
end if
if within(&hB1, &hBF, c2) then
nc1 = &hD5
nc2 = c2 - &h10
end if
case &hD5
select case c2
case &h80 to &h8F
nc2 = c2 + &h30
case &h90 to &h96
nc1 = &hD6
nc2 = c2 - &h10
end select
end select
' evaluate
if cdiff <> 0 then
(*outp)[0] = nc1
(*outp)[1] = nc2
else
if nc1 <> c1 then (*outp)[0] = nc1
if nc2 <> c2 then (*outp)[1] = nc2
end if
istr += 2
outp += 2
' three bytes
case &hE1
nc1 = c1
c2 = (*istr)[1]
c3 = (*istr)[2]
nc2 = c2
nc3 = c3
select case c2
case &h82
if within(&hA0, &hBF, c3) then
nc1 = &hE2
nc2 = &hB4
nc3 = c3 - &h20
end if
case &h83
if within(&h80, &h85, c3) then
nc1 = &hE2
nc2 = &hB4
nc3 = c3 + &h20
end if
case &hB8 to &hBB
if (c3 mod 2) = 0 then
nc3 = c3 + 1
end if
if c2 = &hBA then
if within(&h96, &h9F, c3) then
nc3 = c3
end if
end if
if c2 = &hBA andalso c3 = &h9E then
istr += 3
(*outp)[0] = &hC3
outp += 1
(*outp)[0] = &h9F
outp += 1
cdiff += 1
continue while
end if
case &hBC
if (c3 mod &h10) \ 8 = 1 then
nc3 = c3 - 8
end if
case &hBD
select case c3
case &h80 to &h8F, &hA0 to &hAF
if (c3 mod &h10) \ 8 = 1 then
nc3 = c3 - 8
end if
case &h99, &h9B, &h9D, &h9F
nc3 = c3 - 8
end select
case &hBE
select case c3
case &h80 to &hB9
if (c3 mod &h10) \ 8 = 1 then
nc3 = c3 - 8
end if
case &hBA
nc2 = &hBD
nc3 = &hB0
case &hBB
nc2 = &hBD
nc3 = &hB1
case &hBC
nc3 = &hB3
end select
end select
if cdiff <> 0 then
(*outp)[0] = nc1
(*outp)[1] = nc2
(*outp)[2] = nc3
else
if c1 <> nc1 then (*outp)[0] = nc1
if c2 <> nc2 then (*outp)[1] = nc2
if c3 <> nc3 then (*outp)[2] = nc3
end if
istr += 3
outp += 3
case &hE2
nc1 = c1
c2 = (*istr)[1]
c3 = (*istr)[2]
nc2 = c2
nc3 = c3
select case c2
case &h84
select case c3
case &hA6
istr += 3
(*outp)[0] = &hCF
outp += 1
(*outp)[0] = &h89
outp += 1
cdiff += 1
continue while
case &hAA
istr += 3
(*outp)[0] = &h6B
outp += 1
cdiff += 2
continue while
case &hAB
istr += 3
(*outp)[0] = &hC3
outp += 1
(*outp)[0] = &hA5
outp += 1
cdiff += 1
continue while
end select
case &h85
if within(&hA0, &hAF, c3) then
nc3 = c3 + &h10
end if
case &h86
if c3 = &h83 then
nc3 = c3 + 1
end if
case &h92
if within(&hB6, &hBF, c3) then
nc2 = &h93
nc3 = c3 - &h26
end if
case &h93
if within(&h80, &h8F, c3) then
nc3 = c3 + &h26
end if
case &hB0
select case c3
case &h80 to &h8F
nc3 = c3 + &h30
case &h90 to &hAE
nc2 = &hB1
nc3 = c3 - &h10
end select
case &hB1
select case c3
case &hA0
nc3 = c3 + 1
case &hA2, &hA4, &hAD to &hAF, &hB0
istr += 3
(*outp)[0] = &hC9
outp += 1
select case c3
case &hA2
(*outp)[0] = &hAB
case &hA4
(*outp)[0] = &hBD
case &hAD
(*outp)[0] = &h91
case &hAE
(*outp)[0] = &hB1
case &hAF
(*outp)[0] = &h90
case &hB0
(*outp)[0] = &h92
end select
outp += 1
cdiff += 1
continue while
case &hA3
nc2 = &hB5
nc3 = &hBD
case &hA7, &hA9, &hAB
nc3 = c3 + 1
case &hB2, &hB5
nc3 = c3 + 1
case &hBE, &hBF
istr += 3
(*outp)[0] = &hC8
outp += 1
if c3 = &hBE then
(*outp)[0] = &hBF
else
(*outp)[0] = &h80
end if
outp += 1
cdiff += 1
continue while
end select
case &hB2
if (c3 mod 2) = 0 then
nc3 = c3 + 1
end if
case &hB3
if within(&h80, &hA3, c3) then
if (c3 mod 2) = 0 then nc3 = c3 + 1
end if
end select
if cdiff <> 0 then
(*outp)[0] = nc1
(*outp)[1] = nc2
(*outp)[2] = nc3
else
if c1 <> nc1 then (*outp)[0] = nc1
if c2 <> nc2 then (*outp)[1] = nc2
if c3 <> nc3 then (*outp)[2] = nc3
end if
istr += 3
outp += 3
case &hEF
c2 = (*istr)[1]
c3 = (*istr)[2]
if c2 = &hBC then
if within(&hA1, &hBA, c3) then
(*outp)[0] = c1
(*outp)[1] = &hBD
(*outp)[2] = c3 - &h20
end if
end if
if cdiff <> 0 then
(*outp)[0] = c1
(*outp)[1] = c2
(*outp)[2] = c3
end if
istr += 3
outp += 3
case else
if cdiff <> 0 then (*outp)[0] = c1
istr += 1
outp += 1
end select
wend
' set final buffer size
ResizeStr(result, outp - strptr(result))
return result
end function
function UUCase(byref s as const ustring, byref lang as const string = "") as const ustring
dim as uinteger icnt, ocnt
dim as PChar8 outp
dim as ubyte cplen, ncplen
dim as ushort ncod, ocod
dim as boolean codpr, IsTurkish
dim as ustring result
if len(s) = 0 then
exit function
end if
result = s
outp = strptr(result)
if lang = "tr" orelse lang = "az" then
IsTurkish = true
end if
icnt = 0
ocnt = 0
while icnt <= len(s)
if within(&h61, &h7A, s[icnt]) then
if IsTurkish andalso (s[icnt] = asc("i")) then
ResizeStr(result, len(result) + 1)
outp = strptr(result)
(*outp)[ocnt] = &hC4
(*outp)[ocnt + 1] = &hB0
icnt += 1
ocnt += 2
else
(*outp)[ocnt] = s[icnt] - &h20
icnt += 1
ocnt += 1
end if
else
cplen = UTF8CpLen(cast(PChar8, @s[icnt]))
codpr = false
ncplen = cplen
if cplen = 2 then
ocod = (s[icnt] shl 8) or s[icnt + 1]
ncod = 0
select case ocod
case &hC39F
ncod = &h5353
case &hC3A0 to &hC3B6, &hC3B8 to &hC3BE
ncod = ocod - &h20
case &hC3BF
ncod = &hC5B8
case &hC481 to &hC4B0
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hCB1
(*outp)[ocnt] = asc("I")
ncplen = 1
codpr = true
case &hC4B2 to &hC4B7
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC4B9 to &hC4BF
if (ocod mod 2) = 0 then
ncod = ocod - 1
end if
case &hC580
ncod = &hC4BF
case &hC581 to &hC588
if (ocod mod 2) = 0 then
ncod = ocod - 1
end if
case &hC58A to &hC5B7
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC5B9 to &hC5BE
if (ocod mod 2) = 0 then
ncod = ocod - 1
end if
case &hC5BF
(*outp)[ocnt] = asc("S")
ncplen = 1
codpr = true
case &hC680
ncod = &hC983
case &hC682 to &hC685
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC688
ncod = &hC687
case &hC68C
ncod = &hC68B
case &hC692
ncod = &hC691
case &hC695
ncod = &hC7B6
case &hC699
ncod = &hC698
case &hC69A
ncod = &hC8BD
case &hC69E
ncod = &hC8A0
case &hC6A0 to &hC6A5
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC6A8
ncod = &hC6A7
case &hC6AD
ncod = &hC6AC
case &hC6B0
ncod = &hC6AF
case &hC6B3 to &hC6B6
if (ocod mod 2) = 0 then
ncod = ocod - 1
end if
case &hC6B9
ncod = &hC6B8
case &hC6BD
ncod = &hC6BC
case &hC6BF
ncod = &hC7B7
case &hC784 to &hC786
ncod = &hC784
case &hC787 to &hC789
ncod = &hC787
case &hC78A to &hC78C
ncod = &hC78A
case &hC78E
ncod = &hC78D
case &hC790
ncod = &hC78F
case &hC791 to &hC79C
if (ocod mod 2) = 0 then
ncod = ocod - 1
end if
case &hC79D
ncod = &hC68E
case &hC79F
ncod = &hC79E
case &hC7A0 to &hC7AF
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC7B2 to &hC7B3
ncod = &hC7B1
case &hC7B5
ncod = &hC7B4
case &hC7B8 to &hC7BF
if ocod mod 2 = 1 then
ncod = ocod - 1
end if
case &hC880 to &hC89F
if ocod mod 2 = 1 then
ncod = ocod - 1
end if
case &hC8A2 to &hC8B3
if ocod mod 2 = 1 then
ncod = ocod - 1
end if
case &hC8BC
ncod = &hC8BB
case &hC8BF
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hBE
ncplen = 3
codpr = true
case &hC980
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hBF
ncplen = 3
codpr = true
case &hC982
ncod = &hC981
case &hC986 to &hC98F
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hC990
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hAF
ncplen = 3
codpr = true
case &hC991
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hAD
ncplen = 3
codpr = true
case &hC992
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hB0
ncplen = 3
codpr = true
case &h993
ncod = &hC681
case &hC994
ncod = &hC686
case &hC996
ncod = &hC689
case &hC997
ncod = &hC68A
case &hC999
ncod = &hC68F
case &hC99B
ncod = &hC690
case &hC9A0
ncod = &hC693
case &hC9A3
ncod = &hC694
case &hC9A5
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hEA
(*outp)[ocnt + 1] = &h9E
(*outp)[ocnt + 2] = &h8D
ncplen = 3
codpr = true
case &hC9A8
ncod = &hC697
case &hC9A9
ncod = &hC696
case &hC9AB
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hA2
ncplen = 3
codpr = true
case &hC9AF
ncod = &hC69C
case &hC9B1
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hAE
ncplen = 3
codpr = true
case &hC9B2
ncod = &hC69D
case &hC9B5
ncod = &hC69F
case &hC9BD
UTF8ResizeBuffer(result, outp, icnt, ocnt, 2, 3)
(*outp)[ocnt] = &hE2
(*outp)[ocnt + 1] = &hB1
(*outp)[ocnt + 2] = &hA4
ncplen = 3
codpr = true
case &hCA80
ncod = &hC6A6
case &hCA83
ncod = &hC6A9
case &hCA88
ncod = &hC6AE
case &hCA89
ncod = &hC984
case &hCA8A
ncod = &hC6B1
case &hCA8B
ncod = &hC6B2
case &hCA8C
ncod = &hC985
case &hCA92
ncod = &hC6B7
case &hCEAC
ncod = &hCE86
case &hCEAD
ncod = &hCE88
case &hCEAE
ncod = &hCE89
case &hCEAF
ncod = &hCE8A
case &hCEB1 to &hCEBF
ncod = ocod - &h20
case &hCF80, &hCF81, &hCF83 to &hCF8B
ncod = ocod - &hE0
case &hCF82
ncod = &hCEA3
case &hCF8C
ncod = &hCE8C
case &hCF8D
ncod = &hCE8E
case &hCF8E
ncod = &hCE8F
case &hCF90
ncod = &hCE92
case &hCF91
ncod = &hCE98
case &hCF95
ncod = &hCEA6
case &hCF96
ncod = &hCEA0
case &hCF97
ncod = &hCF8F
case &hCF99 to &hCF9F, &hCFA0 to &hCFAF
if (ocod mod 2) = 1 then
ncod = ocod - 1
end if
case &hCFB0
ncod = &hCE9A
case &hCFB1
ncod = &hCEA1
case &hCFB2
ncod = &hCFB9
case &hCFB5
ncod = &hCE95
case &hCFB8
ncod = &hCFB7
case &hCFBB
ncod = &hCFBA
case &hD0B0 to &hD0BF
ncod = ocod - &h20
case &hD180 to &hD18F
ncod = ocod - &hE0
case &hD190 to &hD19F
ncod = ocod - &h110
end select
if ncod <> 0 then
(*outp)[ocnt] = hibyte(ncod)
(*outp)[ocnt + 1] = lobyte(ncod)
codpr = true
end if
end if
if (icnt <> ocnt + 1) andalso (codpr = false) then
for i as uinteger = 0 to cplen - 1
(*outp)[ocnt + i] = s[icnt + i]
next
end if
icnt += cplen
ocnt += ncplen
end if
wend
' -1 prevents trailing space character (tested with ASCII and UTF8)
ResizeStr(result, ocnt - 1)
return result
end function
' --- house-keeping routines ---
function UTF8Asc(p as PChar8) as uinteger
dim result as uinteger
if p <> nil then
if (*p)[0] < &hC0 then
' one byte
result = (*p)[0]
elseif ((*p)[0] and &hE0) = &hC0 then
' two bytes?
if byte1_is_utf8 then
result = (((*p)[0] and &h1F) shl 6) _
or ((*p)[1] and &h3F)
if result < &h80 then
' illegal encoding, perhaps XSS attack
result = 0
end if
else
' single character
result = (*p)[0]
end if
elseif ((*p)[0] and &hF0) = &hE0 then
' three bytes?
if byte1_is_utf8 _
andalso byte2_is_utf8 then
result = (((*p)[0] and &h1F) shl 12) _
or (((*p)[1] and &h3F) shl 6) _
or ((*p)[2] and &h3F)
if result < &h800 then
' illegal encoding, perhaps XSS attack
result = 0
end if
else
' single character
result = (*p)[0]
end if
elseif ((*p)[0] and &hF8) = &hF0 then
' four bytes?
if byte1_is_utf8 _
andalso byte2_is_utf8 _
andalso byte3_is_utf8 then
result = (((*p)[0] and &hF) shl 18) _
or (((*p)[1] and &h3F) shl 12) _
or (((*p)[2] and &h3F) shl 6) _
or ((*p)[3] and &h3F)
if result < &h10000 then
' illegal encoding, perhaps XSS attack
result = 0
end if
else
' single character
result = (*p)[0]
end if
else
' illegal character
result = 0
end if
end if
return result
end function
function UTF8CpLenX(p as PChar8) as uinteger
dim result as uinteger = 1
select case (*p)[0]
case &hC0 to &hDF
' two bytes
if byte1_is_utf8 then
result = 2
end if
case &hE0 to &hEF
' three bytes
if byte1_is_utf8 andalso byte2_is_utf8 then
result = 3
end if
case &hF0 to &hF7
' four bytes
if byte1_is_utf8 then
if byte2_is_utf8 andalso byte3_is_utf8 then
result = 4
end if
end if
end select
return result
end function
function UTF8CpLen(p as PChar8) as uinteger
if p = nil then
return 0
end if
if (*p)[0] < &hC0 then
return 1
end if
return UTF8CpLenX(p)
end function
function UTF8Cod(byval value as uinteger) as ustring
' unicode value to codepoint string
dim result as ustring
If Utf8Rep (value) = "" Then
select case value
case &h0 to &h7F
' one byte
result = space(1)
result[0] = value
case &h80 to &h7FF
' two bytes
result = space(2)
result[0] = &hC0 or (value shr 6)
result[1] = &h80 or (value and &h3F)
case &h800 to &hFFFF
' three bytes
result = space(3)
result[0] = &hE0 or (value shr 12)
result[1] = ((value shr 6) and &h3F) or &h80
result[2] = (value and &h3F) or &h80
case &h10000 to &h10FFFF
' four bytes
result = space(4)
result[0] = &hF0 or (value shr 18)
result[1] = ((value shr 12) and &h3F) or &h80
result[2] = ((value shr 6) and &h3F) or &h80
result[3] = (value and &h3F) or &h80
end select
Else
result = Utf8Rep (value) 'replace string if a replacement is available
End If
return result
end function
function UTF8CpStart(p as PChar8, byval length as uinteger, byval index as uinteger) as PChar8
' codepoint start
dim cplen as uinteger
dim result as PChar8 = p
if result <> 0 then
while (index > 0) andalso (length > 0)
cplen = UTF8CpLen(result)
length -= cplen
index -= 1
result += cplen
wend
if (index <> 0) orelse (length < 0) then
result = nil
end if
end if
return result
end function
function UTF8Eval(p as PChar8, byval count as uinteger, byval flag as boolean = true) as integer
dim cplen as integer
dim c as ubyte
dim result as integer
if p <> nil then
result = 0
while result < count
c = (*p)[0]
if c < &h80 then
cplen = 1
elseif c <= &hC1 then
' flag = true means exit if error
if flag orelse (c >= &hC0) then
return result
end if
cplen = 1
elseif c <= &hDF then
if (result < count - 1) then
if byte1_is_utf8 then
cplen = 2
end if
else
return result
end if
elseif c <= &hEF then
if (result < count - 2) then
if byte1_is_utf8 andalso byte2_is_utf8 then
if (c = &hE0) andalso ((*p)[1] <= &h9F) then
return result
end if
cplen = 3
end if
else
return result
end if
elseif c <= &hF7 then
if (result < count - 3) then
if byte1_is_utf8 _
andalso byte2_is_utf8 _
andalso byte3_is_utf8 then
if (c = &hF0) andalso ((*p)[1] <= &h8F) then
return result
end if
cplen = 4
else
return result
end if
end if
else
if flag then
return result
end if
cplen = 1
end if
result += cplen
p += cplen
if result > count then
result -= cplen
return result
end if
wend
end if
return -1
end function
sub UTF8Repair(p as PChar8)
dim c as uinteger
if p = nil then exit sub
while (*p)[0] <> 0
if (*p)[0] < &h80 then
' single byte
p += 1
elseif (*p)[0] < &hC0 then
' invalid
(*p)[0] = 32
p += 1
elseif ((*p)[0] and &hE0) = &hC0 then
' two bytes
if byte1_is_utf8 then
c = ((*p)[0] and &h1F) shl 6
if c < &h80 then
' fix XSS attack
(*p)[0] = 32
else
p += 2
end if
else
' invalid
(*p)[0] = 32
p += 1
end if
elseif ((*p)[0] and &hF0) = &hE0 then
' three bytes
if byte1_is_utf8 andalso byte2_is_utf8 then
c = (((*p)[0] and &h1F) shl 12) _
or (((*p)[1] and &h3F) shl 6)
if c < &h800 then
' fix XSS attack
(*p)[0] = 32
else
p += 3
end if
else
' invalid
(*p)[0] = 32
p += 1
end if
elseif ((*p)[0] and &hF8) = &hF0 then
if byte1_is_utf8 _
andalso byte2_is_utf8 andalso byte3_is_utf8 then
c = (((*p)[0] and &hF) shl 18) or (((*p)[1] and &h3F) shl 12) or (((*p)[2] and &h3F) shl 6)
if c < &h10000 then
' fix XSS attack
(*p)[0] = 32
else
p += 4
end if
else
' invalid
(*p)[0] = 32
p += 1
end if
else
' invalid
(*p)[0] = 32
p += 1
end if
wend
end sub
function UTF8InstrP(sp as PChar8, byval splen as uinteger, sb as PChar8, byval sblen as uinteger) as PChar8
' returns pointer to search string, or 0 if not found
dim p as integer
if (sp = nil) orelse (sb = nil) orelse (sblen = 0) then
return nil
end if
while splen > 0
p = ByteIndex(sp, splen, (*sb)[0])
if p < 0 then
exit function
end if
sp += p
splen -= p
if splen < sblen then
exit function
end if
if memcmp(sp, sb, sblen) = 0 then
return sp
end if
sp += 1
splen -= 1
wend
return 0
end function
function UTF8Len(p as PChar8, byval bytes as uinteger) as uinteger
dim cplen as uinteger
dim result as uinteger = 0
while bytes > 0
result += 1
cplen = UTF8CpLen(p)
p += cplen
bytes -= cplen
wend
return result
end function
sub UTF8ResizeBuffer (byref s as ustring, byref outp as PChar8, byval icnt as uinteger, byval ocnt as uinteger, byval osize as integer, nsize as integer)
if not (nsize > osize) then
exit sub
end if
if (nsize > 20) orelse (osize > 20) then
exit sub
end if
if (nsize > osize) andalso (ocnt >= icnt - 1) then
ResizeStr(s, len(s) + nsize - osize)
outp = strptr(s)
end if
end sub
function UTF8Reverse(p as PChar8, byval count as uinteger) as ustring
dim as uinteger rbpos
dim as ubyte cplen
dim as ustring result
ResizeStr(result, count)
rbpos = count
while rbpos > 0
cplen = UTF8CpLen(p)
rbpos -= cplen
memcpy(@result[rbpos], p, cplen)
p += cplen
wend
return result
end function
'UTF-8 conversion tools as extension:
'Define variables needed to function
Dim Shared As String Chrs (255), UChrs (255)
Dim As Integer TmpUtf8
Dim As Boolean UTF8Support
'If not, we set it to false (Windows / DOS)
UTF8Support = True
'Low characters:
For TmpUtf8 = 0 To 126
UChrs (TmpUtf8) = Chr (TmpUtf8)
Next TmpUtf8
'Prefab UTF-8 characters as Ascii 437 code:
UChrs ( 1) = "☺": UChrs ( 2) = "☻": UChrs ( 3) = "♥": UChrs ( 4) = "♦": UChrs ( 5) = "♣": UChrs ( 6) = "♠": UChrs ( 14) = "♫": UChrs ( 15) = "☼"
UChrs ( 16) = "►": UChrs ( 17) = "◄": UChrs ( 18) = "↕": UChrs ( 19) = "‼": UChrs ( 20) = "¶": UChrs ( 21) = "§": UChrs ( 22) = "▬": UChrs ( 23) = "↨"
UChrs ( 24) = "↑": UChrs ( 25) = "↓": UChrs ( 26) = "→": UChrs ( 27) = "←": UChrs ( 28) = "∟": UChrs ( 29) = "↔": UChrs ( 30) = "▲": UChrs ( 31) = "▼"
UChrs (127) = "⌂" 'The code numbers in UTF-8 are different, therefore we need to enter them manually.
UChrs (128) = "Ç": UChrs (129) = "ü": UChrs (130) = "é": UChrs (131) = "â": UChrs (132) = "ä": UChrs (133) = "à": UChrs (134) = "å": UChrs (135) = "ç"
UChrs (136) = "ê": UChrs (137) = "ë": UChrs (138) = "è": UChrs (139) = "ï": UChrs (140) = "î": UChrs (141) = "ì": UChrs (142) = "Ä": UChrs (143) = "Å"
UChrs (144) = "É": UChrs (145) = "æ": UChrs (146) = "Æ": UChrs (147) = "ô": UChrs (148) = "ö": UChrs (149) = "ò": UChrs (150) = "û": UChrs (151) = "ù"
UChrs (152) = "ÿ": UChrs (153) = "Ö": UChrs (154) = "Ü": UChrs (155) = "¢": UChrs (156) = "£": UChrs (157) = "¥": UChrs (158) = "₧": UChrs (159) = "ƒ"
UChrs (160) = "á": UChrs (161) = "í": UChrs (162) = "ó": UChrs (163) = "ú": UChrs (164) = "ñ": UChrs (165) = "Ñ": UChrs (166) = "ª": UChrs (167) = "º"
UChrs (168) = "¿": UChrs (169) = "⌐": UChrs (170) = "¬": UChrs (171) = "½": UChrs (172) = "¼": UChrs (173) = "¡": UChrs (174) = "«": UChrs (175) = "»"
UChrs (176) = "░": UChrs (177) = "▒": UChrs (178) = "▓": UChrs (179) = "│": UChrs (180) = "┤": UChrs (181) = "╡": UChrs (182) = "╢": UChrs (183) = "╖"
UChrs (184) = "╕": UChrs (185) = "╣": UChrs (186) = "║": UChrs (187) = "╗": UChrs (188) = "╝": UChrs (189) = "╜": UChrs (190) = "╛": UChrs (191) = "┐"
UChrs (192) = "└": UChrs (193) = "┴": UChrs (194) = "┬": UChrs (195) = "├": UChrs (196) = "─": UChrs (197) = "┼": UChrs (198) = "╞": UChrs (199) = "╟"
UChrs (200) = "╚": UChrs (201) = "╔": UChrs (202) = "╩": UChrs (203) = "╦": UChrs (204) = "╠": UChrs (205) = "═": UChrs (206) = "╬": UChrs (207) = "╧"
UChrs (208) = "╨": UChrs (209) = "╤": UChrs (210) = "╥": UChrs (211) = "╙": UChrs (212) = "╘": UChrs (213) = "╒": UChrs (214) = "╓": UChrs (215) = "╫"
UChrs (216) = "╪": UChrs (217) = "┘": UChrs (218) = "┌": UChrs (219) = "█": UChrs (220) = "▄": UChrs (221) = "▌": UChrs (222) = "▐": UChrs (223) = "▀"
UChrs (224) = "α": UChrs (225) = "ß": UChrs (226) = "Γ": UChrs (227) = "π": UChrs (228) = "Σ": UChrs (229) = "σ": UChrs (230) = "µ": UChrs (231) = "τ"
UChrs (232) = "Φ": UChrs (233) = "Θ": UChrs (234) = "Ω": UChrs (235) = "δ": UChrs (236) = "∞": UChrs (237) = "φ": UChrs (238) = "ε": UChrs (239) = "∩"
UChrs (240) = "≡": UChrs (241) = "±": UChrs (242) = "≥": UChrs (243) = "≤": UChrs (244) = "⌠": UChrs (245) = "⌡": UChrs (246) = "÷": UChrs (247) = "≈"
UChrs (248) = "°": UChrs (249) = "∙": UChrs (250) = "·": UChrs (251) = "√": UChrs (252) = "ⁿ": UChrs (253) = "²": UChrs (254) = "■": UChrs (255) = " "
'Windows hosts use the old 437 code page, in Cmd.exe window:
#If Defined (__FB_WIN32__)
#include once "windows.bi"
'Unfortunately UTF-8 doesn't work correctly in Windows, as well as shell command, and as internal command.
'Shell "chcp 65001"
'SetConsoleOutputCP 65001
'Try to switch to code page 437 if the system refuses to switch to UTF-8
If GetConsoleOutputCP = 850 Then
SetConsoleOutputCP 437
End If
If GetConsoleOutputCP <> 65001 Then
UTF8Support = False
For TmpUtf8 = 0 To 255
Chrs (TmpUtf8) = Chr (TmpUtf8)
Next TmpUtf8
End If
#EndIf
'Since Windows can't upgrade the console to UTF-8, we extend Unix-based consoles with 437 by using known UTF-8 characters as an Ascii code:
If UTF8Support = True Then
For TmpUtf8 = 0 To 255
Chrs (TmpUtf8) = UChrs (TmpUtf8)
Next TmpUtf8
End If
'UTF-8 converters, Ascii 437 to UTF-8:
Function Asc2Utf (InString As String) As String
Dim As String TmpTxt
Dim As Integer SPos
For SPos = 1 To Len (InString)
TmpTxt += UChrs(Asc(Mid(InString, SPos, 1)))
Next SPos
Return TmpTxt
End Function
'UTF-8 converters, UTF-8 to Ascii 437:
Function Utf2Asc (InString As String) As String
Dim As String TmpTxt0
Dim As Integer TmpInt0, SPos, NrChar
SPos = 1
Do Until SPos > Len (InString)
If Asc (Mid (InString, SPos, 1)) = 226 Then
NrChar = 3 'This UTF-8 string is 3 Ascii characters long
ElseIf (Asc (Mid (InString, SPos, 1)) >= 193 And Asc (Mid (InString, SPos, 1)) <= 195) Or Asc (Mid (InString, SPos, 1)) = 198 Or Asc (Mid (InString, SPos, 1)) = 206 Then
NrChar = 2 'This UTF-8 string is 2 Ascii characters long
ElseIf Mid (InString, SPos, 1) <> "" Then
NrChar = 1 'This UTF-8 string is 1 Ascii character long
Else
NrChar = 0 'This UTF-8 string is 0 Ascii characters long
End If
TmpInt0 = 0
Do Until TmpInt0 > 255
If Mid (InString, SPos, NrChar) = UChrs (TmpInt0) Then TmpTxt0 += Chr(TmpInt0): Exit Do
TmpInt0 += 1
Loop
SPos += NrChar
Loop
Return TmpTxt0
End Function
Code: Select all
#include once "utf8support.bi"
Dim As Integer TmpInt
Dim As String Utf8Txt (13), AscTxt (13)
Utf8Txt (0) = "▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄"
Utf8Txt (1) = "█░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░█"
Utf8Txt (2) = "█░█▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀█░░░█"
Utf8Txt (3) = "█░█────────────────────────────────────────█ ░█"
Utf8Txt (4) = "█░█───█▀▀▀▀▄───────────────────────────────█ ░█"
Utf8Txt (5) = "█░█───█────█────▄▄▄▄─────▄───▄─────▄▄▄▄────█ ░█"
Utf8Txt (6) = "█░█───█────█───█────█───█─▀▄▀─█───█────█───█ ░█"
Utf8Txt (7) = "█░█───█────█───█▀▀▀▀▀───█─────█───█────█───█ ░█"
Utf8Txt (8) = "█░█───█▄▄▄▄▀───▀▄▄▄▄▀───█─────█───▀▄▄▄▄▀───█ ░█"
Utf8Txt (9) = "█░█────────────────────────────────────────█ ░█"
Utf8Txt (10) = "█░█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄█ ░█"
Utf8Txt (11) = "█░░░ ░█"
Utf8Txt (12) = "█░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░█"
Utf8Txt (13) = "▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀"
'Convert UTF-8 to Ascii 437 text format
For TmpInt = 0 To 13
AscTxt (TmpInt) = Utf2Asc (Utf8Txt (TmpInt))
Next TmpInt
'Convert Ascii 437 to UTF-8 text format
For TmpInt = 0 To 13
Utf8Txt (TmpInt) = Asc2Utf (AscTxt (TmpInt))
Next TmpInt
'Show text depending on console:
If UTF8Support = False Then
'Windows cmd.exe or DOS without UTF-8 support:
For TmpInt = 0 To 13
Print AscTxt (TmpInt)
Next TmpInt
Else
'All Unix-based systems (e.g. Linux, MacOs, FreeBSD) support UTF-8 by default:
For TmpInt = 0 To 13
Print Utf8Txt (TmpInt)
Next TmpInt
End If
'Save in Ascii format:
Open "demo.asc" For Output As #1
For TmpInt = 0 To 13
Print #1, AscTxt (TmpInt)
Next TmpInt
Close #1
'Save in UTF-8 format:
Open "demo.utf" For Output As #1
For TmpInt = 0 To 13
Print #1, Utf8Txt (TmpInt)
Next TmpInt
Close #1