2022-02-15 03:12:20 +00:00
|
|
|
/**
|
|
|
|
* Furnace Tracker - multi-system chiptune tracker
|
2024-01-17 02:26:57 +00:00
|
|
|
* Copyright (C) 2021-2024 tildearrow and contributors
|
2022-02-15 03:12:20 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
*/
|
|
|
|
|
2021-12-16 20:51:19 +00:00
|
|
|
#include "utfutils.h"
|
|
|
|
|
2022-08-13 23:00:29 +00:00
|
|
|
int decodeUTF8(const unsigned char* data, signed char& len) {
|
2021-12-16 20:51:19 +00:00
|
|
|
int ret=0xfffd;
|
|
|
|
if (data[0]<0x80) {
|
|
|
|
ret=data[0];
|
|
|
|
len=1;
|
|
|
|
} else if (data[0]<0xc0) {
|
|
|
|
ret=0xfffd; // invalid
|
|
|
|
len=1;
|
|
|
|
} else if (data[0]<0xe0) {
|
|
|
|
if (data[1]>=0x80 && data[1]<0xc0) {
|
|
|
|
len=2;
|
|
|
|
ret=((data[0]&31)<<6)|
|
|
|
|
(data[1]&63);
|
|
|
|
} else len=1;
|
|
|
|
} else if (data[0]<0xf0) {
|
|
|
|
if (data[1]>=0x80 && data[1]<0xc0) {
|
|
|
|
if (data[2]>=0x80 && data[2]<0xc0) {
|
|
|
|
len=3;
|
|
|
|
ret=((data[0]&15)<<12)|
|
|
|
|
((data[1]&63)<<6)|
|
|
|
|
(data[2]&63);
|
|
|
|
} else len=2;
|
|
|
|
} else len=1;
|
|
|
|
} else if (data[0]<0xf5) {
|
|
|
|
if (data[1]>=0x80 && data[1]<0xc0) {
|
|
|
|
if (data[2]>=0x80 && data[2]<0xc0) {
|
|
|
|
if (data[3]>=0x80 && data[3]<0xc0) {
|
|
|
|
len=4;
|
|
|
|
ret=((data[0]&7)<<18)|
|
|
|
|
((data[1]&63)<<12)|
|
|
|
|
((data[2]&63)<<6)|
|
|
|
|
(data[3]&63);
|
|
|
|
} else len=3;
|
|
|
|
} else len=2;
|
|
|
|
} else len=1;
|
|
|
|
} else {
|
|
|
|
len=1;
|
|
|
|
return 0xfffd;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((ret>=0xd800 && ret<=0xdfff) || ret>=0x110000) return 0xfffd;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t utf8len(const char* s) {
|
|
|
|
size_t p=0;
|
|
|
|
size_t r=0;
|
2022-08-13 23:00:29 +00:00
|
|
|
signed char cl;
|
2021-12-16 20:51:19 +00:00
|
|
|
while (s[p]!=0) {
|
|
|
|
r++;
|
|
|
|
decodeUTF8((const unsigned char*)&s[p],cl);
|
|
|
|
p+=cl;
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
char utf8csize(const unsigned char* c) {
|
2022-08-13 23:00:29 +00:00
|
|
|
signed char ret;
|
2021-12-16 20:51:19 +00:00
|
|
|
decodeUTF8(c,ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
WString utf8To16(const char* s) {
|
|
|
|
WString ret;
|
|
|
|
int ch, p;
|
2022-08-13 23:00:29 +00:00
|
|
|
signed char chs;
|
2021-12-16 20:51:19 +00:00
|
|
|
p=0;
|
|
|
|
while (s[p]!=0) {
|
|
|
|
ch=decodeUTF8((const unsigned char*)&s[p],chs);
|
2023-08-19 21:49:58 +00:00
|
|
|
// surrogates
|
|
|
|
if (ch>=0x10000) {
|
|
|
|
ch-=0x10000;
|
|
|
|
ret+=(unsigned short)(0xd800|((ch>>10)&0x3ff));
|
|
|
|
ret+=(unsigned short)(0xdc00|(ch&0x3ff));
|
|
|
|
} else if (ch<0xd800 || ch>0xdfff) {
|
|
|
|
ret+=(unsigned short)ch;
|
|
|
|
}
|
2021-12-16 20:51:19 +00:00
|
|
|
p+=chs;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
String utf16To8(const wchar_t* s) {
|
|
|
|
String ret;
|
2023-08-19 21:49:58 +00:00
|
|
|
unsigned int next=0;
|
2021-12-16 20:51:19 +00:00
|
|
|
for (size_t i=0; i<wcslen(s); i++) {
|
2023-08-19 21:49:58 +00:00
|
|
|
if (s[i]>=0xd800 && s[i]<0xdc00) {
|
|
|
|
next=0x10000+((s[i]&0x3ff)<<10);
|
|
|
|
continue;
|
|
|
|
} else if (s[i]>=0xdc00 && s[i]<0xe000) {
|
|
|
|
next|=s[i]&0x3ff;
|
|
|
|
} else {
|
|
|
|
next=s[i];
|
|
|
|
}
|
|
|
|
if (next<0x80) {
|
|
|
|
ret+=next;
|
|
|
|
} else if (next<0x800) {
|
|
|
|
ret+=(0xc0+((next>>6)&31));
|
|
|
|
ret+=(0x80+((next)&63));
|
|
|
|
} else if (next<0x10000) {
|
|
|
|
ret+=(0xe0+((next>>12)&15));
|
|
|
|
ret+=(0x80+((next>>6)&63));
|
|
|
|
ret+=(0x80+((next)&63));
|
2021-12-16 20:51:19 +00:00
|
|
|
} else {
|
2023-08-19 21:49:58 +00:00
|
|
|
ret+=(0xf0+((next>>18)&7));
|
|
|
|
ret+=(0x80+((next>>12)&63));
|
|
|
|
ret+=(0x80+((next>>6)&63));
|
|
|
|
ret+=(0x80+((next)&63));
|
2021-12-16 20:51:19 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|