handle non-BMP characters

issue #1376
This commit is contained in:
tildearrow 2023-08-19 16:49:58 -05:00
parent 14d3baae56
commit c8222fd491
2 changed files with 66 additions and 21 deletions

View File

@ -1114,11 +1114,20 @@ static int u8to16s(wchar_t* dest, const char* src, size_t limit) {
int ch, p; int ch, p;
char chs; char chs;
p=0; p=0;
while (src[p]!=0 && ret<limit) { while (src[p]!=0 && ret<limit-1) {
ch=decodeUTF8s((const unsigned char*)&src[p],&chs); ch=decodeUTF8s((const unsigned char*)&src[p],&chs);
dest[ret++]=(unsigned short)ch; // surrogates
if (ch>=0x10000) {
ch-=0x10000;
if (ret+1>=limit-1) break;
dest[ret++]=(unsigned short)(0xd800|((ch>>10)&0x3ff));
dest[ret++]=(unsigned short)(0xdc00|(ch&0x3ff));
} else if (ch<0xd800 || ch>0xdfff) {
dest[ret++]=(unsigned short)ch;
}
p+=chs; p+=chs;
} }
dest[ret]=0;
return ret; return ret;
} }
@ -1176,19 +1185,34 @@ dirent_mbstowcs_s(
static int u16to8s(char* dest, const wchar_t* src, size_t limit) { static int u16to8s(char* dest, const wchar_t* src, size_t limit) {
size_t ret=0; size_t ret=0;
unsigned int next=0;
for (; (*src)!=0; src++) { for (; (*src)!=0; src++) {
if ((*src)<0x80) { if ((*src)>=0xd800 && (*src)<0xdc00) {
if (ret+1>=limit-1) break; next=0x10000+(((*src)&0x3ff)<<10);
dest[ret++]=(*src); continue;
} else if ((*src)<0x800) { } else if ((*src)>=0xdc00 && (*src)<0xe000) {
if (ret+2>=limit-1) break; next|=(*src)&0x3ff;
dest[ret++]=(0xc0+(((*src)>>6)&31));
dest[ret++]=(0x80+((*src)&63));
} else { } else {
next=(*src);
}
if (next<0x80) {
if (ret+1>=limit-1) break;
dest[ret++]=next;
} else if (next<0x800) {
if (ret+2>=limit-1) break;
dest[ret++]=(0xc0+((next>>6)&31));
dest[ret++]=(0x80+(next&63));
} else if (next<0x10000) {
if (ret+3>=limit-1) break; if (ret+3>=limit-1) break;
dest[ret++]=(0xe0+(((*src)>>12)&15)); dest[ret++]=(0xe0+((next>>12)&15));
dest[ret++]=(0x80+(((*src)>>6)&63)); dest[ret++]=(0x80+((next>>6)&63));
dest[ret++]=(0x80+((*src)&63)); dest[ret++]=(0x80+(next&63));
} else {
if (ret+4>=limit-1) break;
dest[ret++]=(0xf0+((next>>18)&7));
dest[ret++]=(0x80+((next>>12)&63));
dest[ret++]=(0x80+((next>>6)&63));
dest[ret++]=(0x80+(next&63));
} }
} }
dest[ret]=0; dest[ret]=0;

View File

@ -88,7 +88,14 @@ WString utf8To16(const char* s) {
p=0; p=0;
while (s[p]!=0) { while (s[p]!=0) {
ch=decodeUTF8((const unsigned char*)&s[p],chs); ch=decodeUTF8((const unsigned char*)&s[p],chs);
ret+=(unsigned short)ch; // surrogates
if (ch>=0x10000) {
ch-=0x10000;
ret+=(unsigned short)(0xd800|((ch>>10)&0x3ff));
ret+=(unsigned short)(0xdc00|(ch&0x3ff));
} else if (ch<0xd800 || ch>0xdfff) {
ret+=(unsigned short)ch;
}
p+=chs; p+=chs;
} }
return ret; return ret;
@ -96,16 +103,30 @@ WString utf8To16(const char* s) {
String utf16To8(const wchar_t* s) { String utf16To8(const wchar_t* s) {
String ret; String ret;
unsigned int next=0;
for (size_t i=0; i<wcslen(s); i++) { for (size_t i=0; i<wcslen(s); i++) {
if (s[i]<0x80) { if (s[i]>=0xd800 && s[i]<0xdc00) {
ret+=s[i]; next=0x10000+((s[i]&0x3ff)<<10);
} else if (s[i]<0x800) { continue;
ret+=(0xc0+((s[i]>>6)&31)); } else if (s[i]>=0xdc00 && s[i]<0xe000) {
ret+=(0x80+((s[i])&63)); next|=s[i]&0x3ff;
} else { } else {
ret+=(0xe0+((s[i]>>12)&15)); next=s[i];
ret+=(0x80+((s[i]>>6)&63)); }
ret+=(0x80+((s[i])&63)); if (next<0x80) {
ret+=next;
} else if (next<0x800) {
ret+=(0xc0+((next>>6)&31));
ret+=(0x80+((next)&63));
} else if (next<0x10000) {
ret+=(0xe0+((next>>12)&15));
ret+=(0x80+((next>>6)&63));
ret+=(0x80+((next)&63));
} else {
ret+=(0xf0+((next>>18)&7));
ret+=(0x80+((next>>12)&63));
ret+=(0x80+((next>>6)&63));
ret+=(0x80+((next)&63));
} }
} }
return ret; return ret;