How to detect a UTF-8 file without bom?
Oct 16, 2014 at 5:05am UTC
I need to open UTF-8 files in a text editor, but don't know how to detect UTF-8 without bom. The WIN32 IsTextUnicode function doesn't have a UTF-8 flag
Oct 16, 2014 at 11:25am UTC
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
bool IsUTF8(const void * pBuffer, long size)
{
bool IsUTF8 = true ;
unsigned char * start = (unsigned char *)pBuffer;
unsigned char * end = (unsigned char *)pBuffer + size;
while (start < end)
{
if (*start < 0x80) // (10000000)[output][/output]
{
start++;
}
else if (*start < (0xC0)) // (11000000)
{
IsUTF8 = false ;
break ;
}
{
if (start >= end - 1)
break ;
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = false ;
break ;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000)
{
if (start >= end - 2)
break ;
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = false ;
break ;
}
start += 3;
}
else
{
IsUTF8 = false ;
break ;
}
}
return IsUTF8;
}
Topic archived. No new replies allowed.