Skip to content

Instantly share code, notes, and snippets.

@japajoe
Created October 8, 2025 18:29
Show Gist options
  • Save japajoe/5915b098b6871048ed7baed68b25c44f to your computer and use it in GitHub Desktop.
Save japajoe/5915b098b6871048ed7baed68b25c44f to your computer and use it in GitHub Desktop.
Check if string is valid UTF8
bool isValidUTF8(const char *pText, size_t size, size_t &charCount) {
if(size == 0)
size = strlen(pText);
int numBytes = 0; // Number of bytes expected in the current UTF-8 character
unsigned char byte;
const uint8_t *pPayload = reinterpret_cast<const uint8_t*>(pText);
for (size_t i = 0; i < size; ++i) {
byte = pPayload[i];
if (numBytes == 0)
{
// Determine the number of bytes in the UTF-8 character
if ((byte & 0x80) == 0)
{
// 1-byte character (ASCII)
charCount++;
continue;
}
else if ((byte & 0xE0) == 0xC0)
{
// 2-byte character
numBytes = 1;
}
else if ((byte & 0xF0) == 0xE0)
{
// 3-byte character
numBytes = 2;
}
else if ((byte & 0xF8) == 0xF0)
{
// 4-byte character
numBytes = 3;
}
else
{
// Invalid first byte
return false;
}
charCount++;
}
else
{
// Check continuation bytes
if ((byte & 0xC0) != 0x80)
{
return false; // Invalid continuation byte
}
numBytes--;
}
}
return numBytes == 0; // Ensure all characters were complete
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment