Skip to content

Instantly share code, notes, and snippets.

@VeryCrazyDog
Last active August 28, 2023 13:31
Show Gist options
  • Save VeryCrazyDog/c20b2cb83896e9975d22 to your computer and use it in GitHub Desktop.
Save VeryCrazyDog/c20b2cb83896e9975d22 to your computer and use it in GitHub Desktop.

Revisions

  1. VeryCrazyDog revised this gist Mar 9, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions ReadUtfFile.cpp
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,6 @@
    // Reading ASCII, UTF-8, UTF-16LE, UTF-16BE with auto BOM detection using C++11 on Windows platform
    // Code tested on Microsoft Visual Studio 2013 on Windows 7
    // Part of the code is referencing http://cfc.kizzx2.com/index.php/reading-a-unicode-utf16-file-in-windows-c/

    #include <stdio.h>
    #include <tchar.h>
  2. VeryCrazyDog revised this gist Mar 8, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions ReadUtfFile.cpp
    Original file line number Diff line number Diff line change
    @@ -65,6 +65,7 @@ std::string readFile(std::string path)
    else if (encoding == ENCODING_UTF16BE) {
    std::string src = ss.str();
    std::string dst = src;
    // Using Windows API
    _swab(&src[0u], &dst[0u], src.size() + 1);
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv;
    result = utfconv.to_bytes(std::wstring((wchar_t *)dst.c_str()));
  3. VeryCrazyDog renamed this gist Mar 8, 2015. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. VeryCrazyDog renamed this gist Mar 8, 2015. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  5. VeryCrazyDog created this gist Mar 8, 2015.
    90 changes: 90 additions & 0 deletions gistfile1.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,90 @@
    // Reading ASCII, UTF-8, UTF-16LE, UTF-16BE with auto BOM detection using C++11 on Windows platform
    // Code tested on Microsoft Visual Studio 2013 on Windows 7

    #include <stdio.h>
    #include <tchar.h>
    #include <string>
    #include <fstream>
    #include <sstream>
    #include <locale>
    #include <codecvt>
    #include <iostream>
    #include <io.h>
    #include <fcntl.h>

    #define TEXT_FILE_PATH "D:\\test.txt"
    #define ENCODING_ASCII 0
    #define ENCODING_UTF8 1
    #define ENCODING_UTF16LE 2
    #define ENCODING_UTF16BE 3

    std::string readFile(std::string path)
    {
    std::string result;
    std::ifstream ifs(path.c_str(), std::ios::binary);
    std::stringstream ss;
    int encoding = ENCODING_ASCII;

    if (!ifs.is_open()) {
    // Unable to read file
    result.clear();
    return result;
    }
    else if (ifs.eof()) {
    result.clear();
    }
    else {
    int ch1 = ifs.get();
    int ch2 = ifs.get();
    if (ch1 == 0xff && ch2 == 0xfe) {
    // The file contains UTF-16LE BOM
    encoding = ENCODING_UTF16LE;
    }
    else if (ch1 == 0xfe && ch2 == 0xff) {
    // The file contains UTF-16BE BOM
    encoding = ENCODING_UTF16BE;
    }
    else {
    int ch3 = ifs.get();
    if (ch1 == 0xef && ch2 == 0xbb && ch3 == 0xbf) {
    // The file contains UTF-8 BOM
    encoding = ENCODING_UTF8;
    }
    else {
    // The file does not have BOM
    encoding = ENCODING_ASCII;
    ifs.seekg(0);
    }
    }
    }
    ss << ifs.rdbuf() << '\0';
    if (encoding == ENCODING_UTF16LE) {
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv;
    result = utfconv.to_bytes(std::wstring((wchar_t *)ss.str().c_str()));
    }
    else if (encoding == ENCODING_UTF16BE) {
    std::string src = ss.str();
    std::string dst = src;
    _swab(&src[0u], &dst[0u], src.size() + 1);
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv;
    result = utfconv.to_bytes(std::wstring((wchar_t *)dst.c_str()));
    }
    else if (encoding == ENCODING_UTF8) {
    result = ss.str();
    }
    else {
    result = ss.str();
    }
    return result;
    }

    int _tmain(int argc, _TCHAR* argv[])
    {
    std::string path = TEXT_FILE_PATH;
    std::string utf8Content = readFile(path);
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv;
    std::wstring utf16LeContent = utfconv.from_bytes(utf8Content);
    _setmode(_fileno(stdout), _O_U8TEXT);
    std::wcout << utf16LeContent << std::endl;
    return 0;
    }