Last active
August 28, 2023 13:31
-
-
Save VeryCrazyDog/c20b2cb83896e9975d22 to your computer and use it in GitHub Desktop.
Revisions
-
VeryCrazyDog revised this gist
Mar 9, 2015 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,6 @@ // Reading ASCII, UTF-8, UTF-16LE, UTF-16BE with auto BOM detection using C++11 on Windows platform // Code tested on Microsoft Visual Studio 2013 on Windows 7 // Part of the code is referencing http://cfc.kizzx2.com/index.php/reading-a-unicode-utf16-file-in-windows-c/ #include <stdio.h> #include <tchar.h> -
VeryCrazyDog revised this gist
Mar 8, 2015 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -65,6 +65,7 @@ std::string readFile(std::string path) else if (encoding == ENCODING_UTF16BE) { std::string src = ss.str(); std::string dst = src; // Using Windows API _swab(&src[0u], &dst[0u], src.size() + 1); std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv; result = utfconv.to_bytes(std::wstring((wchar_t *)dst.c_str())); -
VeryCrazyDog renamed this gist
Mar 8, 2015 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
VeryCrazyDog renamed this gist
Mar 8, 2015 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
VeryCrazyDog created this gist
Mar 8, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,90 @@ // Reading ASCII, UTF-8, UTF-16LE, UTF-16BE with auto BOM detection using C++11 on Windows platform // Code tested on Microsoft Visual Studio 2013 on Windows 7 #include <stdio.h> #include <tchar.h> #include <string> #include <fstream> #include <sstream> #include <locale> #include <codecvt> #include <iostream> #include <io.h> #include <fcntl.h> #define TEXT_FILE_PATH "D:\\test.txt" #define ENCODING_ASCII 0 #define ENCODING_UTF8 1 #define ENCODING_UTF16LE 2 #define ENCODING_UTF16BE 3 std::string readFile(std::string path) { std::string result; std::ifstream ifs(path.c_str(), std::ios::binary); std::stringstream ss; int encoding = ENCODING_ASCII; if (!ifs.is_open()) { // Unable to read file result.clear(); return result; } else if (ifs.eof()) { result.clear(); } else { int ch1 = ifs.get(); int ch2 = ifs.get(); if (ch1 == 0xff && ch2 == 0xfe) { // The file contains UTF-16LE BOM encoding = ENCODING_UTF16LE; } else if (ch1 == 0xfe && ch2 == 0xff) { // The file contains UTF-16BE BOM encoding = ENCODING_UTF16BE; } else { int ch3 = ifs.get(); if (ch1 == 0xef && ch2 == 0xbb && ch3 == 0xbf) { // The file contains UTF-8 BOM encoding = ENCODING_UTF8; } else { // The file does not have BOM encoding = ENCODING_ASCII; ifs.seekg(0); } } } ss << ifs.rdbuf() << '\0'; if (encoding == ENCODING_UTF16LE) { std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv; result = utfconv.to_bytes(std::wstring((wchar_t *)ss.str().c_str())); } else if (encoding == ENCODING_UTF16BE) { std::string src = ss.str(); std::string dst = src; _swab(&src[0u], &dst[0u], src.size() + 1); std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv; result = utfconv.to_bytes(std::wstring((wchar_t *)dst.c_str())); } else if (encoding == ENCODING_UTF8) { result = ss.str(); } else { result = ss.str(); } return result; } int _tmain(int argc, _TCHAR* argv[]) { std::string path = TEXT_FILE_PATH; std::string utf8Content = readFile(path); std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfconv; std::wstring utf16LeContent = utfconv.from_bytes(utf8Content); _setmode(_fileno(stdout), _O_U8TEXT); std::wcout << utf16LeContent << std::endl; return 0; }