#include <locale>
#include <codecvt>
#include <cassert>
#include <string>
#include <vector>
/*
*注意这里必须用std::codecvt_utf8<char32_t>和std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>,
*含义是将utf8编码的字节字符串 转化为 定长4字节ucs4编码的宽字符串std::u32string,
*只有定长4字节,才能确保std::u32string的任何一个元素char32_t 能完整表达一个unicode
*字符,其值强转为int类型即为对应的码点值,即code point。
*/
typedef std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t>
WstringConvertType;
std::u32string
StringToWideString(const std::string& s,
WstringConvertType* convert = nullptr) {
if (nullptr == convert) {
WstringConvertType conv;
try {
return conv.from_bytes(s);
} catch (std::range_error& ex) {
assert(false);
return std::u32string();
}
} else {
try {
return convert->from_bytes(s);
} catch (std::range_error& ex) {
assert(false);
return std::u32string();
}
}
}
std::string
WideStringToString(const std::u32string& wide_string,
WstringConvertType* convert = nullptr) {
if (nullptr == convert) {
WstringConvertType conv;
try {
return conv.to_bytes(wide_string);
} catch (std::range_error& ex) {
assert(false);
return std::string();
}
} else {
try {
return convert->to_bytes(wide_string);
} catch (std::range_error& ex) {
assert(false);
return std::string();
}
}
}
std::vector<int>
StringToCodePoints(const std::string& s,
WstringConvertType* convert = nullptr) {
std::u32string ws = StringToWideString(s, convert);
std::vector<int> res;
res.reserve(ws.size());
for (char32_t & c : ws) {
res.push_back((int)c);
}
return res;
}
std::string
CodePointsToString(const std::vector<int>& code_points,
WstringConvertType* convert = nullptr) {
std::u32string ws;
ws.reserve(code_points.size());
for (const int& cp : code_points) {
ws.push_back((char32_t)cp);
}
return WideStringToString(ws, convert);
}
int main(int argc, char** argv) {
std::string s = "一伦红日冉冉升起,O(∩_∩)O哈哈~";
std::u32string ws = StringToWideString(s);
std::string s2 = WideStringToString(ws);
assert(s == s2);
std::vector<int> code_points1 = StringToCodePoints(s);
std::vector<int> code_points2 = StringToCodePoints(s2);
assert(code_points1 == code_points2);
std::string s3 = CodePointsToString(code_points1);
assert(s == s3);
return 0;
}
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。