//判断GBK编码
bool is_str_gbk(const char* str)
{
unsigned int nBytes = 0;//GBK可用1-2个字节编码,中文两个 ,英文一个
unsigned char chr = *str;
bool bAllAscii = true; //如果全部都是ASCII,
for (unsigned int i = 0; str[i] != '\0'; ++i) {
chr = *(str + i);
if ((chr & 0x80) != 0 && nBytes == 0) {// 判断是否ASCII编码,如果不是,说明有可能是GBK
bAllAscii = false;
}
if (nBytes == 0) {
if (chr >= 0x80) {
if (chr >= 0x81 && chr <= 0xFE) {
nBytes = +2;
}
else {
return false;
}
nBytes--;
}
}
else {
if (chr < 0x40 || chr>0xFE) {
return false;
}
nBytes--;
}//else end
}
if (nBytes != 0) { //违返规则
return false;
}
if (bAllAscii) { //如果全部都是ASCII, 也是GBK
return true;
}
return true;
}
std::string GBKStringToUTF8String(const std::string &gbkStr)
{
const char* GBK_LOCALE_NAME = ".936"; //GBK在windows下的locale name
//构造GBK与wstring间的转码器(wstring_convert在析构时会负责销毁codecvt_byname,所以不用自己delete)
wstring_convert<codecvt_byname<wchar_t, char, mbstate_t>> cv1(new codecvt_byname<wchar_t, char, mbstate_t>(GBK_LOCALE_NAME));
wstring tmp_wstr = cv1.from_bytes(gbkStr);
wstring_convert<codecvt_utf8<wchar_t>> cv2;
string utf8_str = cv2.to_bytes(tmp_wstr);
return utf8_str;
}
std::string GBKToUTF8(const char* strGBK)
{
int len = MultiByteToWideChar(CP_ACP, 0, strGBK, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, strGBK, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
string strTemp = str;
if (wstr) delete[] wstr;
if (str) delete[] str;
return strTemp;
}
int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen, char *outbuf, size_t outlen)
{
iconv_t cd;
int rc;
char **pin = &inbuf;
char **pout = &outbuf;
cd = iconv_open(to_charset, from_charset);
if (cd == 0)
return -1;
memset(outbuf, 0, outlen);
if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
return -1;
iconv_close(cd);
return 0;
}
std::string any2utf8(std::string in, std::string fromEncode, std::string toEncode)
{
char* inbuf = (char*)in.c_str();
int inlen = strlen(inbuf);
int outlen = inlen * 3;//in case unicode 3 times than ascii
char* outbuf = new char[outlen];// = { 0 };
int rst = code_convert((char*)fromEncode.c_str(), (char*)toEncode.c_str(), inbuf, inlen, outbuf, outlen);
if (rst == 0) {
return std::string(outbuf);
}
else {
return in;
}
}
std::string gbk2utf8(const char* in)
{
return any2utf8(std::string(in), std::string("gbk"), std::string("utf-8"));
}
一共三个方法,有一个用的是iconv的C++库 ,库的下载地址http://windows.php.net/downloads/php-sdk/deps/vc14/x64/,当然你也可以自己下源码来编
- 本文固定链接: http://www.letg.top/?p=70
- 转载请注明: ubosm 于 点滴分享 发表