Base64是一种基于64个可打印字符来表示二进制数据的表示方法。由于我好久没写 C++ 了,于是突然一时兴起想要造一个 Base64 编解码的轮子。

转换的时候,将3字节的数据,先后放入一个24位的缓冲区中,先来的字节占高位。数据不足3字节的话,于缓冲器中剩下的比特用0补足。每次取出6比特,按照其值选择ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/中的字符作为编码后的输出,直到全部输入数据转换完成。若原数据长度不是3的倍数时且剩下1个输入数据,则在编码结果后加2个=;若剩下2个输入数据,则在编码结果后加1个=。

以上是摘自维基百科Base64的 Base64 编码方式。

首先在实现编解码之前先定义 base64 字符。

constexpr char base64_char[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

编码

首先先实现编码函数:

string base64_encode(const char* src, size_t len) {
	string ret = "";
    ret.reserve(len * 4 / 3);               // base64编码长度大概是原来的4/3
	for (size_t i = 0; i < len; i += 3) {   // 每三个byte处理一次
		char first = src[i];
		char second = i + 1 < len ? src[i + 1] : 0;
		char third = i + 2 < len ? src[i + 2] : 0;

		ret += base64_char[(first >> 2) & 0b111111]; // 1st high 6 bits
		ret += base64_char[(first & 0b11) << 4 | ((second >> 4) & 0b1111)]; // 1st low 2 bits + 2nd high 4 bits
		if (i + 1 >= len) { ret += "=="; break; }
		ret += base64_char[(second & 0b1111) << 2 | ((third >> 6) & 0b11)]; // 2nd low 4 bits + 3rd high 2 bits
		if (i + 2 >= len) { ret += "="; break; }
		ret += base64_char[third & 0b111111]; // 3rd low 6 bits
	}
	return ret;
}

需要注意的是位运算的处理方式: (first & 0b11) << 4 | ((second >> 4) & 0b1111) 这一个表达式将first的最后2bits和second的高4bits连在了一起。需要注意的是>>运算符在某些编译器下(implementation-defined)可能会导致执行arithmetic shift而不是logical shift。这就会导致0b11110000>>1的结果有可能是0b11111000(移位时复制最高位)。因此,在这个操作后要手动& 0b1111一下。

解码

然后再来实现解码函数。解码函数基本上就是编码的逆向过程。

首先先定义一个base64_char_convert函数用来将base64字符转换成原始byte。以下是一个最简单的实现。出于效率考虑还可以把这个函数实现成查表。

struct InvalidBase64 {};

char base64_char_convert(char c) {
	for (size_t i = 0; i < sizeof(base64_char); ++i)
		if (base64_char[i] == c) return i;
	throw InvalidBase64(); // Invalid base64 character
}

然后再实现真正的解码函数:

vector<char> base64_decode(const string& src) {
	vector<char> ret;
    ret.reserve(src.length() * 3 / 4);
	for (size_t i = 0; i < src.length(); i += 4) {
		char b1 = base64_char_convert(src[i]);
		char b2 = base64_char_convert(src[i + 1]);

		ret.push_back((b1 << 2) | ((b2 >> 4 & 0b11))); // 1st 6bits, 2nd high 2bits

		if (src[i + 2] == '=') break;
		char b3 = base64_char_convert(src[i + 2]);
		ret.push_back(((b2 & 0b1111) << 4) | ((b3 >> 2) & 0b1111)); // 2nd low 4bits, 3rd high 4bits

		if (src[i + 3] == '=') break;
		char b4 = base64_char_convert(src[i + 3]);
		ret.push_back(((b3 & 0b11) << 6) | b4); // 3rd low 2bits, 4th 6bits
	}
	return ret;
}

测试

最后,写一个主程序测试一下以上代码吧。

string bytes_to_string(const vector<char>& v) {
	return string(v.begin(), v.end());
}

#define STR_AND_LEN(c_str) c_str, (sizeof(c_str)-1)
int main() {
	string a, b, c;
	cout << (a = base64_encode(STR_AND_LEN("hi, base64!"))) << endl;
	cout << (b = base64_encode(STR_AND_LEN("hii, base64!"))) << endl;
	cout << (c = base64_encode(STR_AND_LEN("hello, base64!"))) << endl;

	cout << (bytes_to_string(base64_decode(a))) << endl;
	cout << (bytes_to_string(base64_decode(b))) << endl;
	cout << (bytes_to_string(base64_decode(c))) << endl;

    cout << base64_encode(STR_AND_LEN(
        "Man is distinguished, not only by his reason, but by this singular passion "
        "from other animals, which is a lust of the mind, that by a perseverance of "
        "delight in the continued and indefatigable generation of knowledge, exceeds "
        "the short vehemence of any carnal pleasure."))<<endl;
	cout << (bytes_to_string(base64_decode(
		"TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz"
		"IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg"
		"dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu"
		"dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo"
		"ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="))) << endl;
}

运行结果:

aGksIGJhc2U2NCE=
aGlpLCBiYXNlNjQh
aGVsbG8sIGJhc2U2NCE=
hi, base64!
hii, base64!
hello, base64!
TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=
Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure.