Skip to content

Commit c98c471

Browse files
committed
Add experimental implementations of BASE64 encoding with a testing/benchmarking tool and conclusion
1 parent 95137e2 commit c98c471

14 files changed

+2083
-0
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ if (Boost_FOUND)
4949
enable_testing()
5050
add_subdirectory(libs/network/src)
5151
add_subdirectory(libs/network/test)
52+
add_subdirectory(libs/network/experiment)
5253
if (NOT MSVC)
5354
add_subdirectory(libs/mime/test)
5455
endif(NOT MSVC)
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) Dean Michael Berris 2010.
2+
# Distributed under the Boost Software License, Version 1.0.
3+
# (See accompanying file LICENSE_1_0.txt or copy at
4+
# http://www.boost.org/LICENSE_1_0.txt)
5+
6+
include_directories(${CPP-NETLIB_SOURCE_DIR})
7+
set(CMAKE_BUILD_TYPE Release)
8+
if (Boost_FOUND)
9+
add_executable(cpp-netlib-utils_base64_experiment utils_base64_experiment.cpp)
10+
target_link_libraries(cpp-netlib-utils_base64_experiment ${Boost_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
11+
if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
12+
target_link_libraries(cpp-netlib-utils_base64_experiment rt)
13+
endif()
14+
set_target_properties(cpp-netlib-utils_base64_experiment
15+
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CPP-NETLIB_BINARY_DIR}/tests)
16+
endif()

libs/network/experiment/utils/base64-standalone.hpp

+426
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#ifndef BOOST_NETWORK_UTILS_BASE64_STATEFUL_BUFFER_HPP
2+
#define BOOST_NETWORK_UTILS_BASE64_STATEFUL_BUFFER_HPP
3+
4+
#include <boost/archive/iterators/base64_from_binary.hpp>
5+
#include <boost/archive/iterators/transform_width.hpp>
6+
#include <boost/range/begin.hpp>
7+
#include <boost/range/end.hpp>
8+
#include <boost/array.hpp>
9+
#include <algorithm>
10+
#include <iterator>
11+
#include <string>
12+
13+
namespace boost {
14+
namespace network {
15+
namespace utils {
16+
17+
// Uses base64_from_binary and transform_width to implement a BASE64
18+
// converter working on an iterator range. Because the transform_width
19+
// encodes immediately every input byte, while the BASE64 encoding processes
20+
// the input by byte-triplets, if the input sequence does not end at the
21+
// three-byte boundary, the rest is remembered in an encoding state to
22+
// be able to continue with the next chunk. It uses an internal buffer
23+
// of 4095 input octets to be able to read the input by octet-triplets.
24+
//
25+
// Summarized interface:
26+
//
27+
// struct state<Value> {
28+
// bool empty () const;
29+
// void clear();
30+
// }
31+
//
32+
// OutputIterator encode(InputIterator begin, InputIterator end,
33+
// OutputIterator output, State & rest)
34+
// OutputIterator encode_rest(OutputIterator output, State & rest)
35+
// OutputIterator encode(InputRange const & input, OutputIterator output,
36+
// State & rest)
37+
// OutputIterator encode(char const * value, OutputIterator output,
38+
// state<char> & rest)
39+
// std::basic_string<Char> encode(InputRange const & value, State & rest)
40+
// std::basic_string<Char> encode(char const * value, state<char> & rest)
41+
//
42+
// OutputIterator encode(InputIterator begin, InputIterator end,
43+
// OutputIterator output)
44+
// OutputIterator encode(InputRange const & input, OutputIterator output)
45+
// OutputIterator encode(char const * value, OutputIterator output)
46+
// std::basic_string<Char> encode(InputRange const & value)
47+
// std::basic_string<Char> encode(char const * value) {
48+
49+
namespace base64_stateful_buffer {
50+
51+
// force using the ostream_iterator from boost::archive to write wide
52+
// characters reliably, althoth wchar_t may not be a native character type
53+
using namespace boost::archive::iterators;
54+
55+
template <typename Value>
56+
struct state {
57+
state() : size(0) {}
58+
59+
state(state<Value> const & source)
60+
: data(source.data), size(source.size) {}
61+
62+
bool empty() const { return size == 0; }
63+
64+
void clear() { size = 0; }
65+
66+
private:
67+
typedef boost::array<Value, 3> data_type;
68+
typedef typename data_type::const_iterator const_iterator_type;
69+
70+
template <typename InputIterator>
71+
void fill(InputIterator begin, InputIterator end) {
72+
// make sure that there is always zero padding for the incomplete
73+
// triplet; the encode will read three bytes from the vector
74+
data.fill(0);
75+
size = std::copy(begin, end, data.begin()) - data.begin();
76+
}
77+
78+
template <typename OutputIterator>
79+
OutputIterator write(OutputIterator output) {
80+
return std::copy(data.begin(), data.begin() + size, output);
81+
}
82+
83+
const_iterator_type begin() const { return data.begin(); }
84+
85+
const_iterator_type end() const { return data.begin() + size; }
86+
87+
data_type data;
88+
std::size_t size;
89+
90+
template <
91+
typename InputIterator,
92+
typename OutputIterator,
93+
typename State
94+
>
95+
friend OutputIterator encode(InputIterator begin,
96+
InputIterator end,
97+
OutputIterator output,
98+
State & rest);
99+
template <
100+
typename State,
101+
typename OutputIterator
102+
>
103+
friend OutputIterator encode_rest(OutputIterator output,
104+
State & rest);
105+
};
106+
107+
template <
108+
typename InputIterator,
109+
typename OutputIterator,
110+
typename State
111+
>
112+
OutputIterator encode(InputIterator begin,
113+
InputIterator end,
114+
OutputIterator output,
115+
State & rest) {
116+
typedef typename iterator_value<InputIterator>::type value_type;
117+
// declare the buffer type for 1365 octet triplets; make sure that the
118+
// number is divisible by three if you change it (!)
119+
const std::size_t BufferSize = 4095;
120+
BOOST_STATIC_ASSERT(BufferSize / 3 * 3 == BufferSize);
121+
typedef boost::array<value_type, BufferSize> buffer_type;
122+
// declare the encoding iterator type
123+
typedef base64_from_binary<
124+
transform_width<InputIterator, 6, 8>
125+
> base64_text;
126+
if (begin != end) {
127+
// declare the buffer, a variable to remmeber its size and the size
128+
// which can be encoded (the nearest lower size divisible by three)
129+
buffer_type buffer;
130+
std::size_t buffer_size = 0, encode_size = 0;
131+
// if the previous state contained an incomplete octet triplet, put
132+
// it to the start of the buffer to get it prepended to the input
133+
if (!rest.empty()) {
134+
buffer_size = rest.size;
135+
rest.write(buffer.begin());
136+
rest.clear();
137+
}
138+
// iterate over the entire input
139+
while (begin != end) {
140+
// fill the buffer with the input as much as possible
141+
while (begin != end && buffer_size < buffer.size())
142+
buffer[buffer_size++] = *begin++;
143+
// if the buffer could not be filled completely, compute
144+
// the size which can be encoded immediately.
145+
encode_size = buffer_size / 3 * 3;
146+
if (encode_size > 0) {
147+
// encode the buffer part of the size divisible by three
148+
base64_text base64_begin(buffer.begin()),
149+
base64_end(buffer.begin() + encode_size);
150+
output = std::copy(base64_begin, base64_end, output);
151+
// zero the buffer size to prepare for the next iteration
152+
buffer_size = 0;
153+
}
154+
}
155+
// if the complete buffer could not be encoded, store the last
156+
// incomplete octet triplet to the transiting state
157+
if (buffer_size > encode_size)
158+
rest.fill(buffer.begin() + encode_size,
159+
buffer.begin() + buffer_size);
160+
}
161+
return output;
162+
}
163+
164+
template <
165+
typename State,
166+
typename OutputIterator
167+
>
168+
OutputIterator encode_rest(OutputIterator output,
169+
State & rest) {
170+
typedef typename State::const_iterator_type iterator_type;
171+
// declare the encoding iterator type
172+
typedef base64_from_binary<
173+
transform_width<iterator_type, 6, 8>
174+
> base64_text;
175+
if (!rest.empty()) {
176+
// encode the incomplete octet triplet using zeros as padding
177+
// (an artificial input continuation)
178+
base64_text base64_begin(rest.begin()), base64_end(rest.end());
179+
output = std::copy(base64_begin, base64_end, output);
180+
// at least one padding '=' will be always needed - at least two
181+
// bits are missing in the finally encoded 6-bit value
182+
if (rest.size > 0) {
183+
*output++ = '=';
184+
// if the last octet was the first in the triplet (the index was,
185+
// four bits are missing in the finally encoded 6-bit value;
186+
// another '=' character is needed for the another two bits
187+
if (rest.size == 1)
188+
*output++ = '=';
189+
}
190+
rest.clear();
191+
}
192+
return output;
193+
}
194+
195+
template <
196+
typename InputIterator,
197+
typename OutputIterator
198+
>
199+
OutputIterator encode(InputIterator begin,
200+
InputIterator end,
201+
OutputIterator output) {
202+
state<typename iterator_value<InputIterator>::type> rest;
203+
output = encode(begin, end, output, rest);
204+
return encode_rest(output, rest);
205+
}
206+
207+
template <
208+
typename InputRange,
209+
typename OutputIterator
210+
>
211+
OutputIterator encode(InputRange const & value,
212+
OutputIterator output) {
213+
return encode(boost::begin(value), boost::end(value), output);
214+
}
215+
216+
template <typename OutputIterator>
217+
OutputIterator encode(char const * value,
218+
OutputIterator output) {
219+
return encode(value, value + strlen(value), output);
220+
}
221+
222+
template <
223+
typename Char,
224+
typename InputRange
225+
>
226+
std::basic_string<Char> encode(InputRange const & value) {
227+
std::basic_string<Char> result;
228+
encode(value, std::back_inserter(result));
229+
return result;
230+
}
231+
232+
template <typename Char>
233+
std::basic_string<Char> encode(char const * value) {
234+
std::basic_string<Char> result;
235+
encode(value, std::back_inserter(result));
236+
return result;
237+
}
238+
239+
// the function overloads for string literals encode the input without
240+
// the terminating zero, which is usually expected, because the trailing
241+
// zero byte is not considered a part of the string value; the overloads
242+
// foran input range would wrap the string literal by Boost.Range and
243+
// encodethe full memory occupated by the string literal - including the
244+
// unwanted last zero byte
245+
246+
} // namespace base64_stateful_buffer
247+
248+
} // namespace utils
249+
} // namespace network
250+
} // namespace boost
251+
252+
#endif // BOOST_NETWORK_UTILS_BASE64_STATEFUL_BUFFER_HPP

0 commit comments

Comments
 (0)