Skip to content

Commit 673b853

Browse files
committed
Introduced new chunked transfer encoding parser to remove chunk markers in streaming clients.
1 parent 3183577 commit 673b853

File tree

7 files changed

+168
-53
lines changed

7 files changed

+168
-53
lines changed

boost/network/protocol/http/client/async_impl.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ struct async_client
3939

4040
async_client(bool cache_resolved, bool follow_redirect,
4141
bool always_verify_peer, int timeout,
42+
bool remove_chunk_markers,
4243
std::shared_ptr<boost::asio::io_service> service,
4344
optional<string_type> certificate_filename,
4445
optional<string_type> verify_path,
4546
optional<string_type> certificate_file,
4647
optional<string_type> private_key_file,
4748
optional<string_type> ciphers,
4849
optional<string_type> sni_hostname, long ssl_options)
49-
: connection_base(cache_resolved, follow_redirect, timeout),
50+
: connection_base(cache_resolved, follow_redirect, timeout, remove_chunk_markers),
5051
service_ptr(service.get() ? service
5152
: std::make_shared<boost::asio::io_service>()),
5253
service_(*service_ptr),

boost/network/protocol/http/client/connection/async_base.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct async_connection_base {
4343
// tag.
4444
static connection_ptr new_connection(
4545
resolve_function resolve, resolver_type &resolver, bool follow_redirect,
46-
bool always_verify_peer, bool https, int timeout,
46+
bool always_verify_peer, bool https, int timeout, bool remove_chunk_markers,
4747
optional<string_type> certificate_filename = optional<string_type>(),
4848
optional<string_type> const &verify_path = optional<string_type>(),
4949
optional<string_type> certificate_file = optional<string_type>(),
@@ -59,7 +59,7 @@ struct async_connection_base {
5959
certificate_filename, verify_path, certificate_file, private_key_file,
6060
ciphers, sni_hostname, ssl_options);
6161
auto temp = std::make_shared<async_connection>(
62-
resolver, resolve, follow_redirect, timeout, std::move(delegate));
62+
resolver, resolve, follow_redirect, timeout, remove_chunk_markers, std::move(delegate));
6363
BOOST_ASSERT(temp != nullptr);
6464
return temp;
6565
}

boost/network/protocol/http/client/connection/async_normal.hpp

+134-37
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include <iterator>
1313
#include <cstdint>
14+
#include <iostream>
1415
#include <boost/algorithm/string/trim.hpp>
1516
#include <boost/asio/steady_timer.hpp>
1617
#include <boost/asio/placeholders.hpp>
@@ -37,6 +38,77 @@ namespace network {
3738
namespace http {
3839
namespace impl {
3940

41+
template <class Tag>
42+
struct chunk_encoding_parser {
43+
44+
chunk_encoding_parser() : state(state_t::header), chunk_size(0) {}
45+
46+
enum state_t { header, header_end, data, data_end };
47+
48+
state_t state;
49+
size_t chunk_size;
50+
std::array<typename char_<Tag>::type, 1024> buffer;
51+
52+
void update_chunk_size(boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> const& range) {
53+
if (range.empty())
54+
return;
55+
std::stringstream ss;
56+
ss << std::hex << range;
57+
size_t size;
58+
ss >> size;
59+
chunk_size = (chunk_size << (range.size()*4)) + size;
60+
}
61+
62+
boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> operator()(boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> const& range) {
63+
auto iter = boost::begin(range);
64+
auto begin = iter;
65+
auto pos = boost::begin(buffer);
66+
67+
while (iter != boost::end(range))
68+
switch(state) {
69+
case state_t::header:
70+
iter = std::find(iter, boost::end(range), '\r');
71+
update_chunk_size(boost::make_iterator_range(begin, iter));
72+
if (iter != boost::end(range)) {
73+
state = state_t::header_end;
74+
++iter;
75+
}
76+
break;
77+
78+
case state_t::header_end:
79+
BOOST_ASSERT(*iter == '\n');
80+
++iter;
81+
state = state_t::data;
82+
break;
83+
84+
case state_t::data:
85+
if (chunk_size == 0) {
86+
BOOST_ASSERT(*iter == '\r');
87+
++iter;
88+
state = state_t::data_end;
89+
} else {
90+
auto len = std::min(chunk_size, (size_t)std::distance(iter, boost::end(range)));
91+
begin = iter;
92+
iter = std::next(iter, len);
93+
pos = std::copy(begin, iter, pos);
94+
chunk_size -= len;
95+
}
96+
break;
97+
98+
case state_t::data_end:
99+
BOOST_ASSERT (*iter == '\n');
100+
++iter;
101+
begin = iter;
102+
state = state_t::header;
103+
break;
104+
105+
default:
106+
BOOST_ASSERT(false && "Bug, report this to the developers!");
107+
}
108+
return boost::make_iterator_range(boost::begin(buffer), pos);
109+
}
110+
};
111+
40112
template <class Tag, unsigned version_major, unsigned version_minor>
41113
struct async_connection_base;
42114

@@ -72,8 +144,10 @@ struct http_async_connection
72144

73145
http_async_connection(resolver_type& resolver, resolve_function resolve,
74146
bool follow_redirect, int timeout,
147+
bool remove_chunk_markers,
75148
connection_delegate_ptr delegate)
76149
: timeout_(timeout),
150+
remove_chunk_markers_(remove_chunk_markers),
77151
timer_(resolver.get_io_service()),
78152
is_timedout_(false),
79153
follow_redirect_(follow_redirect),
@@ -348,8 +422,11 @@ struct http_async_connection
348422

349423
// The invocation of the callback is synchronous to allow us to
350424
// wait before scheduling another read.
351-
callback(make_iterator_range(begin, end), ec);
352-
425+
if (this->is_chunk_encoding && remove_chunk_markers_) {
426+
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
427+
} else {
428+
callback(make_iterator_range(begin, end), ec);
429+
}
353430
auto self = this->shared_from_this();
354431
delegate_->read_some(
355432
boost::asio::mutable_buffers_1(this->part.data(),
@@ -388,14 +465,28 @@ struct http_async_connection
388465
// We call the callback function synchronously passing the error
389466
// condition (in this case, end of file) so that it can handle it
390467
// appropriately.
391-
callback(make_iterator_range(begin, end), ec);
468+
if (this->is_chunk_encoding && remove_chunk_markers_) {
469+
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
470+
} else {
471+
callback(make_iterator_range(begin, end), ec);
472+
}
392473
} else {
393474
string_type body_string;
394-
std::swap(body_string, this->partial_parsed);
395-
body_string.append(this->part.begin(), this->part.begin() + bytes_transferred);
396-
if (this->is_chunk_encoding) {
397-
this->body_promise.set_value(parse_chunk_encoding(body_string));
475+
if (this->is_chunk_encoding && remove_chunk_markers_) {
476+
for (size_t i = 0; i < this->partial_parsed.size(); i += 1024) {
477+
auto range = parse_chunk_encoding(
478+
boost::make_iterator_range(this->partial_parsed.data() + i,
479+
this->partial_parsed.data() + std::min(i+1024, this->partial_parsed.size())));
480+
body_string.append(boost::begin(range), boost::end(range));
481+
}
482+
this->partial_parsed.clear();
483+
auto range = parse_chunk_encoding(boost::make_iterator_range(this->part.begin(),
484+
this->part.begin() + bytes_transferred));
485+
body_string.append(boost::begin(range), boost::end(range));
486+
this->body_promise.set_value(body_string);
398487
} else {
488+
std::swap(body_string, this->partial_parsed);
489+
body_string.append(this->part.begin(), this->part.begin() + bytes_transferred);
399490
this->body_promise.set_value(body_string);
400491
}
401492
}
@@ -417,7 +508,11 @@ struct http_async_connection
417508
this->part.begin();
418509
typename protocol_base::buffer_type::const_iterator end = begin;
419510
std::advance(end, bytes_transferred);
420-
callback(make_iterator_range(begin, end), ec);
511+
if (this->is_chunk_encoding && remove_chunk_markers_) {
512+
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
513+
} else {
514+
callback(make_iterator_range(begin, end), ec);
515+
}
421516
auto self = this->shared_from_this();
422517
delegate_->read_some(
423518
boost::asio::mutable_buffers_1(this->part.data(),
@@ -476,38 +571,39 @@ struct http_async_connection
476571
}
477572
}
478573

479-
string_type parse_chunk_encoding(string_type& body_string) {
480-
string_type body;
481-
string_type crlf = "\r\n";
482-
483-
typename string_type::iterator begin = body_string.begin();
484-
for (typename string_type::iterator iter =
485-
std::search(begin, body_string.end(), crlf.begin(), crlf.end());
486-
iter != body_string.end();
487-
iter =
488-
std::search(begin, body_string.end(), crlf.begin(), crlf.end())) {
489-
string_type line(begin, iter);
490-
if (line.empty()) {
491-
break;
492-
}
493-
std::stringstream stream(line);
494-
int len;
495-
stream >> std::hex >> len;
496-
std::advance(iter, 2);
497-
if (len == 0) {
498-
break;
499-
}
500-
if (len <= body_string.end() - iter) {
501-
body.insert(body.end(), iter, iter + len);
502-
std::advance(iter, len + 2);
503-
}
504-
begin = iter;
505-
}
574+
// string_type parse_chunk_encoding(string_type& body_string) {
575+
// string_type body;
576+
// string_type crlf = "\r\n";
506577

507-
return body;
508-
}
578+
// typename string_type::iterator begin = body_string.begin();
579+
// for (typename string_type::iterator iter =
580+
// std::search(begin, body_string.end(), crlf.begin(), crlf.end());
581+
// iter != body_string.end();
582+
// iter =
583+
// std::search(begin, body_string.end(), crlf.begin(), crlf.end())) {
584+
// string_type line(begin, iter);
585+
// if (line.empty()) {
586+
// break;
587+
// }
588+
// std::stringstream stream(line);
589+
// int len;
590+
// stream >> std::hex >> len;
591+
// std::advance(iter, 2);
592+
// if (len == 0) {
593+
// break;
594+
// }
595+
// if (len <= body_string.end() - iter) {
596+
// body.insert(body.end(), iter, iter + len);
597+
// std::advance(iter, len + 2);
598+
// }
599+
// begin = iter;
600+
// }
601+
602+
// return body;
603+
// }
509604

510605
int timeout_;
606+
bool remove_chunk_markers_;
511607
boost::asio::steady_timer timer_;
512608
bool is_timedout_;
513609
bool follow_redirect_;
@@ -517,6 +613,7 @@ struct http_async_connection
517613
connection_delegate_ptr delegate_;
518614
boost::asio::streambuf command_streambuf;
519615
string_type method;
616+
chunk_encoding_parser<Tag> parse_chunk_encoding;
520617
};
521618

522619
} // namespace impl

boost/network/protocol/http/client/facade.hpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,8 @@ class basic_client_facade {
303303
options.openssl_verify_path(), options.openssl_certificate_file(),
304304
options.openssl_private_key_file(), options.openssl_ciphers(),
305305
options.openssl_sni_hostname(), options.openssl_options(),
306-
options.io_service(), options.timeout()));
306+
options.io_service(), options.timeout(),
307+
options.remove_chunk_markers()));
307308
}
308309
};
309310

boost/network/protocol/http/client/options.hpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ class client_options {
3434
openssl_options_(0),
3535
io_service_(),
3636
always_verify_peer_(true),
37-
timeout_(0) {}
37+
timeout_(0),
38+
remove_chunk_markers_(false) {}
3839

3940
client_options(client_options const& other)
4041
: cache_resolved_(other.cache_resolved_),
@@ -48,7 +49,8 @@ class client_options {
4849
openssl_options_(other.openssl_options_),
4950
io_service_(other.io_service_),
5051
always_verify_peer_(other.always_verify_peer_),
51-
timeout_(other.timeout_) {}
52+
timeout_(other.timeout_),
53+
remove_chunk_markers_(other.remove_chunk_markers) {}
5254

5355
client_options& operator=(client_options other) {
5456
other.swap(*this);
@@ -69,6 +71,7 @@ class client_options {
6971
swap(io_service_, other.io_service_);
7072
swap(always_verify_peer_, other.always_verify_peer_);
7173
swap(timeout_, other.timeout_);
74+
swap(remove_chunk_markers_, other.remove_chunk_markers_);
7275
}
7376

7477
/// Specify whether the client should cache resolved endpoints.
@@ -154,6 +157,12 @@ class client_options {
154157
return *this;
155158
}
156159

160+
/// Set an overall timeout for HTTP requests.
161+
client_options& remove_chunk_markers(bool v) {
162+
remove_chunk_markers_ = v;
163+
return *this;
164+
}
165+
157166
bool cache_resolved() const { return cache_resolved_; }
158167

159168
bool follow_redirects() const { return follow_redirects_; }
@@ -190,6 +199,8 @@ class client_options {
190199

191200
int timeout() const { return timeout_; }
192201

202+
bool remove_chunk_markers() const { return remove_chunk_markers_; }
203+
193204
private:
194205
bool cache_resolved_;
195206
bool follow_redirects_;
@@ -203,6 +214,7 @@ class client_options {
203214
std::shared_ptr<boost::asio::io_service> io_service_;
204215
bool always_verify_peer_;
205216
int timeout_;
217+
bool remove_chunk_markers_;
206218
};
207219

208220
template <class Tag>

boost/network/protocol/http/client/pimpl.hpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,12 @@ struct basic_client_impl
7474
optional<string_type> const& private_key_file,
7575
optional<string_type> const& ciphers,
7676
optional<string_type> const& sni_hostname, long ssl_options,
77-
std::shared_ptr<boost::asio::io_service> service, int timeout)
78-
: base_type(cache_resolved, follow_redirect, always_verify_peer, timeout,
79-
service, certificate_filename, verify_path, certificate_file,
80-
private_key_file, ciphers, sni_hostname, ssl_options) {}
77+
std::shared_ptr<boost::asio::io_service> service, int timeout,
78+
bool remove_chunk_markers)
79+
: base_type(cache_resolved, follow_redirect, always_verify_peer, timeout,
80+
remove_chunk_markers, service, certificate_filename, verify_path,
81+
certificate_file, private_key_file, ciphers, sni_hostname,
82+
ssl_options) {}
8183

8284
~basic_client_impl() = default;
8385
};

0 commit comments

Comments
 (0)