Skip to content

Commit b668d54

Browse files
authored
Refactored domain name processing (#141)
* Moved domain functions to header file * Refactored domain functions * Refactored domain functions some more * Updated host processing * Reworked domain processing to later allow parameterizing allocators * Reworked domain to u8 processing * Made more extensive use of range-v3 in domain name processing * Made more extensive use of range v3
1 parent a1059b5 commit b668d54

File tree

15 files changed

+517
-435
lines changed

15 files changed

+517
-435
lines changed

include/skyr/v1/containers/static_vector.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,13 @@ class static_vector {
111111

112112
///
113113
/// \return
114-
[[nodiscard]] constexpr auto data() const noexcept -> const value_type * {
114+
[[nodiscard]] constexpr auto data() noexcept -> pointer {
115+
return impl_.data();
116+
}
117+
118+
///
119+
/// \return
120+
[[nodiscard]] constexpr auto data() const noexcept -> const_pointer {
115121
return impl_.data();
116122
}
117123

@@ -133,6 +139,11 @@ class static_vector {
133139
return size_ == 0;
134140
}
135141

142+
///
143+
constexpr void clear() noexcept {
144+
size_ = 0;
145+
}
146+
136147
///
137148
/// \return
138149
[[nodiscard]] constexpr auto begin() noexcept -> iterator {

include/skyr/v1/core/check_input.hpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
#include <locale>
1010
#include <string>
11+
#include <range/v3/distance.hpp>
12+
#include <range/v3/algorithm/find_if_not.hpp>
13+
#include <range/v3/algorithm/remove_if.hpp>
14+
#include <range/v3/view/reverse.hpp>
15+
#include <range/v3/action/erase.hpp>
1116

1217
namespace skyr {
1318
inline namespace v1 {
@@ -16,18 +21,17 @@ constexpr static auto is_c0_control_or_space = [] (auto byte) {
1621
};
1722

1823
inline auto remove_leading_c0_control_or_space(std::string_view input, bool *validation_error) {
19-
auto first = begin(input), last = end(input);
20-
auto it = std::find_if_not(first, last, is_c0_control_or_space);
21-
*validation_error |= (it != first);
22-
input.remove_prefix(std::distance(first, it));
24+
auto it = ranges::find_if_not(input, is_c0_control_or_space);
25+
*validation_error |= (it != ranges::cbegin(input));
26+
input.remove_prefix(std::distance(ranges::cbegin(input), it));
2327
return input;
2428
}
2529

2630
inline auto remove_trailing_c0_control_or_space(std::string_view input, bool *validation_error) {
27-
auto first = rbegin(input), last = rend(input);
28-
auto it = std::find_if_not(first, last, is_c0_control_or_space);
29-
*validation_error |= (it != first);
30-
input.remove_suffix(std::distance(first, it));
31+
auto reversed = ranges::reverse_view(input);
32+
auto it = ranges::find_if_not(reversed, is_c0_control_or_space);
33+
*validation_error |= (it != ranges::cbegin(reversed));
34+
input.remove_suffix(std::distance(ranges::cbegin(reversed), it));
3135
return input;
3236
}
3337

@@ -36,10 +40,9 @@ inline auto remove_tabs_and_newlines(std::string &input, bool *validation_error)
3640
return (byte == '\t') || (byte == '\r') || (byte == '\n');
3741
};
3842

39-
auto first = begin(input), last = end(input);
40-
auto it = std::remove_if(first, last, is_tab_or_newline);
41-
*validation_error |= (it != last);
42-
input.erase(it, last);
43+
auto it = ranges::remove_if(input, is_tab_or_newline);
44+
*validation_error |= (it != std::cend(input));
45+
ranges::erase(input, it, std::cend(input));
4346
}
4447
} // namespace v1
4548
} // namespace skyr

include/skyr/v1/core/host.hpp

Lines changed: 109 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,22 @@
99
#include <variant>
1010
#include <string>
1111
#include <cassert>
12+
#include <algorithm>
1213
#include <tl/expected.hpp>
14+
#include <range/v3/algorithm/find_if.hpp>
15+
#include <range/v3/range/access.hpp>
1316
#include <skyr/v1/core/errors.hpp>
1417
#include <skyr/v1/network/ipv4_address.hpp>
1518
#include <skyr/v1/network/ipv6_address.hpp>
19+
#include <skyr/v1/percent_encoding/percent_encoded_char.hpp>
20+
#include <skyr/v1/percent_encoding/percent_decode.hpp>
21+
#include <skyr/v1/domain/domain.hpp>
22+
1623

1724
namespace skyr {
1825
inline namespace v1 {
19-
/// Represents a domain in a [URL host](https://url.spec.whatwg.org/#host-representation)
20-
struct domain {
26+
/// Represents a domain name in a [URL host](https://url.spec.whatwg.org/#host-representation)
27+
struct domain_name {
2128
std::string name;
2229
};
2330

@@ -35,7 +42,7 @@ class host {
3542
using host_types = std::variant<
3643
skyr::v1::ipv4_address,
3744
skyr::v1::ipv6_address,
38-
skyr::v1::domain,
45+
skyr::v1::domain_name,
3946
skyr::v1::opaque_host,
4047
skyr::v1::empty_host
4148
>;
@@ -54,13 +61,13 @@ class host {
5461

5562
/// Constructor
5663
/// \param host A domain name
57-
explicit host(skyr::v1::domain host)
58-
: host_(host) {}
64+
explicit host(skyr::v1::domain_name host)
65+
: host_(std::move(host)) {}
5966

6067
/// Constructor
6168
/// \param host An opaque host string
6269
explicit host(skyr::v1::opaque_host host)
63-
: host_(host) {}
70+
: host_(std::move(host)) {}
6471

6572
/// Constructor
6673
/// \param hsost An empty host
@@ -79,7 +86,7 @@ class host {
7986
else if constexpr (std::is_same_v<T, skyr::v1::ipv6_address>) {
8087
return "[" + host.serialize() + "]";
8188
}
82-
else if constexpr (std::is_same_v<T, skyr::v1::domain> ||
89+
else if constexpr (std::is_same_v<T, skyr::v1::domain_name> ||
8390
std::is_same_v<T, skyr::v1::opaque_host>) {
8491
return host.name;
8592
}
@@ -93,14 +100,14 @@ class host {
93100

94101
///
95102
/// \return \c true if the host is a domain, \c false otherwise
96-
[[nodiscard]] auto is_domain() const noexcept {
97-
return std::holds_alternative<skyr::v1::domain>(host_);
103+
[[nodiscard]] auto is_domain_name() const noexcept {
104+
return std::holds_alternative<skyr::v1::domain_name>(host_);
98105
}
99106

100107
///
101108
/// \return
102-
[[nodiscard]] auto domain() const noexcept -> std::optional<std::string> {
103-
return is_domain() ? std::make_optional(std::get<skyr::v1::domain>(host_).name) : std::nullopt;
109+
[[nodiscard]] auto domain_name() const noexcept -> std::optional<std::string> {
110+
return is_domain_name() ? std::make_optional(std::get<skyr::v1::domain_name>(host_).name) : std::nullopt;
104111
}
105112

106113
///
@@ -150,16 +157,105 @@ class host {
150157
host_types host_;
151158
};
152159

160+
namespace details {
161+
constexpr static auto is_forbidden_host_point = [](auto byte) {
162+
return
163+
(byte == '\0') || (byte == '\t') || (byte == '\n') || (byte == '\r') || (byte == ' ') || (byte == '#') ||
164+
(byte == '%') || (byte == '/') || (byte == ':') || (byte == '<') || (byte == '>') || (byte == '?') ||
165+
(byte == '@') || (byte == '[') || (byte == '\\') || (byte == ']') || (byte == '^');
166+
};
167+
168+
inline auto parse_opaque_host(std::string_view input,
169+
bool *validation_error) -> tl::expected<skyr::v1::opaque_host, url_parse_errc> {
170+
constexpr static auto is_forbidden = [] (auto byte) -> bool {
171+
return (byte != '%') && is_forbidden_host_point(byte);
172+
};
173+
174+
if (std::cend(input) != ranges::find_if(input, is_forbidden)) {
175+
*validation_error |= true;
176+
return tl::make_unexpected(url_parse_errc::forbidden_host_point);
177+
}
178+
179+
auto output = std::string();
180+
for (auto c : input) {
181+
auto pct_encoded = percent_encode_byte(std::byte(c), percent_encoding::encode_set::c0_control);
182+
output += pct_encoded.to_string();
183+
}
184+
return skyr::v1::opaque_host{std::move(output)};
185+
}
186+
} // namespace details
187+
153188
/// Parses a string to either a domain, IPv4 address or IPv6 address according to
154189
/// https://url.spec.whatwg.org/#host-parsing
155190
/// \param input An input string
156191
/// \param is_not_special \c true to process only non-special hosts, \c false otherwise
157192
/// \param validation_error Set to \c true if there was a validation error
158193
/// \return A host as a domain (std::string), ipv4_address or ipv6_address, or an error code
159-
auto parse_host(
194+
inline auto parse_host(
160195
std::string_view input,
161196
bool is_not_special,
162-
bool *validation_error) -> tl::expected<host, url_parse_errc>;
197+
bool *validation_error) -> tl::expected<host, url_parse_errc> {
198+
if (input.empty()) {
199+
return host{empty_host{}};
200+
}
201+
202+
if (input.front() == '[') {
203+
if (input.back() != ']') {
204+
*validation_error |= true;
205+
return tl::make_unexpected(url_parse_errc::invalid_ipv6_address);
206+
}
207+
208+
auto view = std::string_view(input);
209+
view.remove_prefix(1);
210+
view.remove_suffix(1);
211+
bool ipv6_validation_error = false;
212+
auto ipv6_address = parse_ipv6_address(view, &ipv6_validation_error);
213+
if (ipv6_address) {
214+
*validation_error = ipv6_validation_error;
215+
return skyr::v1::host{ipv6_address.value()};
216+
}
217+
else {
218+
return tl::make_unexpected(url_parse_errc::invalid_ipv6_address);
219+
}
220+
}
221+
222+
if (is_not_special) {
223+
return details::parse_opaque_host(input, validation_error).and_then(
224+
[] (auto &&h) -> tl::expected<host, url_parse_errc> { return host{h}; });
225+
}
226+
227+
auto domain_name = std::string{};
228+
auto range = percent_encoding::percent_decode_range{input};
229+
for (auto it = std::cbegin(range); it != std::cend(range); ++it) {
230+
if (!*it) {
231+
return tl::make_unexpected(url_parse_errc::cannot_decode_host_point);
232+
}
233+
domain_name.push_back((*it).value());
234+
}
235+
236+
auto ascii_domain = std::string{};
237+
if (!domain_to_ascii(domain_name, &ascii_domain)) {
238+
return tl::make_unexpected(url_parse_errc::domain_error);
239+
}
240+
241+
if (ranges::cend(ascii_domain) != ranges::find_if(ascii_domain, details::is_forbidden_host_point)) {
242+
*validation_error |= true;
243+
return tl::make_unexpected(url_parse_errc::domain_error);
244+
}
245+
246+
bool ipv4_validation_error = false;
247+
auto host = parse_ipv4_address(ascii_domain, &ipv4_validation_error);
248+
if (!host) {
249+
if (host.error() == ipv4_address_errc::overflow) {
250+
return tl::make_unexpected(url_parse_errc::invalid_ipv4_address);
251+
}
252+
else {
253+
return skyr::v1::host{skyr::v1::domain_name{std::move(ascii_domain)}};
254+
}
255+
}
256+
*validation_error = ipv4_validation_error;
257+
return skyr::v1::host{host.value()};
258+
}
163259

164260
/// Parses a string to either a domain, IPv4 address or IPv6 address according to
165261
/// https://url.spec.whatwg.org/#host-parsing

0 commit comments

Comments
 (0)