11
11
#include < skyr/v1/core/schemes.hpp>
12
12
#include < skyr/v1/core/host.hpp>
13
13
#include < skyr/v1/percent_encoding/percent_encoded_char.hpp>
14
- #include < skyr/v1/percent_encoding/percent_decode_range.hpp>
15
14
#include < skyr/v1/string/starts_with.hpp>
16
15
#include " url_parser_context.hpp"
17
16
@@ -21,14 +20,12 @@ using namespace std::string_literals;
21
20
using namespace std ::string_view_literals;
22
21
23
22
namespace {
24
- auto contains (char byte, std::string_view view) noexcept {
25
- auto first = begin (view), last = end (view);
23
+ auto contains (std::string_view view, char byte ) noexcept {
24
+ auto first = std::cbegin (view), last = std::cend (view);
26
25
return last != std::find (first, last, byte);
27
26
}
28
27
29
- auto remaining_starts_with (
30
- std::string_view input,
31
- std::string_view chars) noexcept {
28
+ auto remaining_starts_with (std::string_view input, std::string_view chars) noexcept {
32
29
return !input.empty () && starts_with (input.substr (1 ), chars);
33
30
}
34
31
@@ -37,8 +34,8 @@ auto port_number(std::string_view port) noexcept -> tl::expected<std::uint16_t,
37
34
return tl::make_unexpected (url_parse_errc::invalid_port);
38
35
}
39
36
40
- const char * port_first = port.data ();
41
- char * port_last = nullptr ;
37
+ const char * port_first = port.data ();
38
+ char * port_last = nullptr ;
42
39
auto port_value = std::strtoul (port_first, &port_last, 10 );
43
40
44
41
if (port_first == port_last) {
@@ -52,8 +49,7 @@ auto port_number(std::string_view port) noexcept -> tl::expected<std::uint16_t,
52
49
}
53
50
54
51
auto is_url_code_point (char byte) noexcept {
55
- return
56
- std::isalnum (byte, std::locale::classic ()) || contains (byte, " !$&'()*+,-./:;=?@_~" sv);
52
+ return std::isalnum (byte, std::locale::classic ()) || contains (" !$&'()*+,-./:;=?@_~" sv, byte);
57
53
}
58
54
59
55
inline auto is_windows_drive_letter (std::string_view segment) noexcept {
@@ -85,37 +81,26 @@ auto is_double_dot_path_segment(std::string_view segment) noexcept {
85
81
}
86
82
87
83
void shorten_path (std::string_view scheme, std::vector<std::string> &path) {
88
- if (path.empty ()) {
89
- return ;
84
+ if (! path.empty () && !((scheme == " file " sv) && (path. size () == 1 ) && is_windows_drive_letter (path. front ()) )) {
85
+ path. pop_back () ;
90
86
}
91
-
92
- if ((scheme == " file" sv) &&
93
- (path.size () == 1 ) &&
94
- is_windows_drive_letter (path.front ())) {
95
- return ;
96
- }
97
-
98
- path.pop_back ();
99
87
}
100
- } // namespace
101
-
102
- url_parser_context::url_parser_context (
103
- std::string_view input,
104
- bool *validation_error,
105
- const url_record *base,
106
- const url_record *url,
107
- std::optional<url_parse_state> state_override)
108
- : input(input)
109
- , it(begin(input))
110
- , validation_error(validation_error)
111
- , base(base)
112
- , url(http://github.com/url?%20*url%20:%20url_record{})
113
- , state(state_override? state_override.value() : url_parse_state::scheme_start)
114
- , state_override(state_override)
115
- , buffer()
116
- , at_flag(false )
117
- , square_braces_flag(false )
118
- , password_token_seen_flag(false ) {}
88
+ } // namespace
89
+
90
+ url_parser_context::url_parser_context (std::string_view input, bool *validation_error, const url_record *base,
91
+ const url_record *url, std::optional<url_parse_state> state_override)
92
+ : input(input),
93
+ it (begin(input)),
94
+ validation_error(validation_error),
95
+ base(base),
96
+ url(http://github.com/url%20?%20*url%20:%20url_record{}),
97
+ state(state_override ? state_override.value() : url_parse_state::scheme_start),
98
+ state_override(state_override),
99
+ buffer(),
100
+ at_flag(false ),
101
+ square_braces_flag(false ),
102
+ password_token_seen_flag(false ) {
103
+ }
119
104
120
105
auto url_parser_context::parse_scheme_start (char byte) -> tl::expected<url_parse_action, url_parse_errc> {
121
106
if (std::isalpha (byte, std::locale::classic ())) {
@@ -135,7 +120,7 @@ auto url_parser_context::parse_scheme_start(char byte) -> tl::expected<url_parse
135
120
}
136
121
137
122
auto url_parser_context::parse_scheme (char byte) -> tl::expected<url_parse_action, url_parse_errc> {
138
- if (std::isalnum (byte, std::locale::classic ()) || contains (byte, " +-." sv)) {
123
+ if (std::isalnum (byte, std::locale::classic ()) || contains (" +-." sv, byte )) {
139
124
auto lower = std::tolower (byte, std::locale::classic ());
140
125
buffer.push_back (lower);
141
126
} else if (byte == ' :' ) {
@@ -191,8 +176,7 @@ auto url_parser_context::parse_scheme(char byte) -> tl::expected<url_parse_actio
191
176
state = url_parse_state::no_scheme;
192
177
reset ();
193
178
return url_parse_action::continue_;
194
- }
195
- else {
179
+ } else {
196
180
return tl::make_unexpected (url_parse_errc::invalid_scheme_character);
197
181
}
198
182
@@ -256,8 +240,7 @@ auto url_parser_context::parse_relative(char byte) -> tl::expected<url_parse_act
256
240
url.port = base->port ;
257
241
url.path = base->path ;
258
242
url.query = base->query ;
259
- }
260
- else if (byte == ' /' ) {
243
+ } else if (byte == ' /' ) {
261
244
state = url_parse_state::relative_slash;
262
245
} else if (byte == ' ?' ) {
263
246
url.username = base->username ;
@@ -308,11 +291,9 @@ auto url_parser_context::parse_relative_slash(char byte) -> tl::expected<url_par
308
291
*validation_error |= true ;
309
292
}
310
293
state = url_parse_state::special_authority_ignore_slashes;
311
- }
312
- else if (byte == ' /' ) {
294
+ } else if (byte == ' /' ) {
313
295
state = url_parse_state::authority;
314
- }
315
- else {
296
+ } else {
316
297
url.username = base->username ;
317
298
url.password = base->password ;
318
299
url.host = base->host ;
@@ -338,7 +319,8 @@ auto url_parser_context::parse_special_authority_slashes(char byte) -> tl::expec
338
319
return url_parse_action::increment;
339
320
}
340
321
341
- auto url_parser_context::parse_special_authority_ignore_slashes (char byte) -> tl::expected<url_parse_action, url_parse_errc> {
322
+ auto url_parser_context::parse_special_authority_ignore_slashes (char byte)
323
+ -> tl::expected<url_parse_action, url_parse_errc> {
342
324
if ((byte != ' /' ) && (byte != ' \\ ' )) {
343
325
decrement ();
344
326
state = url_parse_state::authority;
@@ -370,9 +352,7 @@ auto url_parser_context::parse_authority(char byte) -> tl::expected<url_parse_ac
370
352
}
371
353
}
372
354
buffer.clear ();
373
- } else if (
374
- ((is_eof ()) || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) ||
375
- (url.is_special () && (byte == ' \\ ' ))) {
355
+ } else if (((is_eof ()) || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) || (url.is_special () && (byte == ' \\ ' ))) {
376
356
if (at_flag && buffer.empty ()) {
377
357
*validation_error |= true ;
378
358
return tl::make_unexpected (url_parse_errc::empty_hostname);
@@ -412,21 +392,15 @@ auto url_parser_context::parse_hostname(char byte) -> tl::expected<url_parse_act
412
392
if (state_override && (state_override.value () == url_parse_state::hostname)) {
413
393
return url_parse_action::success;
414
394
}
415
- } else if (
416
- (is_eof () || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) ||
417
- (url.is_special () && (byte == ' \\ ' ))) {
395
+ } else if ((is_eof () || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) || (url.is_special () && (byte == ' \\ ' ))) {
418
396
if (it != begin (input)) {
419
397
decrement ();
420
398
}
421
399
422
400
if (url.is_special () && buffer.empty ()) {
423
401
*validation_error |= true ;
424
402
return tl::make_unexpected (url_parse_errc::empty_hostname);
425
- }
426
- else if (
427
- state_override &&
428
- buffer.empty () &&
429
- (url.includes_credentials () || url.port )) {
403
+ } else if (state_override && buffer.empty () && (url.includes_credentials () || url.port )) {
430
404
*validation_error |= true ;
431
405
return url_parse_action::success;
432
406
}
@@ -456,10 +430,8 @@ auto url_parser_context::parse_hostname(char byte) -> tl::expected<url_parse_act
456
430
auto url_parser_context::parse_port (char byte) -> tl::expected<url_parse_action, url_parse_errc> {
457
431
if (std::isdigit (byte, std::locale::classic ())) {
458
432
buffer += byte;
459
- } else if (
460
- ((is_eof ()) || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) ||
461
- (url.is_special () && (byte == ' \\ ' )) ||
462
- state_override) {
433
+ } else if (((is_eof ()) || (byte == ' /' ) || (byte == ' ?' ) || (byte == ' #' )) || (url.is_special () && (byte == ' \\ ' )) ||
434
+ state_override) {
463
435
if (!buffer.empty ()) {
464
436
auto port = port_number (buffer);
465
437
@@ -471,8 +443,7 @@ auto url_parser_context::parse_port(char byte) -> tl::expected<url_parse_action,
471
443
auto dport = default_port (url.scheme );
472
444
if (dport && (dport.value () == port.value ())) {
473
445
url.port = std::nullopt;
474
- }
475
- else {
446
+ } else {
476
447
url.port = port.value ();
477
448
}
478
449
buffer.clear ();
@@ -505,8 +476,7 @@ auto url_parser_context::parse_file(char byte) -> tl::expected<url_parse_action,
505
476
url.host = base->host ;
506
477
url.path = base->path ;
507
478
url.query = base->query ;
508
- }
509
- else if (byte == ' ?' ) {
479
+ } else if (byte == ' ?' ) {
510
480
url.host = base->host ;
511
481
url.path = base->path ;
512
482
url.query = std::string ();
@@ -522,8 +492,7 @@ auto url_parser_context::parse_file(char byte) -> tl::expected<url_parse_action,
522
492
url.host = base->host ;
523
493
url.path = base->path ;
524
494
shorten_path (url.scheme , url.path );
525
- }
526
- else {
495
+ } else {
527
496
*validation_error |= true ;
528
497
}
529
498
state = url_parse_state::path;
@@ -551,8 +520,7 @@ auto url_parser_context::parse_file_slash(char byte) -> tl::expected<url_parse_a
551
520
state = url_parse_state::file_host;
552
521
} else {
553
522
auto substr = input.substr (std::distance (begin (input), it));
554
- if (base &&
555
- ((base->scheme == " file" ) && !is_windows_drive_letter (substr))) {
523
+ if (base && ((base->scheme == " file" ) && !is_windows_drive_letter (substr))) {
556
524
if (!base->path .empty () && is_windows_drive_letter (base->path [0 ])) {
557
525
url.path .push_back (base->path [0 ]);
558
526
} else {
@@ -643,8 +611,7 @@ auto url_parser_context::parse_path_start(char byte) -> tl::expected<url_parse_a
643
611
}
644
612
645
613
auto url_parser_context::parse_path (char byte) -> tl::expected<url_parse_action, url_parse_errc> {
646
- if (((is_eof ()) || (byte == ' /' )) ||
647
- (url.is_special () && (byte == ' \\ ' )) ||
614
+ if (((is_eof ()) || (byte == ' /' )) || (url.is_special () && (byte == ' \\ ' )) ||
648
615
(!state_override && ((byte == ' ?' ) || (byte == ' #' )))) {
649
616
if (url.is_special () && (byte == ' \\ ' )) {
650
617
*validation_error |= true ;
@@ -655,13 +622,10 @@ auto url_parser_context::parse_path(char byte) -> tl::expected<url_parse_action,
655
622
if (!((byte == ' /' ) || (url.is_special () && (byte == ' \\ ' )))) {
656
623
url.path .emplace_back ();
657
624
}
658
- } else if (
659
- is_single_dot_path_segment (buffer) &&
660
- !((byte == ' /' ) || (url.is_special () && (byte == ' \\ ' )))) {
625
+ } else if (is_single_dot_path_segment (buffer) && !((byte == ' /' ) || (url.is_special () && (byte == ' \\ ' )))) {
661
626
url.path .emplace_back ();
662
627
} else if (!is_single_dot_path_segment (buffer)) {
663
- if ((url.scheme == " file" ) &&
664
- url.path .empty () && is_windows_drive_letter (buffer)) {
628
+ if ((url.scheme == " file" ) && url.path .empty () && is_windows_drive_letter (buffer)) {
665
629
if (!url.host || !url.host .value ().is_empty ()) {
666
630
*validation_error |= true ;
667
631
url.host = skyr::host{skyr::v1::empty_host{}};
@@ -716,8 +680,7 @@ auto url_parser_context::parse_cannot_be_a_base_url(http://github.com/char%20byte) -> tl::expected<u
716
680
auto substr = input.substr (std::distance (std::begin (input), it));
717
681
if (!is_eof () && (!is_url_code_point (byte) && (byte != ' %' ))) {
718
682
*validation_error |= true ;
719
- }
720
- else if ((byte == ' %' ) && !percent_encoding::is_percent_encoded (substr)) {
683
+ } else if ((byte == ' %' ) && !percent_encoding::is_percent_encoded (substr)) {
721
684
*validation_error |= true ;
722
685
}
723
686
if (!is_eof ()) {
@@ -733,10 +696,7 @@ auto url_parser_context::parse_query(char byte) -> tl::expected<url_parse_action
733
696
url.fragment = std::string ();
734
697
state = url_parse_state::fragment;
735
698
} else if (!is_eof ()) {
736
- if ((byte < ' !' ) ||
737
- (byte > ' ~' ) ||
738
- (contains (byte, R"( "#<>)" sv)) ||
739
- ((byte == ' \' ' ) && url.is_special ())) {
699
+ if ((byte < ' !' ) || (byte > ' ~' ) || (contains (R"( "#<>)" sv, byte)) || ((byte == ' \' ' ) && url.is_special ())) {
740
700
auto pct_encoded = percent_encode_byte (std::byte (byte), percent_encoding::encode_set::none);
741
701
url.query .value () += pct_encoded.to_string ();
742
702
} else {
0 commit comments