@@ -83,17 +83,16 @@ def canonicalize_url(http://github.com/url,%20keep_blank_values=True,%20keep_fragments=False,%3C/div%3E%3C/code%3E%3C/div%3E%3C/td%3E%3C/tr%3E%3Ctr%20class=%22diff-line-row%22%3E%3Ctd%20data-grid-cell-id=%22diff-5e903e2ea0d927846a0d01988f13b868e2a6a1b0c7099452370d9e411f224052-83-83-0%22%20data-selected=%22false%22%20role=%22gridcell%22%20style=%22background-color:var(--bgColor-default);text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative diff-line-number-neutral left-side">83
83
try :
84
84
scheme , netloc , path , params , query , fragment = _safe_ParseResult (
85
85
parse_url (url ), encoding = encoding )
86
- except UnicodeError as e :
87
- if encoding != 'utf8' :
88
- scheme , netloc , path , params , query , fragment = _safe_ParseResult (
89
- parse_url (url ), encoding = 'utf8' )
90
- else :
91
- raise
86
+ except UnicodeEncodeError as e :
87
+ scheme , netloc , path , params , query , fragment = _safe_ParseResult (
88
+ parse_url (url ), encoding = 'utf8' )
92
89
93
90
# 1. decode query-string as UTF-8 (or keep raw bytes),
94
91
# sort values,
95
92
# and percent-encode them back
96
- if not six .PY2 :
93
+ if six .PY2 :
94
+ keyvals = parse_qsl (query , keep_blank_values )
95
+ else :
97
96
# Python3's urllib.parse.parse_qsl does not work as wanted
98
97
# for percent-encoded characters that do not match passed encoding,
99
98
# they get lost.
@@ -118,8 +117,6 @@ def canonicalize_url(http://github.com/url,%20keep_blank_values=True,%20keep_fragments=False,%3C/div%3E%3C/code%3E%3C/div%3E%3C/td%3E%3C/tr%3E%3Ctr%20class=%22diff-line-row%22%3E%3Ctd%20data-grid-cell-id=%22diff-5e903e2ea0d927846a0d01988f13b868e2a6a1b0c7099452370d9e411f224052-118-117-0%22%20data-selected=%22false%22%20role=%22gridcell%22%20style=%22background-color:var(--bgColor-default);text-align:center" tabindex="-1" valign="top" class="focusable-grid-cell diff-line-number position-relative diff-line-number-neutral left-side">118
117
# IRIs (namely, to be able to include non-ASCII characters) can only be
119
118
# used if the query part is encoded in UTF-8.
120
119
keyvals = parse_qsl_to_bytes (query , keep_blank_values )
121
- else :
122
- keyvals = parse_qsl (query , keep_blank_values )
123
120
keyvals .sort ()
124
121
query = urlencode (keyvals )
125
122
@@ -138,16 +135,17 @@ def _unquotepath(path):
138
135
for reserved in ('2f' , '2F' , '3f' , '3F' ):
139
136
path = path .replace ('%' + reserved , '%25' + reserved .upper ())
140
137
141
- if six .PY3 :
142
- # standard lib's unquote() does not work in Python 3
143
- # for non-UTF-8 percent-escaped characters, they get lost.
138
+ if six .PY2 :
139
+ # in Python 2, '%a3' becomes '\xa3', which is what we want
140
+ return unquote (path )
141
+ else :
142
+ # in Python 3,
143
+ # standard lib's unquote() does not work for non-UTF-8
144
+ # percent-escaped characters, they get lost.
144
145
# e.g., '%a3' becomes 'REPLACEMENT CHARACTER' (U+FFFD)
145
146
#
146
147
# unquote_to_bytes() returns raw bytes instead
147
148
return unquote_to_bytes (path )
148
- else :
149
- # in Python 2, '%a3' becomes '\xa3', which is what we want
150
- return unquote (path )
151
149
152
150
153
151
def parse_url (url , encoding = None ):
0 commit comments