Skip to content

Commit 26ebccd

Browse files
committed
upgrade parsel and use its function to instantiate root for finding form
1 parent 2fe6d12 commit 26ebccd

File tree

3 files changed

+5
-10
lines changed

3 files changed

+5
-10
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ queuelib
77
six>=1.5.2
88
PyDispatcher>=2.0.5
99
service_identity
10-
parsel>=0.9.2
10+
parsel>=0.9.3

scrapy/http/request/form.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from six.moves.urllib.parse import urljoin, urlencode
99
import lxml.html
10-
from lxml import etree
10+
from parsel.selector import create_root_node
1111
import six
1212
from scrapy.http.request import Request
1313
from scrapy.utils.python import to_bytes, is_listlike
@@ -55,15 +55,10 @@ def _urlencode(seq, enc):
5555
return urlencode(values, doseq=1)
5656

5757

58-
def _create_parser_from_response(response, parser_cls):
59-
body = response.body_as_unicode().strip().encode('utf8') or b'<html/>'
60-
parser = parser_cls(recover=True, encoding='utf8')
61-
return etree.fromstring(body, parser=parser, base_url=response.url)
62-
63-
6458
def _get_form(response, formname, formid, formnumber, formxpath):
6559
"""Find the form element """
66-
root = _create_parser_from_response(response, lxml.html.HTMLParser)
60+
text = response.body_as_unicode()
61+
root = create_root_node(text, lxml.html.HTMLParser, base_url=response.url)
6762
forms = root.xpath('//form')
6863
if not forms:
6964
raise ValueError("No <form> element found in %s" % response)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
'pyOpenSSL',
4545
'cssselect>=0.9',
4646
'six>=1.5.2',
47-
'parsel>=0.9.2',
47+
'parsel>=0.9.3',
4848
'PyDispatcher>=2.0.5',
4949
'service_identity',
5050
],

0 commit comments

Comments
 (0)