Skip to content

Commit 6287fc3

Browse files
committed
remove lxmldocument dependency from http.request.form
1 parent 35c1dcd commit 6287fc3

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

scrapy/http/request/form.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from six.moves.urllib.parse import urljoin, urlencode
99
import lxml.html
10+
from lxml import etree
1011
import six
1112
from scrapy.http.request import Request
1213
from scrapy.utils.python import to_bytes, is_listlike
@@ -54,10 +55,15 @@ def _urlencode(seq, enc):
5455
return urlencode(values, doseq=1)
5556

5657

58+
def _create_parser_from_response(response, parser_cls):
59+
body = response.body_as_unicode().strip().encode('utf8') or b'<html/>'
60+
parser = parser_cls(recover=True, encoding='utf8')
61+
return etree.fromstring(body, parser=parser, base_url=response.url)
62+
63+
5764
def _get_form(response, formname, formid, formnumber, formxpath):
5865
"""Find the form element """
59-
from scrapy.selector.lxmldocument import LxmlDocument
60-
root = LxmlDocument(response, lxml.html.HTMLParser)
66+
root = _create_parser_from_response(response, lxml.html.HTMLParser)
6167
forms = root.xpath('//form')
6268
if not forms:
6369
raise ValueError("No <form> element found in %s" % response)

0 commit comments

Comments
 (0)