@@ -65,9 +65,8 @@ def _extract_links(self, selector, response_url, response_encoding, base_url):
65
65
if self .unique else links
66
66
67
67
def extract_links (self , response ):
68
- html = Selector (response )
69
68
base_url = get_base_url (response )
70
- return self ._extract_links (html , response .url , response .encoding , base_url )
69
+ return self ._extract_links (response . selector , response .url , response .encoding , base_url )
71
70
72
71
def _process_links (self , links ):
73
72
""" Normalize and filter extracted links
@@ -95,14 +94,13 @@ def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restric
95
94
canonicalize = canonicalize , deny_extensions = deny_extensions )
96
95
97
96
def extract_links (self , response ):
98
- html = Selector (response )
99
97
base_url = get_base_url (response )
100
98
if self .restrict_xpaths :
101
99
docs = [subdoc
102
100
for x in self .restrict_xpaths
103
- for subdoc in html .xpath (x )]
101
+ for subdoc in response .xpath (x )]
104
102
else :
105
- docs = [html ]
103
+ docs = [response . selector ]
106
104
all_links = []
107
105
for doc in docs :
108
106
links = self ._extract_links (doc , response .url , response .encoding , base_url )
0 commit comments