Skip to content

Commit 7ce95c0

Browse files
Fixed links regexp + newline at end of file
1 parent e14a6d0 commit 7ce95c0

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

PyCrawler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
# Compile keyword and link regex expressions
5353
keywordregex = re.compile('<meta\sname=["\']keywords["\']\scontent=["\'](.*?)["\']\s/>')
54-
linkregex = re.compile('<a\s*href=[\'|"](.*?)[\'"].*?>')
54+
linkregex = re.compile('<a.*\shref=[\'"](.*?)[\'"].*?>')
5555
crawled = []
5656

5757
# set crawling status and stick starting url into the queue
@@ -204,4 +204,4 @@ def queue_links(self, url, links, cid, curdepth):
204204
pass
205205
if __name__ == '__main__':
206206
# Run main loop
207-
threader().run()
207+
threader().run()

0 commit comments

Comments
 (0)