File tree Expand file tree Collapse file tree 1 file changed +2
-12
lines changed
web-scraping/download-images Expand file tree Collapse file tree 1 file changed +2
-12
lines changed Original file line number Diff line number Diff line change 55from urllib .parse import urljoin , urlparse
66
77
8- def is_absolute (url ):
9- """
10- Determines whether a `url` is absolute.
11- """
12- return bool (urlparse (url ).netloc )
13-
14-
158def is_valid (url ):
169 """
1710 Checks whether `url` is a valid URL.
@@ -28,14 +21,11 @@ def get_all_images(url):
2821 urls = []
2922 for img in tqdm (soup .find_all ("img" ), "Extracting images" ):
3023 img_url = img .attrs .get ("src" )
31-
3224 if not img_url :
3325 # if img does not contain src attribute, just skip
3426 continue
35-
36- if not is_absolute (img_url ):
37- # if img has relative URL, make it absolute by joining
38- img_url = urljoin (url , img_url )
27+ # make the URL absolute by joining domain with the URL that is just extracted
28+ img_url = urljoin (url , img_url )
3929 # remove URLs like '/hsts-pixel.gif?c=3.2.5'
4030 try :
4131 pos = img_url .index ("?" )
You can’t perform that action at this time.
0 commit comments