66import re
77import urllib2
88import csv
9+ import os
910
10- def get_isbn_from_douban (douban_url ):
11- isbn = ""
12- response = urllib2 .urlopen (douban_url )
13- html = response .read ()
14- isbn_url = r"ISBN:</span> \d{10,13}"
15- isbn = re .search (isbn_url , html ).group ()[13 :]
16- return isbn
11+ IMAGE_PATH = './images-zh-cn/{book_index}.jpg'
1712
18- def get_image (self , isbn , image_filename ):
13+ def get_image (isbn , image_filename ):
1914 """
2015 :type en_isbn: str
2116 :rtype: str
@@ -30,28 +25,34 @@ def get_image(self, isbn, image_filename):
3025 ft .write (image )
3126
3227def get_book_url (isbn ):
33- # TODO: 改为从豆瓣上获取真正的URL
34- return "http://www.douban.com"
3528 url = "https://api.douban.com/v2/book/isbn/" + isbn
36- response = urllib2 .urlopen (url )
37- detail = response .read ()
38- return json .loads (detail )["alt" ]
29+ result = "https://book.douban.com/"
30+ try :
31+ response = urllib2 .urlopen (url )
32+ detail = response .read ()
33+ return json .loads (detail )["alt" ]
34+ except Exception as e :
35+ print isbn
36+ print e
37+ return result
3938
4039def get_book_info (book_index ):
41- zh = {}
40+ title = "未找到中文"
41+ zh_isbn = ""
4242 with open ("isbn.csv" ) as ff :
4343 spamreader = csv .reader (ff , delimiter = ',' )
4444 for line in spamreader :
45- book , en_isbn , zh_title , zh_isbn = line
46- zh [ book ] = zh_title .strip ()
47- title = zh [ book_index ]
45+ if line [ 0 ] == book_index :
46+ title = line [ 2 ] .strip ()
47+ zh_isbn = line [ 3 ]
4848 if title == "未找到中文" :
4949 return None , None
5050 else :
51- return title , get_book_url (zh_isbn )
52-
53-
51+ image_path = IMAGE_PATH .format (book_index = book_index .strip ('"' ))
5452
53+ if not os .path .exists (image_path ):
54+ get_image (zh_isbn , image_path )
55+ return title , get_book_url (zh_isbn )
5556LABEL_DICT = {
5657 "" :"" ,
5758 "Recommended Path" : "推荐路线" ,
@@ -135,8 +136,6 @@ def get_book_info(book_index):
135136LABEL_LINE = '{label_index} [label="{label}"]\n '
136137RE_CONTENT_LINE = re .compile (r'[\w ]+\[color="#[\w]{6}", label=[<"]\d+\. [\w ()]+[">]\]' )
137138
138- IMAGE_PATH = './images-zh-cn/{book_index}.jpg'
139-
140139if __name__ == '__main__' :
141140 with open ("game-programmer.dot" ) as en_f , open ("game-programmer-zh-cn.dot" ,'w' ) as zh_f :
142141 for line in en_f :
0 commit comments