Skip to content

Commit d4ba837

Browse files
Initial commit
0 parents  commit d4ba837

12 files changed

+242
-0
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Python history blog articles
2+
===========================
3+
4+
This script take articles from the _blog:http://python-history.blogspot.com/
5+
and generate pdf file
6+
7+

fonts/Courier_New.ttf

296 KB
Binary file not shown.

fonts/Courier_New_Bold.ttf

304 KB
Binary file not shown.

fonts/Courier_New_Bold_Italic.ttf

229 KB
Binary file not shown.

fonts/Courier_New_Italic.ttf

238 KB
Binary file not shown.

fonts/Verdana.ttf

136 KB
Binary file not shown.

fonts/Verdana_Bold.ttf

133 KB
Binary file not shown.

fonts/Verdana_Bold_Italic.ttf

150 KB
Binary file not shown.

fonts/Verdana_Italic.ttf

151 KB
Binary file not shown.

pdf_gen.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import xhtml2pdf.pisa as pisa
4+
import StringIO
5+
6+
header = u'''
7+
8+
<html>
9+
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
10+
<head>
11+
12+
<style>
13+
14+
@page {
15+
16+
margin: 40px;
17+
margin-left: 60px;
18+
margin-bottom: 70px;
19+
@frame footer {
20+
-pdf-frame-content: footerContent;
21+
bottom: 10px;
22+
margin-left: 60px;
23+
margin-right: 40px;
24+
height: 40px;
25+
}
26+
27+
}
28+
29+
@font-face {
30+
font-family: courier;
31+
src: url(http://github.com/fonts/Courier_New.ttf);
32+
}
33+
34+
@font-face {
35+
font-family: courier;
36+
src: url(http://github.com/fonts/Courier_Bold.ttf);
37+
font-weight: bold;
38+
}
39+
40+
@font-face {
41+
font-family: courier;
42+
src: url(http://github.com/fonts/Courier_Bold_Italic.ttf);
43+
font-weight: bold;
44+
font-style: italic;
45+
}
46+
47+
@font-face {
48+
font-family: courier;
49+
src: url(http://github.com/fonts/Courier_Italic.ttf);
50+
font-style: italic;
51+
}
52+
53+
@font-face {
54+
font-family: verdana;
55+
src: url(http://github.com/fonts/Verdana.ttf);
56+
}
57+
58+
@font-face {
59+
font-family: verdana;
60+
src: url(http://github.com/fonts/Verdana_Bold.ttf);
61+
font-weight: bold;
62+
}
63+
64+
@font-face {
65+
font-family: verdana;
66+
src: url(http://github.com/fonts/Verdana_Italic.ttf);
67+
font-style: italic;
68+
}
69+
70+
@font-face {
71+
font-family: verdana;
72+
src: url(http://github.com/fonts/Verdana_Bold_Italic.ttf);
73+
font-style: italic;
74+
font-weight: bold;
75+
}
76+
77+
78+
img {
79+
font-family: sans;
80+
}
81+
82+
body, div {
83+
font-family: verdana;
84+
font-size: 14px;
85+
color:#000;
86+
background:#fff;
87+
}
88+
89+
90+
a[href] { color: #6da3bd; }
91+
a[name] { color: #000000; font-size: 150%; text-decoration:none}
92+
93+
fieldset {border:0 solid transparent;}
94+
input, select, textarea {
95+
font-size: 100%;
96+
font-family: verdana;
97+
}
98+
99+
blockquote {
100+
border-left:2px solid #bbb;
101+
margin: .83em 10;
102+
padding-left:15px;
103+
clear: both;
104+
}
105+
106+
ul,ol,li,h1,h2,h3,h4,h5,h6,pre,form,body,html,blockquote,fieldset,dl,dt,dd,caption {margin:0; padding:0;}
107+
ul,ol {list-style: none;}
108+
pre,code {font-size: 1em;}
109+
110+
h1, h2, h3, h4, h5, h6 {
111+
color:#999999;
112+
font-family: verdana;
113+
font-weight:normal;
114+
margin:0 0 0 0;
115+
padding:0;
116+
}
117+
118+
h1 {
119+
font-size:162.5%;
120+
letter-spacing:-1px;
121+
margin-bottom:0.7em;
122+
}
123+
124+
h2 {
125+
font-size:150%;
126+
}
127+
128+
h3 {
129+
font-size: 137.5%;
130+
}
131+
132+
h4 {
133+
font-size: 120%;
134+
}
135+
136+
h5 {
137+
font-size: 110%;
138+
}
139+
140+
h6 {
141+
font-size: 100%;
142+
}
143+
144+
pre {
145+
font-size: 80%;
146+
}
147+
</style>
148+
</head>
149+
<body>
150+
151+
152+
<div align="center"><img src="http://python.org/images/python-logo.gif"></div>'''
153+
154+
footer = u'''
155+
156+
<div id="footerContent" align="right">
157+
<hr>
158+
Страница #<pdf:pagenumber>
159+
</div>
160+
</body>
161+
</html>
162+
'''
163+
164+
165+
def go(content, filename):
166+
print '\n Prepare PDF...\n'
167+
cont = header + content + footer
168+
pisa.showLogging()
169+
pisa.CreatePDF(cont, file(filename, 'wb'), encoding='UTF-8')

pyhistorypdf.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import urllib2
4+
import sys
5+
from lxml import etree
6+
from pdf_gen import *
7+
from datetime import date
8+
9+
content = ''
10+
headpage = 'http://python-history.blogspot.com/'
11+
12+
try:
13+
headhtml = urllib2.urlopen(headpage).read()
14+
except:
15+
print "Could not load head page:" + headpage
16+
sys.exit(1)
17+
18+
19+
content += '<br /><h2 align="center"><a href="%s">' % (headpage,)
20+
content += 'The History of Python blog articles</a></h2><br />'
21+
content += '<div><pdf:toc /></div>'
22+
content += '<div><pdf:nextpage /></div>'
23+
24+
headet = etree.HTML(headhtml)
25+
postlinks = headet.xpath("//ul[@class='posts']/li/a/@href")
26+
postlinks.reverse()
27+
28+
for postlink in postlinks:
29+
try:
30+
posthtml = urllib2.urlopen(postlink).read()
31+
except:
32+
print "Can`t get page %s" % (postlink,)
33+
continue
34+
postet = etree.HTML(posthtml)
35+
36+
titleraw = postet.xpath("//h3[@class='post-title entry-title']/text()")
37+
bodyraw = postet.xpath("//div[@class='post-body entry-content']")
38+
authorraw = postet.xpath("//a[@rel='author']/text()")
39+
dateraw = postet.xpath("//h2[@class='date-header']/*/text()")
40+
try:
41+
title = titleraw[0]
42+
except IndexError:
43+
print "No title for %s" % postlink
44+
try:
45+
body = etree.tostring(bodyraw[0])
46+
except:
47+
print "No body for %s" % postlink
48+
continue
49+
try:
50+
author = authorraw[0]
51+
except IndexError:
52+
print "No author for %s" % postlink
53+
try:
54+
date = dateraw[0]
55+
except IndexError:
56+
print "No date for %s" % postlink
57+
58+
topic = ''
59+
60+
topic += '<h3><a href="%s">%s</a></h3> by %s<br /> %s <br />' % (postlink, title, author, date)
61+
topic += body
62+
topic += '<div><pdf:nextpage /></div>'
63+
content += '<div>' + topic + '</div>'
64+
65+
66+
go(content, 'pyton-history.pdf')

pyton-history.pdf

340 KB
Binary file not shown.

0 commit comments

Comments
 (0)