Skip to content

Commit c24f493

Browse files
author
pranjal dubey
committed
a script to sync reading page in offline and online reading
1 parent 48ff282 commit c24f493

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

bookmark.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Author : Pranjal Dubey
2+
# Created : 31 Dec 2015
3+
# Last Modified :
4+
# Version : 1.0
5+
# Modifications :
6+
# Description : Script to keep a sync between your soft copy and hard copy file
7+
# Known Bugs :
8+
9+
10+
import PyPDF2, re
11+
12+
pdfFileObject = open("pdfs/the_girl_who_played_with_fire.pdf", "rb")
13+
pdfReader = PyPDF2.PdfFileReader(pdfFileObject)
14+
15+
pdfFileData = ''
16+
17+
for i in range(pdfReader.numPages):
18+
pageObject = pdfReader.getPage(i)
19+
pdfFileData += pageObject.extractText()
20+
21+
pageStartingWords = input('Enter first few words: ')
22+
pageEndingWords = input('Enter last few words: ')
23+
24+
bookmarkPageWords = input('Enter few words of page to bookmark: ')
25+
26+
matchCase1 = r'' + pageStartingWords + '(.*)' + pageEndingWords
27+
28+
regex1 = re.compile(matchCase1, re.IGNORECASE | re.DOTALL)
29+
30+
charCountPerPage = re.search(regex1, pdfFileData).end() - re.search(regex1, pdfFileData).start()
31+
32+
print("Character count per page :", charCountPerPage)
33+
34+
matchCase2 = r'' + bookmarkPageWords
35+
36+
regex2 = re.compile(matchCase2, re.IGNORECASE | re.DOTALL)
37+
38+
print(re.search(regex2, pdfFileData).start())
39+
40+
pageNumber = re.search(regex2, pdfFileData).start() / charCountPerPage
41+
42+
print(pageNumber)

0 commit comments

Comments
 (0)