File tree 1 file changed +42
-0
lines changed
1 file changed +42
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Author : Pranjal Dubey
2
+ # Created : 31 Dec 2015
3
+ # Last Modified :
4
+ # Version : 1.0
5
+ # Modifications :
6
+ # Description : Script to keep a sync between your soft copy and hard copy file
7
+ # Known Bugs :
8
+
9
+
10
+ import PyPDF2 , re
11
+
12
+ pdfFileObject = open ("pdfs/the_girl_who_played_with_fire.pdf" , "rb" )
13
+ pdfReader = PyPDF2 .PdfFileReader (pdfFileObject )
14
+
15
+ pdfFileData = ''
16
+
17
+ for i in range (pdfReader .numPages ):
18
+ pageObject = pdfReader .getPage (i )
19
+ pdfFileData += pageObject .extractText ()
20
+
21
+ pageStartingWords = input ('Enter first few words: ' )
22
+ pageEndingWords = input ('Enter last few words: ' )
23
+
24
+ bookmarkPageWords = input ('Enter few words of page to bookmark: ' )
25
+
26
+ matchCase1 = r'' + pageStartingWords + '(.*)' + pageEndingWords
27
+
28
+ regex1 = re .compile (matchCase1 , re .IGNORECASE | re .DOTALL )
29
+
30
+ charCountPerPage = re .search (regex1 , pdfFileData ).end () - re .search (regex1 , pdfFileData ).start ()
31
+
32
+ print ("Character count per page :" , charCountPerPage )
33
+
34
+ matchCase2 = r'' + bookmarkPageWords
35
+
36
+ regex2 = re .compile (matchCase2 , re .IGNORECASE | re .DOTALL )
37
+
38
+ print (re .search (regex2 , pdfFileData ).start ())
39
+
40
+ pageNumber = re .search (regex2 , pdfFileData ).start () / charCountPerPage
41
+
42
+ print (pageNumber )
You can’t perform that action at this time.
0 commit comments