@@ -270,7 +270,7 @@ def detect_properties_uri(uri):
270270
271271
272272def detect_web (path ):
273- """detects web annotations given an image."""
273+ """Detects web annotations given an image."""
274274 vision_client = vision .Client ()
275275
276276 with io .open (path , 'rb' ) as image_file :
@@ -312,7 +312,7 @@ def detect_web(path):
312312
313313
314314def detect_web_uri (uri ):
315- """detects web annotations in the file located in google cloud storage ."""
315+ """Detects web annotations in the file located in Google Cloud Storage ."""
316316 vision_client = vision .Client ()
317317 image = vision_client .image (source_uri = uri )
318318
@@ -350,7 +350,7 @@ def detect_web_uri(uri):
350350
351351
352352def detect_crop_hints (path ):
353- """detects crop hints in an image."""
353+ """Detects crop hints in an image."""
354354 vision_client = vision .Client ()
355355 with io .open (path , 'rb' ) as image_file :
356356 content = image_file .read ()
@@ -368,7 +368,7 @@ def detect_crop_hints(path):
368368
369369
370370def detect_crop_hints_uri (uri ):
371- """detects crop hints in the file located in google cloud storage ."""
371+ """Detects crop hints in the file located in Google Cloud Storage ."""
372372 vision_client = vision .Client ()
373373 image = vision_client .image (source_uri = uri )
374374
@@ -382,54 +382,77 @@ def detect_crop_hints_uri(uri):
382382 print ('bounds: {}' .format (',' .join (vertices )))
383383
384384
385- def detect_fulltext (path ):
386- """extracts full text from an image."""
385+ def detect_document (path ):
386+ """Detects document features in an image."""
387387 vision_client = vision .Client ()
388388
389389 with io .open (path , 'rb' ) as image_file :
390390 content = image_file .read ()
391391
392392 image = vision_client .image (content = content )
393393
394- fulltext = image .detect_full_text ()
394+ document = image .detect_full_text ()
395+
396+ for b , page in enumerate (document .pages ):
397+ page_text = ''
395398
396- for b , page in enumerate (fulltext .pages ):
397- print (page .width )
398399 for bb , block in enumerate (page .blocks ):
399- print ('Block: {}' .format (block .bounding_box ))
400- print ('Type: {}' .format (dir (block )))
401- print ('Type: {}' .format (block .block_type ))
400+ block_text = ''
401+
402402 for p , paragraph in enumerate (block .paragraphs ):
403- print ( ' \t Paragraph: ({})' . format ( paragraph . bounding_box ))
404- print ( ' \t words: ({})' . format (( paragraph . words )))
403+ para_text = ''
404+
405405 for w , word in enumerate (paragraph .words ):
406+ word_text = ''
407+
406408 for s , symbol in enumerate (word .symbols ):
407- print ('\t \t \t $:{}' .format (symbol .text ))
409+ word_text = word_text + symbol .text
410+
411+ para_text = para_text + word_text
412+
413+ block_text = block_text + para_text
414+ print ('\n --\n Content Block: {}' .format (block_text ))
415+ print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
416+
417+ page_text = page_text + block_text
408418
409- print (fulltext .text )
419+ print ('Page Content:\n {}' .format (page_text ))
420+ print ('Page Dimensions: w: {} h: {}' .format (page .width , page .height ))
410421
411422
412- def detect_fulltext_uri (uri ):
413- """extracts full text in the file located in google cloud storage."""
423+ def detect_document_uri (uri ):
424+ """Detects document features in the file located in Google Cloud
425+ Storage."""
414426 vision_client = vision .Client ()
415427 image = vision_client .image (source_uri = uri )
416428
417- fulltext = image .detect_full_text ()
429+ document = image .detect_full_text ()
430+
431+ for b , page in enumerate (document .pages ):
432+ page_text = ''
418433
419- for b , page in enumerate (fulltext .pages ):
420- print (page .width )
421434 for bb , block in enumerate (page .blocks ):
422- print ('Block: {}' .format (block .bounding_box ))
423- print ('Type: {}' .format (dir (block )))
424- print ('Type: {}' .format (block .block_type ))
435+ block_text = ''
436+
425437 for p , paragraph in enumerate (block .paragraphs ):
426- print ( ' \t Paragraph: ({})' . format ( paragraph . bounding_box ))
427- print ( ' \t words: ({})' . format (( paragraph . words )))
438+ para_text = ''
439+
428440 for w , word in enumerate (paragraph .words ):
441+ word_text = ''
442+
429443 for s , symbol in enumerate (word .symbols ):
430- print ('\t \t \t $:{}' .format (symbol .text ))
444+ word_text = word_text + symbol .text
445+
446+ para_text = para_text + word_text
447+
448+ block_text = block_text + para_text
449+ print ('\n --\n Content Block: {}' .format (block_text ))
450+ print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
451+
452+ page_text = page_text + block_text
431453
432- print (fulltext .text )
454+ print ('Page Content:\n {}' .format (page_text ))
455+ print ('Page Dimensions: w: {} h: {}' .format (page .width , page .height ))
433456
434457
435458def run_local (args ):
@@ -451,8 +474,8 @@ def run_local(args):
451474 detect_web (args .path )
452475 elif args .command == 'crophints' :
453476 detect_crop_hints (args .path )
454- elif args .command == 'fulltext ' :
455- detect_fulltext (args .path )
477+ elif args .command == 'document ' :
478+ detect_document (args .path )
456479
457480
458481def run_uri (args ):
@@ -474,8 +497,8 @@ def run_uri(args):
474497 detect_web_uri (args .uri )
475498 elif args .command == 'crophints-uri' :
476499 detect_crop_hints_uri (args .uri )
477- elif args .command == 'fulltext -uri' :
478- detect_fulltext_uri (args .uri )
500+ elif args .command == 'document -uri' :
501+ detect_document_uri (args .uri )
479502
480503
481504if __name__ == '__main__' :
@@ -560,13 +583,13 @@ def run_uri(args):
560583 'crophints-uri' , help = detect_crop_hints_uri .__doc__ )
561584 crop_hints_uri_parser .add_argument ('uri' )
562585
563- fulltext_parser = subparsers .add_parser (
564- 'fulltext ' , help = detect_fulltext .__doc__ )
565- fulltext_parser .add_argument ('path' )
586+ document_parser = subparsers .add_parser (
587+ 'document ' , help = detect_document .__doc__ )
588+ document_parser .add_argument ('path' )
566589
567- fulltext_uri_parser = subparsers .add_parser (
568- 'fulltext -uri' , help = detect_fulltext_uri .__doc__ )
569- fulltext_uri_parser .add_argument ('uri' )
590+ document_uri_parser = subparsers .add_parser (
591+ 'document -uri' , help = detect_document_uri .__doc__ )
592+ document_uri_parser .add_argument ('uri' )
570593
571594 args = parser .parse_args ()
572595
0 commit comments