|
11 | 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | 12 | # See the License for the specific language governing permissions and |
13 | 13 | # limitations under the License. |
14 | | -# |
15 | | - |
16 | | -# flake8: noqa |
17 | | - |
18 | | -# [START documentai_quickstart] |
19 | | - |
20 | | -from google.api_core.client_options import ClientOptions |
21 | | -from google.cloud import documentai # type: ignore |
22 | 14 |
|
23 | | -# TODO(developer): Uncomment these variables before running the sample. |
24 | | -# project_id = "YOUR_PROJECT_ID" |
25 | | -# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu" |
26 | | -# file_path = "/path/to/local/pdf" |
27 | | -# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor" |
| 15 | +from google.cloud.documentai_v1.types.document import Document |
| 16 | +from google.cloud.documentai_v1.types.processor import Processor |
28 | 17 |
|
29 | 18 |
|
30 | 19 | def quickstart( |
31 | 20 | project_id: str, |
32 | 21 | location: str, |
33 | 22 | file_path: str, |
34 | | - processor_display_name: str = "My Processor", |
35 | | -): |
36 | | - # You must set the `api_endpoint`if you use a location other than "us". |
| 23 | + processor_display_name: str, |
| 24 | +) -> tuple[Processor, Document]: |
| 25 | + # [START documentai_quickstart] |
| 26 | + from google.api_core.client_options import ClientOptions |
| 27 | + from google.cloud import documentai_v1 # type: ignore |
| 28 | + |
| 29 | + # TODO(developer): Update and uncomment these variables before running the sample. |
| 30 | + # project_id = "MY_PROJECT_ID" |
| 31 | + |
| 32 | + # Processor location. For example: "us" or "eu". |
| 33 | + # location = "MY_PROCESSOR_LOCATION" |
| 34 | + |
| 35 | + # Path for file to process. |
| 36 | + # file_path = "/path/to/local/pdf" |
| 37 | + |
| 38 | + # Processor display name must be unique per project. |
| 39 | + # processor_display_name = "MY_PROCESSOR_DISPLAY_NAME" |
| 40 | + |
| 41 | + # Set `api_endpoint` if you use a location other than "us". |
37 | 42 | opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") |
38 | 43 |
|
39 | | - client = documentai.DocumentProcessorServiceClient(client_options=opts) |
| 44 | + # Initialize Document AI client. |
| 45 | + client = documentai_v1.DocumentProcessorServiceClient(client_options=opts) |
40 | 46 |
|
41 | | - # The full resource name of the location, e.g.: |
42 | | - # `projects/{project_id}/locations/{location}` |
| 47 | + # Get the full resource name of the location. |
| 48 | + # For example: `projects/{project_id}/locations/{location}` |
43 | 49 | parent = client.common_location_path(project_id, location) |
44 | 50 |
|
45 | | - # Create a Processor |
| 51 | + # Create a Processor. |
| 52 | + # For available types, refer to https://cloud.google.com/document-ai/docs/create-processor |
46 | 53 | processor = client.create_processor( |
47 | 54 | parent=parent, |
48 | | - processor=documentai.Processor( |
49 | | - type_="OCR_PROCESSOR", # Refer to https://cloud.google.com/document-ai/docs/create-processor for how to get available processor types |
| 55 | + processor=documentai_v1.Processor( |
| 56 | + type_="OCR_PROCESSOR", |
50 | 57 | display_name=processor_display_name, |
51 | 58 | ), |
52 | 59 | ) |
53 | 60 |
|
54 | | - # Print the processor information |
| 61 | + # Print the processor information. |
55 | 62 | print(f"Processor Name: {processor.name}") |
56 | 63 |
|
57 | | - # Read the file into memory |
| 64 | + # Read the file into memory. |
58 | 65 | with open(file_path, "rb") as image: |
59 | 66 | image_content = image.read() |
60 | 67 |
|
61 | | - # Load binary data |
62 | | - raw_document = documentai.RawDocument( |
| 68 | + # Load binary data. |
| 69 | + # For supported MIME types, refer to https://cloud.google.com/document-ai/docs/file-types |
| 70 | + raw_document = documentai_v1.RawDocument( |
63 | 71 | content=image_content, |
64 | | - mime_type="application/pdf", # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types |
| 72 | + mime_type="application/pdf", |
65 | 73 | ) |
66 | 74 |
|
67 | | - # Configure the process request |
68 | | - # `processor.name` is the full resource name of the processor, e.g.: |
69 | | - # `projects/{project_id}/locations/{location}/processors/{processor_id}` |
70 | | - request = documentai.ProcessRequest(name=processor.name, raw_document=raw_document) |
| 75 | + # Configure the process request. |
| 76 | + # `processor.name` is the full resource name of the processor, |
| 77 | + # For example: `projects/{project_id}/locations/{location}/processors/{processor_id}` |
| 78 | + request = documentai_v1.ProcessRequest(name=processor.name, raw_document=raw_document) |
71 | 79 |
|
72 | 80 | result = client.process_document(request=request) |
| 81 | + document = result.document |
73 | 82 |
|
| 83 | + # Read the text recognition output from the processor. |
74 | 84 | # For a full list of `Document` object attributes, reference this page: |
75 | 85 | # https://cloud.google.com/document-ai/docs/reference/rest/v1/Document |
76 | | - document = result.document |
77 | | - |
78 | | - # Read the text recognition output from the processor |
79 | 86 | print("The document contains the following text:") |
80 | 87 | print(document.text) |
81 | 88 | # [END documentai_quickstart] |
82 | | - return processor |
| 89 | + |
| 90 | + return processor, document |
0 commit comments