Sends an online processing request to a Document OCR processor and parses the response. Extracts and prints full text, page dimensions, detected languages, paragraphs, blocks, lines, and tokens.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
Java
For more information, see the Document AI Java API reference documentation .
To authenticate to Document AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.cloud.documentai.v1beta3.Document
;
import
com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClient
;
import
com.google.cloud.documentai.v1beta3.DocumentProcessorServiceSettings
;
import
com.google.cloud.documentai.v1beta3.ProcessRequest
;
import
com.google.cloud.documentai.v1beta3.ProcessResponse
;
import
com.google.cloud.documentai.v1beta3.RawDocument
;
import
com.google.protobuf. ByteString
;
import
java.io.IOException
;
import
java.nio.file.Files
;
import
java.nio.file.Paths
;
import
java.util.List
;
import
java.util.concurrent.ExecutionException
;
import
java.util.concurrent.TimeoutException
;
public
class
ProcessOcrDocument
{
public
static
void
processOcrDocument
()
throws
IOException
,
InterruptedException
,
ExecutionException
,
TimeoutException
{
// TODO(developer): Replace these variables before running the sample.
String
projectId
=
"your-project-id"
;
String
location
=
"your-project-location"
;
// Format is "us" or "eu".
String
processerId
=
"your-processor-id"
;
String
filePath
=
"path/to/input/file.pdf"
;
processOcrDocument
(
projectId
,
location
,
processerId
,
filePath
);
}
public
static
void
processOcrDocument
(
String
projectId
,
String
location
,
String
processorId
,
String
filePath
)
throws
IOException
,
InterruptedException
,
ExecutionException
,
TimeoutException
{
// Initialize client that will be used to send requests. This client only needs
// to be created
// once, and can be reused for multiple requests. After completing all of your
// requests, call
// the "close" method on the client to safely clean up any remaining background
// resources.
String
endpoint
=
String
.
format
(
"%s-documentai.googleapis.com:443"
,
location
);
DocumentProcessorServiceSettings
settings
=
DocumentProcessorServiceSettings
.
newBuilder
().
setEndpoint
(
endpoint
).
build
();
try
(
DocumentProcessorServiceClient
client
=
DocumentProcessorServiceClient
.
create
(
settings
))
{
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
String
name
=
String
.
format
(
"projects/%s/locations/%s/processors/%s"
,
projectId
,
location
,
processorId
);
// Read the file.
byte
[]
imageFileData
=
Files
.
readAllBytes
(
Paths
.
get
(
filePath
));
// Convert the image data to a Buffer and base64 encode it.
ByteString
content
=
ByteString
.
copyFrom
(
imageFileData
);
RawDocument
document
=
RawDocument
.
newBuilder
().
setContent
(
content
).
setMimeType
(
"application/pdf"
).
build
();
// Configure the process request.
ProcessRequest
request
=
ProcessRequest
.
newBuilder
().
setName
(
name
).
setRawDocument
(
document
).
build
();
// Recognizes text entities in the PDF document
ProcessResponse
result
=
client
.
processDocument
(
request
);
Document
documentResponse
=
result
.
getDocument
();
System
.
out
.
println
(
"Document processing complete."
);
// Read the text recognition output from the processor
// For a full list of Document object attributes,
// please reference this page:
// https://googleapis.dev/java/google-cloud-document-ai/latest/index.html
// Get all of the document text as one big string
String
text
=
documentResponse
.
getText
();
System
.
out
.
printf
(
"Full document text: '%s'\n"
,
escapeNewlines
(
text
));
// Read the text recognition output from the processor
List<Document
.
Page
>
pages
=
documentResponse
.
getPagesList
();
System
.
out
.
printf
(
"There are %s page(s) in this document.\n"
,
pages
.
size
());
for
(
Document
.
Page
page
:
pages
)
{
System
.
out
.
printf
(
"Page %d:\n"
,
page
.
getPageNumber
());
printPageDimensions
(
page
.
getDimension
());
printDetectedLanguages
(
page
.
getDetectedLanguagesList
());
printParagraphs
(
page
.
getParagraphsList
(),
text
);
printBlocks
(
page
.
getBlocksList
(),
text
);
printLines
(
page
.
getLinesList
(),
text
);
printTokens
(
page
.
getTokensList
(),
text
);
}
}
}
private
static
void
printPageDimensions
(
Document
.
Page
.
Dimension
dimension
)
{
String
unit
=
dimension
.
getUnit
();
System
.
out
.
printf
(
" Width: %.1f %s\n"
,
dimension
.
getWidth
(),
unit
);
System
.
out
.
printf
(
" Height: %.1f %s\n"
,
dimension
.
getHeight
(),
unit
);
}
private
static
void
printDetectedLanguages
(
List<Document
.
Page
.
DetectedLanguage
>
detectedLangauges
)
{
System
.
out
.
println
(
" Detected languages:"
);
for
(
Document
.
Page
.
DetectedLanguage
detectedLanguage
:
detectedLangauges
)
{
String
languageCode
=
detectedLanguage
.
getLanguageCode
();
float
confidence
=
detectedLanguage
.
getConfidence
();
System
.
out
.
printf
(
" %s (%.2f%%)\n"
,
languageCode
,
confidence
*
100.0
);
}
}
private
static
void
printParagraphs
(
List<Document
.
Page
.
Paragraph
>
paragraphs
,
String
text
)
{
System
.
out
.
printf
(
" %d paragraphs detected:\n"
,
paragraphs
.
size
());
Document
.
Page
.
Paragraph
firstParagraph
=
paragraphs
.
get
(
0
);
String
firstParagraphText
=
getLayoutText
(
firstParagraph
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" First paragraph text: %s\n"
,
escapeNewlines
(
firstParagraphText
));
Document
.
Page
.
Paragraph
lastParagraph
=
paragraphs
.
get
(
paragraphs
.
size
()
-
1
);
String
lastParagraphText
=
getLayoutText
(
lastParagraph
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" Last paragraph text: %s\n"
,
escapeNewlines
(
lastParagraphText
));
}
private
static
void
printBlocks
(
List<Document
.
Page
.
Block
>
blocks
,
String
text
)
{
System
.
out
.
printf
(
" %d blocks detected:\n"
,
blocks
.
size
());
Document
.
Page
.
Block
firstBlock
=
blocks
.
get
(
0
);
String
firstBlockText
=
getLayoutText
(
firstBlock
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" First block text: %s\n"
,
escapeNewlines
(
firstBlockText
));
Document
.
Page
.
Block
lastBlock
=
blocks
.
get
(
blocks
.
size
()
-
1
);
String
lastBlockText
=
getLayoutText
(
lastBlock
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" Last block text: %s\n"
,
escapeNewlines
(
lastBlockText
));
}
private
static
void
printLines
(
List<Document
.
Page
.
Line
>
lines
,
String
text
)
{
System
.
out
.
printf
(
" %d lines detected:\n"
,
lines
.
size
());
Document
.
Page
.
Line
firstLine
=
lines
.
get
(
0
);
String
firstLineText
=
getLayoutText
(
firstLine
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" First line text: %s\n"
,
escapeNewlines
(
firstLineText
));
Document
.
Page
.
Line
lastLine
=
lines
.
get
(
lines
.
size
()
-
1
);
String
lastLineText
=
getLayoutText
(
lastLine
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" Last line text: %s\n"
,
escapeNewlines
(
lastLineText
));
}
private
static
void
printTokens
(
List<Document
.
Page
.
Token
>
tokens
,
String
text
)
{
System
.
out
.
printf
(
" %d tokens detected:\n"
,
tokens
.
size
());
Document
.
Page
.
Token
firstToken
=
tokens
.
get
(
0
);
String
firstTokenText
=
getLayoutText
(
firstToken
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" First token text: %s\n"
,
escapeNewlines
(
firstTokenText
));
Document
.
Page
.
Token
lastToken
=
tokens
.
get
(
tokens
.
size
()
-
1
);
String
lastTokenText
=
getLayoutText
(
lastToken
.
getLayout
().
getTextAnchor
(),
text
);
System
.
out
.
printf
(
" Last token text: %s\n"
,
escapeNewlines
(
lastTokenText
));
}
// Extract shards from the text field
private
static
String
getLayoutText
(
Document
.
TextAnchor
textAnchor
,
String
text
)
{
if
(
textAnchor
.
getTextSegmentsList
().
size
()
>
0
)
{
int
startIdx
=
(
int
)
textAnchor
.
getTextSegments
(
0
).
getStartIndex
();
int
endIdx
=
(
int
)
textAnchor
.
getTextSegments
(
0
).
getEndIndex
();
return
text
.
substring
(
startIdx
,
endIdx
);
}
return
"[NO TEXT]"
;
}
private
static
String
escapeNewlines
(
String
s
)
{
return
s
.
replace
(
"\n"
,
"\\n"
).
replace
(
"\r"
,
"\\r"
);
}
}
Node.js
For more information, see the Document AI Node.js API reference documentation .
To authenticate to Document AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
// const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console
// const filePath = '/path/to/local/pdf';
const
{
DocumentProcessorServiceClient
}
=
require
(
' @google-cloud/documentai
'
).
v1beta3
;
// Instantiates a client
const
client
=
new
DocumentProcessorServiceClient
();
async
function
processDocument
()
{
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
const
name
=
`projects/
${
projectId
}
/locations/
${
location
}
/processors/
${
processorId
}
`
;
// Read the file into memory.
const
fs
=
require
(
'fs'
).
promises
;
const
imageFile
=
await
fs
.
readFile
(
filePath
);
// Convert the image data to a Buffer and base64 encode it.
const
encodedImage
=
Buffer
.
from
(
imageFile
).
toString
(
'base64'
);
const
request
=
{
name
,
rawDocument
:
{
content
:
encodedImage
,
mimeType
:
'application/pdf'
,
},
};
// Recognizes text entities in the PDF document
const
[
result
]
=
await
client
.
processDocument
(
request
);
console
.
log
(
'Document processing complete.'
);
// Read the text recognition output from the processor
// For a full list of Document object attributes,
// please reference this page: https://googleapis.dev/nodejs/documentai/latest/index.html
const
{
document
}
=
result
;
const
{
text
}
=
document
;
// Read the text recognition output from the processor
console
.
log
(
`Full document text:
${
JSON
.
stringify
(
text
)
}
`
);
console
.
log
(
`There are
${
document
.
pages
.
length
}
page(s) in this document.`
);
for
(
const
page
of
document
.
pages
)
{
console
.
log
(
`Page
${
page
.
pageNumber
}
`
);
printPageDimensions
(
page
.
dimension
);
printDetectedLanguages
(
page
.
detectedLanguages
);
printParagraphs
(
page
.
paragraphs
,
text
);
printBlocks
(
page
.
blocks
,
text
);
printLines
(
page
.
lines
,
text
);
printTokens
(
page
.
tokens
,
text
);
}
}
const
printPageDimensions
=
dimension
=
>
{
console
.
log
(
` Width:
${
dimension
.
width
}
`
);
console
.
log
(
` Height:
${
dimension
.
height
}
`
);
};
const
printDetectedLanguages
=
detectedLanguages
=
>
{
console
.
log
(
' Detected languages:'
);
for
(
const
lang
of
detectedLanguages
)
{
const
code
=
lang
.
languageCode
;
const
confPercent
=
lang
.
confidence
*
100
;
console
.
log
(
`
${
code
}
(
${
confPercent
.
toFixed
(
2
)
}
% confidence)`
);
}
};
const
printParagraphs
=
(
paragraphs
,
text
)
=
>
{
console
.
log
(
`
${
paragraphs
.
length
}
paragraphs detected:`
);
const
firstParagraphText
=
getText
(
paragraphs
[
0
].
layout
.
textAnchor
,
text
);
console
.
log
(
` First paragraph text:
${
JSON
.
stringify
(
firstParagraphText
)
}
`
);
const
lastParagraphText
=
getText
(
paragraphs
[
paragraphs
.
length
-
1
].
layout
.
textAnchor
,
text
);
console
.
log
(
` Last paragraph text:
${
JSON
.
stringify
(
lastParagraphText
)
}
`
);
};
const
printBlocks
=
(
blocks
,
text
)
=
>
{
console
.
log
(
`
${
blocks
.
length
}
blocks detected:`
);
const
firstBlockText
=
getText
(
blocks
[
0
].
layout
.
textAnchor
,
text
);
console
.
log
(
` First block text:
${
JSON
.
stringify
(
firstBlockText
)
}
`
);
const
lastBlockText
=
getText
(
blocks
[
blocks
.
length
-
1
].
layout
.
textAnchor
,
text
);
console
.
log
(
` Last block text:
${
JSON
.
stringify
(
lastBlockText
)
}
`
);
};
const
printLines
=
(
lines
,
text
)
=
>
{
console
.
log
(
`
${
lines
.
length
}
lines detected:`
);
const
firstLineText
=
getText
(
lines
[
0
].
layout
.
textAnchor
,
text
);
console
.
log
(
` First line text:
${
JSON
.
stringify
(
firstLineText
)
}
`
);
const
lastLineText
=
getText
(
lines
[
lines
.
length
-
1
].
layout
.
textAnchor
,
text
);
console
.
log
(
` Last line text:
${
JSON
.
stringify
(
lastLineText
)
}
`
);
};
const
printTokens
=
(
tokens
,
text
)
=
>
{
console
.
log
(
`
${
tokens
.
length
}
tokens detected:`
);
const
firstTokenText
=
getText
(
tokens
[
0
].
layout
.
textAnchor
,
text
);
console
.
log
(
` First token text:
${
JSON
.
stringify
(
firstTokenText
)
}
`
);
const
firstTokenBreakType
=
tokens
[
0
].
detectedBreak
.
type
;
console
.
log
(
` First token break type:
${
firstTokenBreakType
}
`
);
const
lastTokenText
=
getText
(
tokens
[
tokens
.
length
-
1
].
layout
.
textAnchor
,
text
);
console
.
log
(
` Last token text:
${
JSON
.
stringify
(
lastTokenText
)
}
`
);
const
lastTokenBreakType
=
tokens
[
tokens
.
length
-
1
].
detectedBreak
.
type
;
console
.
log
(
` Last token break type:
${
lastTokenBreakType
}
`
);
};
// Extract shards from the text field
const
getText
=
(
textAnchor
,
text
)
=
>
{
if
(
!
textAnchor
.
textSegments
||
textAnchor
.
textSegments
.
length
===
0
)
{
return
''
;
}
// First shard in document doesn't have startIndex property
const
startIndex
=
textAnchor
.
textSegments
[
0
].
startIndex
||
0
;
const
endIndex
=
textAnchor
.
textSegments
[
0
].
endIndex
;
return
text
.
substring
(
startIndex
,
endIndex
);
};
Python
For more information, see the Document AI Python API reference documentation .
To authenticate to Document AI, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
from
typing
import
Optional
,
Sequence
from
google.api_core.client_options
import
ClientOptions
from
google.cloud
import
documentai
# TODO(developer): Uncomment these variables before running the sample.
# project_id = "YOUR_PROJECT_ID"
# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu"
# processor_id = "YOUR_PROCESSOR_ID" # Create processor before running sample
# processor_version = "rc" # Refer to https://cloud.google.com/document-ai/docs/manage-processor-versions for more information
# file_path = "/path/to/local/pdf"
# mime_type = "application/pdf" # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
def
process_document_ocr_sample
(
project_id
:
str
,
location
:
str
,
processor_id
:
str
,
processor_version
:
str
,
file_path
:
str
,
mime_type
:
str
,
)
-
> None
:
# Optional: Additional configurations for Document OCR Processor.
# For more information: https://cloud.google.com/document-ai/docs/enterprise-document-ocr
process_options
=
documentai
.
ProcessOptions
(
ocr_config
=
documentai
.
OcrConfig
(
enable_native_pdf_parsing
=
True
,
enable_image_quality_scores
=
True
,
enable_symbol
=
True
,
# OCR Add Ons https://cloud.google.com/document-ai/docs/ocr-add-ons
premium_features
=
documentai
.
OcrConfig
.
PremiumFeatures
(
compute_style_info
=
True
,
enable_math_ocr
=
False
,
# Enable to use Math OCR Model
enable_selection_mark_detection
=
True
,
),
)
)
# Online processing request to Document AI
document
=
process_document
(
project_id
,
location
,
processor_id
,
processor_version
,
file_path
,
mime_type
,
process_options
=
process_options
,
)
text
=
document
.
text
print
(
f
"Full document text:
{
text
}
\n
"
)
print
(
f
"There are
{
len
(
document
.
pages
)
}
page(s) in this document.
\n
"
)
for
page
in
document
.
pages
:
print
(
f
"Page
{
page
.
page_number
}
:"
)
print_page_dimensions
(
page
.
dimension
)
print_detected_languages
(
page
.
detected_languages
)
print_blocks
(
page
.
blocks
,
text
)
print_paragraphs
(
page
.
paragraphs
,
text
)
print_lines
(
page
.
lines
,
text
)
print_tokens
(
page
.
tokens
,
text
)
if
page
.
symbols
:
print_symbols
(
page
.
symbols
,
text
)
if
page
.
image_quality_scores
:
print_image_quality_scores
(
page
.
image_quality_scores
)
if
page
.
visual_elements
:
print_visual_elements
(
page
.
visual_elements
,
text
)
def
print_page_dimensions
(
dimension
:
documentai
.
Document
.
Page
.
Dimension
)
-
> None
:
print
(
f
" Width:
{
str
(
dimension
.
width
)
}
"
)
print
(
f
" Height:
{
str
(
dimension
.
height
)
}
"
)
def
print_detected_languages
(
detected_languages
:
Sequence
[
documentai
.
Document
.
Page
.
DetectedLanguage
],
)
-
> None
:
print
(
" Detected languages:"
)
for
lang
in
detected_languages
:
print
(
f
"
{
lang
.
language_code
}
(
{
lang
.
confidence
:
.1%
}
confidence)"
)
def
print_blocks
(
blocks
:
Sequence
[
documentai
.
Document
.
Page
.
Block
],
text
:
str
)
-
> None
:
print
(
f
"
{
len
(
blocks
)
}
blocks detected:"
)
first_block_text
=
layout_to_text
(
blocks
[
0
]
.
layout
,
text
)
print
(
f
" First text block:
{
repr
(
first_block_text
)
}
"
)
last_block_text
=
layout_to_text
(
blocks
[
-
1
]
.
layout
,
text
)
print
(
f
" Last text block:
{
repr
(
last_block_text
)
}
"
)
def
print_paragraphs
(
paragraphs
:
Sequence
[
documentai
.
Document
.
Page
.
Paragraph
],
text
:
str
)
-
> None
:
print
(
f
"
{
len
(
paragraphs
)
}
paragraphs detected:"
)
first_paragraph_text
=
layout_to_text
(
paragraphs
[
0
]
.
layout
,
text
)
print
(
f
" First paragraph text:
{
repr
(
first_paragraph_text
)
}
"
)
last_paragraph_text
=
layout_to_text
(
paragraphs
[
-
1
]
.
layout
,
text
)
print
(
f
" Last paragraph text:
{
repr
(
last_paragraph_text
)
}
"
)
def
print_lines
(
lines
:
Sequence
[
documentai
.
Document
.
Page
.
Line
],
text
:
str
)
-
> None
:
print
(
f
"
{
len
(
lines
)
}
lines detected:"
)
first_line_text
=
layout_to_text
(
lines
[
0
]
.
layout
,
text
)
print
(
f
" First line text:
{
repr
(
first_line_text
)
}
"
)
last_line_text
=
layout_to_text
(
lines
[
-
1
]
.
layout
,
text
)
print
(
f
" Last line text:
{
repr
(
last_line_text
)
}
"
)
def
print_tokens
(
tokens
:
Sequence
[
documentai
.
Document
.
Page
.
Token
],
text
:
str
)
-
> None
:
print
(
f
"
{
len
(
tokens
)
}
tokens detected:"
)
first_token_text
=
layout_to_text
(
tokens
[
0
]
.
layout
,
text
)
first_token_break_type
=
tokens
[
0
]
.
detected_break
.
type_
.
name
print
(
f
" First token text:
{
repr
(
first_token_text
)
}
"
)
print
(
f
" First token break type:
{
repr
(
first_token_break_type
)
}
"
)
if
tokens
[
0
]
.
style_info
:
print_style_info
(
tokens
[
0
]
.
style_info
)
last_token_text
=
layout_to_text
(
tokens
[
-
1
]
.
layout
,
text
)
last_token_break_type
=
tokens
[
-
1
]
.
detected_break
.
type_
.
name
print
(
f
" Last token text:
{
repr
(
last_token_text
)
}
"
)
print
(
f
" Last token break type:
{
repr
(
last_token_break_type
)
}
"
)
if
tokens
[
-
1
]
.
style_info
:
print_style_info
(
tokens
[
-
1
]
.
style_info
)
def
print_symbols
(
symbols
:
Sequence
[
documentai
.
Document
.
Page
.
Symbol
],
text
:
str
)
-
> None
:
print
(
f
"
{
len
(
symbols
)
}
symbols detected:"
)
first_symbol_text
=
layout_to_text
(
symbols
[
0
]
.
layout
,
text
)
print
(
f
" First symbol text:
{
repr
(
first_symbol_text
)
}
"
)
last_symbol_text
=
layout_to_text
(
symbols
[
-
1
]
.
layout
,
text
)
print
(
f
" Last symbol text:
{
repr
(
last_symbol_text
)
}
"
)
def
print_image_quality_scores
(
image_quality_scores
:
documentai
.
Document
.
Page
.
ImageQualityScores
,
)
-
> None
:
print
(
f
" Quality score:
{
image_quality_scores
.
quality_score
:
.1%
}
"
)
print
(
" Detected defects:"
)
for
detected_defect
in
image_quality_scores
.
detected_defects
:
print
(
f
"
{
detected_defect
.
type_
}
:
{
detected_defect
.
confidence
:
.1%
}
"
)
def
print_style_info
(
style_info
:
documentai
.
Document
.
Page
.
Token
.
StyleInfo
)
-
> None
:
"""
Only supported in version `pretrained-ocr-v2.0-2023-06-02`
"""
print
(
f
" Font Size:
{
style_info
.
font_size
}
pt"
)
print
(
f
" Font Type:
{
style_info
.
font_type
}
"
)
print
(
f
" Bold:
{
style_info
.
bold
}
"
)
print
(
f
" Italic:
{
style_info
.
italic
}
"
)
print
(
f
" Underlined:
{
style_info
.
underlined
}
"
)
print
(
f
" Handwritten:
{
style_info
.
handwritten
}
"
)
print
(
f
" Text Color (RGBa):
{
style_info
.
text_color
.
red
}
,
{
style_info
.
text_color
.
green
}
,
{
style_info
.
text_color
.
blue
}
,
{
style_info
.
text_color
.
alpha
}
"
)
def
print_visual_elements
(
visual_elements
:
Sequence
[
documentai
.
Document
.
Page
.
VisualElement
],
text
:
str
)
-
> None
:
"""
Only supported in version `pretrained-ocr-v2.0-2023-06-02`
"""
checkboxes
=
[
x
for
x
in
visual_elements
if
"checkbox"
in
x
.
type
]
math_symbols
=
[
x
for
x
in
visual_elements
if
x
.
type
==
"math_formula"
]
if
checkboxes
:
print
(
f
"
{
len
(
checkboxes
)
}
checkboxes detected:"
)
print
(
f
" First checkbox:
{
repr
(
checkboxes
[
0
]
.
type
)
}
"
)
print
(
f
" Last checkbox:
{
repr
(
checkboxes
[
-
1
]
.
type
)
}
"
)
if
math_symbols
:
print
(
f
"
{
len
(
math_symbols
)
}
math symbols detected:"
)
first_math_symbol_text
=
layout_to_text
(
math_symbols
[
0
]
.
layout
,
text
)
print
(
f
" First math symbol:
{
repr
(
first_math_symbol_text
)
}
"
)
def
process_document
(
project_id
:
str
,
location
:
str
,
processor_id
:
str
,
processor_version
:
str
,
file_path
:
str
,
mime_type
:
str
,
process_options
:
Optional
[
documentai
.
ProcessOptions
]
=
None
,
)
-
> documentai
.
Document
:
# You must set the `api_endpoint` if you use a location other than "us".
client
=
documentai
.
DocumentProcessorServiceClient
(
client_options
=
ClientOptions
(
api_endpoint
=
f
"
{
location
}
-documentai.googleapis.com"
)
)
# The full resource name of the processor version, e.g.:
# `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
# You must create a processor before running this sample.
name
=
client
.
processor_version_path
(
project_id
,
location
,
processor_id
,
processor_version
)
# Read the file into memory
with
open
(
file_path
,
"rb"
)
as
image
:
image_content
=
image
.
read
()
# Configure the process request
request
=
documentai
.
ProcessRequest
(
name
=
name
,
raw_document
=
documentai
.
RawDocument
(
content
=
image_content
,
mime_type
=
mime_type
),
# Only supported for Document OCR processor
process_options
=
process_options
,
)
result
=
client
.
process_document
(
request
=
request
)
# For a full list of `Document` object attributes, reference this page:
# https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
return
result
.
document
def
layout_to_text
(
layout
:
documentai
.
Document
.
Page
.
Layout
,
text
:
str
)
-
> str
:
"""
Document AI identifies text in different parts of the document by their
offsets in the entirety of the document"s text. This function converts
offsets to a string.
"""
# If a text segment spans several lines, it will
# be stored in different text segments.
return
""
.
join
(
text
[
int
(
segment
.
start_index
)
:
int
(
segment
.
end_index
)]
for
segment
in
layout
.
text_anchor
.
text_segments
)
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .