Demonstrates finding sensitive data in a local text or image file.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
C#
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
using
System
;
using
System.Collections.Generic
;
using
System.IO
;
using
System.Linq
;
using
Google.Api.Gax.ResourceNames
;
using
Google.Cloud.Dlp.V2
;
using
Google.Protobuf
;
using
static
Google
.
Cloud
.
Dlp
.
V2
.
ByteContentItem
.
Types
;
public
class
DlpInspectFile
{
public
static
IEnumerable<Finding>
InspectFile
(
string
projectId
,
string
filePath
,
BytesType
fileType
)
{
// Instantiate a client.
var
dlp
=
DlpServiceClient
.
Create
();
// Get the bytes from the file.
ByteString
fileBytes
;
using
(
Stream
f
=
new
FileStream
(
filePath
,
FileMode
.
Open
))
{
fileBytes
=
ByteString
.
FromStream
(
f
);
}
// Construct a request.
var
request
=
new
InspectContentRequest
{
Parent
=
new
LocationName
(
projectId
,
"global"
).
ToString
(),
Item
=
new
ContentItem
{
ByteItem
=
new
ByteContentItem
()
{
Data
=
fileBytes
,
Type
=
fileType
}
},
InspectConfig
=
new
InspectConfig
{
// The info types of information to match
InfoTypes
=
{
new
InfoType
{
Name
=
"PHONE_NUMBER"
},
new
InfoType
{
Name
=
"EMAIL_ADDRESS"
},
new
InfoType
{
Name
=
"CREDIT_CARD_NUMBER"
}
},
// The minimum likelihood before returning a match
MinLikelihood
=
Likelihood
.
Unspecified
,
// Whether to include the matching string
IncludeQuote
=
true
,
Limits
=
new
InspectConfig
.
Types
.
FindingLimits
{
// The maximum number of findings to report per request
// (0 = server maximum)
MaxFindingsPerRequest
=
0
}
}
};
// Execute request
var
response
=
dlp
.
InspectContent
(
request
);
// Inspect response
var
findings
=
response
.
Result
.
Findings
;
if
(
findings
.
Any
())
{
Console
.
WriteLine
(
"Findings:"
);
foreach
(
var
finding
in
findings
)
{
Console
.
WriteLine
(
$"Quote: {finding.Quote}"
);
Console
.
WriteLine
(
$"InfoType: {finding. InfoType
}"
);
Console
.
WriteLine
(
$"Likelihood: {finding. Likelihood
}"
);
}
}
else
{
Console
.
WriteLine
(
"No findings."
);
}
return
findings
;
}
}
Go
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
(
"context"
"fmt"
"io"
"os"
dlp
"cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// inspectTextFile inspects a text file at a given filePath, and prints results.
func
inspectTextFile
(
w
io
.
Writer
,
projectID
,
filePath
string
)
error
{
// projectID := "my-project-id"
// filePath := "path/to/image.png"
ctx
:=
context
.
Background
()
// Initialize client.
client
,
err
:=
dlp
.
NewClient
(
ctx
)
if
err
!=
nil
{
return
err
}
defer
client
.
Close
()
// Closing the client safely cleans up background resources.
// Gather the resources for the request.
data
,
err
:=
os
.
ReadFile
(
filePath
)
if
err
!=
nil
{
return
err
}
// Create and send the request.
req
:=
& dlppb
.
InspectContentRequest
{
Parent
:
fmt
.
Sprintf
(
"projects/%s/locations/global"
,
projectID
),
Item
:
& dlppb
.
ContentItem
{
DataItem
:
& dlppb
.
ContentItem_ByteItem
{
ByteItem
:
& dlppb
.
ByteContentItem
{
Type
:
dlppb
.
ByteContentItem_TEXT_UTF8
,
Data
:
data
,
},
},
},
InspectConfig
:
& dlppb
.
InspectConfig
{
InfoTypes
:
[]
*
dlppb
.
InfoType
{
{
Name
:
"PHONE_NUMBER"
},
{
Name
:
"EMAIL_ADDRESS"
},
{
Name
:
"CREDIT_CARD_NUMBER"
},
},
IncludeQuote
:
true
,
},
}
resp
,
err
:=
client
.
InspectContent
(
ctx
,
req
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"InspectContent: %w"
,
err
)
}
// Process the results.
fmt
.
Fprintf
(
w
,
"Findings: %d\n"
,
len
(
resp
.
Result
.
Findings
))
for
_
,
f
:=
range
resp
.
Result
.
Findings
{
fmt
.
Fprintf
(
w
,
"\tQuote: %s\n"
,
f
.
Quote
)
fmt
.
Fprintf
(
w
,
"\tInfo type: %s\n"
,
f
.
InfoType
.
Name
)
fmt
.
Fprintf
(
w
,
"\tLikelihood: %s\n"
,
f
.
Likelihood
)
}
return
nil
}
Java
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.cloud.dlp.v2. DlpServiceClient
;
import
com.google.privacy.dlp.v2. ByteContentItem
;
import
com.google.privacy.dlp.v2. ByteContentItem
. BytesType
;
import
com.google.privacy.dlp.v2. ContentItem
;
import
com.google.privacy.dlp.v2. Finding
;
import
com.google.privacy.dlp.v2. InfoType
;
import
com.google.privacy.dlp.v2. InspectConfig
;
import
com.google.privacy.dlp.v2. InspectContentRequest
;
import
com.google.privacy.dlp.v2. InspectContentResponse
;
import
com.google.privacy.dlp.v2. LocationName
;
import
com.google.protobuf. ByteString
;
import
java.io.FileInputStream
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.List
;
public
class
InspectTextFile
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
// TODO(developer): Replace these variables before running the sample.
String
projectId
=
"your-project-id"
;
String
filePath
=
"path/to/file.txt"
;
inspectTextFile
(
projectId
,
filePath
);
}
// Inspects the specified text file.
public
static
void
inspectTextFile
(
String
projectId
,
String
filePath
)
throws
IOException
{
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try
(
DlpServiceClient
dlp
=
DlpServiceClient
.
create
())
{
// Specify the type and content to be inspected.
ByteString
fileBytes
=
ByteString
.
readFrom
(
new
FileInputStream
(
filePath
));
ByteContentItem
byteItem
=
ByteContentItem
.
newBuilder
().
setType
(
BytesType
.
TEXT_UTF8
).
setData
(
fileBytes
).
build
();
ContentItem
item
=
ContentItem
.
newBuilder
().
setByteItem
(
byteItem
).
build
();
// Specify the type of info the inspection will look for.
List<InfoType>
infoTypes
=
new
ArrayList
<> ();
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
for
(
String
typeName
:
new
String
[]
{
"PHONE_NUMBER"
,
"EMAIL_ADDRESS"
,
"CREDIT_CARD_NUMBER"
})
{
infoTypes
.
add
(
InfoType
.
newBuilder
().
setName
(
typeName
).
build
());
}
// Construct the configuration for the Inspect request.
InspectConfig
config
=
InspectConfig
.
newBuilder
().
addAllInfoTypes
(
infoTypes
).
setIncludeQuote
(
true
).
build
();
// Construct the Inspect request to be sent by the client.
InspectContentRequest
request
=
InspectContentRequest
.
newBuilder
()
.
setParent
(
LocationName
.
of
(
projectId
,
"global"
).
toString
())
.
setItem
(
item
)
.
setInspectConfig
(
config
)
.
build
();
// Use the client to send the API request.
InspectContentResponse
response
=
dlp
.
inspectContent
(
request
);
// Parse the response and process results
System
.
out
.
println
(
"Findings: "
+
response
.
getResult
().
getFindingsCount
());
for
(
Finding
f
:
response
.
getResult
().
getFindingsList
())
{
System
.
out
.
println
(
"\tQuote: "
+
f
.
getQuote
());
System
.
out
.
println
(
"\tInfo type: "
+
f
.
getInfoType
().
getName
());
System
.
out
.
println
(
"\tLikelihood: "
+
f
.
getLikelihood
());
}
}
}
}
Node.js
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
// Imports the Google Cloud Data Loss Prevention library
const
DLP
=
require
(
' @google-cloud/dlp
'
);
// Import other required libraries
const
fs
=
require
(
'fs'
);
const
mime
=
require
(
'mime'
);
// Instantiates a client
const
dlp
=
new
DLP
.
DlpServiceClient
();
// The project ID to run the API call under
// const projectId = 'my-project';
// The path to a local file to inspect. Can be a text, JPG, or PNG file.
// const filepath = 'path/to/image.png';
// The minimum likelihood required before returning a match
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED';
// The maximum number of findings to report per request (0 = server maximum)
// const maxFindings = 0;
// The infoTypes of information to match
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }];
// The customInfoTypes of information to match
// const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}},
// { infoType: { name: 'REGEX_TYPE' }, regex: {pattern: '\\(\\d{3}\\) \\d{3}-\\d{4}'}}];
// Whether to include the matching string
// const includeQuote = true;
async
function
inspectFile
()
{
// Construct file data to inspect
const
fileTypeConstant
=
[
'image/jpeg'
,
'image/bmp'
,
'image/png'
,
'image/svg'
].
indexOf
(
mime
.
getType
(
filepath
)
)
+
1
;
const
fileBytes
=
Buffer
.
from
(
fs
.
readFileSync
(
filepath
)).
toString
(
'base64'
);
const
item
=
{
byteItem
:
{
type
:
fileTypeConstant
,
data
:
fileBytes
,
},
};
// Construct request
const
request
=
{
parent
:
`projects/
${
projectId
}
/locations/global`
,
inspectConfig
:
{
infoTypes
:
infoTypes
,
customInfoTypes
:
customInfoTypes
,
minLikelihood
:
minLikelihood
,
includeQuote
:
includeQuote
,
limits
:
{
maxFindingsPerRequest
:
maxFindings
,
},
},
item
:
item
,
};
// Run request
const
[
response
]
=
await
dlp
.
inspectContent
(
request
);
const
findings
=
response
.
result
.
findings
;
if
(
findings
.
length
>
0
)
{
console
.
log
(
'Findings:'
);
findings
.
forEach
(
finding
=
>
{
if
(
includeQuote
)
{
console
.
log
(
`\tQuote:
${
finding
.
quote
}
`
);
}
console
.
log
(
`\tInfo type:
${
finding
.
infoType
.
name
}
`
);
console
.
log
(
`\tLikelihood:
${
finding
.
likelihood
}
`
);
});
}
else
{
console
.
log
(
'No findings.'
);
}
}
Python
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
mimetypes
from
typing
import
List
from
typing
import
Optional
import
google.cloud.dlp
def
inspect_file
(
project
:
str
,
filename
:
str
,
info_types
:
List
[
str
],
min_likelihood
:
str
=
None
,
custom_dictionaries
:
List
[
str
]
=
None
,
custom_regexes
:
List
[
str
]
=
None
,
max_findings
:
Optional
[
int
]
=
None
,
include_quote
:
bool
=
True
,
mime_type
:
str
=
None
,
)
-
> None
:
"""Uses the Data Loss Prevention API to analyze a file for protected data.
Args:
project: The Google Cloud project id to use as a parent resource.
filename: The path to the file to inspect.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
min_likelihood: A string representing the minimum likelihood threshold
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
max_findings: The maximum number of findings to report; 0 = no maximum.
include_quote: Boolean for whether to display a quote of the detected
information in the results.
mime_type: The MIME type of the file. If not specified, the type is
inferred via the Python standard library's mimetypes module.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client.
dlp
=
google
.
cloud
.
dlp_v2
.
DlpServiceClient
()
# Prepare info_types by converting the list of strings into a list of
# dictionaries (protos are also accepted).
if
not
info_types
:
info_types
=
[
"FIRST_NAME"
,
"LAST_NAME"
,
"EMAIL_ADDRESS"
]
info_types
=
[{
"name"
:
info_type
}
for
info_type
in
info_types
]
# Prepare custom_info_types by parsing the dictionary word lists and
# regex patterns.
if
custom_dictionaries
is
None
:
custom_dictionaries
=
[]
dictionaries
=
[
{
"info_type"
:
{
"name"
:
f
"CUSTOM_DICTIONARY_
{
i
}
"
},
"dictionary"
:
{
"word_list"
:
{
"words"
:
custom_dict
.
split
(
","
)}},
}
for
i
,
custom_dict
in
enumerate
(
custom_dictionaries
)
]
if
custom_regexes
is
None
:
custom_regexes
=
[]
regexes
=
[
{
"info_type"
:
{
"name"
:
f
"CUSTOM_REGEX_
{
i
}
"
},
"regex"
:
{
"pattern"
:
custom_regex
},
}
for
i
,
custom_regex
in
enumerate
(
custom_regexes
)
]
custom_info_types
=
dictionaries
+
regexes
# Construct the configuration dictionary. Keys which are None may
# optionally be omitted entirely.
inspect_config
=
{
"info_types"
:
info_types
,
"custom_info_types"
:
custom_info_types
,
"min_likelihood"
:
min_likelihood
,
"include_quote"
:
include_quote
,
"limits"
:
{
"max_findings_per_request"
:
max_findings
},
}
# If mime_type is not specified, guess it from the filename.
if
mime_type
is
None
:
mime_guess
=
mimetypes
.
MimeTypes
()
.
guess_type
(
filename
)
mime_type
=
mime_guess
[
0
]
# Select the content type index from the list of supported types.
# https://github.com/googleapis/googleapis/blob/master/google/privacy/dlp/v2/dlp.proto / message ByteContentItem
supported_content_types
=
{
None
:
0
,
# "Unspecified" or BYTES_TYPE_UNSPECIFIED
"image/jpeg"
:
1
,
# IMAGE_JPEG
"image/bmp"
:
2
,
# IMAGE_BMP
"image/png"
:
3
,
# IMAGE_PNG
"image/svg"
:
4
,
# IMAGE_SVG - Adjusted to "image/svg+xml" for correct MIME type
"text/plain"
:
5
,
# TEXT_UTF8
# Note: No specific MIME type for general "image", mapping to IMAGE for any image type not specified
"image"
:
6
,
# IMAGE - Any image type
"application/msword"
:
7
,
# WORD_DOCUMENT
"application/pdf"
:
8
,
# PDF
"application/powerpoint"
:
9
,
# POWERPOINT_DOCUMENT
"application/msexcel"
:
10
,
# EXCEL_DOCUMENT
"application/avro"
:
11
,
# AVRO
"text/csv"
:
12
,
# CSV
"text/tsv"
:
13
,
# TSV
}
content_type_index
=
supported_content_types
.
get
(
mime_type
,
0
)
# Construct the item, containing the file's byte data.
with
open
(
filename
,
mode
=
"rb"
)
as
f
:
item
=
{
"byte_item"
:
{
"type_"
:
content_type_index
,
"data"
:
f
.
read
()}}
# Convert the project id into a full resource id.
parent
=
f
"projects/
{
project
}
"
# Call the API.
response
=
dlp
.
inspect_content
(
request
=
{
"parent"
:
parent
,
"inspect_config"
:
inspect_config
,
"item"
:
item
}
)
# Print out the results.
if
response
.
result
.
findings
:
for
finding
in
response
.
result
.
findings
:
try
:
print
(
f
"Quote:
{
finding
.
quote
}
"
)
except
AttributeError
:
pass
print
(
f
"Info type:
{
finding
.
info_type
.
name
}
"
)
print
(
f
"Likelihood:
{
finding
.
likelihood
}
"
)
else
:
print
(
"No findings."
)
Ruby
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
# project_id = "Your Google Cloud project ID"
# filename = "The file path to the file to inspect"
# max_findings = "Maximum number of findings to report per request (0 = server maximum)"
require
"google/cloud/dlp"
dlp
=
Google
::
Cloud
::
Dlp
.
dlp_service
inspect_config
=
{
# The types of information to match
info_types
:
[
{
name
:
"PERSON_NAME"
},
{
name
:
"PHONE_NUMBER"
}
]
,
# Only return results above a likelihood threshold (0 for all)
min_likelihood
:
:POSSIBLE
,
# Limit the number of findings (0 for no limit)
limits
:
{
max_findings_per_request
:
max_findings
},
# Whether to include the matching string in the response
include_quote
:
true
}
# The item to inspect
file
=
File
.
open
filename
,
"rb"
item_to_inspect
=
{
byte_item
:
{
type
:
:BYTES_TYPE_UNSPECIFIED
,
data
:
file
.
read
}
}
# Run request
parent
=
"projects/
#{
project_id
}
/locations/global"
response
=
dlp
.
inspect_content
parent
:
parent
,
inspect_config
:
inspect_config
,
item
:
item_to_inspect
# Print the results
if
response
.
result
.
findings
.
empty?
puts
"No findings"
else
response
.
result
.
findings
.
each
do
|
finding
|
puts
"Quote:
#{
finding
.
quote
}
"
puts
"Info type:
#{
finding
.
info_type
.
name
}
"
puts
"Likelihood:
#{
finding
.
likelihood
}
"
end
end
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

