This sample demonstrates how to inspect a Cloud Storage resource and create de-identified copies of the files.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
C#
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
using
Google.Api.Gax.ResourceNames
;
using
Google.Cloud.Dlp.V2
;
using
System.Linq
;
public
class
DeidentifyDataStoredInCloudStorage
{
public
static
DlpJob
Deidentify
(
string
projectId
,
string
gcsInputPath
,
string
unstructuredDeidentifyTemplatePath
,
string
structuredDeidentifyTemplatePath
,
string
imageRedactionTemplatePath
,
string
gcsOutputPath
,
string
datasetId
,
string
tableId
)
{
// Instantiate the client.
var
dlp
=
DlpServiceClient
.
Create
();
//Construct the storage config by specifying the input directory.
var
storageConfig
=
new
StorageConfig
{
CloudStorageOptions
=
new
CloudStorageOptions
{
FileSet
=
new
CloudStorageOptions
.
Types
.
FileSet
{
Url
=
gcsInputPath
}
}
};
// Construct the inspect config by specifying the type of info to be inspected.
var
inspectConfig
=
new
InspectConfig
{
InfoTypes
=
{
new
InfoType
[]
{
new
InfoType
{
Name
=
"PERSON_NAME"
},
new
InfoType
{
Name
=
"EMAIL_ADDRESS"
}
}
},
IncludeQuote
=
true
};
// Construct the actions to take after the inspection portion of the job is completed.
// Specify how Cloud DLP must de-identify sensitive data in structured files, unstructured files and images
// using Transformation config.
// The de-identified files will be written to the the GCS bucket path specified in gcsOutputPath and the details of
// transformations performed will be written to BigQuery table specified in datasetId and tableId.
var
actions
=
new
Action
[]
{
new
Action
{
Deidentify
=
new
Action
.
Types
.
Deidentify
{
CloudStorageOutput
=
gcsOutputPath
,
TransformationConfig
=
new
TransformationConfig
{
DeidentifyTemplate
=
unstructuredDeidentifyTemplatePath
,
ImageRedactTemplate
=
imageRedactionTemplatePath
,
StructuredDeidentifyTemplate
=
structuredDeidentifyTemplatePath
,
},
TransformationDetailsStorageConfig
=
new
TransformationDetailsStorageConfig
{
Table
=
new
BigQueryTable
{
ProjectId
=
projectId
,
DatasetId
=
datasetId
,
TableId
=
tableId
}
}
}
}
};
// Construct the inspect job config using created storage config, inspect config and actions.
var
inspectJob
=
new
InspectJobConfig
{
StorageConfig
=
storageConfig
,
InspectConfig
=
inspectConfig
,
Actions
=
{
actions
}
};
// Create the dlp job and call the API.
DlpJob
response
=
dlp
.
CreateDlpJob
(
new
CreateDlpJobRequest
{
ParentAsLocationName
=
new
LocationName
(
projectId
,
"global"
),
InspectJob
=
inspectJob
});
return
response
;
}
}
Go
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
(
"context"
"fmt"
"io"
dlp
"cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
func
deidentifyCloudStorage
(
w
io
.
Writer
,
projectID
,
gcsUri
,
tableId
,
datasetId
,
outputDirectory
,
deidentifyTemplateId
,
structuredDeidentifyTemplateId
,
imageRedactTemplateId
string
)
error
{
// projectId := "my-project-id"
// gcsUri := "gs://" + "your-bucket-name" + "/path/to/your/file.txt"
// tableId := "your-bigquery-table-id"
// datasetId := "your-bigquery-dataset-id"
// outputDirectory := "your-output-directory"
// deidentifyTemplateId := "your-deidentify-template-id"
// structuredDeidentifyTemplateId := "your-structured-deidentify-template-id"
// imageRedactTemplateId := "your-image-redact-template-id"
ctx
:=
context
.
Background
()
// Initialize a client once and reuse it to send multiple requests. Clients
// are safe to use across goroutines. When the client is no longer needed,
// call the Close method to cleanup its resources.
client
,
err
:=
dlp
.
NewClient
(
ctx
)
if
err
!=
nil
{
return
err
}
// Closing the client safely cleans up background resources.
defer
client
.
Close
()
// Set path in Cloud Storage.
cloudStorageOptions
:=
& dlppb
.
CloudStorageOptions
{
FileSet
:
& dlppb
.
CloudStorageOptions_FileSet
{
Url
:
gcsUri
,
},
}
// Define the storage config options for cloud storage options.
storageConfig
:=
& dlppb
.
StorageConfig
{
Type
:
& dlppb
.
StorageConfig_CloudStorageOptions
{
CloudStorageOptions
:
cloudStorageOptions
,
},
}
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
infoTypes
:=
[]
*
dlppb
.
InfoType
{
{
Name
:
"PERSON_NAME"
},
{
Name
:
"EMAIL_ADDRESS"
},
}
// inspectConfig holds the configuration settings for data inspection and analysis
// within the context of the Google Cloud Data Loss Prevention (DLP) API.
inspectConfig
:=
& dlppb
.
InspectConfig
{
InfoTypes
:
infoTypes
,
IncludeQuote
:
true
,
}
// Types of files to include for de-identification.
fileTypesToTransform
:=
[]
dlppb
.
FileType
{
dlppb
.
FileType_CSV
,
dlppb
.
FileType_IMAGE
,
dlppb
.
FileType_TEXT_FILE
,
}
// Specify the BigQuery table to be inspected.
table
:=
& dlppb
.
BigQueryTable
{
ProjectId
:
projectID
,
DatasetId
:
datasetId
,
TableId
:
tableId
,
}
// transformationDetailsStorageConfig holds configuration settings for storing transformation
// details in the context of the Google Cloud Data Loss Prevention (DLP) API.
transformationDetailsStorageConfig
:=
& dlppb
.
TransformationDetailsStorageConfig
{
Type
:
& dlppb
.
TransformationDetailsStorageConfig_Table
{
Table
:
table
,
},
}
transformationConfig
:=
& dlppb
.
TransformationConfig
{
DeidentifyTemplate
:
deidentifyTemplateId
,
ImageRedactTemplate
:
imageRedactTemplateId
,
StructuredDeidentifyTemplate
:
structuredDeidentifyTemplateId
,
}
// Action to execute on the completion of a job.
deidentify
:=
& dlppb
.
Action_Deidentify
{
TransformationConfig
:
transformationConfig
,
TransformationDetailsStorageConfig
:
transformationDetailsStorageConfig
,
Output
:
& dlppb
.
Action_Deidentify_CloudStorageOutput
{
CloudStorageOutput
:
outputDirectory
,
},
FileTypesToTransform
:
fileTypesToTransform
,
}
action
:=
& dlppb
.
Action
{
Action
:
& dlppb
.
Action_Deidentify_
{
Deidentify
:
deidentify
,
},
}
// Configure the inspection job we want the service to perform.
inspectJobConfig
:=
& dlppb
.
InspectJobConfig
{
StorageConfig
:
storageConfig
,
InspectConfig
:
inspectConfig
,
Actions
:
[]
*
dlppb
.
Action
{
action
,
},
}
// Construct the job creation request to be sent by the client.
req
:=
& dlppb
.
CreateDlpJobRequest
{
Parent
:
fmt
.
Sprintf
(
"projects/%s/locations/global"
,
projectID
),
Job
:
& dlppb
.
CreateDlpJobRequest_InspectJob
{
InspectJob
:
inspectJobConfig
,
},
}
// Send the request.
resp
,
err
:=
client
.
CreateDlpJob
(
ctx
,
req
)
if
err
!=
nil
{
fmt
.
Fprintf
(
w
,
"error after resp: %v"
,
err
)
return
err
}
// Print the results.
fmt
.
Fprint
(
w
,
"Job created successfully: "
,
resp
.
Name
)
return
nil
}
Java
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.cloud.dlp.v2. DlpServiceClient
;
import
com.google.privacy.dlp.v2. Action
;
import
com.google.privacy.dlp.v2. BigQueryTable
;
import
com.google.privacy.dlp.v2. CloudStorageOptions
;
import
com.google.privacy.dlp.v2. CreateDlpJobRequest
;
import
com.google.privacy.dlp.v2. DlpJob
;
import
com.google.privacy.dlp.v2. FileType
;
import
com.google.privacy.dlp.v2. InfoType
;
import
com.google.privacy.dlp.v2. InfoTypeStats
;
import
com.google.privacy.dlp.v2. InspectConfig
;
import
com.google.privacy.dlp.v2. InspectDataSourceDetails
;
import
com.google.privacy.dlp.v2. InspectJobConfig
;
import
com.google.privacy.dlp.v2. LocationName
;
import
com.google.privacy.dlp.v2. ProjectDeidentifyTemplateName
;
import
com.google.privacy.dlp.v2. StorageConfig
;
import
com.google.privacy.dlp.v2. TransformationConfig
;
import
com.google.privacy.dlp.v2. TransformationDetailsStorageConfig
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.concurrent.TimeUnit
;
public
class
DeidentifyCloudStorage
{
// Set the timeout duration in minutes.
private
static
final
int
TIMEOUT_MINUTES
=
15
;
public
static
void
main
(
String
[]
args
)
throws
IOException
,
InterruptedException
{
// TODO(developer): Replace these variables before running the sample.
// The Google Cloud project id to use as a parent resource.
String
projectId
=
"your-project-id"
;
// Specify the cloud storage directory that you want to inspect.
String
gcsPath
=
"gs://"
+
"your-bucket-name"
+
"/path/to/your/file.txt"
;
// Specify the big query dataset id to store the transformation details.
String
datasetId
=
"your-bigquery-dataset-id"
;
// Specify the big query table id to store the transformation details.
String
tableId
=
"your-bigquery-table-id"
;
// Specify the cloud storage directory to store the de-identified files.
String
outputDirectory
=
"your-output-directory"
;
// Specify the de-identify template ID for unstructured files.
String
deidentifyTemplateId
=
"your-deidentify-template-id"
;
// Specify the de-identify template ID for structured files.
String
structuredDeidentifyTemplateId
=
"your-structured-deidentify-template-id"
;
// Specify the de-identify template ID for images.
String
imageRedactTemplateId
=
"your-image-redact-template-id"
;
deidentifyCloudStorage
(
projectId
,
gcsPath
,
tableId
,
datasetId
,
outputDirectory
,
deidentifyTemplateId
,
structuredDeidentifyTemplateId
,
imageRedactTemplateId
);
}
public
static
void
deidentifyCloudStorage
(
String
projectId
,
String
gcsPath
,
String
tableId
,
String
datasetId
,
String
outputDirectory
,
String
deidentifyTemplateId
,
String
structuredDeidentifyTemplateId
,
String
imageRedactTemplateId
)
throws
IOException
,
InterruptedException
{
try
(
DlpServiceClient
dlp
=
DlpServiceClient
.
create
())
{
// Set path in Cloud Storage.
CloudStorageOptions
cloudStorageOptions
=
CloudStorageOptions
.
newBuilder
()
.
setFileSet
(
CloudStorageOptions
.
FileSet
.
newBuilder
().
setUrl
(
gcsPath
))
.
build
();
// Set storage config indicating the type of cloud storage.
StorageConfig
storageConfig
=
StorageConfig
.
newBuilder
().
setCloudStorageOptions
(
cloudStorageOptions
).
build
();
// Specify the type of info the inspection will look for.
// See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types
List<InfoType>
infoTypes
=
new
ArrayList
<> ();
for
(
String
typeName
:
new
String
[]
{
"PERSON_NAME"
,
"EMAIL_ADDRESS"
})
{
infoTypes
.
add
(
InfoType
.
newBuilder
().
setName
(
typeName
).
build
());
}
InspectConfig
inspectConfig
=
InspectConfig
.
newBuilder
().
addAllInfoTypes
(
infoTypes
).
setIncludeQuote
(
true
).
build
();
// Types of files to include for de-identification.
List<FileType>
fileTypesToTransform
=
Arrays
.
asList
(
FileType
.
valueOf
(
"IMAGE"
),
FileType
.
valueOf
(
"CSV"
),
FileType
.
valueOf
(
"TEXT_FILE"
));
// Specify the big query table to store the transformation details.
BigQueryTable
table
=
BigQueryTable
.
newBuilder
()
.
setProjectId
(
projectId
)
.
setTableId
(
tableId
)
.
setDatasetId
(
datasetId
)
.
build
();
TransformationDetailsStorageConfig
transformationDetailsStorageConfig
=
TransformationDetailsStorageConfig
.
newBuilder
().
setTable
(
table
).
build
();
// Specify the de-identify template used for the transformation.
TransformationConfig
transformationConfig
=
TransformationConfig
.
newBuilder
()
.
setDeidentifyTemplate
(
ProjectDeidentifyTemplateName
.
of
(
projectId
,
deidentifyTemplateId
).
toString
())
.
setImageRedactTemplate
(
ProjectDeidentifyTemplateName
.
of
(
projectId
,
imageRedactTemplateId
).
toString
())
.
setStructuredDeidentifyTemplate
(
ProjectDeidentifyTemplateName
.
of
(
projectId
,
structuredDeidentifyTemplateId
)
.
toString
())
.
build
();
Action
.
Deidentify
deidentify
=
Action
.
Deidentify
.
newBuilder
()
.
setCloudStorageOutput
(
outputDirectory
)
.
setTransformationConfig
(
transformationConfig
)
.
setTransformationDetailsStorageConfig
(
transformationDetailsStorageConfig
)
.
addAllFileTypesToTransform
(
fileTypesToTransform
)
.
build
();
Action
action
=
Action
.
newBuilder
().
setDeidentify
(
deidentify
).
build
();
// Configure the long-running job we want the service to perform.
InspectJobConfig
inspectJobConfig
=
InspectJobConfig
.
newBuilder
()
.
setInspectConfig
(
inspectConfig
)
.
setStorageConfig
(
storageConfig
)
.
addActions
(
action
)
.
build
();
// Construct the job creation request to be sent by the client.
CreateDlpJobRequest
createDlpJobRequest
=
CreateDlpJobRequest
.
newBuilder
()
.
setParent
(
LocationName
.
of
(
projectId
,
"global"
).
toString
())
.
setInspectJob
(
inspectJobConfig
)
.
build
();
// Send the job creation request.
DlpJob
response
=
dlp
.
createDlpJob
(
createDlpJobRequest
);
// Get the current time.
long
startTime
=
System
.
currentTimeMillis
();
// Check if the job state is DONE.
while
(
response
.
getState
()
!=
DlpJob
.
JobState
.
DONE
)
{
// Sleep for 30 second.
Thread
.
sleep
(
30000
);
// Get the updated job status.
response
=
dlp
.
getDlpJob
(
response
.
getName
());
// Check if the timeout duration has exceeded.
long
elapsedTime
=
System
.
currentTimeMillis
()
-
startTime
;
if
(
TimeUnit
.
MILLISECONDS
.
toMinutes
(
elapsedTime
)
> =
TIMEOUT_MINUTES
)
{
System
.
out
.
printf
(
"Job did not complete within %d minutes.%n"
,
TIMEOUT_MINUTES
);
break
;
}
}
// Print the results.
System
.
out
.
println
(
"Job status: "
+
response
.
getState
());
System
.
out
.
println
(
"Job name: "
+
response
.
getName
());
InspectDataSourceDetails
.
Result
result
=
response
.
getInspectDetails
().
getResult
();
System
.
out
.
println
(
"Findings: "
);
for
(
InfoTypeStats
infoTypeStat
:
result
.
getInfoTypeStatsList
())
{
System
.
out
.
print
(
"\tInfo type: "
+
infoTypeStat
.
getInfoType
().
getName
());
System
.
out
.
println
(
"\tCount: "
+
infoTypeStat
.
getCount
());
}
}
}
}
Node.js
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
// Imports the Google Cloud client library
const
DLP
=
require
(
' @google-cloud/dlp
'
);
// Instantiates a client
const
dlp
=
new
DLP
.
DlpServiceClient
();
// The project ID to run the API call under
// const projectId = 'my-project';
// The Cloud Storage directory that needs to be inspected
// const inputDirectory = 'your-google-cloud-storage-path';
// The ID of the dataset to inspect, e.g. 'my_dataset'
// const datasetId = 'my_dataset';
// The ID of the table to inspect, e.g. 'my_table'
// const tableId = 'my_table';
// The Cloud Storage directory that will be used to store the de-identified files
// const outputDirectory = 'your-output-directory';
// The full resource name of the default de-identify template
// const deidentifyTemplateId = 'your-deidentify-template-id';
// The full resource name of the de-identify template for structured files
// const structuredDeidentifyTemplateId = 'your-structured-deidentify-template-id';
// The full resource name of the image redaction template for images
// const imageRedactTemplateId = 'your-image-redact-template-id';
async
function
deidentifyCloudStorage
()
{
// Specify storage configuration that uses file set.
const
storageConfig
=
{
cloudStorageOptions
:
{
fileSet
:
{
url
:
inputDirectory
,
},
},
};
// Specify the type of info the inspection will look for.
const
infoTypes
=
[{
name
:
'PERSON_NAME'
},
{
name
:
'EMAIL_ADDRESS'
}];
// Construct inspect configuration
const
inspectConfig
=
{
infoTypes
:
infoTypes
,
includeQuote
:
true
,
};
// Types of files to include for de-identification.
const
fileTypesToTransform
=
[
{
fileType
:
'IMAGE'
},
{
fileType
:
'CSV'
},
{
fileType
:
' TEXT_FILE
'
},
];
// Specify the big query table to store the transformation details.
const
transformationDetailsStorageConfig
=
{
table
:
{
projectId
:
projectId
,
tableId
:
tableId
,
datasetId
:
datasetId
,
},
};
// Specify the de-identify template used for the transformation.
const
transformationConfig
=
{
deidentifyTemplate
:
deidentifyTemplateId
,
structuredDeidentifyTemplate
:
structuredDeidentifyTemplateId
,
imageRedactTemplate
:
imageRedactTemplateId
,
};
// Construct action to de-identify sensitive data.
const
action
=
{
deidentify
:
{
cloudStorageOutput
:
outputDirectory
,
transformationConfig
:
transformationConfig
,
transformationDetailsStorageConfig
:
transformationDetailsStorageConfig
,
fileTypes
:
fileTypesToTransform
,
},
};
// Construct the inspect job configuration.
const
inspectJobConfig
=
{
inspectConfig
:
inspectConfig
,
storageConfig
:
storageConfig
,
actions
:
[
action
],
};
// Construct the job creation request to be sent by the client.
const
createDlpJobRequest
=
{
parent
:
`projects/
${
projectId
}
/locations/global`
,
inspectJob
:
inspectJobConfig
,
};
// Send the job creation request and process the response.
const
[
response
]
=
await
dlp
.
createDlpJob
(
createDlpJobRequest
);
const
jobName
=
response
.
name
;
// Waiting for a maximum of 15 minutes for the job to get complete.
let
job
;
let
numOfAttempts
=
30
;
while
(
numOfAttempts
>
0
)
{
// Fetch DLP Job status
[
job
]
=
await
dlp
.
getDlpJob
({
name
:
jobName
});
// Check if the job has completed.
if
(
job
.
state
===
'DONE'
)
{
break
;
}
if
(
job
.
state
===
'FAILED'
)
{
console
.
log
(
'Job Failed, Please check the configuration.'
);
return
;
}
// Sleep for a short duration before checking the job status again.
await
new
Promise
(
resolve
=
>
{
setTimeout
(()
=
>
resolve
(),
30000
);
});
numOfAttempts
-=
1
;
}
// Print out the results.
const
infoTypeStats
=
job
.
inspectDetails
.
result
.
infoTypeStats
;
if
(
infoTypeStats
.
length
>
0
)
{
infoTypeStats
.
forEach
(
infoTypeStat
=
>
{
console
.
log
(
` Found
${
infoTypeStat
.
count
}
instance(s) of infoType
${
infoTypeStat
.
infoType
.
name
}
.`
);
});
}
else
{
console
.
log
(
'No findings.'
);
}
}
await
deidentifyCloudStorage
();
PHP
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
use Google\Cloud\Dlp\V2\Action;
use Google\Cloud\Dlp\V2\Action\Deidentify;
use Google\Cloud\Dlp\V2\BigQueryTable;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\CloudStorageOptions;
use Google\Cloud\Dlp\V2\CloudStorageOptions\FileSet;
use Google\Cloud\Dlp\V2\CreateDlpJobRequest;
use Google\Cloud\Dlp\V2\DlpJob\JobState;
use Google\Cloud\Dlp\V2\FileType;
use Google\Cloud\Dlp\V2\GetDlpJobRequest;
use Google\Cloud\Dlp\V2\InfoType;
use Google\Cloud\Dlp\V2\InspectConfig;
use Google\Cloud\Dlp\V2\InspectJobConfig;
use Google\Cloud\Dlp\V2\StorageConfig;
use Google\Cloud\Dlp\V2\TransformationConfig;
use Google\Cloud\Dlp\V2\TransformationDetailsStorageConfig;
/**
* De-identify sensitive data stored in Cloud Storage using the API.
* Create an inspection job that has a de-identification action.
*
* @param string $callingProjectId The project ID to run the API call under.
* @param string $inputgcsPath The Cloud Storage directory that you want to de-identify.
* @param string $outgcsPath The Cloud Storage directory where you want to store the
* de-identified files.
* @param string $deidentifyTemplateName The full resource name of the default de-identify template — for
* unstructured and structured files — if you created one. This value
* must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $structuredDeidentifyTemplateName The full resource name of the de-identify template for structured
* files if you created one. This value must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $imageRedactTemplateName The full resource name of the image redaction template for images if
* you created one. This value must be in the format
* `projects/projectName/(locations/locationId)/deidentifyTemplates/templateName`.
* @param string $datasetId The ID of the BigQuery dataset where you want to store
* the transformation details. If you don't provide a table ID, the
* system automatically creates one.
* @param string $tableId The ID of the BigQuery table where you want to store the
* transformation details.
*/
function deidentify_cloud_storage(
// TODO(developer): Replace sample parameters before running the code.
string $callingProjectId,
string $inputgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $outgcsPath = 'gs://YOUR_GOOGLE_STORAGE_BUCKET',
string $deidentifyTemplateName = 'YOUR_DEIDENTIFY_TEMPLATE_NAME',
string $structuredDeidentifyTemplateName = 'YOUR_STRUCTURED_DEIDENTIFY_TEMPLATE_NAME',
string $imageRedactTemplateName = 'YOUR_IMAGE_REDACT_DEIDENTIFY_TEMPLATE_NAME',
string $datasetId = 'YOUR_DATASET_ID',
string $tableId = 'YOUR_TABLE_ID'
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
$parent = "projects/$callingProjectId/locations/global";
// Specify the GCS Path to be de-identify.
$cloudStorageOptions = (new CloudStorageOptions())
->setFileSet((new FileSet())
->setUrl($inputgcsPath));
$storageConfig = (new StorageConfig())
->setCloudStorageOptions(($cloudStorageOptions));
// Specify the type of info the inspection will look for.
$inspectConfig = (new InspectConfig())
->setInfoTypes([
(new InfoType())->setName('PERSON_NAME'),
(new InfoType())->setName('EMAIL_ADDRESS')
]);
// Specify the big query table to store the transformation details.
$transformationDetailsStorageConfig = (new TransformationDetailsStorageConfig())
->setTable((new BigQueryTable())
->setProjectId($callingProjectId)
->setDatasetId($datasetId)
->setTableId($tableId));
// Specify the de-identify template used for the transformation.
$transformationConfig = (new TransformationConfig())
->setDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $deidentifyTemplateName)
)
->setStructuredDeidentifyTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $structuredDeidentifyTemplateName)
)
->setImageRedactTemplate(
DlpServiceClient::projectDeidentifyTemplateName($callingProjectId, $imageRedactTemplateName)
);
$deidentify = (new Deidentify())
->setCloudStorageOutput($outgcsPath)
->setTransformationConfig($transformationConfig)
->setTransformationDetailsStorageConfig($transformationDetailsStorageConfig)
->setFileTypesToTransform([FileType::TEXT_FILE, FileType::IMAGE, FileType::CSV]);
$action = (new Action())
->setDeidentify($deidentify);
// Configure the inspection job we want the service to perform.
$inspectJobConfig = (new InspectJobConfig())
->setInspectConfig($inspectConfig)
->setStorageConfig($storageConfig)
->setActions([$action]);
// Send the job creation request and process the response.
$createDlpJobRequest = (new CreateDlpJobRequest())
->setParent($parent)
->setInspectJob($inspectJobConfig);
$job = $dlp->createDlpJob($createDlpJobRequest);
$numOfAttempts = 10;
do {
printf('Waiting for job to complete' . PHP_EOL);
sleep(30);
$getDlpJobRequest = (new GetDlpJobRequest())
->setName($job->getName());
$job = $dlp->getDlpJob($getDlpJobRequest);
if ($job->getState() == JobState::DONE) {
break;
}
$numOfAttempts--;
} while ($numOfAttempts > 0);
// Print finding counts.
printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState()));
switch ($job->getState()) {
case JobState::DONE:
$infoTypeStats = $job->getInspectDetails()->getResult()->getInfoTypeStats();
if (count($infoTypeStats) === 0) {
printf('No findings.' . PHP_EOL);
} else {
foreach ($infoTypeStats as $infoTypeStat) {
printf(
' Found %s instance(s) of infoType %s' . PHP_EOL,
$infoTypeStat->getCount(),
$infoTypeStat->getInfoType()->getName()
);
}
}
break;
case JobState::FAILED:
printf('Job %s had errors:' . PHP_EOL, $job->getName());
$errors = $job->getErrors();
foreach ($errors as $error) {
var_dump($error->getDetails());
}
break;
case JobState::PENDING:
printf('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL);
break;
default:
printf('Unexpected job state. Most likely, the job is either running or has not yet started.');
}
}
Python
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
time
from
typing
import
List
import
google.cloud.dlp
def
deidentify_cloud_storage
(
project
:
str
,
input_gcs_bucket
:
str
,
output_gcs_bucket
:
str
,
info_types
:
List
[
str
],
deid_template_id
:
str
,
structured_deid_template_id
:
str
,
image_redact_template_id
:
str
,
dataset_id
:
str
,
table_id
:
str
,
timeout
:
int
=
300
,
)
-
> None
:
"""
Uses the Data Loss Prevention API to de-identify files in a Google Cloud
Storage directory.
Args:
project: The Google Cloud project id to use as a parent resource.
input_gcs_bucket: The name of google cloud storage bucket to inspect.
output_gcs_bucket: The name of google cloud storage bucket where
de-identified files would be stored.
info_types: A list of strings representing info types to look for.
A full list of info type categories can be fetched from the API.
deid_template_id: The name of the de-identify template for
unstructured and structured files.
structured_deid_template_id: The name of the de-identify template
for structured files.
image_redact_template_id: The name of the image redaction template
for images.
dataset_id: The identifier of the BigQuery dataset where transformation
details would be stored.
table_id: The identifier of the BigQuery table where transformation
details would be stored.
timeout: The number of seconds to wait for a response from the API.
"""
# Instantiate a client.
dlp
=
google
.
cloud
.
dlp_v2
.
DlpServiceClient
()
# Construct the configuration dictionary.
# Specify the type of info the inspection will look for.
# See https://cloud.google.com/dlp/docs/infotypes-reference for complete list of info types.
inspect_config
=
{
"info_types"
:
[{
"name"
:
info_type
}
for
info_type
in
info_types
]}
# Construct cloud_storage_options dictionary with the bucket's URL.
storage_config
=
{
"cloud_storage_options"
:
{
"file_set"
:
{
"url"
:
f
"gs://
{
input_gcs_bucket
}
"
}}
}
# Specify the big query table to store the transformation details.
big_query_table
=
{
"project_id"
:
project
,
"dataset_id"
:
dataset_id
,
"table_id"
:
table_id
,
}
# Convert the project id into a full resource id.
parent
=
f
"projects/
{
project
}
/locations/global"
# Construct Transformation Configuration with de-identify Templates used
# for transformation.
transformation_config
=
{
"deidentify_template"
:
f
"
{
parent
}
/deidentifyTemplates/
{
deid_template_id
}
"
,
"structured_deidentify_template"
:
f
"
{
parent
}
/deidentifyTemplates/
{
structured_deid_template_id
}
"
,
"image_redact_template"
:
f
"
{
parent
}
/deidentifyTemplates/
{
image_redact_template_id
}
"
,
}
# Tell the API where to send notification when the job is completed.
actions
=
[
{
"deidentify"
:
{
"cloud_storage_output"
:
f
"gs://
{
output_gcs_bucket
}
"
,
"transformation_config"
:
transformation_config
,
"transformation_details_storage_config"
:
{
"table"
:
big_query_table
},
"file_types_to_transform"
:
[
"IMAGE"
,
"CSV"
,
"TEXT_FILE"
],
}
}
]
# Construct the job definition.
inspect_job
=
{
"inspect_config"
:
inspect_config
,
"storage_config"
:
storage_config
,
"actions"
:
actions
,
}
# Call the API.
response
=
dlp
.
create_dlp_job
(
request
=
{
"parent"
:
parent
,
"inspect_job"
:
inspect_job
,
}
)
job_name
=
response
.
name
print
(
f
"Inspection Job started :
{
job_name
}
"
)
# Waiting for the job to get completed.
job
=
dlp
.
get_dlp_job
(
request
=
{
"name"
:
job_name
})
# Since the sleep time is kept as 30s, number of calls would be timeout/30.
no_of_attempts
=
timeout
//
30
while
no_of_attempts
!=
0
:
# Check if the job has completed.
if
job
.
state
==
google
.
cloud
.
dlp_v2
.
DlpJob
.
JobState
.
DONE
:
break
if
job
.
state
==
google
.
cloud
.
dlp_v2
.
DlpJob
.
JobState
.
FAILED
:
print
(
"Job Failed, Please check the configuration."
)
break
# Sleep for a short duration before checking the job status again.
time
.
sleep
(
30
)
no_of_attempts
-=
1
# Get DLP job status.
job
=
dlp
.
get_dlp_job
(
request
=
{
"name"
:
job_name
})
if
job
.
state
!=
google
.
cloud
.
dlp_v2
.
DlpJob
.
JobState
.
DONE
:
print
(
f
"Job did not complete within
{
timeout
}
minutes."
)
return
# Print out the results.
print
(
f
"Job name:
{
job
.
name
}
"
)
result
=
job
.
inspect_details
.
result
print
(
f
"Processed Bytes:
{
result
.
processed_bytes
}
"
)
if
result
.
info_type_stats
:
for
stats
in
result
.
info_type_stats
:
print
(
f
"Info type:
{
stats
.
info_type
.
name
}
"
)
print
(
f
"Count:
{
stats
.
count
}
"
)
else
:
print
(
"No findings."
)
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

